213 files changed, 79523 insertions, 0 deletions
diff --git a/video/csputils.c b/video/csputils.c
new file mode 100644
index 0000000..59200c5
--- /dev/null
+++ b/video/csputils.c
@@ -0,0 +1,1020 @@
+/*
+ * Common code related to colorspaces and conversion
+ *
+ * Copyleft (C) 2009 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
+ *
+ * mp_invert_cmat based on DarkPlaces engine (relicensed from GPL to LGPL)
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdint.h>
+#include <math.h>
+#include <assert.h>
+#include <libavutil/common.h>
+#include <libavcodec/avcodec.h>
+
+#include "mp_image.h"
+#include "csputils.h"
+#include "options/m_config.h"
+#include "options/m_option.h"
+
+const struct m_opt_choice_alternatives mp_csp_names[] = {
+    {"auto",        MP_CSP_AUTO},
+    {"bt.601",      MP_CSP_BT_601},
+    {"bt.709",      MP_CSP_BT_709},
+    {"smpte-240m",  MP_CSP_SMPTE_240M},
+    {"bt.2020-ncl", MP_CSP_BT_2020_NC},
+    {"bt.2020-cl",  MP_CSP_BT_2020_C},
+    {"rgb",         MP_CSP_RGB},
+    {"xyz",         MP_CSP_XYZ},
+    {"ycgco",       MP_CSP_YCGCO},
+    {0}
+};
+
+const struct m_opt_choice_alternatives mp_csp_levels_names[] = {
+    {"auto",        MP_CSP_LEVELS_AUTO},
+    {"limited",     MP_CSP_LEVELS_TV},
+    {"full",        MP_CSP_LEVELS_PC},
+    {0}
+};
+
+const struct m_opt_choice_alternatives mp_csp_prim_names[] = {
+    {"auto",        MP_CSP_PRIM_AUTO},
+    {"bt.601-525",  MP_CSP_PRIM_BT_601_525},
+    {"bt.601-625",  MP_CSP_PRIM_BT_601_625},
+    {"bt.709",      MP_CSP_PRIM_BT_709},
+    {"bt.2020",     MP_CSP_PRIM_BT_2020},
+    {"bt.470m",     MP_CSP_PRIM_BT_470M},
+    {"apple",       MP_CSP_PRIM_APPLE},
+    {"adobe",       MP_CSP_PRIM_ADOBE},
+    {"prophoto",    MP_CSP_PRIM_PRO_PHOTO},
+    {"cie1931",     MP_CSP_PRIM_CIE_1931},
+    {"dci-p3",      MP_CSP_PRIM_DCI_P3},
+    {"display-p3",  MP_CSP_PRIM_DISPLAY_P3},
+    {"v-gamut",     MP_CSP_PRIM_V_GAMUT},
+    {"s-gamut",     MP_CSP_PRIM_S_GAMUT},
+    {"ebu3213",     MP_CSP_PRIM_EBU_3213},
+    {"film-c",      MP_CSP_PRIM_FILM_C},
+    {"aces-ap0",    MP_CSP_PRIM_ACES_AP0},
+    {"aces-ap1",    MP_CSP_PRIM_ACES_AP1},
+    {0}
+};
+
+const struct m_opt_choice_alternatives mp_csp_trc_names[] = {
+    {"auto",        MP_CSP_TRC_AUTO},
+    {"bt.1886",     MP_CSP_TRC_BT_1886},
+    {"srgb",        MP_CSP_TRC_SRGB},
+    {"linear",      MP_CSP_TRC_LINEAR},
+    {"gamma1.8",    MP_CSP_TRC_GAMMA18},
+    {"gamma2.0",    MP_CSP_TRC_GAMMA20},
+    {"gamma2.2",    MP_CSP_TRC_GAMMA22},
+    {"gamma2.4",    MP_CSP_TRC_GAMMA24},
+    {"gamma2.6",    MP_CSP_TRC_GAMMA26},
+    {"gamma2.8",    MP_CSP_TRC_GAMMA28},
+    {"prophoto",    MP_CSP_TRC_PRO_PHOTO},
+    {"pq",          MP_CSP_TRC_PQ},
+    {"hlg",         MP_CSP_TRC_HLG},
+    {"v-log",       MP_CSP_TRC_V_LOG},
+    {"s-log1",      MP_CSP_TRC_S_LOG1},
+    {"s-log2",      MP_CSP_TRC_S_LOG2},
+    {"st428",       MP_CSP_TRC_ST428},
+    {0}
+};
+
+const struct m_opt_choice_alternatives mp_csp_light_names[] = {
+    {"auto",        MP_CSP_LIGHT_AUTO},
+    {"display",     MP_CSP_LIGHT_DISPLAY},
+    {"hlg",         MP_CSP_LIGHT_SCENE_HLG},
+    {"709-1886",    MP_CSP_LIGHT_SCENE_709_1886},
+    {"gamma1.2",    MP_CSP_LIGHT_SCENE_1_2},
+    {0}
+};
+
+const struct m_opt_choice_alternatives mp_chroma_names[] = {
+    {"unknown",     MP_CHROMA_AUTO},
+    {"uhd",         MP_CHROMA_TOPLEFT},
+    {"mpeg2/4/h264",MP_CHROMA_LEFT},
+    {"mpeg1/jpeg",  MP_CHROMA_CENTER},
+    {0}
+};
+
+const struct m_opt_choice_alternatives mp_alpha_names[] = {
+    {"auto",        MP_ALPHA_AUTO},
+    {"straight",    MP_ALPHA_STRAIGHT},
+    {"premul",      MP_ALPHA_PREMUL},
+    {0}
+};
+
+void mp_colorspace_merge(struct mp_colorspace *orig, struct mp_colorspace *new)
+{
+    if (!orig->space)
+        orig->space = new->space;
+    if (!orig->levels)
+        orig->levels = new->levels;
+    if (!orig->primaries)
+        orig->primaries = new->primaries;
+    if (!orig->gamma)
+        orig->gamma = new->gamma;
+    if (!orig->light)
+        orig->light = new->light;
+    pl_hdr_metadata_merge(&orig->hdr, &new->hdr);
+}
+
+// The short name _must_ match with what vf_stereo3d accepts (if supported).
+// The long name in comments is closer to the Matroska spec (StereoMode element).
+// The numeric index matches the Matroska StereoMode value. If you add entries
+// that don't match Matroska, make sure demux_mkv.c rejects them properly.
+const struct m_opt_choice_alternatives mp_stereo3d_names[] = {
+    {"no",     -1}, // disable/invalid
+    {"mono",    0},
+    {"sbs2l",   1}, // "side_by_side_left"
+    {"ab2r",    2}, // "top_bottom_right"
+    {"ab2l",    3}, // "top_bottom_left"
+    {"checkr",  4}, // "checkboard_right" (unsupported by vf_stereo3d)
+    {"checkl",  5}, // "checkboard_left"  (unsupported by vf_stereo3d)
+    {"irr",     6}, // "row_interleaved_right"
+    {"irl",     7}, // "row_interleaved_left"
+    {"icr",     8}, // "column_interleaved_right" (unsupported by vf_stereo3d)
+    {"icl",     9}, // "column_interleaved_left" (unsupported by vf_stereo3d)
+    {"arcc",   10}, // "anaglyph_cyan_red" (Matroska: unclear which mode)
+    {"sbs2r",  11}, // "side_by_side_right"
+    {"agmc",   12}, // "anaglyph_green_magenta" (Matroska: unclear which mode)
+    {"al",     13}, // "alternating frames left first"
+    {"ar",     14}, // "alternating frames right first"
+    {0}
+};
+
+enum mp_csp avcol_spc_to_mp_csp(int avcolorspace)
+{
+    switch (avcolorspace) {
+    case AVCOL_SPC_BT709:       return MP_CSP_BT_709;
+    case AVCOL_SPC_BT470BG:     return MP_CSP_BT_601;
+    case AVCOL_SPC_BT2020_NCL:  return MP_CSP_BT_2020_NC;
+    case AVCOL_SPC_BT2020_CL:   return MP_CSP_BT_2020_C;
+    case AVCOL_SPC_SMPTE170M:   return MP_CSP_BT_601;
+    case AVCOL_SPC_SMPTE240M:   return MP_CSP_SMPTE_240M;
+    case AVCOL_SPC_RGB:         return MP_CSP_RGB;
+    case AVCOL_SPC_YCOCG:       return MP_CSP_YCGCO;
+    default:                    return MP_CSP_AUTO;
+    }
+}
+
+enum mp_csp_levels avcol_range_to_mp_csp_levels(int avrange)
+{
+    switch (avrange) {
+    case AVCOL_RANGE_MPEG:      return MP_CSP_LEVELS_TV;
+    case AVCOL_RANGE_JPEG:      return MP_CSP_LEVELS_PC;
+    default:                    return MP_CSP_LEVELS_AUTO;
+    }
+}
+
+enum mp_csp_prim avcol_pri_to_mp_csp_prim(int avpri)
+{
+    switch (avpri) {
+    case AVCOL_PRI_SMPTE240M:   // Same as below
+    case AVCOL_PRI_SMPTE170M:   return MP_CSP_PRIM_BT_601_525;
+    case AVCOL_PRI_BT470BG:     return MP_CSP_PRIM_BT_601_625;
+    case AVCOL_PRI_BT709:       return MP_CSP_PRIM_BT_709;
+    case AVCOL_PRI_BT2020:      return MP_CSP_PRIM_BT_2020;
+    case AVCOL_PRI_BT470M:      return MP_CSP_PRIM_BT_470M;
+    case AVCOL_PRI_SMPTE431:    return MP_CSP_PRIM_DCI_P3;
+    case AVCOL_PRI_SMPTE432:    return MP_CSP_PRIM_DISPLAY_P3;
+    default:                    return MP_CSP_PRIM_AUTO;
+    }
+}
+
+enum mp_csp_trc avcol_trc_to_mp_csp_trc(int avtrc)
+{
+    switch (avtrc) {
+    case AVCOL_TRC_BT709:
+    case AVCOL_TRC_SMPTE170M:
+    case AVCOL_TRC_SMPTE240M:
+    case AVCOL_TRC_BT1361_ECG:
+    case AVCOL_TRC_BT2020_10:
+    case AVCOL_TRC_BT2020_12:    return MP_CSP_TRC_BT_1886;
+    case AVCOL_TRC_IEC61966_2_1: return MP_CSP_TRC_SRGB;
+    case AVCOL_TRC_LINEAR:       return MP_CSP_TRC_LINEAR;
+    case AVCOL_TRC_GAMMA22:      return MP_CSP_TRC_GAMMA22;
+    case AVCOL_TRC_GAMMA28:      return MP_CSP_TRC_GAMMA28;
+    case AVCOL_TRC_SMPTEST2084:  return MP_CSP_TRC_PQ;
+    case AVCOL_TRC_ARIB_STD_B67: return MP_CSP_TRC_HLG;
+    case AVCOL_TRC_SMPTE428:     return MP_CSP_TRC_ST428;
+    default:                     return MP_CSP_TRC_AUTO;
+    }
+}
+
+int mp_csp_to_avcol_spc(enum mp_csp colorspace)
+{
+    switch (colorspace) {
+    case MP_CSP_BT_709:         return AVCOL_SPC_BT709;
+    case MP_CSP_BT_601:         return AVCOL_SPC_BT470BG;
+    case MP_CSP_BT_2020_NC:     return AVCOL_SPC_BT2020_NCL;
+    case MP_CSP_BT_2020_C:      return AVCOL_SPC_BT2020_CL;
+    case MP_CSP_SMPTE_240M:     return AVCOL_SPC_SMPTE240M;
+    case MP_CSP_RGB:            return AVCOL_SPC_RGB;
+    case MP_CSP_YCGCO:          return AVCOL_SPC_YCOCG;
+    default:                    return AVCOL_SPC_UNSPECIFIED;
+    }
+}
+
+int mp_csp_levels_to_avcol_range(enum mp_csp_levels range)
+{
+    switch (range) {
+    case MP_CSP_LEVELS_TV:      return AVCOL_RANGE_MPEG;
+    case MP_CSP_LEVELS_PC:      return AVCOL_RANGE_JPEG;
+    default:                    return AVCOL_RANGE_UNSPECIFIED;
+    }
+}
+
+int mp_csp_prim_to_avcol_pri(enum mp_csp_prim prim)
+{
+    switch (prim) {
+    case MP_CSP_PRIM_BT_601_525: return AVCOL_PRI_SMPTE170M;
+    case MP_CSP_PRIM_BT_601_625: return AVCOL_PRI_BT470BG;
+    case MP_CSP_PRIM_BT_709:     return AVCOL_PRI_BT709;
+    case MP_CSP_PRIM_BT_2020:    return AVCOL_PRI_BT2020;
+    case MP_CSP_PRIM_BT_470M:    return AVCOL_PRI_BT470M;
+    case MP_CSP_PRIM_DCI_P3:     return AVCOL_PRI_SMPTE431;
+    case MP_CSP_PRIM_DISPLAY_P3: return AVCOL_PRI_SMPTE432;
+    default:                     return AVCOL_PRI_UNSPECIFIED;
+    }
+}
+
+int mp_csp_trc_to_avcol_trc(enum mp_csp_trc trc)
+{
+    switch (trc) {
+    // We just call it BT.1886 since we're decoding, but it's still BT.709
+    case MP_CSP_TRC_BT_1886:      return AVCOL_TRC_BT709;
+    case MP_CSP_TRC_SRGB:         return AVCOL_TRC_IEC61966_2_1;
+    case MP_CSP_TRC_LINEAR:       return AVCOL_TRC_LINEAR;
+    case MP_CSP_TRC_GAMMA22:      return AVCOL_TRC_GAMMA22;
+    case MP_CSP_TRC_GAMMA28:      return AVCOL_TRC_GAMMA28;
+    case MP_CSP_TRC_PQ:           return AVCOL_TRC_SMPTEST2084;
+    case MP_CSP_TRC_HLG:          return AVCOL_TRC_ARIB_STD_B67;
+    case MP_CSP_TRC_ST428:        return AVCOL_TRC_SMPTE428;
+    default:                      return AVCOL_TRC_UNSPECIFIED;
+    }
+}
+
+enum mp_csp mp_csp_guess_colorspace(int width, int height)
+{
+    return width >= 1280 || height > 576 ? MP_CSP_BT_709 : MP_CSP_BT_601;
+}
+
+enum mp_csp_prim mp_csp_guess_primaries(int width, int height)
+{
+    // HD content
+    if (width >= 1280 || height > 576)
+        return MP_CSP_PRIM_BT_709;
+
+    switch (height) {
+    case 576: // Typical PAL content, including anamorphic/squared
+        return MP_CSP_PRIM_BT_601_625;
+
+    case 480: // Typical NTSC content, including squared
+    case 486: // NTSC Pro or anamorphic NTSC
+        return MP_CSP_PRIM_BT_601_525;
+
+    default: // No good metric, just pick BT.709 to minimize damage
+        return MP_CSP_PRIM_BT_709;
+    }
+}
+
+enum mp_chroma_location avchroma_location_to_mp(int avloc)
+{
+    switch (avloc) {
+    case AVCHROMA_LOC_TOPLEFT:          return MP_CHROMA_TOPLEFT;
+    case AVCHROMA_LOC_LEFT:             return MP_CHROMA_LEFT;
+    case AVCHROMA_LOC_CENTER:           return MP_CHROMA_CENTER;
+    default:                            return MP_CHROMA_AUTO;
+    }
+}
+
+int mp_chroma_location_to_av(enum mp_chroma_location mploc)
+{
+    switch (mploc) {
+    case MP_CHROMA_TOPLEFT:             return AVCHROMA_LOC_TOPLEFT;
+    case MP_CHROMA_LEFT:                return AVCHROMA_LOC_LEFT;
+    case MP_CHROMA_CENTER:              return AVCHROMA_LOC_CENTER;
+    default:                            return AVCHROMA_LOC_UNSPECIFIED;
+    }
+}
+
+// Return location of chroma samples relative to luma samples. 0/0 means
+// centered. Other possible values are -1 (top/left) and +1 (right/bottom).
+void mp_get_chroma_location(enum mp_chroma_location loc, int *x, int *y)
+{
+    *x = 0;
+    *y = 0;
+    if (loc == MP_CHROMA_LEFT || loc == MP_CHROMA_TOPLEFT)
+        *x = -1;
+    if (loc == MP_CHROMA_TOPLEFT)
+        *y = -1;
+}
+
+void mp_invert_matrix3x3(float m[3][3])
+{
+    float m00 = m[0][0], m01 = m[0][1], m02 = m[0][2],
+          m10 = m[1][0], m11 = m[1][1], m12 = m[1][2],
+          m20 = m[2][0], m21 = m[2][1], m22 = m[2][2];
+
+    // calculate the adjoint
+    m[0][0] =  (m11 * m22 - m21 * m12);
+    m[0][1] = -(m01 * m22 - m21 * m02);
+    m[0][2] =  (m01 * m12 - m11 * m02);
+    m[1][0] = -(m10 * m22 - m20 * m12);
+    m[1][1] =  (m00 * m22 - m20 * m02);
+    m[1][2] = -(m00 * m12 - m10 * m02);
+    m[2][0] =  (m10 * m21 - m20 * m11);
+    m[2][1] = -(m00 * m21 - m20 * m01);
+    m[2][2] =  (m00 * m11 - m10 * m01);
+
+    // calculate the determinant (as inverse == 1/det * adjoint,
+    // adjoint * m == identity * det, so this calculates the det)
+    float det = m00 * m[0][0] + m10 * m[0][1] + m20 * m[0][2];
+    det = 1.0f / det;
+
+    for (int i = 0; i < 3; i++) {
+        for (int j = 0; j < 3; j++)
+            m[i][j] *= det;
+    }
+}
+
+// A := A * B
+static void mp_mul_matrix3x3(float a[3][3], float b[3][3])
+{
+    float a00 = a[0][0], a01 = a[0][1], a02 = a[0][2],
+          a10 = a[1][0], a11 = a[1][1], a12 = a[1][2],
+          a20 = a[2][0], a21 = a[2][1], a22 = a[2][2];
+
+    for (int i = 0; i < 3; i++) {
+        a[0][i] = a00 * b[0][i] + a01 * b[1][i] + a02 * b[2][i];
+        a[1][i] = a10 * b[0][i] + a11 * b[1][i] + a12 * b[2][i];
+        a[2][i] = a20 * b[0][i] + a21 * b[1][i] + a22 * b[2][i];
+    }
+}
+
+// return the primaries associated with a certain mp_csp_primaries val
+struct mp_csp_primaries mp_get_csp_primaries(enum mp_csp_prim spc)
+{
+    /*
+    Values from: ITU-R Recommendations BT.470-6, BT.601-7, BT.709-5, BT.2020-0
+
+    https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.470-6-199811-S!!PDF-E.pdf
+    https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.601-7-201103-I!!PDF-E.pdf
+    https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.709-5-200204-I!!PDF-E.pdf
+    https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.2020-0-201208-I!!PDF-E.pdf
+
+    Other colorspaces from https://en.wikipedia.org/wiki/RGB_color_space#Specifications
+    */
+
+    // CIE standard illuminant series
+    static const struct mp_csp_col_xy
+        d50 = {0.34577, 0.35850},
+        d65 = {0.31271, 0.32902},
+        c   = {0.31006, 0.31616},
+        dci = {0.31400, 0.35100},
+        e   = {1.0/3.0, 1.0/3.0};
+
+    switch (spc) {
+    case MP_CSP_PRIM_BT_470M:
+        return (struct mp_csp_primaries) {
+            .red   = {0.670, 0.330},
+            .green = {0.210, 0.710},
+            .blue  = {0.140, 0.080},
+            .white = c
+        };
+    case MP_CSP_PRIM_BT_601_525:
+        return (struct mp_csp_primaries) {
+            .red   = {0.630, 0.340},
+            .green = {0.310, 0.595},
+            .blue  = {0.155, 0.070},
+            .white = d65
+        };
+    case MP_CSP_PRIM_BT_601_625:
+        return (struct mp_csp_primaries) {
+            .red   = {0.640, 0.330},
+            .green = {0.290, 0.600},
+            .blue  = {0.150, 0.060},
+            .white = d65
+        };
+    // This is the default assumption if no colorspace information could
+    // be determined, eg. for files which have no video channel.
+    case MP_CSP_PRIM_AUTO:
+    case MP_CSP_PRIM_BT_709:
+        return (struct mp_csp_primaries) {
+            .red   = {0.640, 0.330},
+            .green = {0.300, 0.600},
+            .blue  = {0.150, 0.060},
+            .white = d65
+        };
+    case MP_CSP_PRIM_BT_2020:
+        return (struct mp_csp_primaries) {
+            .red   = {0.708, 0.292},
+            .green = {0.170, 0.797},
+            .blue  = {0.131, 0.046},
+            .white = d65
+        };
+    case MP_CSP_PRIM_APPLE:
+        return (struct mp_csp_primaries) {
+            .red   = {0.625, 0.340},
+            .green = {0.280, 0.595},
+            .blue  = {0.115, 0.070},
+            .white = d65
+        };
+    case MP_CSP_PRIM_ADOBE:
+        return (struct mp_csp_primaries) {
+            .red   = {0.640, 0.330},
+            .green = {0.210, 0.710},
+            .blue  = {0.150, 0.060},
+            .white = d65
+        };
+    case MP_CSP_PRIM_PRO_PHOTO:
+        return (struct mp_csp_primaries) {
+            .red   = {0.7347, 0.2653},
+            .green = {0.1596, 0.8404},
+            .blue  = {0.0366, 0.0001},
+            .white = d50
+        };
+    case MP_CSP_PRIM_CIE_1931:
+        return (struct mp_csp_primaries) {
+            .red   = {0.7347, 0.2653},
+            .green = {0.2738, 0.7174},
+            .blue  = {0.1666, 0.0089},
+            .white = e
+        };
+    // From SMPTE RP 431-2 and 432-1
+    case MP_CSP_PRIM_DCI_P3:
+    case MP_CSP_PRIM_DISPLAY_P3:
+        return (struct mp_csp_primaries) {
+            .red   = {0.680, 0.320},
+            .green = {0.265, 0.690},
+            .blue  = {0.150, 0.060},
+            .white = spc == MP_CSP_PRIM_DCI_P3 ? dci : d65
+        };
+    // From Panasonic VARICAM reference manual
+    case MP_CSP_PRIM_V_GAMUT:
+        return (struct mp_csp_primaries) {
+            .red   = {0.730, 0.280},
+            .green = {0.165, 0.840},
+            .blue  = {0.100, -0.03},
+            .white = d65
+        };
+    // From Sony S-Log reference manual
+    case MP_CSP_PRIM_S_GAMUT:
+        return (struct mp_csp_primaries) {
+            .red   = {0.730, 0.280},
+            .green = {0.140, 0.855},
+            .blue  = {0.100, -0.05},
+            .white = d65
+        };
+    // from EBU Tech. 3213-E
+    case MP_CSP_PRIM_EBU_3213:
+        return (struct mp_csp_primaries) {
+            .red   = {0.630, 0.340},
+            .green = {0.295, 0.605},
+            .blue  = {0.155, 0.077},
+            .white = d65
+        };
+    // From H.273, traditional film with Illuminant C
+    case MP_CSP_PRIM_FILM_C:
+        return (struct mp_csp_primaries) {
+            .red   = {0.681, 0.319},
+            .green = {0.243, 0.692},
+            .blue  = {0.145, 0.049},
+            .white = c
+        };
+    // From libplacebo source code
+    case MP_CSP_PRIM_ACES_AP0:
+        return (struct mp_csp_primaries) {
+            .red   = {0.7347, 0.2653},
+            .green = {0.0000, 1.0000},
+            .blue  = {0.0001, -0.0770},
+            .white = {0.32168, 0.33767},
+        };
+    // From libplacebo source code
+    case MP_CSP_PRIM_ACES_AP1:
+        return (struct mp_csp_primaries) {
+            .red   = {0.713, 0.293},
+            .green = {0.165, 0.830},
+            .blue  = {0.128, 0.044},
+            .white = {0.32168, 0.33767},
+        };
+    default:
+        return (struct mp_csp_primaries) {{0}};
+    }
+}
+
+// Get the nominal peak for a given colorspace, relative to the reference white
+// level. In other words, this returns the brightest encodable value that can
+// be represented by a given transfer curve.
+float mp_trc_nom_peak(enum mp_csp_trc trc)
+{
+    switch (trc) {
+    case MP_CSP_TRC_PQ:           return 10000.0 / MP_REF_WHITE;
+    case MP_CSP_TRC_HLG:          return 12.0 / MP_REF_WHITE_HLG;
+    case MP_CSP_TRC_V_LOG:        return 46.0855;
+    case MP_CSP_TRC_S_LOG1:       return 6.52;
+    case MP_CSP_TRC_S_LOG2:       return 9.212;
+    }
+
+    return 1.0;
+}
+
+bool mp_trc_is_hdr(enum mp_csp_trc trc)
+{
+    return mp_trc_nom_peak(trc) > 1.0;
+}
+
+// Compute the RGB/XYZ matrix as described here:
+// http://www.brucelindbloom.com/index.html?Eqn_RGB_XYZ_Matrix.html
+void mp_get_rgb2xyz_matrix(struct mp_csp_primaries space, float m[3][3])
+{
+    float S[3], X[4], Z[4];
+
+    // Convert from CIE xyY to XYZ. Note that Y=1 holds true for all primaries
+    X[0] = space.red.x   / space.red.y;
+    X[1] = space.green.x / space.green.y;
+    X[2] = space.blue.x  / space.blue.y;
+    X[3] = space.white.x / space.white.y;
+
+    Z[0] = (1 - space.red.x   - space.red.y)   / space.red.y;
+    Z[1] = (1 - space.green.x - space.green.y) / space.green.y;
+    Z[2] = (1 - space.blue.x  - space.blue.y)  / space.blue.y;
+    Z[3] = (1 - space.white.x - space.white.y) / space.white.y;
+
+    // S = XYZ^-1 * W
+    for (int i = 0; i < 3; i++) {
+        m[0][i] = X[i];
+        m[1][i] = 1;
+        m[2][i] = Z[i];
+    }
+
+    mp_invert_matrix3x3(m);
+
+    for (int i = 0; i < 3; i++)
+        S[i] = m[i][0] * X[3] + m[i][1] * 1 + m[i][2] * Z[3];
+
+    // M = [Sc * XYZc]
+    for (int i = 0; i < 3; i++) {
+        m[0][i] = S[i] * X[i];
+        m[1][i] = S[i] * 1;
+        m[2][i] = S[i] * Z[i];
+    }
+}
+
+// M := M * XYZd<-XYZs
+static void mp_apply_chromatic_adaptation(struct mp_csp_col_xy src,
+                                          struct mp_csp_col_xy dest, float m[3][3])
+{
+    // If the white points are nearly identical, this is a wasteful identity
+    // operation.
+    if (fabs(src.x - dest.x) < 1e-6 && fabs(src.y - dest.y) < 1e-6)
+        return;
+
+    // XYZd<-XYZs = Ma^-1 * (I*[Cd/Cs]) * Ma
+    // http://www.brucelindbloom.com/index.html?Eqn_ChromAdapt.html
+    float C[3][2], tmp[3][3] = {{0}};
+
+    // Ma = Bradford matrix, arguably most popular method in use today.
+    // This is derived experimentally and thus hard-coded.
+    float bradford[3][3] = {
+        {  0.8951,  0.2664, -0.1614 },
+        { -0.7502,  1.7135,  0.0367 },
+        {  0.0389, -0.0685,  1.0296 },
+    };
+
+    for (int i = 0; i < 3; i++) {
+        // source cone
+        C[i][0] = bradford[i][0] * mp_xy_X(src)
+                + bradford[i][1] * 1
+                + bradford[i][2] * mp_xy_Z(src);
+
+        // dest cone
+        C[i][1] = bradford[i][0] * mp_xy_X(dest)
+                + bradford[i][1] * 1
+                + bradford[i][2] * mp_xy_Z(dest);
+    }
+
+    // tmp := I * [Cd/Cs] * Ma
+    for (int i = 0; i < 3; i++)
+        tmp[i][i] = C[i][1] / C[i][0];
+
+    mp_mul_matrix3x3(tmp, bradford);
+
+    // M := M * Ma^-1 * tmp
+    mp_invert_matrix3x3(bradford);
+    mp_mul_matrix3x3(m, bradford);
+    mp_mul_matrix3x3(m, tmp);
+}
+
+// get the coefficients of the source -> dest cms matrix
+void mp_get_cms_matrix(struct mp_csp_primaries src, struct mp_csp_primaries dest,
+                       enum mp_render_intent intent, float m[3][3])
+{
+    float tmp[3][3];
+
+    // In saturation mapping, we don't care about accuracy and just want
+    // primaries to map to primaries, making this an identity transformation.
+    if (intent == MP_INTENT_SATURATION) {
+        for (int i = 0; i < 3; i++)
+            m[i][i] = 1;
+        return;
+    }
+
+    // RGBd<-RGBs = RGBd<-XYZd * XYZd<-XYZs * XYZs<-RGBs
+    // Equations from: http://www.brucelindbloom.com/index.html?Math.html
+    // Note: Perceptual is treated like relative colorimetric. There's no
+    // definition for perceptual other than "make it look good".
+
+    // RGBd<-XYZd, inverted from XYZd<-RGBd
+    mp_get_rgb2xyz_matrix(dest, m);
+    mp_invert_matrix3x3(m);
+
+    // Chromatic adaptation, except in absolute colorimetric intent
+    if (intent != MP_INTENT_ABSOLUTE_COLORIMETRIC)
+        mp_apply_chromatic_adaptation(src.white, dest.white, m);
+
+    // XYZs<-RGBs
+    mp_get_rgb2xyz_matrix(src, tmp);
+    mp_mul_matrix3x3(m, tmp);
+}
+
+// get the coefficients of an ST 428-1 xyz -> rgb conversion matrix
+// intent = the rendering intent used to convert to the target primaries
+static void mp_get_xyz2rgb_coeffs(struct mp_csp_params *params,
+                                  enum mp_render_intent intent, struct mp_cmat *m)
+{
+    // Convert to DCI-P3
+    struct mp_csp_primaries prim = mp_get_csp_primaries(MP_CSP_PRIM_DCI_P3);
+    float brightness = params->brightness;
+    mp_get_rgb2xyz_matrix(prim, m->m);
+    mp_invert_matrix3x3(m->m);
+
+    // All non-absolute mappings want to map source white to target white
+    if (intent != MP_INTENT_ABSOLUTE_COLORIMETRIC) {
+        // SMPTE EG 432-1 Annex H defines the white point as equal energy
+        static const struct mp_csp_col_xy smpte432 = {1.0/3.0, 1.0/3.0};
+        mp_apply_chromatic_adaptation(smpte432, prim.white, m->m);
+    }
+
+    // Since this outputs linear RGB rather than companded RGB, we
+    // want to linearize any brightness additions. 2 is a reasonable
+    // approximation for any sort of gamma function that could be in use.
+    // As this is an aesthetic setting only, any exact values do not matter.
+    brightness *= fabs(brightness);
+
+    for (int i = 0; i < 3; i++)
+        m->c[i] = brightness;
+}
+
+// Get multiplication factor required if image data is fit within the LSBs of a
+// higher smaller bit depth fixed-point texture data.
+// This is broken. Use mp_get_csp_uint_mul().
+double mp_get_csp_mul(enum mp_csp csp, int input_bits, int texture_bits)
+{
+    assert(texture_bits >= input_bits);
+
+    // Convenience for some irrelevant cases, e.g. rgb565 or disabling expansion.
+    if (!input_bits)
+        return 1;
+
+    // RGB always uses the full range available.
+    if (csp == MP_CSP_RGB)
+        return ((1LL << input_bits) - 1.) / ((1LL << texture_bits) - 1.);
+
+    if (csp == MP_CSP_XYZ)
+        return 1;
+
+    // High bit depth YUV uses a range shifted from 8 bit.
+    return (1LL << input_bits) / ((1LL << texture_bits) - 1.) * 255 / 256;
+}
+
+// Return information about color fixed point representation.his is needed for
+// converting color from integer formats to or from float. Use as follows:
+//      float_val = uint_val * m + o
+//      uint_val = clamp(round((float_val - o) / m))
+// See H.264/5 Annex E.
+//  csp: colorspace
+//  levels: full range flag
+//  component: ID of the channel, as in mp_regular_imgfmt:
+//             1 is red/luminance/gray, 2 is green/Cb, 3 is blue/Cr, 4 is alpha.
+//  bits: number of significant bits, e.g. 10 for yuv420p10, 16 for p010
+//  out_m: returns factor to multiply the uint number with
+//  out_o: returns offset to add after multiplication
+void mp_get_csp_uint_mul(enum mp_csp csp, enum mp_csp_levels levels,
+                         int bits, int component, double *out_m, double *out_o)
+{
+    uint16_t i_min = 0;
+    uint16_t i_max = (1u << bits) - 1;
+    double f_min = 0; // min. float value
+
+    if (csp != MP_CSP_RGB && component != 4) {
+        if (component == 2 || component == 3) {
+            f_min = (1u << (bits - 1)) / -(double)i_max; // force center => 0
+
+            if (levels != MP_CSP_LEVELS_PC && bits >= 8) {
+                i_min = 16  << (bits - 8); // => -0.5
+                i_max = 240 << (bits - 8); // =>  0.5
+                f_min = -0.5;
+            }
+        } else {
+            if (levels != MP_CSP_LEVELS_PC && bits >= 8) {
+                i_min = 16  << (bits - 8); // => 0
+                i_max = 235 << (bits - 8); // => 1
+            }
+        }
+    }
+
+    *out_m = 1.0 / (i_max - i_min);
+    *out_o = (1 + f_min) - i_max * *out_m;
+}
+
+/* Fill in the Y, U, V vectors of a yuv-to-rgb conversion matrix
+ * based on the given luma weights of the R, G and B components (lr, lg, lb).
+ * lr+lg+lb is assumed to equal 1.
+ * This function is meant for colorspaces satisfying the following
+ * conditions (which are true for common YUV colorspaces):
+ * - The mapping from input [Y, U, V] to output [R, G, B] is linear.
+ * - Y is the vector [1, 1, 1].  (meaning input Y component maps to 1R+1G+1B)
+ * - U maps to a value with zero R and positive B ([0, x, y], y > 0;
+ *   i.e. blue and green only).
+ * - V maps to a value with zero B and positive R ([x, y, 0], x > 0;
+ *   i.e. red and green only).
+ * - U and V are orthogonal to the luma vector [lr, lg, lb].
+ * - The magnitudes of the vectors U and V are the minimal ones for which
+ *   the image of the set Y=[0...1],U=[-0.5...0.5],V=[-0.5...0.5] under the
+ *   conversion function will cover the set R=[0...1],G=[0...1],B=[0...1]
+ *   (the resulting matrix can be converted for other input/output ranges
+ *   outside this function).
+ * Under these conditions the given parameters lr, lg, lb uniquely
+ * determine the mapping of Y, U, V to R, G, B.
+ */
+static void luma_coeffs(struct mp_cmat *mat, float lr, float lg, float lb)
+{
+    assert(fabs(lr+lg+lb - 1) < 1e-6);
+    *mat = (struct mp_cmat) {
+        { {1, 0,                    2 * (1-lr)          },
+          {1, -2 * (1-lb) * lb/lg, -2 * (1-lr) * lr/lg  },
+          {1,  2 * (1-lb),          0                   } },
+        // Constant coefficients (mat->c) not set here
+    };
+}
+
+// get the coefficients of the yuv -> rgb conversion matrix
+void mp_get_csp_matrix(struct mp_csp_params *params, struct mp_cmat *m)
+{
+    enum mp_csp colorspace = params->color.space;
+    if (colorspace <= MP_CSP_AUTO || colorspace >= MP_CSP_COUNT)
+        colorspace = MP_CSP_BT_601;
+    enum mp_csp_levels levels_in = params->color.levels;
+    if (levels_in <= MP_CSP_LEVELS_AUTO || levels_in >= MP_CSP_LEVELS_COUNT)
+        levels_in = MP_CSP_LEVELS_TV;
+
+    switch (colorspace) {
+    case MP_CSP_BT_601:     luma_coeffs(m, 0.299,  0.587,  0.114 ); break;
+    case MP_CSP_BT_709:     luma_coeffs(m, 0.2126, 0.7152, 0.0722); break;
+    case MP_CSP_SMPTE_240M: luma_coeffs(m, 0.2122, 0.7013, 0.0865); break;
+    case MP_CSP_BT_2020_NC: luma_coeffs(m, 0.2627, 0.6780, 0.0593); break;
+    case MP_CSP_BT_2020_C: {
+        // Note: This outputs into the [-0.5,0.5] range for chroma information.
+        // If this clips on any VO, a constant 0.5 coefficient can be added
+        // to the chroma channels to normalize them into [0,1]. This is not
+        // currently needed by anything, though.
+        *m = (struct mp_cmat){{{0, 0, 1}, {1, 0, 0}, {0, 1, 0}}};
+        break;
+    }
+    case MP_CSP_RGB: {
+        *m = (struct mp_cmat){{{1, 0, 0}, {0, 1, 0}, {0, 0, 1}}};
+        levels_in = -1;
+        break;
+    }
+    case MP_CSP_XYZ: {
+        // The vo should probably not be using a matrix generated by this
+        // function for XYZ sources, but if it does, let's just convert it to
+        // an equivalent RGB space based on the colorimetry metadata it
+        // provided in mp_csp_params. (At the risk of clipping, if the
+        // chosen primaries are too small to fit the actual data)
+        mp_get_xyz2rgb_coeffs(params, MP_INTENT_RELATIVE_COLORIMETRIC, m);
+        levels_in = -1;
+        break;
+    }
+    case MP_CSP_YCGCO: {
+        *m = (struct mp_cmat) {
+            {{1,  -1,  1},
+             {1,   1,  0},
+             {1,  -1, -1}},
+        };
+        break;
+    }
+    default:
+        MP_ASSERT_UNREACHABLE();
+    };
+
+    if (params->is_float)
+        levels_in = -1;
+
+    if ((colorspace == MP_CSP_BT_601 || colorspace == MP_CSP_BT_709 ||
+         colorspace == MP_CSP_SMPTE_240M || colorspace == MP_CSP_BT_2020_NC))
+    {
+        // Hue is equivalent to rotating input [U, V] subvector around the origin.
+        // Saturation scales [U, V].
+        float huecos = params->gray ? 0 : params->saturation * cos(params->hue);
+        float huesin = params->gray ? 0 : params->saturation * sin(params->hue);
+        for (int i = 0; i < 3; i++) {
+            float u = m->m[i][1], v = m->m[i][2];
+            m->m[i][1] = huecos * u - huesin * v;
+            m->m[i][2] = huesin * u + huecos * v;
+        }
+    }
+
+    // The values below are written in 0-255 scale - thus bring s into range.
+    double s =
+        mp_get_csp_mul(colorspace, params->input_bits, params->texture_bits) / 255;
+    // NOTE: The yuvfull ranges as presented here are arguably ambiguous,
+    // and conflict with at least the full-range YCbCr/ICtCp values as defined
+    // by ITU-R BT.2100. If somebody ever complains about full-range YUV looking
+    // different from their reference display, this comment is probably why.
+    struct yuvlevels { double ymin, ymax, cmax, cmid; }
+        yuvlim =  { 16*s, 235*s, 240*s, 128*s },
+        yuvfull = {  0*s, 255*s, 255*s, 128*s },
+        anyfull = {  0*s, 255*s, 255*s/2, 0 }, // cmax picked to make cmul=ymul
+        yuvlev;
+    switch (levels_in) {
+    case MP_CSP_LEVELS_TV: yuvlev = yuvlim; break;
+    case MP_CSP_LEVELS_PC: yuvlev = yuvfull; break;
+    case -1: yuvlev = anyfull; break;
+    default:
+        MP_ASSERT_UNREACHABLE();
+    }
+
+    int levels_out = params->levels_out;
+    if (levels_out <= MP_CSP_LEVELS_AUTO || levels_out >= MP_CSP_LEVELS_COUNT)
+        levels_out = MP_CSP_LEVELS_PC;
+    struct rgblevels { double min, max; }
+        rgblim =  { 16/255., 235/255. },
+        rgbfull = {      0,        1  },
+        rgblev;
+    switch (levels_out) {
+    case MP_CSP_LEVELS_TV: rgblev = rgblim; break;
+    case MP_CSP_LEVELS_PC: rgblev = rgbfull; break;
+    default:
+        MP_ASSERT_UNREACHABLE();
+    }
+
+    double ymul = (rgblev.max - rgblev.min) / (yuvlev.ymax - yuvlev.ymin);
+    double cmul = (rgblev.max - rgblev.min) / (yuvlev.cmax - yuvlev.cmid) / 2;
+
+    // Contrast scales the output value range (gain)
+    ymul *= params->contrast;
+    cmul *= params->contrast;
+
+    for (int i = 0; i < 3; i++) {
+        m->m[i][0] *= ymul;
+        m->m[i][1] *= cmul;
+        m->m[i][2] *= cmul;
+        // Set c so that Y=umin,UV=cmid maps to RGB=min (black to black),
+        // also add brightness offset (black lift)
+        m->c[i] = rgblev.min - m->m[i][0] * yuvlev.ymin
+                  - (m->m[i][1] + m->m[i][2]) * yuvlev.cmid
+                  + params->brightness;
+    }
+}
+
+// Set colorspace related fields in p from f. Don't touch other fields.
+void mp_csp_set_image_params(struct mp_csp_params *params,
+                             const struct mp_image_params *imgparams)
+{
+    struct mp_image_params p = *imgparams;
+    mp_image_params_guess_csp(&p); // ensure consistency
+    params->color = p.color;
+}
+
+bool mp_colorspace_equal(struct mp_colorspace c1, struct mp_colorspace c2)
+{
+    return c1.space == c2.space &&
+           c1.levels == c2.levels &&
+           c1.primaries == c2.primaries &&
+           c1.gamma == c2.gamma &&
+           c1.light == c2.light &&
+           pl_hdr_metadata_equal(&c1.hdr, &c2.hdr);
+}
+
+enum mp_csp_equalizer_param {
+    MP_CSP_EQ_BRIGHTNESS,
+    MP_CSP_EQ_CONTRAST,
+    MP_CSP_EQ_HUE,
+    MP_CSP_EQ_SATURATION,
+    MP_CSP_EQ_GAMMA,
+    MP_CSP_EQ_COUNT,
+};
+
+// Default initialization with 0 is enough, except for the capabilities field
+struct mp_csp_equalizer_opts {
+    // Value for each property is in the range [-100.0, 100.0].
+    // 0.0 is default, meaning neutral or no change.
+    float values[MP_CSP_EQ_COUNT];
+    int output_levels;
+};
+
+#define OPT_BASE_STRUCT struct mp_csp_equalizer_opts
+
+const struct m_sub_options mp_csp_equalizer_conf = {
+    .opts = (const m_option_t[]) {
+        {"brightness", OPT_FLOAT(values[MP_CSP_EQ_BRIGHTNESS]),
+            M_RANGE(-100, 100)},
+        {"saturation", OPT_FLOAT(values[MP_CSP_EQ_SATURATION]),
+            M_RANGE(-100, 100)},
+        {"contrast", OPT_FLOAT(values[MP_CSP_EQ_CONTRAST]),
+            M_RANGE(-100, 100)},
+        {"hue", OPT_FLOAT(values[MP_CSP_EQ_HUE]),
+            M_RANGE(-100, 100)},
+        {"gamma", OPT_FLOAT(values[MP_CSP_EQ_GAMMA]),
+            M_RANGE(-100, 100)},
+        {"video-output-levels",
+            OPT_CHOICE_C(output_levels, mp_csp_levels_names)},
+        {0}
+    },
+    .size = sizeof(struct mp_csp_equalizer_opts),
+};
+
+// Copy settings from eq into params.
+static void mp_csp_copy_equalizer_values(struct mp_csp_params *params,
+                                         const struct mp_csp_equalizer_opts *eq)
+{
+    params->brightness = eq->values[MP_CSP_EQ_BRIGHTNESS] / 100.0;
+    params->contrast = (eq->values[MP_CSP_EQ_CONTRAST] + 100) / 100.0;
+    params->hue = eq->values[MP_CSP_EQ_HUE] / 100.0 * M_PI;
+    params->saturation = (eq->values[MP_CSP_EQ_SATURATION] + 100) / 100.0;
+    params->gamma = exp(log(8.0) * eq->values[MP_CSP_EQ_GAMMA] / 100.0);
+    params->levels_out = eq->output_levels;
+}
+
+struct mp_csp_equalizer_state *mp_csp_equalizer_create(void *ta_parent,
+                                                    struct mpv_global *global)
+{
+    struct m_config_cache *c = m_config_cache_alloc(ta_parent, global,
+                                                    &mp_csp_equalizer_conf);
+    // The terrible, terrible truth.
+    return (struct mp_csp_equalizer_state *)c;
+}
+
+bool mp_csp_equalizer_state_changed(struct mp_csp_equalizer_state *state)
+{
+    struct m_config_cache *c = (struct m_config_cache *)state;
+    return m_config_cache_update(c);
+}
+
+void mp_csp_equalizer_state_get(struct mp_csp_equalizer_state *state,
+                                struct mp_csp_params *params)
+{
+    struct m_config_cache *c = (struct m_config_cache *)state;
+    m_config_cache_update(c);
+    struct mp_csp_equalizer_opts *opts = c->opts;
+    mp_csp_copy_equalizer_values(params, opts);
+}
+
+void mp_invert_cmat(struct mp_cmat *out, struct mp_cmat *in)
+{
+    *out = *in;
+    mp_invert_matrix3x3(out->m);
+
+    // fix the constant coefficient
+    // rgb = M * yuv + C
+    // M^-1 * rgb = yuv + M^-1 * C
+    // yuv = M^-1 * rgb - M^-1 * C
+    //                  ^^^^^^^^^^
+    out->c[0] = -(out->m[0][0] * in->c[0] + out->m[0][1] * in->c[1] + out->m[0][2] * in->c[2]);
+    out->c[1] = -(out->m[1][0] * in->c[0] + out->m[1][1] * in->c[1] + out->m[1][2] * in->c[2]);
+    out->c[2] = -(out->m[2][0] * in->c[0] + out->m[2][1] * in->c[1] + out->m[2][2] * in->c[2]);
+}
+
+// Multiply the color in c with the given matrix.
+// i/o is {R, G, B} or {Y, U, V} (depending on input/output and matrix), using
+// a fixed point representation with the given number of bits (so for bits==8,
+// [0,255] maps to [0,1]). The output is clipped to the range as needed.
+void mp_map_fixp_color(struct mp_cmat *matrix, int ibits, int in[3],
+                                               int obits, int out[3])
+{
+    for (int i = 0; i < 3; i++) {
+        double val = matrix->c[i];
+        for (int x = 0; x < 3; x++)
+            val += matrix->m[i][x] * in[x] / ((1 << ibits) - 1);
+        int ival = lrint(val * ((1 << obits) - 1));
+        out[i] = av_clip(ival, 0, (1 << obits) - 1);
+    }
+}
diff --git a/video/csputils.h b/video/csputils.h
new file mode 100644
index 0000000..3a904cb
--- /dev/null
+++ b/video/csputils.h
@@ -0,0 +1,290 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MPLAYER_CSPUTILS_H
+#define MPLAYER_CSPUTILS_H
+
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <libplacebo/colorspace.h>
+
+#include "options/m_option.h"
+
+/* NOTE: the csp and levels AUTO values are converted to specific ones
+ * above vf/vo level. At least vf_scale relies on all valid settings being
+ * nonzero at vf/vo level.
+ */
+
+enum mp_csp {
+    MP_CSP_AUTO,
+    MP_CSP_BT_601,
+    MP_CSP_BT_709,
+    MP_CSP_SMPTE_240M,
+    MP_CSP_BT_2020_NC,
+    MP_CSP_BT_2020_C,
+    MP_CSP_RGB,
+    MP_CSP_XYZ,
+    MP_CSP_YCGCO,
+    MP_CSP_COUNT
+};
+
+extern const struct m_opt_choice_alternatives mp_csp_names[];
+
+enum mp_csp_levels {
+    MP_CSP_LEVELS_AUTO,
+    MP_CSP_LEVELS_TV,
+    MP_CSP_LEVELS_PC,
+    MP_CSP_LEVELS_COUNT,
+};
+
+extern const struct m_opt_choice_alternatives mp_csp_levels_names[];
+
+enum mp_csp_prim {
+    MP_CSP_PRIM_AUTO,
+    MP_CSP_PRIM_BT_601_525,
+    MP_CSP_PRIM_BT_601_625,
+    MP_CSP_PRIM_BT_709,
+    MP_CSP_PRIM_BT_2020,
+    MP_CSP_PRIM_BT_470M,
+    MP_CSP_PRIM_APPLE,
+    MP_CSP_PRIM_ADOBE,
+    MP_CSP_PRIM_PRO_PHOTO,
+    MP_CSP_PRIM_CIE_1931,
+    MP_CSP_PRIM_DCI_P3,
+    MP_CSP_PRIM_DISPLAY_P3,
+    MP_CSP_PRIM_V_GAMUT,
+    MP_CSP_PRIM_S_GAMUT,
+    MP_CSP_PRIM_EBU_3213,
+    MP_CSP_PRIM_FILM_C,
+    MP_CSP_PRIM_ACES_AP0,
+    MP_CSP_PRIM_ACES_AP1,
+    MP_CSP_PRIM_COUNT
+};
+
+extern const struct m_opt_choice_alternatives mp_csp_prim_names[];
+
+enum mp_csp_trc {
+    MP_CSP_TRC_AUTO,
+    MP_CSP_TRC_BT_1886,
+    MP_CSP_TRC_SRGB,
+    MP_CSP_TRC_LINEAR,
+    MP_CSP_TRC_GAMMA18,
+    MP_CSP_TRC_GAMMA20,
+    MP_CSP_TRC_GAMMA22,
+    MP_CSP_TRC_GAMMA24,
+    MP_CSP_TRC_GAMMA26,
+    MP_CSP_TRC_GAMMA28,
+    MP_CSP_TRC_PRO_PHOTO,
+    MP_CSP_TRC_PQ,
+    MP_CSP_TRC_HLG,
+    MP_CSP_TRC_V_LOG,
+    MP_CSP_TRC_S_LOG1,
+    MP_CSP_TRC_S_LOG2,
+    MP_CSP_TRC_ST428,
+    MP_CSP_TRC_COUNT
+};
+
+extern const struct m_opt_choice_alternatives mp_csp_trc_names[];
+
+enum mp_csp_light {
+    MP_CSP_LIGHT_AUTO,
+    MP_CSP_LIGHT_DISPLAY,
+    MP_CSP_LIGHT_SCENE_HLG,
+    MP_CSP_LIGHT_SCENE_709_1886,
+    MP_CSP_LIGHT_SCENE_1_2,
+    MP_CSP_LIGHT_COUNT
+};
+
+extern const struct m_opt_choice_alternatives mp_csp_light_names[];
+
+// These constants are based on the ICC specification (Table 23) and match
+// up with the API of LittleCMS, which treats them as integers.
+enum mp_render_intent {
+    MP_INTENT_PERCEPTUAL = 0,
+    MP_INTENT_RELATIVE_COLORIMETRIC = 1,
+    MP_INTENT_SATURATION = 2,
+    MP_INTENT_ABSOLUTE_COLORIMETRIC = 3
+};
+
+// The numeric values (except -1) match the Matroska StereoMode element value.
+enum mp_stereo3d_mode {
+    MP_STEREO3D_INVALID = -1,
+    /* only modes explicitly referenced in the code are listed */
+    MP_STEREO3D_MONO = 0,
+    MP_STEREO3D_SBS2L = 1,
+    MP_STEREO3D_AB2R = 2,
+    MP_STEREO3D_AB2L = 3,
+    MP_STEREO3D_SBS2R = 11,
+    /* no explicit enum entries for most valid values */
+    MP_STEREO3D_COUNT = 15, // 14 is last valid mode
+};
+
+extern const struct m_opt_choice_alternatives mp_stereo3d_names[];
+
+#define MP_STEREO3D_NAME(x) m_opt_choice_str(mp_stereo3d_names, x)
+
+#define MP_STEREO3D_NAME_DEF(x, def) \
+    (MP_STEREO3D_NAME(x) ? MP_STEREO3D_NAME(x) : (def))
+
+struct mp_colorspace {
+    enum mp_csp space;
+    enum mp_csp_levels levels;
+    enum mp_csp_prim primaries;
+    enum mp_csp_trc gamma;
+    enum mp_csp_light light;
+    struct pl_hdr_metadata hdr;
+};
+
+// For many colorspace conversions, in particular those involving HDR, an
+// implicit reference white level is needed. Since this magic constant shows up
+// a lot, give it an explicit name. The value of 203 cd/m² comes from ITU-R
+// Report BT.2408, and the value for HLG comes from the cited HLG 75% level
+// (transferred to scene space).
+#define MP_REF_WHITE 203.0
+#define MP_REF_WHITE_HLG 3.17955
+
+// Replaces unknown values in the first struct by those of the second struct
+void mp_colorspace_merge(struct mp_colorspace *orig, struct mp_colorspace *new);
+
+struct mp_csp_params {
+    struct mp_colorspace color; // input colorspace
+    enum mp_csp_levels levels_out; // output device
+    float brightness;
+    float contrast;
+    float hue;
+    float saturation;
+    float gamma;
+    // discard U/V components
+    bool gray;
+    // input is already centered and range-expanded
+    bool is_float;
+    // texture_bits/input_bits is for rescaling fixed point input to range [0,1]
+    int texture_bits;
+    int input_bits;
+};
+
+#define MP_CSP_PARAMS_DEFAULTS {                                \
+    .color = { .space = MP_CSP_BT_601,                          \
+               .levels = MP_CSP_LEVELS_TV },                    \
+    .levels_out = MP_CSP_LEVELS_PC,                             \
+    .brightness = 0, .contrast = 1, .hue = 0, .saturation = 1,  \
+    .gamma = 1, .texture_bits = 8, .input_bits = 8}
+
+struct mp_image_params;
+void mp_csp_set_image_params(struct mp_csp_params *params,
+                             const struct mp_image_params *imgparams);
+
+bool mp_colorspace_equal(struct mp_colorspace c1, struct mp_colorspace c2);
+
+enum mp_chroma_location {
+    MP_CHROMA_AUTO,
+    MP_CHROMA_TOPLEFT,  // uhd
+    MP_CHROMA_LEFT,     // mpeg2/4, h264
+    MP_CHROMA_CENTER,   // mpeg1, jpeg
+    MP_CHROMA_COUNT,
+};
+
+extern const struct m_opt_choice_alternatives mp_chroma_names[];
+
+enum mp_alpha_type {
+    MP_ALPHA_AUTO,
+    MP_ALPHA_STRAIGHT,
+    MP_ALPHA_PREMUL,
+};
+
+extern const struct m_opt_choice_alternatives mp_alpha_names[];
+
+extern const struct m_sub_options mp_csp_equalizer_conf;
+
+struct mpv_global;
+struct mp_csp_equalizer_state *mp_csp_equalizer_create(void *ta_parent,
+                                                    struct mpv_global *global);
+bool mp_csp_equalizer_state_changed(struct mp_csp_equalizer_state *state);
+void mp_csp_equalizer_state_get(struct mp_csp_equalizer_state *state,
+                                struct mp_csp_params *params);
+
+struct mp_csp_col_xy {
+    float x, y;
+};
+
+static inline float mp_xy_X(struct mp_csp_col_xy xy) {
+    return xy.x / xy.y;
+}
+
+static inline float mp_xy_Z(struct mp_csp_col_xy xy) {
+    return (1 - xy.x - xy.y) / xy.y;
+}
+
+struct mp_csp_primaries {
+    struct mp_csp_col_xy red, green, blue, white;
+};
+
+enum mp_csp avcol_spc_to_mp_csp(int avcolorspace);
+enum mp_csp_levels avcol_range_to_mp_csp_levels(int avrange);
+enum mp_csp_prim avcol_pri_to_mp_csp_prim(int avpri);
+enum mp_csp_trc avcol_trc_to_mp_csp_trc(int avtrc);
+
+int mp_csp_to_avcol_spc(enum mp_csp colorspace);
+int mp_csp_levels_to_avcol_range(enum mp_csp_levels range);
+int mp_csp_prim_to_avcol_pri(enum mp_csp_prim prim);
+int mp_csp_trc_to_avcol_trc(enum mp_csp_trc trc);
+
+enum mp_csp mp_csp_guess_colorspace(int width, int height);
+enum mp_csp_prim mp_csp_guess_primaries(int width, int height);
+
+enum mp_chroma_location avchroma_location_to_mp(int avloc);
+int mp_chroma_location_to_av(enum mp_chroma_location mploc);
+void mp_get_chroma_location(enum mp_chroma_location loc, int *x, int *y);
+
+struct mp_csp_primaries mp_get_csp_primaries(enum mp_csp_prim csp);
+float mp_trc_nom_peak(enum mp_csp_trc trc);
+bool mp_trc_is_hdr(enum mp_csp_trc trc);
+
+/* Color conversion matrix: RGB = m * YUV + c
+ * m is in row-major matrix, with m[row][col], e.g.:
+ *     [ a11 a12 a13 ]     float m[3][3] = { { a11, a12, a13 },
+ *     [ a21 a22 a23 ]                       { a21, a22, a23 },
+ *     [ a31 a32 a33 ]                       { a31, a32, a33 } };
+ * This is accessed as e.g.: m[2-1][1-1] = a21
+ * In particular, each row contains all the coefficients for one of R, G, B,
+ * while each column contains all the coefficients for one of Y, U, V:
+ *     m[r,g,b][y,u,v] = ...
+ * The matrix could also be viewed as group of 3 vectors, e.g. the 1st column
+ * is the Y vector (1, 1, 1), the 2nd is the U vector, the 3rd the V vector.
+ * The matrix might also be used for other conversions and colorspaces.
+ */
+struct mp_cmat {
+    float m[3][3];
+    float c[3];
+};
+
+void mp_get_rgb2xyz_matrix(struct mp_csp_primaries space, float m[3][3]);
+void mp_get_cms_matrix(struct mp_csp_primaries src, struct mp_csp_primaries dest,
+                       enum mp_render_intent intent, float cms_matrix[3][3]);
+
+double mp_get_csp_mul(enum mp_csp csp, int input_bits, int texture_bits);
+void mp_get_csp_uint_mul(enum mp_csp csp, enum mp_csp_levels levels,
+                         int bits, int component, double *out_m, double *out_o);
+void mp_get_csp_matrix(struct mp_csp_params *params, struct mp_cmat *out);
+
+void mp_invert_matrix3x3(float m[3][3]);
+void mp_invert_cmat(struct mp_cmat *out, struct mp_cmat *in);
+void mp_map_fixp_color(struct mp_cmat *matrix, int ibits, int in[3],
+                                               int obits, int out[3]);
+
+#endif /* MPLAYER_CSPUTILS_H */
diff --git a/video/cuda.c b/video/cuda.c
new file mode 100644
index 0000000..3b7a2d8
--- /dev/null
+++ b/video/cuda.c
@@ -0,0 +1,44 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hwdec.h"
+#include "options/m_config.h"
+#include "options/options.h"
+
+#include <libavutil/hwcontext.h>
+
+static struct AVBufferRef *cuda_create_standalone(struct mpv_global *global,
+        struct mp_log *log, struct hwcontext_create_dev_params *params)
+{
+    struct cuda_opts *opts = mp_get_config_group(NULL, global, &cuda_conf);
+
+    char *decode_dev = NULL;
+    if (opts->cuda_device != -1)
+        decode_dev = talloc_asprintf(NULL, "%d", opts->cuda_device);
+
+    AVBufferRef* ref = NULL;
+    av_hwdevice_ctx_create(&ref, AV_HWDEVICE_TYPE_CUDA, decode_dev, NULL, 0);
+
+    talloc_free(decode_dev);
+    talloc_free(opts);
+    return ref;
+}
+
+const struct hwcontext_fns hwcontext_fns_cuda = {
+    .av_hwdevice_type = AV_HWDEVICE_TYPE_CUDA,
+    .create_dev = cuda_create_standalone,
+};
diff --git a/video/d3d.c b/video/d3d.c
new file mode 100644
index 0000000..ceddcf3
--- /dev/null
+++ b/video/d3d.c
@@ -0,0 +1,273 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+
+#include <libavcodec/avcodec.h>
+
+#include <libavutil/hwcontext.h>
+#include <libavutil/hwcontext_d3d11va.h>
+
+#if HAVE_D3D9_HWACCEL
+#include <libavutil/hwcontext_dxva2.h>
+#endif
+
+#include "common/av_common.h"
+#include "common/common.h"
+#include "osdep/threads.h"
+#include "osdep/windows_utils.h"
+#include "video/fmt-conversion.h"
+#include "video/hwdec.h"
+#include "video/mp_image_pool.h"
+#include "video/mp_image.h"
+
+#include "d3d.h"
+
+HMODULE d3d11_dll, d3d9_dll, dxva2_dll;
+PFN_D3D11_CREATE_DEVICE d3d11_D3D11CreateDevice;
+
+static mp_once d3d_load_once = MP_STATIC_ONCE_INITIALIZER;
+
+#if !HAVE_UWP
+static void d3d_do_load(void)
+{
+    d3d11_dll = LoadLibrary(L"d3d11.dll");
+    d3d9_dll  = LoadLibrary(L"d3d9.dll");
+    dxva2_dll = LoadLibrary(L"dxva2.dll");
+
+    if (d3d11_dll) {
+        d3d11_D3D11CreateDevice =
+            (void *)GetProcAddress(d3d11_dll, "D3D11CreateDevice");
+    }
+}
+#else
+static void d3d_do_load(void)
+{
+
+    d3d11_D3D11CreateDevice = D3D11CreateDevice;
+}
+#endif
+
+void d3d_load_dlls(void)
+{
+    mp_exec_once(&d3d_load_once, d3d_do_load);
+}
+
+// Test if Direct3D11 can be used by us. Basically, this prevents trying to use
+// D3D11 on Win7, and then failing somewhere in the process.
+bool d3d11_check_decoding(ID3D11Device *dev)
+{
+    HRESULT hr;
+    // We assume that NV12 is always supported, if hw decoding is supported at
+    // all.
+    UINT supported = 0;
+    hr = ID3D11Device_CheckFormatSupport(dev, DXGI_FORMAT_NV12, &supported);
+    return !FAILED(hr) && (supported & D3D11_BIND_DECODER);
+}
+
+static void d3d11_refine_hwframes(AVBufferRef *hw_frames_ctx)
+{
+    AVHWFramesContext *fctx = (void *)hw_frames_ctx->data;
+
+    if (fctx->format == AV_PIX_FMT_D3D11) {
+        AVD3D11VAFramesContext *hwctx = fctx->hwctx;
+
+        // According to hwcontex_d3d11va.h, yuv420p means DXGI_FORMAT_420_OPAQUE,
+        // which has no shader support.
+        if (fctx->sw_format != AV_PIX_FMT_YUV420P)
+            hwctx->BindFlags |= D3D11_BIND_SHADER_RESOURCE;
+    }
+}
+
+AVBufferRef *d3d11_wrap_device_ref(ID3D11Device *device)
+{
+    AVBufferRef *device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_D3D11VA);
+    if (!device_ref)
+        return NULL;
+
+    AVHWDeviceContext *ctx = (void *)device_ref->data;
+    AVD3D11VADeviceContext *hwctx = ctx->hwctx;
+
+    ID3D11Device_AddRef(device);
+    hwctx->device = device;
+
+    if (av_hwdevice_ctx_init(device_ref) < 0)
+        av_buffer_unref(&device_ref);
+
+    return device_ref;
+}
+
+static struct AVBufferRef *d3d11_create_standalone(struct mpv_global *global,
+        struct mp_log *plog, struct hwcontext_create_dev_params *params)
+{
+    ID3D11Device *device = NULL;
+    HRESULT hr;
+
+    d3d_load_dlls();
+    if (!d3d11_D3D11CreateDevice) {
+        mp_err(plog, "Failed to load D3D11 library\n");
+        return NULL;
+    }
+
+    hr = d3d11_D3D11CreateDevice(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL,
+                                 D3D11_CREATE_DEVICE_VIDEO_SUPPORT, NULL, 0,
+                                 D3D11_SDK_VERSION, &device, NULL, NULL);
+    if (FAILED(hr)) {
+        mp_err(plog, "Failed to create D3D11 Device: %s\n",
+               mp_HRESULT_to_str(hr));
+        return NULL;
+    }
+
+    AVBufferRef *avref = d3d11_wrap_device_ref(device);
+    ID3D11Device_Release(device);
+    if (!avref)
+        mp_err(plog, "Failed to allocate AVHWDeviceContext.\n");
+
+    return avref;
+}
+
+const struct hwcontext_fns hwcontext_fns_d3d11 = {
+    .av_hwdevice_type       = AV_HWDEVICE_TYPE_D3D11VA,
+    .refine_hwframes        = d3d11_refine_hwframes,
+    .create_dev             = d3d11_create_standalone,
+};
+
+#if HAVE_D3D9_HWACCEL
+
+#define DXVA2API_USE_BITFIELDS
+#include <libavutil/common.h>
+
+#include <libavutil/hwcontext_dxva2.h>
+
+static void d3d9_free_av_device_ref(AVHWDeviceContext *ctx)
+{
+    AVDXVA2DeviceContext *hwctx = ctx->hwctx;
+
+    if (hwctx->devmgr)
+        IDirect3DDeviceManager9_Release(hwctx->devmgr);
+}
+
+AVBufferRef *d3d9_wrap_device_ref(IDirect3DDevice9 *device)
+{
+    HRESULT hr;
+
+    d3d_load_dlls();
+    if (!dxva2_dll)
+        return NULL;
+
+    HRESULT (WINAPI *DXVA2CreateDirect3DDeviceManager9)(UINT *, IDirect3DDeviceManager9 **) =
+        (void *)GetProcAddress(dxva2_dll, "DXVA2CreateDirect3DDeviceManager9");
+    if (!DXVA2CreateDirect3DDeviceManager9)
+        return NULL;
+
+    AVBufferRef *device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_DXVA2);
+    if (!device_ref)
+        return NULL;
+
+    AVHWDeviceContext *ctx = (void *)device_ref->data;
+    AVDXVA2DeviceContext *hwctx = ctx->hwctx;
+
+    UINT reset_token = 0;
+    hr = DXVA2CreateDirect3DDeviceManager9(&reset_token, &hwctx->devmgr);
+    if (FAILED(hr))
+        goto fail;
+
+    hr = IDirect3DDeviceManager9_ResetDevice(hwctx->devmgr, device, reset_token);
+    if (FAILED(hr))
+        goto fail;
+
+    ctx->free = d3d9_free_av_device_ref;
+
+    if (av_hwdevice_ctx_init(device_ref) < 0)
+        goto fail;
+
+    return device_ref;
+
+fail:
+    d3d9_free_av_device_ref(ctx);
+    av_buffer_unref(&device_ref);
+    return NULL;
+}
+
+static struct AVBufferRef *d3d9_create_standalone(struct mpv_global *global,
+        struct mp_log *plog, struct hwcontext_create_dev_params *params)
+{
+    d3d_load_dlls();
+    if (!d3d9_dll || !dxva2_dll) {
+        mp_err(plog, "Failed to load D3D9 library\n");
+        return NULL;
+    }
+
+    HRESULT (WINAPI *Direct3DCreate9Ex)(UINT, IDirect3D9Ex **) =
+        (void *)GetProcAddress(d3d9_dll, "Direct3DCreate9Ex");
+    if (!Direct3DCreate9Ex) {
+        mp_err(plog, "Failed to locate Direct3DCreate9Ex\n");
+        return NULL;
+    }
+
+    IDirect3D9Ex *d3d9ex = NULL;
+    HRESULT hr = Direct3DCreate9Ex(D3D_SDK_VERSION, &d3d9ex);
+    if (FAILED(hr)) {
+        mp_err(plog, "Failed to create IDirect3D9Ex object\n");
+        return NULL;
+    }
+
+    UINT adapter = D3DADAPTER_DEFAULT;
+    D3DDISPLAYMODEEX modeex = {0};
+    IDirect3D9Ex_GetAdapterDisplayModeEx(d3d9ex, adapter, &modeex, NULL);
+
+    D3DPRESENT_PARAMETERS present_params = {
+        .Windowed         = TRUE,
+        .BackBufferWidth  = 640,
+        .BackBufferHeight = 480,
+        .BackBufferCount  = 0,
+        .BackBufferFormat = modeex.Format,
+        .SwapEffect       = D3DSWAPEFFECT_DISCARD,
+        .Flags            = D3DPRESENTFLAG_VIDEO,
+    };
+
+    IDirect3DDevice9Ex *exdev = NULL;
+    hr = IDirect3D9Ex_CreateDeviceEx(d3d9ex, adapter,
+                                     D3DDEVTYPE_HAL,
+                                     GetShellWindow(),
+                                     D3DCREATE_SOFTWARE_VERTEXPROCESSING |
+                                     D3DCREATE_MULTITHREADED |
+                                     D3DCREATE_FPU_PRESERVE,
+                                     &present_params,
+                                     NULL,
+                                     &exdev);
+    IDirect3D9_Release(d3d9ex);
+    if (FAILED(hr)) {
+        mp_err(plog, "Failed to create Direct3D device: %s\n",
+               mp_HRESULT_to_str(hr));
+        return NULL;
+    }
+
+    AVBufferRef *avref = d3d9_wrap_device_ref((IDirect3DDevice9 *)exdev);
+    IDirect3DDevice9Ex_Release(exdev);
+    if (!avref)
+        mp_err(plog, "Failed to allocate AVHWDeviceContext.\n");
+
+    return avref;
+}
+
+const struct hwcontext_fns hwcontext_fns_dxva2 = {
+    .av_hwdevice_type       = AV_HWDEVICE_TYPE_DXVA2,
+    .create_dev             = d3d9_create_standalone,
+};
+
+#endif /* HAVE_D3D9_HWACCEL */
diff --git a/video/d3d.h b/video/d3d.h
new file mode 100644
index 0000000..0058905
--- /dev/null
+++ b/video/d3d.h
@@ -0,0 +1,42 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MPV_DECODE_D3D_H
+#define MPV_DECODE_D3D_H
+
+#include <windows.h>
+#include <d3d11.h>
+
+#include <stdbool.h>
+#include <inttypes.h>
+
+// Must call d3d_load_dlls() before accessing. Once this is done, the DLLs
+// remain loaded forever.
+extern HMODULE d3d11_dll, d3d9_dll, dxva2_dll;
+extern PFN_D3D11_CREATE_DEVICE d3d11_D3D11CreateDevice;
+
+void d3d_load_dlls(void);
+
+bool d3d11_check_decoding(ID3D11Device *dev);
+
+struct AVBufferRef;
+struct IDirect3DDevice9;
+
+struct AVBufferRef *d3d11_wrap_device_ref(ID3D11Device *device);
+struct AVBufferRef *d3d9_wrap_device_ref(struct IDirect3DDevice9 *device);
+
+#endif
diff --git a/video/decode/vd_lavc.c b/video/decode/vd_lavc.c
new file mode 100644
index 0000000..b971d26
--- /dev/null
+++ b/video/decode/vd_lavc.c
@@ -0,0 +1,1457 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <float.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <stdbool.h>
+
+#include <libavcodec/avcodec.h>
+#include <libavformat/version.h>
+#include <libavutil/common.h>
+#include <libavutil/hwcontext.h>
+#include <libavutil/opt.h>
+#include <libavutil/intreadwrite.h>
+#include <libavutil/pixdesc.h>
+
+#include "mpv_talloc.h"
+#include "common/global.h"
+#include "common/msg.h"
+#include "options/m_config.h"
+#include "options/options.h"
+#include "osdep/threads.h"
+#include "misc/bstr.h"
+#include "common/av_common.h"
+#include "common/codecs.h"
+
+#include "video/fmt-conversion.h"
+
+#include "filters/f_decoder_wrapper.h"
+#include "filters/filter_internal.h"
+#include "video/hwdec.h"
+#include "video/img_format.h"
+#include "video/mp_image.h"
+#include "video/mp_image_pool.h"
+#include "demux/demux.h"
+#include "demux/stheader.h"
+#include "demux/packet.h"
+#include "video/csputils.h"
+#include "video/sws_utils.h"
+#include "video/out/vo.h"
+
+#include "options/m_option.h"
+
+static void init_avctx(struct mp_filter *vd);
+static void uninit_avctx(struct mp_filter *vd);
+
+static int get_buffer2_direct(AVCodecContext *avctx, AVFrame *pic, int flags);
+static enum AVPixelFormat get_format_hwdec(struct AVCodecContext *avctx,
+                                           const enum AVPixelFormat *pix_fmt);
+static int hwdec_opt_help(struct mp_log *log, const m_option_t *opt,
+                          struct bstr name);
+
+#define HWDEC_DELAY_QUEUE_COUNT 2
+
+#define OPT_BASE_STRUCT struct vd_lavc_params
+
+struct vd_lavc_params {
+    bool fast;
+    int film_grain;
+    bool show_all;
+    int skip_loop_filter;
+    int skip_idct;
+    int skip_frame;
+    int framedrop;
+    int threads;
+    bool bitexact;
+    bool old_x264;
+    bool apply_cropping;
+    bool check_hw_profile;
+    int software_fallback;
+    char **avopts;
+    int dr;
+    char **hwdec_api;
+    char *hwdec_codecs;
+    int hwdec_image_format;
+    int hwdec_extra_frames;
+};
+
+static const struct m_opt_choice_alternatives discard_names[] = {
+    {"none",        AVDISCARD_NONE},
+    {"default",     AVDISCARD_DEFAULT},
+    {"nonref",      AVDISCARD_NONREF},
+    {"bidir",       AVDISCARD_BIDIR},
+    {"nonkey",      AVDISCARD_NONKEY},
+    {"all",         AVDISCARD_ALL},
+    {0}
+};
+#define OPT_DISCARD(field) OPT_CHOICE_C(field, discard_names)
+
+const struct m_sub_options vd_lavc_conf = {
+    .opts = (const m_option_t[]){
+        {"vd-lavc-fast", OPT_BOOL(fast)},
+        {"vd-lavc-film-grain", OPT_CHOICE(film_grain,
+            {"auto", -1}, {"cpu", 0}, {"gpu", 1})},
+        {"vd-lavc-show-all", OPT_BOOL(show_all)},
+        {"vd-lavc-skiploopfilter", OPT_DISCARD(skip_loop_filter)},
+        {"vd-lavc-skipidct", OPT_DISCARD(skip_idct)},
+        {"vd-lavc-skipframe", OPT_DISCARD(skip_frame)},
+        {"vd-lavc-framedrop", OPT_DISCARD(framedrop)},
+        {"vd-lavc-threads", OPT_INT(threads), M_RANGE(0, DBL_MAX)},
+        {"vd-lavc-bitexact", OPT_BOOL(bitexact)},
+        {"vd-lavc-assume-old-x264", OPT_BOOL(old_x264)},
+        {"vd-lavc-check-hw-profile", OPT_BOOL(check_hw_profile)},
+        {"vd-lavc-software-fallback", OPT_CHOICE(software_fallback,
+            {"no", INT_MAX}, {"yes", 1}), M_RANGE(1, INT_MAX)},
+        {"vd-lavc-o", OPT_KEYVALUELIST(avopts)},
+        {"vd-lavc-dr", OPT_CHOICE(dr,
+            {"auto", -1}, {"no", 0}, {"yes", 1})},
+        {"vd-apply-cropping", OPT_BOOL(apply_cropping)},
+        {"hwdec", OPT_STRINGLIST(hwdec_api),
+            .help = hwdec_opt_help,
+            .flags = M_OPT_OPTIONAL_PARAM | UPDATE_HWDEC},
+        {"hwdec-codecs", OPT_STRING(hwdec_codecs)},
+        {"hwdec-image-format", OPT_IMAGEFORMAT(hwdec_image_format)},
+        {"hwdec-extra-frames", OPT_INT(hwdec_extra_frames), M_RANGE(0, 256)},
+        {0}
+    },
+    .size = sizeof(struct vd_lavc_params),
+    .defaults = &(const struct vd_lavc_params){
+        .film_grain = -1 /*auto*/,
+        .check_hw_profile = true,
+        .software_fallback = 3,
+        .skip_loop_filter = AVDISCARD_DEFAULT,
+        .skip_idct = AVDISCARD_DEFAULT,
+        .skip_frame = AVDISCARD_DEFAULT,
+        .framedrop = AVDISCARD_NONREF,
+        .dr = -1,
+        .hwdec_api = (char *[]){"no", NULL,},
+        .hwdec_codecs = "h264,vc1,hevc,vp8,vp9,av1,prores",
+        // Maximum number of surfaces the player wants to buffer. This number
+        // might require adjustment depending on whatever the player does;
+        // for example, if vo_gpu increases the number of reference surfaces for
+        // interpolation, this value has to be increased too.
+        .hwdec_extra_frames = 6,
+        .apply_cropping = true,
+    },
+};
+
+struct hwdec_info {
+    char name[64];
+    char method_name[24]; // non-unique name describing the hwdec method
+    const AVCodec *codec; // implemented by this codec
+    enum AVHWDeviceType lavc_device; // if not NONE, get a hwdevice
+    bool copying; // if true, outputs sw frames, or copy to sw ourselves
+    enum AVPixelFormat pix_fmt; // if not NONE, select in get_format
+    bool use_hw_frames; // set AVCodecContext.hw_frames_ctx
+    bool use_hw_device; // set AVCodecContext.hw_device_ctx
+    unsigned int flags; // HWDEC_FLAG_*
+
+    // for internal sorting
+    int auto_pos;
+    int rank;
+};
+
+typedef struct lavc_ctx {
+    struct mp_log *log;
+    struct m_config_cache *opts_cache;
+    struct vd_lavc_params *opts;
+    struct mp_codec_params *codec;
+    AVCodecContext *avctx;
+    AVFrame *pic;
+    AVPacket *avpkt;
+    bool use_hwdec;
+    struct hwdec_info hwdec; // valid only if use_hwdec==true
+    bstr *attempted_hwdecs;
+    int num_attempted_hwdecs;
+    AVRational codec_timebase;
+    enum AVDiscard skip_frame;
+    bool flushing;
+    struct lavc_state state;
+    const char *decoder;
+    bool hwdec_failed;
+    bool hwdec_notified;
+    bool force_eof;
+
+    bool intra_only;
+    int framedrop_flags;
+
+    bool hw_probing;
+    struct demux_packet **sent_packets;
+    int num_sent_packets;
+
+    struct demux_packet **requeue_packets;
+    int num_requeue_packets;
+
+    struct mp_image **delay_queue;
+    int num_delay_queue;
+    int max_delay_queue;
+
+    // From VO
+    struct vo *vo;
+    struct mp_hwdec_devices *hwdec_devs;
+
+    // Wrapped AVHWDeviceContext* used for decoding.
+    AVBufferRef *hwdec_dev;
+
+    bool hwdec_request_reinit;
+    int hwdec_fail_count;
+
+    struct mp_image_pool *hwdec_swpool;
+
+    AVBufferRef *cached_hw_frames_ctx;
+
+    // --- The following fields are protected by dr_lock.
+    mp_mutex dr_lock;
+    bool dr_failed;
+    struct mp_image_pool *dr_pool;
+    int dr_imgfmt, dr_w, dr_h, dr_stride_align;
+
+    struct mp_decoder public;
+} vd_ffmpeg_ctx;
+
+enum {
+    HWDEC_FLAG_AUTO         = (1 << 0), // prioritize in autoprobe order
+    HWDEC_FLAG_WHITELIST    = (1 << 1), // whitelist for auto-safe
+};
+
+struct autoprobe_info {
+    const char *method_name;
+    unsigned int flags;         // HWDEC_FLAG_*
+};
+
+// Things not included in this list will be tried last, in random order.
+const struct autoprobe_info hwdec_autoprobe_info[] = {
+    {"d3d11va",         HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST},
+    {"dxva2",           HWDEC_FLAG_AUTO},
+    {"d3d11va-copy",    HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST},
+    {"dxva2-copy",      HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST},
+    {"nvdec",           HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST},
+    {"nvdec-copy",      HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST},
+    {"vaapi",           HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST},
+    {"vaapi-copy",      HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST},
+    {"vdpau",           HWDEC_FLAG_AUTO},
+    {"vdpau-copy",      HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST},
+    {"drm",             HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST},
+    {"drm-copy",        HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST},
+    {"mmal",            HWDEC_FLAG_AUTO},
+    {"mmal-copy",       HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST},
+    {"mediacodec",      HWDEC_FLAG_AUTO},
+    {"mediacodec-copy", HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST},
+    {"videotoolbox",    HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST},
+    {"videotoolbox-copy", HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST},
+    {0}
+};
+
+static int hwdec_compare(const void *p1, const void *p2)
+{
+    struct hwdec_info *h1 = (void *)p1;
+    struct hwdec_info *h2 = (void *)p2;
+
+    if (h1 == h2)
+        return 0;
+
+    // Strictly put non-preferred hwdecs to the end of the list.
+    if ((h1->auto_pos == INT_MAX) != (h2->auto_pos == INT_MAX))
+        return h1->auto_pos == INT_MAX ? 1 : -1;
+    // List non-copying entries first, so --hwdec=auto takes them.
+    if (h1->copying != h2->copying)
+        return h1->copying ? 1 : -1;
+    // Order by autoprobe preference order.
+    if (h1->auto_pos != h2->auto_pos)
+        return h1->auto_pos > h2->auto_pos ? 1 : -1;
+    // Put hwdecs without hw_device_ctx last
+    if ((!!h1->lavc_device) != (!!h2->lavc_device))
+        return h1->lavc_device ? -1 : 1;
+    // Fallback sort order to make sorting stable.
+    return h1->rank > h2->rank ? 1 :-1;
+}
+
+// (This takes care of some bookkeeping too, like setting info.name)
+static void add_hwdec_item(struct hwdec_info **infos, int *num_infos,
+                           struct hwdec_info info)
+{
+    if (info.copying)
+        mp_snprintf_cat(info.method_name, sizeof(info.method_name), "-copy");
+
+    // (Including the codec name in case this is a wrapper looks pretty dumb,
+    // but better not have them clash with hwaccels and others.)
+    snprintf(info.name, sizeof(info.name), "%s-%s",
+             info.codec->name, info.method_name);
+
+    info.rank = *num_infos;
+    info.auto_pos = INT_MAX;
+
+    for (int x = 0; hwdec_autoprobe_info[x].method_name; x++) {
+        const struct autoprobe_info *entry = &hwdec_autoprobe_info[x];
+        if (strcmp(entry->method_name, info.method_name) == 0) {
+            info.flags |= entry->flags;
+            if (info.flags & HWDEC_FLAG_AUTO)
+                info.auto_pos = x;
+        }
+    }
+
+    MP_TARRAY_APPEND(NULL, *infos, *num_infos, info);
+}
+
+static void add_all_hwdec_methods(struct hwdec_info **infos, int *num_infos)
+{
+    const AVCodec *codec = NULL;
+    void *iter = NULL;
+    while (1) {
+        codec = av_codec_iterate(&iter);
+        if (!codec)
+            break;
+        if (codec->type != AVMEDIA_TYPE_VIDEO || !av_codec_is_decoder(codec))
+            continue;
+
+        struct hwdec_info info_template = {
+            .pix_fmt = AV_PIX_FMT_NONE,
+            .codec = codec,
+        };
+
+        const char *wrapper = NULL;
+        if (codec->capabilities & (AV_CODEC_CAP_HARDWARE | AV_CODEC_CAP_HYBRID))
+            wrapper = codec->wrapper_name;
+
+        // A decoder can provide multiple methods. In particular, hwaccels
+        // provide various methods (e.g. native h264 with vaapi & d3d11), but
+        // even wrapper decoders could provide multiple methods.
+        bool found_any = false;
+        for (int n = 0; ; n++) {
+            const AVCodecHWConfig *cfg = avcodec_get_hw_config(codec, n);
+            if (!cfg)
+                break;
+
+            if ((cfg->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) ||
+                (cfg->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX))
+            {
+                struct hwdec_info info = info_template;
+                info.lavc_device = cfg->device_type;
+                info.pix_fmt = cfg->pix_fmt;
+
+                const char *name = av_hwdevice_get_type_name(cfg->device_type);
+                assert(name); // API violation by libavcodec
+
+                // nvdec hwaccels and the cuvid full decoder clash with their
+                // naming, so fix it here; we also prefer nvdec for the hwaccel.
+                if (strcmp(name, "cuda") == 0 && !wrapper)
+                    name = "nvdec";
+
+                snprintf(info.method_name, sizeof(info.method_name), "%s", name);
+
+                // Usually we want to prefer using hw_frames_ctx for true
+                // hwaccels only, but we actually don't have any way to detect
+                // those, so always use hw_frames_ctx if offered.
+                if (cfg->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) {
+                    info.use_hw_frames = true;
+                } else {
+                    info.use_hw_device = true;
+                }
+
+                // Direct variant.
+                add_hwdec_item(infos, num_infos, info);
+
+                // Copy variant.
+                info.copying = true;
+                if (cfg->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) {
+                    info.use_hw_frames = false;
+                    info.use_hw_device = true;
+                }
+                add_hwdec_item(infos, num_infos, info);
+
+                found_any = true;
+            } else if (cfg->methods & AV_CODEC_HW_CONFIG_METHOD_INTERNAL) {
+                struct hwdec_info info = info_template;
+                info.pix_fmt = cfg->pix_fmt;
+
+                const char *name = wrapper;
+                if (!name)
+                    name = av_get_pix_fmt_name(info.pix_fmt);
+                assert(name); // API violation by libavcodec
+
+                snprintf(info.method_name, sizeof(info.method_name), "%s", name);
+
+                // Direct variant.
+                add_hwdec_item(infos, num_infos, info);
+
+                // Copy variant.
+                info.copying = true;
+                info.pix_fmt = AV_PIX_FMT_NONE; // trust it can do sw output
+                add_hwdec_item(infos, num_infos, info);
+
+                found_any = true;
+            }
+        }
+
+        if (!found_any && wrapper) {
+            // We _know_ there's something supported here, usually outputting
+            // sw surfaces. E.g. mediacodec (before hw_device_ctx support).
+
+            struct hwdec_info info = info_template;
+            info.copying = true; // probably
+
+            snprintf(info.method_name, sizeof(info.method_name), "%s", wrapper);
+            add_hwdec_item(infos, num_infos, info);
+        }
+    }
+
+    qsort(*infos, *num_infos, sizeof(struct hwdec_info), hwdec_compare);
+}
+
+static bool hwdec_codec_allowed(struct mp_filter *vd, const char *codec)
+{
+    vd_ffmpeg_ctx *ctx = vd->priv;
+    bstr s = bstr0(ctx->opts->hwdec_codecs);
+    while (s.len) {
+        bstr item;
+        bstr_split_tok(s, ",", &item, &s);
+        if (bstr_equals0(item, "all") || bstr_equals0(item, codec))
+            return true;
+    }
+    return false;
+}
+
+static AVBufferRef *hwdec_create_dev(struct mp_filter *vd,
+                                     struct hwdec_info *hwdec,
+                                     bool autoprobe)
+{
+    vd_ffmpeg_ctx *ctx = vd->priv;
+    assert(hwdec->lavc_device);
+
+    if (hwdec->copying) {
+        const struct hwcontext_fns *fns =
+            hwdec_get_hwcontext_fns(hwdec->lavc_device);
+        if (fns && fns->create_dev) {
+            struct hwcontext_create_dev_params params = {
+                .probing = autoprobe,
+            };
+            return fns->create_dev(vd->global, vd->log, &params);
+        } else {
+            AVBufferRef* ref = NULL;
+            av_hwdevice_ctx_create(&ref, hwdec->lavc_device, NULL, NULL, 0);
+            return ref;
+        }
+    } else if (ctx->hwdec_devs) {
+        int imgfmt = pixfmt2imgfmt(hwdec->pix_fmt);
+        struct hwdec_imgfmt_request params = {
+            .imgfmt = imgfmt,
+            .probing = autoprobe,
+        };
+        hwdec_devices_request_for_img_fmt(ctx->hwdec_devs, &params);
+
+        const struct mp_hwdec_ctx *hw_ctx =
+            hwdec_devices_get_by_imgfmt(ctx->hwdec_devs, imgfmt);
+
+        if (hw_ctx && hw_ctx->av_device_ref)
+            return av_buffer_ref(hw_ctx->av_device_ref);
+    }
+
+    return NULL;
+}
+
+// Select if and which hwdec to use. Also makes sure to get the decode device.
+static void select_and_set_hwdec(struct mp_filter *vd)
+{
+    vd_ffmpeg_ctx *ctx = vd->priv;
+    const char *codec = ctx->codec->codec;
+
+    m_config_cache_update(ctx->opts_cache);
+
+    struct hwdec_info *hwdecs = NULL;
+    int num_hwdecs = 0;
+    add_all_hwdec_methods(&hwdecs, &num_hwdecs);
+
+    char **hwdec_api = ctx->opts->hwdec_api;
+    for (int i = 0; hwdec_api[i]; i++) {
+        bstr opt = bstr0(hwdec_api[i]);
+
+        bool hwdec_requested = !bstr_equals0(opt, "no");
+        bool hwdec_auto_all = bstr_equals0(opt, "auto") ||
+                            bstr_equals0(opt, "");
+        bool hwdec_auto_safe = bstr_equals0(opt, "auto-safe") ||
+                            bstr_equals0(opt, "auto-copy-safe") ||
+                            bstr_equals0(opt, "yes");
+        bool hwdec_auto_copy = bstr_equals0(opt, "auto-copy") ||
+                            bstr_equals0(opt, "auto-copy-safe");
+        bool hwdec_auto = hwdec_auto_all || hwdec_auto_copy || hwdec_auto_safe;
+
+        if (!hwdec_requested) {
+            MP_VERBOSE(vd, "No hardware decoding requested.\n");
+            break;
+        } else if (!hwdec_codec_allowed(vd, codec)) {
+            MP_VERBOSE(vd, "Not trying to use hardware decoding: codec %s is not "
+                    "on whitelist.\n", codec);
+            break;
+        } else {
+            bool hwdec_name_supported = false;  // relevant only if !hwdec_auto
+            for (int n = 0; n < num_hwdecs; n++) {
+                struct hwdec_info *hwdec = &hwdecs[n];
+
+                if (!hwdec_auto && !(bstr_equals0(opt, hwdec->method_name) ||
+                                    bstr_equals0(opt, hwdec->name)))
+                    continue;
+                hwdec_name_supported = true;
+
+                bool already_attempted = false;
+                for (int j = 0; j < ctx->num_attempted_hwdecs; j++) {
+                    if (bstr_equals0(ctx->attempted_hwdecs[j], hwdec->name)) {
+                        MP_DBG(vd, "Skipping previously attempted hwdec: %s\n",
+                               hwdec->name);
+                        already_attempted = true;
+                        break;
+                    }
+                }
+                if (already_attempted)
+                    continue;
+
+                const char *hw_codec = mp_codec_from_av_codec_id(hwdec->codec->id);
+                if (!hw_codec || strcmp(hw_codec, codec) != 0)
+                    continue;
+
+                if (hwdec_auto_safe && !(hwdec->flags & HWDEC_FLAG_WHITELIST))
+                    continue;
+
+                MP_VERBOSE(vd, "Looking at hwdec %s...\n", hwdec->name);
+
+                /*
+                 * Past this point, any kind of failure that results in us
+                 * looking for a new hwdec should not lead to use trying this
+                 * hwdec again - so add it to the list, regardless of whether
+                 * initialisation will succeed or not.
+                 */
+                MP_TARRAY_APPEND(ctx, ctx->attempted_hwdecs,
+                                 ctx->num_attempted_hwdecs,
+                                 bstrdup(ctx, bstr0(hwdec->name)));
+
+                if (hwdec_auto_copy && !hwdec->copying) {
+                    MP_VERBOSE(vd, "Not using this for auto-copy.\n");
+                    continue;
+                }
+
+                if (hwdec->lavc_device) {
+                    ctx->hwdec_dev = hwdec_create_dev(vd, hwdec, hwdec_auto);
+                    if (!ctx->hwdec_dev) {
+                        MP_VERBOSE(vd, "Could not create device.\n");
+                        continue;
+                    }
+
+                    const struct hwcontext_fns *fns =
+                                hwdec_get_hwcontext_fns(hwdec->lavc_device);
+                    if (fns && fns->is_emulated && fns->is_emulated(ctx->hwdec_dev)) {
+                        if (hwdec_auto) {
+                            MP_VERBOSE(vd, "Not using emulated API.\n");
+                            av_buffer_unref(&ctx->hwdec_dev);
+                            continue;
+                        }
+                        MP_WARN(vd, "Using emulated hardware decoding API.\n");
+                    }
+                } else if (!hwdec->copying) {
+                    // Most likely METHOD_INTERNAL, which often use delay-loaded
+                    // VO support as well.
+                    if (ctx->hwdec_devs) {
+                        struct hwdec_imgfmt_request params = {
+                            .imgfmt = pixfmt2imgfmt(hwdec->pix_fmt),
+                            .probing = hwdec_auto,
+                        };
+                        hwdec_devices_request_for_img_fmt(
+                            ctx->hwdec_devs, &params);
+                    }
+                }
+
+                ctx->use_hwdec = true;
+                ctx->hwdec = *hwdec;
+                break;
+            }
+            if (ctx->use_hwdec)
+                break;
+            else if (!hwdec_auto && !hwdec_name_supported)
+                MP_WARN(vd, "Unsupported hwdec: %.*s\n", BSTR_P(opt));
+        }
+    }
+    talloc_free(hwdecs);
+
+
+    if (ctx->use_hwdec) {
+        MP_VERBOSE(vd, "Trying hardware decoding via %s.\n", ctx->hwdec.name);
+        if (strcmp(ctx->decoder, ctx->hwdec.codec->name) != 0)
+            MP_VERBOSE(vd, "Using underlying hw-decoder '%s'\n",
+                       ctx->hwdec.codec->name);
+    } else {
+        // If software fallback is disabled and we get here, all hwdec must
+        // have failed. Tell the ctx to always force an eof.
+        if (ctx->opts->software_fallback == INT_MAX) {
+            MP_WARN(ctx, "Software decoding fallback is disabled.\n");
+            ctx->force_eof = true;
+        } else {
+            MP_VERBOSE(vd, "Using software decoding.\n");
+        }
+    }
+}
+
+static int hwdec_opt_help(struct mp_log *log, const m_option_t *opt,
+                          struct bstr name)
+{
+    struct hwdec_info *hwdecs = NULL;
+    int num_hwdecs = 0;
+    add_all_hwdec_methods(&hwdecs, &num_hwdecs);
+
+    mp_info(log, "Valid values (with alternative full names):\n");
+
+    for (int n = 0; n < num_hwdecs; n++) {
+        struct hwdec_info *hwdec = &hwdecs[n];
+
+        mp_info(log, "  %s (%s)\n", hwdec->method_name, hwdec->name);
+    }
+
+    talloc_free(hwdecs);
+
+    mp_info(log, "  auto (yes '')\n");
+    mp_info(log, "  no\n");
+    mp_info(log, "  auto-safe\n");
+    mp_info(log, "  auto-copy\n");
+    mp_info(log, "  auto-copy-safe\n");
+
+    return M_OPT_EXIT;
+}
+
+static void force_fallback(struct mp_filter *vd)
+{
+    vd_ffmpeg_ctx *ctx = vd->priv;
+
+    uninit_avctx(vd);
+    int lev = ctx->hwdec_notified ? MSGL_WARN : MSGL_V;
+    mp_msg(vd->log, lev, "Attempting next decoding method after failure of %.*s.\n",
+           BSTR_P(ctx->attempted_hwdecs[ctx->num_attempted_hwdecs - 1]));
+    select_and_set_hwdec(vd);
+    init_avctx(vd);
+}
+
+static void reinit(struct mp_filter *vd)
+{
+    vd_ffmpeg_ctx *ctx = vd->priv;
+
+    uninit_avctx(vd);
+
+    /*
+     * Reset attempted hwdecs so that if the hwdec list is reconfigured
+     * we attempt all of them from the beginning. The most practical
+     * reason for this is that ctrl+h toggles between `no` and
+     * `auto-safe`, and we want to reevaluate from a clean slate each time.
+     */
+    TA_FREEP(&ctx->attempted_hwdecs);
+    ctx->num_attempted_hwdecs = 0;
+    ctx->hwdec_notified = false;
+
+    select_and_set_hwdec(vd);
+
+    bool use_hwdec = ctx->use_hwdec;
+    init_avctx(vd);
+    if (!ctx->avctx && use_hwdec) {
+        do {
+            force_fallback(vd);
+        } while (!ctx->avctx);
+    }
+}
+
+static void init_avctx(struct mp_filter *vd)
+{
+    vd_ffmpeg_ctx *ctx = vd->priv;
+    struct vd_lavc_params *lavc_param = ctx->opts;
+    struct mp_codec_params *c = ctx->codec;
+
+    m_config_cache_update(ctx->opts_cache);
+
+    assert(!ctx->avctx);
+
+    const AVCodec *lavc_codec = NULL;
+
+    if (ctx->use_hwdec) {
+        lavc_codec = ctx->hwdec.codec;
+    } else {
+        lavc_codec = avcodec_find_decoder_by_name(ctx->decoder);
+    }
+    if (!lavc_codec)
+        return;
+
+    const AVCodecDescriptor *desc = avcodec_descriptor_get(lavc_codec->id);
+    ctx->intra_only = desc && (desc->props & AV_CODEC_PROP_INTRA_ONLY);
+
+    ctx->codec_timebase = mp_get_codec_timebase(ctx->codec);
+
+    // This decoder does not read pkt_timebase correctly yet.
+    if (strstr(lavc_codec->name, "_mmal"))
+        ctx->codec_timebase = (AVRational){1, 1000000};
+
+    ctx->hwdec_failed = false;
+    ctx->hwdec_request_reinit = false;
+    ctx->avctx = avcodec_alloc_context3(lavc_codec);
+    AVCodecContext *avctx = ctx->avctx;
+    if (!ctx->avctx)
+        goto error;
+    avctx->codec_type = AVMEDIA_TYPE_VIDEO;
+    avctx->codec_id = lavc_codec->id;
+    avctx->pkt_timebase = ctx->codec_timebase;
+
+    ctx->pic = av_frame_alloc();
+    if (!ctx->pic)
+        goto error;
+
+    ctx->avpkt = av_packet_alloc();
+    if (!ctx->avpkt)
+        goto error;
+
+    if (ctx->use_hwdec) {
+        avctx->opaque = vd;
+        avctx->thread_count = 1;
+        avctx->hwaccel_flags |= AV_HWACCEL_FLAG_IGNORE_LEVEL;
+        if (!lavc_param->check_hw_profile)
+            avctx->hwaccel_flags |= AV_HWACCEL_FLAG_ALLOW_PROFILE_MISMATCH;
+
+#ifdef AV_HWACCEL_FLAG_UNSAFE_OUTPUT
+        /*
+         * This flag primarily exists for nvdec which has a very limited
+         * output frame pool, which can get exhausted if consumers don't
+         * release frames quickly. However, as an implementation
+         * requirement, we have to copy the frames anyway, so we don't
+         * need this extra implicit copy.
+         */
+        avctx->hwaccel_flags |= AV_HWACCEL_FLAG_UNSAFE_OUTPUT;
+#endif
+
+        if (ctx->hwdec.use_hw_device) {
+            if (ctx->hwdec_dev)
+                avctx->hw_device_ctx = av_buffer_ref(ctx->hwdec_dev);
+            if (!avctx->hw_device_ctx)
+                goto error;
+        }
+        if (ctx->hwdec.use_hw_frames) {
+            if (!ctx->hwdec_dev)
+                goto error;
+        }
+
+        if (ctx->hwdec.pix_fmt != AV_PIX_FMT_NONE)
+            avctx->get_format = get_format_hwdec;
+
+        // Some APIs benefit from this, for others it's additional bloat.
+        if (ctx->hwdec.copying)
+            ctx->max_delay_queue = HWDEC_DELAY_QUEUE_COUNT;
+        ctx->hw_probing = true;
+    } else {
+        mp_set_avcodec_threads(vd->log, avctx, lavc_param->threads);
+    }
+
+    if (!ctx->use_hwdec && ctx->vo && lavc_param->dr) {
+        avctx->opaque = vd;
+        avctx->get_buffer2 = get_buffer2_direct;
+#if LIBAVCODEC_VERSION_MAJOR < 60
+        AV_NOWARN_DEPRECATED({
+            avctx->thread_safe_callbacks = 1;
+        });
+#endif
+    }
+
+    avctx->flags |= lavc_param->bitexact ? AV_CODEC_FLAG_BITEXACT : 0;
+    avctx->flags2 |= lavc_param->fast ? AV_CODEC_FLAG2_FAST : 0;
+
+    if (lavc_param->show_all)
+        avctx->flags |= AV_CODEC_FLAG_OUTPUT_CORRUPT;
+
+    avctx->skip_loop_filter = lavc_param->skip_loop_filter;
+    avctx->skip_idct = lavc_param->skip_idct;
+    avctx->skip_frame = lavc_param->skip_frame;
+    avctx->apply_cropping = lavc_param->apply_cropping;
+
+    if (lavc_codec->id == AV_CODEC_ID_H264 && lavc_param->old_x264)
+        av_opt_set(avctx, "x264_build", "150", AV_OPT_SEARCH_CHILDREN);
+
+#ifndef AV_CODEC_EXPORT_DATA_FILM_GRAIN
+    if (ctx->opts->film_grain == 1)
+        MP_WARN(vd, "GPU film grain requested, but FFmpeg too old to expose "
+                    "film grain parameters. Please update to latest master, "
+                    "or at least to release 4.4.\n");
+#else
+    switch(ctx->opts->film_grain) {
+    case 0: /*CPU*/
+        // default lavc flags handle film grain within the decoder.
+        break;
+    case 1: /*GPU*/
+        if (!ctx->vo ||
+            (ctx->vo && !(ctx->vo->driver->caps & VO_CAP_FILM_GRAIN))) {
+            MP_MSG(vd, ctx->vo ? MSGL_WARN : MSGL_V,
+                   "GPU film grain requested, but VO %s, expect wrong output.\n",
+                   ctx->vo ?
+                   "does not support applying film grain" :
+                   "is not available at decoder initialization to verify support");
+        }
+
+        avctx->export_side_data |= AV_CODEC_EXPORT_DATA_FILM_GRAIN;
+        break;
+    default:
+        if (ctx->vo && (ctx->vo->driver->caps & VO_CAP_FILM_GRAIN))
+            avctx->export_side_data |= AV_CODEC_EXPORT_DATA_FILM_GRAIN;
+
+        break;
+    }
+#endif
+
+    mp_set_avopts(vd->log, avctx, lavc_param->avopts);
+
+    // Do this after the above avopt handling in case it changes values
+    ctx->skip_frame = avctx->skip_frame;
+
+    if (mp_set_avctx_codec_headers(avctx, c) < 0) {
+        MP_ERR(vd, "Could not set codec parameters.\n");
+        goto error;
+    }
+
+    /* open it */
+    if (avcodec_open2(avctx, lavc_codec, NULL) < 0)
+        goto error;
+
+    // Sometimes, the first packet contains information required for correct
+    // decoding of the rest of the stream. The only currently known case is the
+    // x264 build number (encoded in a SEI element), needed to enable a
+    // workaround for broken 4:4:4 streams produced by older x264 versions.
+    if (lavc_codec->id == AV_CODEC_ID_H264 && c->first_packet) {
+        mp_set_av_packet(ctx->avpkt, c->first_packet, &ctx->codec_timebase);
+        avcodec_send_packet(avctx, ctx->avpkt);
+        avcodec_receive_frame(avctx, ctx->pic);
+        av_frame_unref(ctx->pic);
+        avcodec_flush_buffers(ctx->avctx);
+    }
+
+    return;
+
+error:
+    MP_ERR(vd, "Could not open codec.\n");
+    uninit_avctx(vd);
+}
+
+static void reset_avctx(struct mp_filter *vd)
+{
+    vd_ffmpeg_ctx *ctx = vd->priv;
+
+    if (ctx->avctx && avcodec_is_open(ctx->avctx))
+        avcodec_flush_buffers(ctx->avctx);
+    ctx->flushing = false;
+    ctx->hwdec_request_reinit = false;
+}
+
+static void flush_all(struct mp_filter *vd)
+{
+    vd_ffmpeg_ctx *ctx = vd->priv;
+
+    for (int n = 0; n < ctx->num_delay_queue; n++)
+        talloc_free(ctx->delay_queue[n]);
+    ctx->num_delay_queue = 0;
+
+    for (int n = 0; n < ctx->num_sent_packets; n++)
+        talloc_free(ctx->sent_packets[n]);
+    ctx->num_sent_packets = 0;
+
+    for (int n = 0; n < ctx->num_requeue_packets; n++)
+        talloc_free(ctx->requeue_packets[n]);
+    ctx->num_requeue_packets = 0;
+
+    reset_avctx(vd);
+}
+
+static void uninit_avctx(struct mp_filter *vd)
+{
+    vd_ffmpeg_ctx *ctx = vd->priv;
+
+    flush_all(vd);
+    av_frame_free(&ctx->pic);
+    mp_free_av_packet(&ctx->avpkt);
+    av_buffer_unref(&ctx->cached_hw_frames_ctx);
+
+    avcodec_free_context(&ctx->avctx);
+
+    av_buffer_unref(&ctx->hwdec_dev);
+
+    ctx->hwdec_failed = false;
+    ctx->hwdec_fail_count = 0;
+    ctx->max_delay_queue = 0;
+    ctx->hw_probing = false;
+    ctx->hwdec = (struct hwdec_info){0};
+    ctx->use_hwdec = false;
+}
+
+static int init_generic_hwaccel(struct mp_filter *vd, enum AVPixelFormat hw_fmt)
+{
+    struct lavc_ctx *ctx = vd->priv;
+    AVBufferRef *new_frames_ctx = NULL;
+
+    if (!ctx->hwdec.use_hw_frames)
+        return 0;
+
+    if (!ctx->hwdec_dev) {
+        MP_ERR(ctx, "Missing device context.\n");
+        goto error;
+    }
+
+    if (avcodec_get_hw_frames_parameters(ctx->avctx,
+                                ctx->hwdec_dev, hw_fmt, &new_frames_ctx) < 0)
+    {
+        MP_VERBOSE(ctx, "Hardware decoding of this stream is unsupported?\n");
+        goto error;
+    }
+
+    AVHWFramesContext *new_fctx = (void *)new_frames_ctx->data;
+
+    if (ctx->opts->hwdec_image_format)
+        new_fctx->sw_format = imgfmt2pixfmt(ctx->opts->hwdec_image_format);
+
+    // 1 surface is already included by libavcodec. The field is 0 if the
+    // hwaccel supports dynamic surface allocation.
+    if (new_fctx->initial_pool_size)
+        new_fctx->initial_pool_size += ctx->opts->hwdec_extra_frames - 1;
+
+    const struct hwcontext_fns *fns =
+        hwdec_get_hwcontext_fns(new_fctx->device_ctx->type);
+
+    if (fns && fns->refine_hwframes)
+        fns->refine_hwframes(new_frames_ctx);
+
+    // We might be able to reuse a previously allocated frame pool.
+    if (ctx->cached_hw_frames_ctx) {
+        AVHWFramesContext *old_fctx = (void *)ctx->cached_hw_frames_ctx->data;
+
+        if (new_fctx->format            != old_fctx->format ||
+            new_fctx->sw_format         != old_fctx->sw_format ||
+            new_fctx->width             != old_fctx->width ||
+            new_fctx->height            != old_fctx->height ||
+            new_fctx->initial_pool_size != old_fctx->initial_pool_size)
+            av_buffer_unref(&ctx->cached_hw_frames_ctx);
+    }
+
+    if (!ctx->cached_hw_frames_ctx) {
+        if (av_hwframe_ctx_init(new_frames_ctx) < 0) {
+            MP_ERR(ctx, "Failed to allocate hw frames.\n");
+            goto error;
+        }
+
+        ctx->cached_hw_frames_ctx = new_frames_ctx;
+        new_frames_ctx = NULL;
+    }
+
+    ctx->avctx->hw_frames_ctx = av_buffer_ref(ctx->cached_hw_frames_ctx);
+    if (!ctx->avctx->hw_frames_ctx)
+        goto error;
+
+    av_buffer_unref(&new_frames_ctx);
+    return 0;
+
+error:
+    av_buffer_unref(&new_frames_ctx);
+    av_buffer_unref(&ctx->cached_hw_frames_ctx);
+    return -1;
+}
+
+static enum AVPixelFormat get_format_hwdec(struct AVCodecContext *avctx,
+                                           const enum AVPixelFormat *fmt)
+{
+    struct mp_filter *vd = avctx->opaque;
+    vd_ffmpeg_ctx *ctx = vd->priv;
+
+    MP_VERBOSE(vd, "Pixel formats supported by decoder:");
+    for (int i = 0; fmt[i] != AV_PIX_FMT_NONE; i++)
+        MP_VERBOSE(vd, " %s", av_get_pix_fmt_name(fmt[i]));
+    MP_VERBOSE(vd, "\n");
+
+    const char *profile = avcodec_profile_name(avctx->codec_id, avctx->profile);
+    MP_VERBOSE(vd, "Codec profile: %s (0x%x)\n", profile ? profile : "unknown",
+               avctx->profile);
+
+    assert(ctx->use_hwdec);
+
+    ctx->hwdec_request_reinit |= ctx->hwdec_failed;
+    ctx->hwdec_failed = false;
+
+    enum AVPixelFormat select = AV_PIX_FMT_NONE;
+    for (int i = 0; fmt[i] != AV_PIX_FMT_NONE; i++) {
+        if (ctx->hwdec.pix_fmt == fmt[i]) {
+            if (init_generic_hwaccel(vd, fmt[i]) < 0)
+                break;
+            select = fmt[i];
+            break;
+        }
+    }
+
+    if (select == AV_PIX_FMT_NONE) {
+        ctx->hwdec_failed = true;
+        select = avcodec_default_get_format(avctx, fmt);
+    }
+
+    const char *name = av_get_pix_fmt_name(select);
+    MP_VERBOSE(vd, "Requesting pixfmt '%s' from decoder.\n", name ? name : "-");
+    return select;
+}
+
+static int get_buffer2_direct(AVCodecContext *avctx, AVFrame *pic, int flags)
+{
+    struct mp_filter *vd = avctx->opaque;
+    vd_ffmpeg_ctx *p = vd->priv;
+
+    mp_mutex_lock(&p->dr_lock);
+
+    int w = pic->width;
+    int h = pic->height;
+    int linesize_align[AV_NUM_DATA_POINTERS] = {0};
+    avcodec_align_dimensions2(avctx, &w, &h, linesize_align);
+
+    // We assume that different alignments are just different power-of-2s.
+    // Thus, a higher alignment always satisfies a lower alignment.
+    int stride_align = MP_IMAGE_BYTE_ALIGN;
+    for (int n = 0; n < AV_NUM_DATA_POINTERS; n++)
+        stride_align = MPMAX(stride_align, linesize_align[n]);
+
+    // Note: texel sizes may be NPOT, so use full lcm instead of max
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pic->format);
+    if (!(desc->flags & AV_PIX_FMT_FLAG_BITSTREAM)) {
+        for (int n = 0; n < desc->nb_components; n++)
+            stride_align = mp_lcm(stride_align, desc->comp[n].step);
+    }
+
+    int imgfmt = pixfmt2imgfmt(pic->format);
+    if (!imgfmt)
+        goto fallback;
+
+    if (p->dr_failed)
+        goto fallback;
+
+    // (For simplicity, we realloc on any parameter change, instead of trying
+    // to be clever.)
+    if (stride_align != p->dr_stride_align || w != p->dr_w || h != p->dr_h ||
+        imgfmt != p->dr_imgfmt)
+    {
+        mp_image_pool_clear(p->dr_pool);
+        p->dr_imgfmt = imgfmt;
+        p->dr_w = w;
+        p->dr_h = h;
+        p->dr_stride_align = stride_align;
+        MP_DBG(p, "DR parameter change to %dx%d %s align=%d\n", w, h,
+               mp_imgfmt_to_name(imgfmt), stride_align);
+    }
+
+    struct mp_image *img = mp_image_pool_get_no_alloc(p->dr_pool, imgfmt, w, h);
+    if (!img) {
+        bool host_cached = p->opts->dr == -1; // auto
+        int dr_flags = host_cached ? VO_DR_FLAG_HOST_CACHED : 0;
+        MP_DBG(p, "Allocating new%s DR image...\n", host_cached ? " (host-cached)" : "");
+        img = vo_get_image(p->vo, imgfmt, w, h, stride_align, dr_flags);
+        if (!img) {
+            MP_DBG(p, "...failed..\n");
+            goto fallback;
+        }
+
+        // Now make the mp_image part of the pool. This requires doing magic to
+        // the image, so just add it to the pool and get it back to avoid
+        // dealing with magic ourselves. (Normally this never fails.)
+        mp_image_pool_add(p->dr_pool, img);
+        img = mp_image_pool_get_no_alloc(p->dr_pool, imgfmt, w, h);
+        if (!img)
+            goto fallback;
+    }
+
+    // get_buffer2 callers seem very unappreciative of overwriting pic with a
+    // new reference. The AVCodecContext.get_buffer2 comments tell us exactly
+    // what we should do, so follow that.
+    for (int n = 0; n < 4; n++) {
+        pic->data[n] = img->planes[n];
+        pic->linesize[n] = img->stride[n];
+        pic->buf[n] = img->bufs[n];
+        img->bufs[n] = NULL;
+    }
+    talloc_free(img);
+
+    mp_mutex_unlock(&p->dr_lock);
+
+    return 0;
+
+fallback:
+    if (!p->dr_failed)
+        MP_VERBOSE(p, "DR failed - disabling.\n");
+    p->dr_failed = true;
+    mp_mutex_unlock(&p->dr_lock);
+
+    return avcodec_default_get_buffer2(avctx, pic, flags);
+}
+
+static void prepare_decoding(struct mp_filter *vd)
+{
+    vd_ffmpeg_ctx *ctx = vd->priv;
+    AVCodecContext *avctx = ctx->avctx;
+    struct vd_lavc_params *opts = ctx->opts;
+
+    if (!avctx)
+        return;
+
+    int drop = ctx->framedrop_flags;
+    if (drop == 1) {
+        avctx->skip_frame = opts->framedrop;    // normal framedrop
+    } else if (drop == 2) {
+        avctx->skip_frame = AVDISCARD_NONREF;   // hr-seek framedrop
+        // Can be much more aggressive for true intra codecs.
+        if (ctx->intra_only)
+            avctx->skip_frame = AVDISCARD_ALL;
+    } else {
+        avctx->skip_frame = ctx->skip_frame;    // normal playback
+    }
+
+    if (ctx->hwdec_request_reinit)
+        reset_avctx(vd);
+}
+
+static void handle_err(struct mp_filter *vd)
+{
+    vd_ffmpeg_ctx *ctx = vd->priv;
+    struct vd_lavc_params *opts = ctx->opts;
+
+    MP_WARN(vd, "Error while decoding frame%s!\n",
+            ctx->use_hwdec ? " (hardware decoding)" : "");
+
+    if (ctx->use_hwdec) {
+        ctx->hwdec_fail_count += 1;
+        if (ctx->hwdec_fail_count >= opts->software_fallback)
+            ctx->hwdec_failed = true;
+    }
+}
+
+static int send_packet(struct mp_filter *vd, struct demux_packet *pkt)
+{
+    vd_ffmpeg_ctx *ctx = vd->priv;
+    AVCodecContext *avctx = ctx->avctx;
+
+    if (ctx->num_requeue_packets && ctx->requeue_packets[0] != pkt)
+        return AVERROR(EAGAIN); // cannot consume the packet
+
+    if (ctx->hwdec_failed)
+        return AVERROR(EAGAIN);
+
+    if (!ctx->avctx)
+        return AVERROR_EOF;
+
+    prepare_decoding(vd);
+
+    if (avctx->skip_frame == AVDISCARD_ALL)
+        return 0;
+
+    mp_set_av_packet(ctx->avpkt, pkt, &ctx->codec_timebase);
+
+    int ret = avcodec_send_packet(avctx, pkt ? ctx->avpkt : NULL);
+    if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
+        return ret;
+
+    if (ctx->hw_probing && ctx->num_sent_packets < 32 &&
+        ctx->opts->software_fallback <= 32)
+    {
+        pkt = pkt ? demux_copy_packet(pkt) : NULL;
+        MP_TARRAY_APPEND(ctx, ctx->sent_packets, ctx->num_sent_packets, pkt);
+    }
+
+    if (ret < 0)
+        handle_err(vd);
+    return ret;
+}
+
+static void send_queued_packet(struct mp_filter *vd)
+{
+    vd_ffmpeg_ctx *ctx = vd->priv;
+
+    assert(ctx->num_requeue_packets);
+
+    if (send_packet(vd, ctx->requeue_packets[0]) != AVERROR(EAGAIN)) {
+        talloc_free(ctx->requeue_packets[0]);
+        MP_TARRAY_REMOVE_AT(ctx->requeue_packets, ctx->num_requeue_packets, 0);
+    }
+}
+
+// Returns whether decoder is still active (!EOF state).
+static int decode_frame(struct mp_filter *vd)
+{
+    vd_ffmpeg_ctx *ctx = vd->priv;
+    AVCodecContext *avctx = ctx->avctx;
+
+    if (!avctx || ctx->force_eof)
+        return AVERROR_EOF;
+
+    prepare_decoding(vd);
+
+    // Re-send old packets (typically after a hwdec fallback during init).
+    if (ctx->num_requeue_packets)
+        send_queued_packet(vd);
+
+    int ret = avcodec_receive_frame(avctx, ctx->pic);
+    if (ret < 0) {
+        if (ret == AVERROR_EOF) {
+            // If flushing was initialized earlier and has ended now, make it
+            // start over in case we get new packets at some point in the future.
+            // This must take the delay queue into account, so avctx returns EOF
+            // until the delay queue has been drained.
+            if (!ctx->num_delay_queue)
+                reset_avctx(vd);
+        } else if (ret == AVERROR(EAGAIN)) {
+            // just retry after caller writes a packet
+        } else {
+            handle_err(vd);
+        }
+        return ret;
+    }
+
+    // If something was decoded successfully, it must return a frame with valid
+    // data.
+    assert(ctx->pic->buf[0]);
+
+    struct mp_image *mpi = mp_image_from_av_frame(ctx->pic);
+    if (!mpi) {
+        av_frame_unref(ctx->pic);
+        return ret;
+    }
+
+    if (mpi->imgfmt == IMGFMT_CUDA && !mpi->planes[0]) {
+        MP_ERR(vd, "CUDA frame without data. This is a FFmpeg bug.\n");
+        talloc_free(mpi);
+        handle_err(vd);
+        return AVERROR_BUG;
+    }
+
+    ctx->hwdec_fail_count = 0;
+
+    mpi->pts = mp_pts_from_av(ctx->pic->pts, &ctx->codec_timebase);
+    mpi->dts = mp_pts_from_av(ctx->pic->pkt_dts, &ctx->codec_timebase);
+
+    mpi->pkt_duration =
+#if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(59, 30, 100)
+        mp_pts_from_av(ctx->pic->duration, &ctx->codec_timebase);
+#else
+        mp_pts_from_av(ctx->pic->pkt_duration, &ctx->codec_timebase);
+#endif
+
+    av_frame_unref(ctx->pic);
+
+    MP_TARRAY_APPEND(ctx, ctx->delay_queue, ctx->num_delay_queue, mpi);
+    return ret;
+}
+
+static int receive_frame(struct mp_filter *vd, struct mp_frame *out_frame)
+{
+    vd_ffmpeg_ctx *ctx = vd->priv;
+
+    int ret = decode_frame(vd);
+
+    if (ctx->hwdec_failed) {
+        // Failed hardware decoding? Try the next one, and eventually software.
+        struct demux_packet **pkts = ctx->sent_packets;
+        int num_pkts = ctx->num_sent_packets;
+        ctx->sent_packets = NULL;
+        ctx->num_sent_packets = 0;
+
+        /*
+         * We repeatedly force_fallback until we get an avctx, because there are
+         * certain hwdecs that are really full decoders, and so if these fail,
+         * they also fail to give us a valid avctx, and the early return path
+         * here will simply give up on decoding completely if there is no
+         * decoder. We should never hit an infinite loop as the hwdec list is
+         * finite and we will eventually exhaust it and fall back to software
+         * decoding (and in practice, most hwdecs are hwaccels and so the
+         * decoder will successfully init even if the hwaccel fails later.)
+         */
+        do {
+            force_fallback(vd);
+        } while (!ctx->avctx);
+
+        ctx->requeue_packets = pkts;
+        ctx->num_requeue_packets = num_pkts;
+
+        return 0; // force retry
+    }
+
+    if (ret == AVERROR(EAGAIN) && ctx->num_requeue_packets)
+        return 0; // force retry, so send_queued_packet() gets called
+
+    if (ctx->num_delay_queue <= ctx->max_delay_queue && ret != AVERROR_EOF)
+        return ret;
+
+    if (!ctx->num_delay_queue)
+        return ret;
+
+    struct mp_image *res = ctx->delay_queue[0];
+    MP_TARRAY_REMOVE_AT(ctx->delay_queue, ctx->num_delay_queue, 0);
+
+    res = res ? mp_img_swap_to_native(res) : NULL;
+    if (!res)
+        return AVERROR_UNKNOWN;
+
+    if (ctx->use_hwdec && ctx->hwdec.copying && res->hwctx) {
+        struct mp_image *sw = mp_image_hw_download(res, ctx->hwdec_swpool);
+        mp_image_unrefp(&res);
+        res = sw;
+        if (!res) {
+            MP_ERR(vd, "Could not copy back hardware decoded frame.\n");
+            ctx->hwdec_fail_count = INT_MAX - 1; // force fallback
+            handle_err(vd);
+            return AVERROR_UNKNOWN;
+        }
+    }
+
+    if (!ctx->hwdec_notified) {
+        if (ctx->use_hwdec) {
+            MP_INFO(vd, "Using hardware decoding (%s).\n",
+                    ctx->hwdec.method_name);
+        } else {
+            MP_VERBOSE(vd, "Using software decoding.\n");
+        }
+        ctx->hwdec_notified = true;
+    }
+
+    if (ctx->hw_probing) {
+        for (int n = 0; n < ctx->num_sent_packets; n++)
+            talloc_free(ctx->sent_packets[n]);
+        ctx->num_sent_packets = 0;
+        ctx->hw_probing = false;
+    }
+
+    *out_frame = MAKE_FRAME(MP_FRAME_VIDEO, res);
+    return 0;
+}
+
+static int control(struct mp_filter *vd, enum dec_ctrl cmd, void *arg)
+{
+    vd_ffmpeg_ctx *ctx = vd->priv;
+    switch (cmd) {
+    case VDCTRL_SET_FRAMEDROP:
+        ctx->framedrop_flags = *(int *)arg;
+        return CONTROL_TRUE;
+    case VDCTRL_CHECK_FORCED_EOF: {
+        *(bool *)arg = ctx->force_eof;
+        return CONTROL_TRUE;
+    }
+    case VDCTRL_GET_BFRAMES: {
+        AVCodecContext *avctx = ctx->avctx;
+        if (!avctx)
+            break;
+        if (ctx->use_hwdec && strcmp(ctx->hwdec.method_name, "mmal") == 0)
+            break; // MMAL has arbitrary buffering, thus unknown
+        *(int *)arg = avctx->has_b_frames;
+        return CONTROL_TRUE;
+    }
+    case VDCTRL_GET_HWDEC: {
+        *(char **)arg = ctx->use_hwdec ? ctx->hwdec.method_name : NULL;
+        return CONTROL_TRUE;
+    }
+    case VDCTRL_FORCE_HWDEC_FALLBACK:
+        if (ctx->use_hwdec) {
+            force_fallback(vd);
+            return ctx->avctx ? CONTROL_OK : CONTROL_ERROR;
+        }
+        return CONTROL_FALSE;
+    case VDCTRL_REINIT:
+        reinit(vd);
+        return CONTROL_TRUE;
+    }
+    return CONTROL_UNKNOWN;
+}
+
+static void process(struct mp_filter *vd)
+{
+    vd_ffmpeg_ctx *ctx = vd->priv;
+
+    lavc_process(vd, &ctx->state, send_packet, receive_frame);
+}
+
+static void reset(struct mp_filter *vd)
+{
+    vd_ffmpeg_ctx *ctx = vd->priv;
+
+    flush_all(vd);
+
+    ctx->state = (struct lavc_state){0};
+    ctx->framedrop_flags = 0;
+}
+
+static void destroy(struct mp_filter *vd)
+{
+    vd_ffmpeg_ctx *ctx = vd->priv;
+
+    uninit_avctx(vd);
+
+    mp_mutex_destroy(&ctx->dr_lock);
+}
+
+static const struct mp_filter_info vd_lavc_filter = {
+    .name = "vd_lavc",
+    .priv_size = sizeof(vd_ffmpeg_ctx),
+    .process = process,
+    .reset = reset,
+    .destroy = destroy,
+};
+
+static struct mp_decoder *create(struct mp_filter *parent,
+                                 struct mp_codec_params *codec,
+                                 const char *decoder)
+{
+    struct mp_filter *vd = mp_filter_create(parent, &vd_lavc_filter);
+    if (!vd)
+        return NULL;
+
+    mp_filter_add_pin(vd, MP_PIN_IN, "in");
+    mp_filter_add_pin(vd, MP_PIN_OUT, "out");
+
+    vd->log = mp_log_new(vd, parent->log, NULL);
+
+    vd_ffmpeg_ctx *ctx = vd->priv;
+    ctx->log = vd->log;
+    ctx->opts_cache = m_config_cache_alloc(ctx, vd->global, &vd_lavc_conf);
+    ctx->opts = ctx->opts_cache->opts;
+    ctx->codec = codec;
+    ctx->decoder = talloc_strdup(ctx, decoder);
+    ctx->hwdec_swpool = mp_image_pool_new(ctx);
+    ctx->dr_pool = mp_image_pool_new(ctx);
+
+    ctx->public.f = vd;
+    ctx->public.control = control;
+
+    mp_mutex_init(&ctx->dr_lock);
+
+    // hwdec/DR
+    struct mp_stream_info *info = mp_filter_find_stream_info(vd);
+    if (info) {
+        ctx->hwdec_devs = info->hwdec_devs;
+        ctx->vo = info->dr_vo;
+    }
+
+    reinit(vd);
+
+    if (!ctx->avctx) {
+        talloc_free(vd);
+        return NULL;
+    }
+    return &ctx->public;
+}
+
+static void add_decoders(struct mp_decoder_list *list)
+{
+    mp_add_lavc_decoders(list, AVMEDIA_TYPE_VIDEO);
+}
+
+const struct mp_decoder_fns vd_lavc = {
+    .create = create,
+    .add_decoders = add_decoders,
+};
diff --git a/video/drmprime.c b/video/drmprime.c
new file mode 100644
index 0000000..64d793f
--- /dev/null
+++ b/video/drmprime.c
@@ -0,0 +1,43 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <libavutil/hwcontext.h>
+
+#include "hwdec.h"
+#include "options/m_config.h"
+#include "video/out/drm_common.h"
+
+extern const struct m_sub_options drm_conf;
+static struct AVBufferRef *drm_create_standalone(struct mpv_global *global,
+        struct mp_log *log, struct hwcontext_create_dev_params *params)
+{
+    void *tmp = talloc_new(NULL);
+    struct drm_opts *drm_opts = mp_get_config_group(tmp, global, &drm_conf);
+    const char *opt_path = drm_opts->device_path;
+
+    const char *device_path = opt_path ? opt_path : "/dev/dri/renderD128";
+    AVBufferRef* ref = NULL;
+    av_hwdevice_ctx_create(&ref, AV_HWDEVICE_TYPE_DRM, device_path, NULL, 0);
+
+    talloc_free(tmp);
+    return ref;
+}
+
+const struct hwcontext_fns hwcontext_fns_drmprime = {
+    .av_hwdevice_type = AV_HWDEVICE_TYPE_DRM,
+    .create_dev = drm_create_standalone,
+};
diff --git a/video/filter/refqueue.c b/video/filter/refqueue.c
new file mode 100644
index 0000000..d018e38
--- /dev/null
+++ b/video/filter/refqueue.c
@@ -0,0 +1,356 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+
+#include <libavutil/buffer.h>
+
+#include "common/common.h"
+#include "filters/f_autoconvert.h"
+#include "filters/filter_internal.h"
+#include "video/mp_image.h"
+
+#include "refqueue.h"
+
+struct mp_refqueue {
+    struct mp_filter *filter;
+    struct mp_autoconvert *conv;
+    struct mp_pin *in, *out;
+
+    struct mp_image *in_format;
+
+    // Buffered frame in case of format changes.
+    struct mp_image *next;
+
+    int needed_past_frames;
+    int needed_future_frames;
+    int flags;
+
+    bool second_field; // current frame has to output a second field yet
+    bool eof;
+
+    // Queue of input frames, used to determine past/current/future frames.
+    // queue[0] is the newest frame, queue[num_queue - 1] the oldest.
+    struct mp_image **queue;
+    int num_queue;
+    // queue[pos] is the current frame, unless pos is an invalid index.
+    int pos;
+};
+
+static bool mp_refqueue_has_output(struct mp_refqueue *q);
+
+static void refqueue_dtor(void *p)
+{
+    struct mp_refqueue *q = p;
+    mp_refqueue_flush(q);
+    mp_image_unrefp(&q->in_format);
+    talloc_free(q->conv->f);
+}
+
+struct mp_refqueue *mp_refqueue_alloc(struct mp_filter *f)
+{
+    struct mp_refqueue *q = talloc_zero(f, struct mp_refqueue);
+    talloc_set_destructor(q, refqueue_dtor);
+    q->filter = f;
+
+    q->conv = mp_autoconvert_create(f);
+    MP_HANDLE_OOM(q->conv);
+
+    q->in = q->conv->f->pins[1];
+    mp_pin_connect(q->conv->f->pins[0], f->ppins[0]);
+    q->out = f->ppins[1];
+
+    mp_refqueue_flush(q);
+    return q;
+}
+
+void mp_refqueue_add_in_format(struct mp_refqueue *q, int fmt, int subfmt)
+{
+    mp_autoconvert_add_imgfmt(q->conv, fmt, subfmt);
+}
+
+// The minimum number of frames required before and after the current frame.
+void mp_refqueue_set_refs(struct mp_refqueue *q, int past, int future)
+{
+    assert(past >= 0 && future >= 0);
+    q->needed_past_frames = past;
+    q->needed_future_frames = MPMAX(future, 1); // at least 1 for determining PTS
+}
+
+// MP_MODE_* flags
+void mp_refqueue_set_mode(struct mp_refqueue *q, int flags)
+{
+    q->flags = flags;
+}
+
+// Whether the current frame should be deinterlaced.
+bool mp_refqueue_should_deint(struct mp_refqueue *q)
+{
+    if (!mp_refqueue_has_output(q) || !(q->flags & MP_MODE_DEINT))
+        return false;
+
+    return (q->queue[q->pos]->fields & MP_IMGFIELD_INTERLACED) ||
+           !(q->flags & MP_MODE_INTERLACED_ONLY);
+}
+
+// Whether the current output frame (field) is the top field, bottom field
+// otherwise. (Assumes the caller forces deinterlacing.)
+bool mp_refqueue_is_top_field(struct mp_refqueue *q)
+{
+    if (!mp_refqueue_has_output(q))
+        return false;
+
+    return !!(q->queue[q->pos]->fields & MP_IMGFIELD_TOP_FIRST) ^ q->second_field;
+}
+
+// Whether top-field-first mode is enabled.
+bool mp_refqueue_top_field_first(struct mp_refqueue *q)
+{
+    if (!mp_refqueue_has_output(q))
+        return false;
+
+    return q->queue[q->pos]->fields & MP_IMGFIELD_TOP_FIRST;
+}
+
+// Discard all state.
+void mp_refqueue_flush(struct mp_refqueue *q)
+{
+    for (int n = 0; n < q->num_queue; n++)
+        talloc_free(q->queue[n]);
+    q->num_queue = 0;
+    q->pos = -1;
+    q->second_field = false;
+    q->eof = false;
+    mp_image_unrefp(&q->next);
+}
+
+static void mp_refqueue_add_input(struct mp_refqueue *q, struct mp_image *img)
+{
+    assert(img);
+
+    MP_TARRAY_INSERT_AT(q, q->queue, q->num_queue, 0, img);
+    q->pos++;
+
+    assert(q->pos >= 0 && q->pos < q->num_queue);
+}
+
+static bool mp_refqueue_need_input(struct mp_refqueue *q)
+{
+    return q->pos < q->needed_future_frames && !q->eof;
+}
+
+static bool mp_refqueue_has_output(struct mp_refqueue *q)
+{
+    return q->pos >= 0 && !mp_refqueue_need_input(q);
+}
+
+static bool output_next_field(struct mp_refqueue *q)
+{
+    if (q->second_field)
+        return false;
+    if (!(q->flags & MP_MODE_OUTPUT_FIELDS))
+        return false;
+    if (!mp_refqueue_should_deint(q))
+        return false;
+
+    assert(q->pos >= 0);
+
+    // If there's no (reasonable) timestamp, also skip the field.
+    if (q->pos == 0)
+        return false;
+
+    double pts = q->queue[q->pos]->pts;
+    double next_pts = q->queue[q->pos - 1]->pts;
+    if (pts == MP_NOPTS_VALUE || next_pts == MP_NOPTS_VALUE)
+        return false;
+
+    double frametime = next_pts - pts;
+    if (frametime <= 0.0 || frametime >= 1.0)
+        return false;
+
+    q->queue[q->pos]->pts = pts + frametime / 2;
+    q->second_field = true;
+    return true;
+}
+
+// Advance to next input frame (skips fields even in field output mode).
+static void mp_refqueue_next(struct mp_refqueue *q)
+{
+    if (!mp_refqueue_has_output(q))
+        return;
+
+    q->pos--;
+    q->second_field = false;
+
+    assert(q->pos >= -1 && q->pos < q->num_queue);
+
+    // Discard unneeded past frames.
+    while (q->num_queue - (q->pos + 1) > q->needed_past_frames) {
+        assert(q->num_queue > 0);
+        talloc_free(q->queue[q->num_queue - 1]);
+        q->num_queue--;
+    }
+
+    assert(q->pos >= -1 && q->pos < q->num_queue);
+}
+
+// Advance current field, depending on interlace flags.
+static void mp_refqueue_next_field(struct mp_refqueue *q)
+{
+    if (!mp_refqueue_has_output(q))
+        return;
+
+    if (!output_next_field(q))
+        mp_refqueue_next(q);
+}
+
+// Return a frame by relative position:
+//  -1: first past frame
+//   0: current frame
+//   1: first future frame
+// Caller doesn't get ownership. Return NULL if unavailable.
+struct mp_image *mp_refqueue_get(struct mp_refqueue *q, int pos)
+{
+    int i = q->pos - pos;
+    return i >= 0 && i < q->num_queue ? q->queue[i] : NULL;
+}
+
+// Same as mp_refqueue_get(), but return the frame which contains a field
+// relative to the current field's position.
+struct mp_image *mp_refqueue_get_field(struct mp_refqueue *q, int pos)
+{
+    // If the current field is the second field (conceptually), then pos=1
+    // needs to get the next frame. Similarly, pos=-1 needs to get the current
+    // frame, so round towards negative infinity.
+    int round = mp_refqueue_top_field_first(q) != mp_refqueue_is_top_field(q);
+    int frame = (pos < 0 ? pos - (1 - round) : pos + round) / 2;
+    return mp_refqueue_get(q, frame);
+}
+
+bool mp_refqueue_is_second_field(struct mp_refqueue *q)
+{
+    return mp_refqueue_has_output(q) && q->second_field;
+}
+
+// Return non-NULL if a format change happened. A format change is defined by
+// a change in image parameters, using broad enough checks that happen to be
+// sufficient for all users of refqueue.
+// On format change, the refqueue transparently drains remaining frames, and
+// once that is done, this function returns a mp_image reference of the new
+// frame. Reinit the low level video processor based on it, and then leave the
+// reference alone and continue normally.
+// All frames returned in the future will have a compatible format.
+struct mp_image *mp_refqueue_execute_reinit(struct mp_refqueue *q)
+{
+    if (mp_refqueue_has_output(q) || !q->next)
+        return NULL;
+
+    struct mp_image *cur = q->next;
+    q->next = NULL;
+
+    mp_image_unrefp(&q->in_format);
+    mp_refqueue_flush(q);
+
+    q->in_format = mp_image_new_ref(cur);
+    mp_image_unref_data(q->in_format);
+
+    mp_refqueue_add_input(q, cur);
+    return cur;
+}
+
+// Main processing function. Call this in the filter process function.
+// Returns if enough input frames are available for filtering, and output pin
+// needs data; in other words, if this returns true, you render a frame and
+// output it.
+// If this returns true, you must call mp_refqueue_write_out_pin() to make
+// progress.
+bool mp_refqueue_can_output(struct mp_refqueue *q)
+{
+    if (!mp_pin_in_needs_data(q->out))
+        return false;
+
+    // Strictly return any output first to reduce latency.
+    if (mp_refqueue_has_output(q))
+        return true;
+
+    if (q->next) {
+        // Make it call again for mp_refqueue_execute_reinit().
+        mp_filter_internal_mark_progress(q->filter);
+        return false;
+    }
+
+    struct mp_frame frame = mp_pin_out_read(q->in);
+    if (frame.type == MP_FRAME_NONE)
+        return false;
+
+    if (frame.type == MP_FRAME_EOF) {
+        q->eof = true;
+        if (mp_refqueue_has_output(q)) {
+            mp_pin_out_unread(q->in, frame);
+            return true;
+        }
+        mp_pin_in_write(q->out, frame);
+        mp_refqueue_flush(q);
+        return false;
+    }
+
+    if (frame.type != MP_FRAME_VIDEO) {
+        MP_ERR(q->filter, "unsupported frame type\n");
+        mp_frame_unref(&frame);
+        mp_filter_internal_mark_failed(q->filter);
+        return false;
+    }
+
+    struct mp_image *img = frame.data;
+
+    if (!q->in_format || !!q->in_format->hwctx != !!img->hwctx ||
+        (img->hwctx && img->hwctx->data != q->in_format->hwctx->data) ||
+        !mp_image_params_equal(&q->in_format->params, &img->params))
+    {
+        q->next = img;
+        q->eof = true;
+        mp_filter_internal_mark_progress(q->filter);
+        return false;
+    }
+
+    mp_refqueue_add_input(q, img);
+
+    if (mp_refqueue_has_output(q))
+        return true;
+
+    mp_pin_out_request_data(q->in);
+    return false;
+}
+
+// (Accepts NULL for generic errors.)
+void mp_refqueue_write_out_pin(struct mp_refqueue *q, struct mp_image *mpi)
+{
+    if (mpi) {
+        mp_pin_in_write(q->out, MAKE_FRAME(MP_FRAME_VIDEO, mpi));
+    } else {
+        MP_WARN(q->filter, "failed to output frame\n");
+        mp_filter_internal_mark_failed(q->filter);
+    }
+    mp_refqueue_next_field(q);
+}
+
+// Return frame for current format (without data). Reference is owned by q,
+// might go away on further queue accesses. NULL if none yet.
+struct mp_image *mp_refqueue_get_format(struct mp_refqueue *q)
+{
+    return q->in_format;
+}
diff --git a/video/filter/refqueue.h b/video/filter/refqueue.h
new file mode 100644
index 0000000..0a8ace0
--- /dev/null
+++ b/video/filter/refqueue.h
@@ -0,0 +1,39 @@
+#ifndef MP_REFQUEUE_H_
+#define MP_REFQUEUE_H_
+
+#include <stdbool.h>
+
+#include "filters/filter.h"
+
+// A helper for deinterlacers which require past/future reference frames.
+
+struct mp_refqueue;
+
+struct mp_refqueue *mp_refqueue_alloc(struct mp_filter *f);
+
+void mp_refqueue_add_in_format(struct mp_refqueue *q, int fmt, int subfmt);
+
+void mp_refqueue_set_refs(struct mp_refqueue *q, int past, int future);
+void mp_refqueue_flush(struct mp_refqueue *q);
+struct mp_image *mp_refqueue_get(struct mp_refqueue *q, int pos);
+
+struct mp_image *mp_refqueue_execute_reinit(struct mp_refqueue *q);
+bool mp_refqueue_can_output(struct mp_refqueue *q);
+void mp_refqueue_write_out_pin(struct mp_refqueue *q, struct mp_image *mpi);
+
+struct mp_image *mp_refqueue_get_format(struct mp_refqueue *q);
+
+enum {
+    MP_MODE_DEINT = (1 << 0),           // deinterlacing enabled
+    MP_MODE_OUTPUT_FIELDS = (1 << 1),   // output fields separately
+    MP_MODE_INTERLACED_ONLY = (1 << 2), // only deinterlace marked frames
+};
+
+void mp_refqueue_set_mode(struct mp_refqueue *q, int flags);
+bool mp_refqueue_should_deint(struct mp_refqueue *q);
+bool mp_refqueue_is_top_field(struct mp_refqueue *q);
+bool mp_refqueue_top_field_first(struct mp_refqueue *q);
+bool mp_refqueue_is_second_field(struct mp_refqueue *q);
+struct mp_image *mp_refqueue_get_field(struct mp_refqueue *q, int pos);
+
+#endif
diff --git a/video/filter/vf_d3d11vpp.c b/video/filter/vf_d3d11vpp.c
new file mode 100644
index 0000000..3f00c5a
--- /dev/null
+++ b/video/filter/vf_d3d11vpp.c
@@ -0,0 +1,506 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <windows.h>
+#include <d3d11.h>
+
+#include <libavutil/hwcontext.h>
+#include <libavutil/hwcontext_d3d11va.h>
+
+#include "common/common.h"
+#include "osdep/timer.h"
+#include "osdep/windows_utils.h"
+#include "filters/f_autoconvert.h"
+#include "filters/filter.h"
+#include "filters/filter_internal.h"
+#include "filters/user_filters.h"
+#include "refqueue.h"
+#include "video/hwdec.h"
+#include "video/mp_image.h"
+#include "video/mp_image_pool.h"
+
+// missing in MinGW
+#define D3D11_VIDEO_PROCESSOR_PROCESSOR_CAPS_DEINTERLACE_BLEND 0x1
+#define D3D11_VIDEO_PROCESSOR_PROCESSOR_CAPS_DEINTERLACE_BOB 0x2
+#define D3D11_VIDEO_PROCESSOR_PROCESSOR_CAPS_DEINTERLACE_ADAPTIVE 0x4
+#define D3D11_VIDEO_PROCESSOR_PROCESSOR_CAPS_DEINTERLACE_MOTION_COMPENSATION 0x8
+#define D3D11_VIDEO_PROCESSOR_PROCESSOR_CAPS_INVERSE_TELECINE 0x10
+#define D3D11_VIDEO_PROCESSOR_PROCESSOR_CAPS_FRAME_RATE_CONVERSION 0x20
+
+struct opts {
+    bool deint_enabled;
+    bool interlaced_only;
+    int mode;
+};
+
+struct priv {
+    struct opts *opts;
+
+    ID3D11Device *vo_dev;
+
+    ID3D11DeviceContext *device_ctx;
+    ID3D11VideoDevice *video_dev;
+    ID3D11VideoContext *video_ctx;
+
+    ID3D11VideoProcessor *video_proc;
+    ID3D11VideoProcessorEnumerator *vp_enum;
+    D3D11_VIDEO_FRAME_FORMAT d3d_frame_format;
+
+    DXGI_FORMAT out_format;
+
+    bool require_filtering;
+
+    struct mp_image_params params, out_params;
+    int c_w, c_h;
+
+    struct mp_image_pool *pool;
+
+    struct mp_refqueue *queue;
+};
+
+static void release_tex(void *arg)
+{
+    ID3D11Texture2D *texture = arg;
+
+    ID3D11Texture2D_Release(texture);
+}
+
+static struct mp_image *alloc_pool(void *pctx, int fmt, int w, int h)
+{
+    struct mp_filter *vf = pctx;
+    struct priv *p = vf->priv;
+    HRESULT hr;
+
+    ID3D11Texture2D *texture = NULL;
+    D3D11_TEXTURE2D_DESC texdesc = {
+        .Width = w,
+        .Height = h,
+        .Format = p->out_format,
+        .MipLevels = 1,
+        .ArraySize = 1,
+        .SampleDesc = { .Count = 1 },
+        .Usage = D3D11_USAGE_DEFAULT,
+        .BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE,
+    };
+    hr = ID3D11Device_CreateTexture2D(p->vo_dev, &texdesc, NULL, &texture);
+    if (FAILED(hr))
+        return NULL;
+
+    struct mp_image *mpi = mp_image_new_custom_ref(NULL, texture, release_tex);
+    MP_HANDLE_OOM(mpi);
+
+    mp_image_setfmt(mpi, IMGFMT_D3D11);
+    mp_image_set_size(mpi, w, h);
+    mpi->params.hw_subfmt = p->out_params.hw_subfmt;
+
+    mpi->planes[0] = (void *)texture;
+    mpi->planes[1] = (void *)(intptr_t)0;
+
+    return mpi;
+}
+
+static void flush_frames(struct mp_filter *vf)
+{
+    struct priv *p = vf->priv;
+    mp_refqueue_flush(p->queue);
+}
+
+static void destroy_video_proc(struct mp_filter *vf)
+{
+    struct priv *p = vf->priv;
+
+    if (p->video_proc)
+        ID3D11VideoProcessor_Release(p->video_proc);
+    p->video_proc = NULL;
+
+    if (p->vp_enum)
+        ID3D11VideoProcessorEnumerator_Release(p->vp_enum);
+    p->vp_enum = NULL;
+}
+
+static int recreate_video_proc(struct mp_filter *vf)
+{
+    struct priv *p = vf->priv;
+    HRESULT hr;
+
+    destroy_video_proc(vf);
+
+    D3D11_VIDEO_PROCESSOR_CONTENT_DESC vpdesc = {
+        .InputFrameFormat = p->d3d_frame_format,
+        .InputWidth = p->c_w,
+        .InputHeight = p->c_h,
+        .OutputWidth = p->params.w,
+        .OutputHeight = p->params.h,
+    };
+    hr = ID3D11VideoDevice_CreateVideoProcessorEnumerator(p->video_dev, &vpdesc,
+                                                          &p->vp_enum);
+    if (FAILED(hr))
+        goto fail;
+
+    D3D11_VIDEO_PROCESSOR_CAPS caps;
+    hr = ID3D11VideoProcessorEnumerator_GetVideoProcessorCaps(p->vp_enum, &caps);
+    if (FAILED(hr))
+        goto fail;
+
+    MP_VERBOSE(vf, "Found %d rate conversion caps. Looking for caps=0x%x.\n",
+               (int)caps.RateConversionCapsCount, p->opts->mode);
+
+    int rindex = -1;
+    for (int n = 0; n < caps.RateConversionCapsCount; n++) {
+        D3D11_VIDEO_PROCESSOR_RATE_CONVERSION_CAPS rcaps;
+        hr = ID3D11VideoProcessorEnumerator_GetVideoProcessorRateConversionCaps
+                (p->vp_enum, n, &rcaps);
+        if (FAILED(hr))
+            goto fail;
+        MP_VERBOSE(vf, "  - %d: 0x%08x\n", n, (unsigned)rcaps.ProcessorCaps);
+        if (rcaps.ProcessorCaps & p->opts->mode) {
+            MP_VERBOSE(vf, "       (matching)\n");
+            if (rindex < 0)
+                rindex = n;
+        }
+    }
+
+    if (rindex < 0) {
+        MP_WARN(vf, "No fitting video processor found, picking #0.\n");
+        rindex = 0;
+    }
+
+    // TODO: so, how do we select which rate conversion mode the processor uses?
+
+    hr = ID3D11VideoDevice_CreateVideoProcessor(p->video_dev, p->vp_enum, rindex,
+                                                &p->video_proc);
+    if (FAILED(hr)) {
+        MP_ERR(vf, "Failed to create D3D11 video processor.\n");
+        goto fail;
+    }
+
+    // Note: libavcodec does not support cropping left/top with hwaccel.
+    RECT src_rc = {
+        .right = p->params.w,
+        .bottom = p->params.h,
+    };
+    ID3D11VideoContext_VideoProcessorSetStreamSourceRect(p->video_ctx,
+                                                         p->video_proc,
+                                                         0, TRUE, &src_rc);
+
+    // This is supposed to stop drivers from fucking up the video quality.
+    ID3D11VideoContext_VideoProcessorSetStreamAutoProcessingMode(p->video_ctx,
+                                                                 p->video_proc,
+                                                                 0, FALSE);
+
+    ID3D11VideoContext_VideoProcessorSetStreamOutputRate(p->video_ctx,
+                                                         p->video_proc,
+                                                         0,
+                                                         D3D11_VIDEO_PROCESSOR_OUTPUT_RATE_NORMAL,
+                                                         FALSE, 0);
+
+    D3D11_VIDEO_PROCESSOR_COLOR_SPACE csp = {
+        .YCbCr_Matrix = p->params.color.space != MP_CSP_BT_601,
+        .Nominal_Range = p->params.color.levels == MP_CSP_LEVELS_TV ? 1 : 2,
+    };
+    ID3D11VideoContext_VideoProcessorSetStreamColorSpace(p->video_ctx,
+                                                         p->video_proc,
+                                                         0, &csp);
+    ID3D11VideoContext_VideoProcessorSetOutputColorSpace(p->video_ctx,
+                                                         p->video_proc,
+                                                         &csp);
+
+    return 0;
+fail:
+    destroy_video_proc(vf);
+    return -1;
+}
+
+static struct mp_image *render(struct mp_filter *vf)
+{
+    struct priv *p = vf->priv;
+    int res = -1;
+    HRESULT hr;
+    ID3D11VideoProcessorInputView *in_view = NULL;
+    ID3D11VideoProcessorOutputView *out_view = NULL;
+    struct mp_image *in = NULL, *out = NULL;
+    out = mp_image_pool_get(p->pool, IMGFMT_D3D11, p->params.w, p->params.h);
+    if (!out) {
+        MP_WARN(vf, "failed to allocate frame\n");
+        goto cleanup;
+    }
+
+    ID3D11Texture2D *d3d_out_tex = (void *)out->planes[0];
+
+    in = mp_refqueue_get(p->queue, 0);
+    if (!in)
+        goto cleanup;
+    ID3D11Texture2D *d3d_tex = (void *)in->planes[0];
+    int d3d_subindex = (intptr_t)in->planes[1];
+
+    mp_image_copy_attributes(out, in);
+
+    D3D11_VIDEO_FRAME_FORMAT d3d_frame_format;
+    if (!mp_refqueue_should_deint(p->queue)) {
+        d3d_frame_format = D3D11_VIDEO_FRAME_FORMAT_PROGRESSIVE;
+    } else if (mp_refqueue_top_field_first(p->queue)) {
+        d3d_frame_format = D3D11_VIDEO_FRAME_FORMAT_INTERLACED_TOP_FIELD_FIRST;
+    } else {
+        d3d_frame_format = D3D11_VIDEO_FRAME_FORMAT_INTERLACED_BOTTOM_FIELD_FIRST;
+    }
+
+    D3D11_TEXTURE2D_DESC texdesc;
+    ID3D11Texture2D_GetDesc(d3d_tex, &texdesc);
+    if (!p->video_proc || p->c_w != texdesc.Width || p->c_h != texdesc.Height ||
+        p->d3d_frame_format != d3d_frame_format)
+    {
+        p->c_w = texdesc.Width;
+        p->c_h = texdesc.Height;
+        p->d3d_frame_format = d3d_frame_format;
+        if (recreate_video_proc(vf) < 0)
+            goto cleanup;
+    }
+
+    if (!mp_refqueue_should_deint(p->queue)) {
+        d3d_frame_format = D3D11_VIDEO_FRAME_FORMAT_PROGRESSIVE;
+    } else if (mp_refqueue_is_top_field(p->queue)) {
+        d3d_frame_format = D3D11_VIDEO_FRAME_FORMAT_INTERLACED_TOP_FIELD_FIRST;
+    } else {
+        d3d_frame_format = D3D11_VIDEO_FRAME_FORMAT_INTERLACED_BOTTOM_FIELD_FIRST;
+    }
+
+    ID3D11VideoContext_VideoProcessorSetStreamFrameFormat(p->video_ctx,
+                                                          p->video_proc,
+                                                          0, d3d_frame_format);
+
+    D3D11_VIDEO_PROCESSOR_INPUT_VIEW_DESC indesc = {
+        .ViewDimension = D3D11_VPIV_DIMENSION_TEXTURE2D,
+        .Texture2D = {
+            .ArraySlice = d3d_subindex,
+        },
+    };
+    hr = ID3D11VideoDevice_CreateVideoProcessorInputView(p->video_dev,
+                                                         (ID3D11Resource *)d3d_tex,
+                                                         p->vp_enum, &indesc,
+                                                         &in_view);
+    if (FAILED(hr)) {
+        MP_ERR(vf, "Could not create ID3D11VideoProcessorInputView\n");
+        goto cleanup;
+    }
+
+    D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC outdesc = {
+        .ViewDimension = D3D11_VPOV_DIMENSION_TEXTURE2D,
+    };
+    hr = ID3D11VideoDevice_CreateVideoProcessorOutputView(p->video_dev,
+                                                          (ID3D11Resource *)d3d_out_tex,
+                                                          p->vp_enum, &outdesc,
+                                                          &out_view);
+    if (FAILED(hr)) {
+        MP_ERR(vf, "Could not create ID3D11VideoProcessorOutputView\n");
+        goto cleanup;
+    }
+
+    D3D11_VIDEO_PROCESSOR_STREAM stream = {
+        .Enable = TRUE,
+        .pInputSurface = in_view,
+    };
+    int frame = mp_refqueue_is_second_field(p->queue);
+    hr = ID3D11VideoContext_VideoProcessorBlt(p->video_ctx, p->video_proc,
+                                              out_view, frame, 1, &stream);
+    if (FAILED(hr)) {
+        MP_ERR(vf, "VideoProcessorBlt failed.\n");
+        goto cleanup;
+    }
+
+    res = 0;
+cleanup:
+    if (in_view)
+        ID3D11VideoProcessorInputView_Release(in_view);
+    if (out_view)
+        ID3D11VideoProcessorOutputView_Release(out_view);
+    if (res < 0)
+        TA_FREEP(&out);
+    return out;
+}
+
+static void vf_d3d11vpp_process(struct mp_filter *vf)
+{
+    struct priv *p = vf->priv;
+
+    struct mp_image *in_fmt = mp_refqueue_execute_reinit(p->queue);
+    if (in_fmt) {
+        mp_image_pool_clear(p->pool);
+
+        destroy_video_proc(vf);
+
+        p->params = in_fmt->params;
+        p->out_params = p->params;
+
+        p->out_params.hw_subfmt = IMGFMT_NV12;
+        p->out_format = DXGI_FORMAT_NV12;
+
+        p->require_filtering = p->params.hw_subfmt != p->out_params.hw_subfmt;
+    }
+
+    if (!mp_refqueue_can_output(p->queue))
+        return;
+
+    if (!mp_refqueue_should_deint(p->queue) && !p->require_filtering) {
+        // no filtering
+        struct mp_image *in = mp_image_new_ref(mp_refqueue_get(p->queue, 0));
+        if (!in) {
+            mp_filter_internal_mark_failed(vf);
+            return;
+        }
+        mp_refqueue_write_out_pin(p->queue, in);
+    } else {
+        mp_refqueue_write_out_pin(p->queue, render(vf));
+    }
+}
+
+static void uninit(struct mp_filter *vf)
+{
+    struct priv *p = vf->priv;
+
+    destroy_video_proc(vf);
+
+    flush_frames(vf);
+    talloc_free(p->queue);
+    talloc_free(p->pool);
+
+    if (p->video_ctx)
+        ID3D11VideoContext_Release(p->video_ctx);
+
+    if (p->video_dev)
+        ID3D11VideoDevice_Release(p->video_dev);
+
+    if (p->device_ctx)
+        ID3D11DeviceContext_Release(p->device_ctx);
+
+    if (p->vo_dev)
+        ID3D11Device_Release(p->vo_dev);
+}
+
+static const struct mp_filter_info vf_d3d11vpp_filter = {
+    .name = "d3d11vpp",
+    .process = vf_d3d11vpp_process,
+    .reset = flush_frames,
+    .destroy = uninit,
+    .priv_size = sizeof(struct priv),
+};
+
+static struct mp_filter *vf_d3d11vpp_create(struct mp_filter *parent,
+                                            void *options)
+{
+    struct mp_filter *f = mp_filter_create(parent, &vf_d3d11vpp_filter);
+    if (!f) {
+        talloc_free(options);
+        return NULL;
+    }
+
+    mp_filter_add_pin(f, MP_PIN_IN, "in");
+    mp_filter_add_pin(f, MP_PIN_OUT, "out");
+
+    struct priv *p = f->priv;
+    p->opts = talloc_steal(p, options);
+
+    // Special path for vf_d3d11_create_outconv(): disable all processing except
+    // possibly surface format conversions.
+    if (!p->opts) {
+        static const struct opts opts = {0};
+        p->opts = (struct opts *)&opts;
+    }
+
+    p->queue = mp_refqueue_alloc(f);
+
+    struct mp_stream_info *info = mp_filter_find_stream_info(f);
+    if (!info || !info->hwdec_devs)
+        goto fail;
+
+    struct hwdec_imgfmt_request params = {
+        .imgfmt = IMGFMT_D3D11,
+        .probing = false,
+    };
+    hwdec_devices_request_for_img_fmt(info->hwdec_devs, &params);
+
+    struct mp_hwdec_ctx *hwctx =
+        hwdec_devices_get_by_imgfmt(info->hwdec_devs, IMGFMT_D3D11);
+    if (!hwctx || !hwctx->av_device_ref)
+        goto fail;
+    AVHWDeviceContext *avhwctx = (void *)hwctx->av_device_ref->data;
+    AVD3D11VADeviceContext *d3dctx = avhwctx->hwctx;
+
+    p->vo_dev = d3dctx->device;
+    ID3D11Device_AddRef(p->vo_dev);
+
+    HRESULT hr;
+
+    hr = ID3D11Device_QueryInterface(p->vo_dev, &IID_ID3D11VideoDevice,
+                                     (void **)&p->video_dev);
+    if (FAILED(hr))
+        goto fail;
+
+    ID3D11Device_GetImmediateContext(p->vo_dev, &p->device_ctx);
+    if (!p->device_ctx)
+        goto fail;
+    hr = ID3D11DeviceContext_QueryInterface(p->device_ctx, &IID_ID3D11VideoContext,
+                                            (void **)&p->video_ctx);
+    if (FAILED(hr))
+        goto fail;
+
+    p->pool = mp_image_pool_new(f);
+    mp_image_pool_set_allocator(p->pool, alloc_pool, f);
+    mp_image_pool_set_lru(p->pool);
+
+    mp_refqueue_add_in_format(p->queue, IMGFMT_D3D11, 0);
+
+    mp_refqueue_set_refs(p->queue, 0, 0);
+    mp_refqueue_set_mode(p->queue,
+        (p->opts->deint_enabled ? MP_MODE_DEINT : 0) |
+        MP_MODE_OUTPUT_FIELDS |
+        (p->opts->interlaced_only ? MP_MODE_INTERLACED_ONLY : 0));
+
+    return f;
+
+fail:
+    talloc_free(f);
+    return NULL;
+}
+
+#define OPT_BASE_STRUCT struct opts
+static const m_option_t vf_opts_fields[] = {
+    {"deint", OPT_BOOL(deint_enabled)},
+    {"interlaced-only", OPT_BOOL(interlaced_only)},
+    {"mode", OPT_CHOICE(mode,
+        {"blend", D3D11_VIDEO_PROCESSOR_PROCESSOR_CAPS_DEINTERLACE_BLEND},
+        {"bob", D3D11_VIDEO_PROCESSOR_PROCESSOR_CAPS_DEINTERLACE_BOB},
+        {"adaptive", D3D11_VIDEO_PROCESSOR_PROCESSOR_CAPS_DEINTERLACE_ADAPTIVE},
+        {"mocomp", D3D11_VIDEO_PROCESSOR_PROCESSOR_CAPS_DEINTERLACE_MOTION_COMPENSATION},
+        {"ivctc", D3D11_VIDEO_PROCESSOR_PROCESSOR_CAPS_INVERSE_TELECINE},
+        {"none", 0})},
+    {0}
+};
+
+const struct mp_user_filter_entry vf_d3d11vpp = {
+    .desc = {
+        .description = "D3D11 Video Post-Process Filter",
+        .name = "d3d11vpp",
+        .priv_size = sizeof(OPT_BASE_STRUCT),
+        .priv_defaults = &(const OPT_BASE_STRUCT) {
+            .deint_enabled = true,
+            .mode = D3D11_VIDEO_PROCESSOR_PROCESSOR_CAPS_DEINTERLACE_BOB,
+        },
+        .options = vf_opts_fields,
+    },
+    .create = vf_d3d11vpp_create,
+};
diff --git a/video/filter/vf_fingerprint.c b/video/filter/vf_fingerprint.c
new file mode 100644
index 0000000..8714382
--- /dev/null
+++ b/video/filter/vf_fingerprint.c
@@ -0,0 +1,229 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <math.h>
+
+#include "common/common.h"
+#include "common/tags.h"
+#include "filters/filter.h"
+#include "filters/filter_internal.h"
+#include "filters/user_filters.h"
+#include "options/m_option.h"
+#include "video/img_format.h"
+#include "video/sws_utils.h"
+#include "video/zimg.h"
+
+#include "osdep/timer.h"
+
+#define PRINT_ENTRY_NUM 10
+
+struct f_opts {
+    int type;
+    bool clear;
+    bool print;
+};
+
+const struct m_opt_choice_alternatives type_names[] = {
+    {"gray-hex-8x8",    8},
+    {"gray-hex-16x16",  16},
+    {0}
+};
+
+#define OPT_BASE_STRUCT struct f_opts
+static const struct m_option f_opts_list[] = {
+    {"type", OPT_CHOICE_C(type, type_names)},
+    {"clear-on-query", OPT_BOOL(clear)},
+    {"print", OPT_BOOL(print)},
+    {0}
+};
+
+static const struct f_opts f_opts_def = {
+    .type = 16,
+    .clear = true,
+};
+
+struct print_entry {
+    double pts;
+    char *print;
+};
+
+struct priv {
+    struct f_opts *opts;
+    struct mp_image *scaled;
+    struct mp_sws_context *sws;
+    struct mp_zimg_context *zimg;
+    struct print_entry entries[PRINT_ENTRY_NUM];
+    int num_entries;
+    bool fallback_warning;
+};
+
+// (Other code internal to this filter also calls this to reset the frame list.)
+static void f_reset(struct mp_filter *f)
+{
+    struct priv *p = f->priv;
+
+    for (int n = 0; n < p->num_entries; n++)
+        talloc_free(p->entries[n].print);
+    p->num_entries = 0;
+}
+
+static void f_process(struct mp_filter *f)
+{
+    struct priv *p = f->priv;
+
+    if (!mp_pin_can_transfer_data(f->ppins[1], f->ppins[0]))
+        return;
+
+    struct mp_frame frame = mp_pin_out_read(f->ppins[0]);
+
+    if (mp_frame_is_signaling(frame)) {
+        mp_pin_in_write(f->ppins[1], frame);
+        return;
+    }
+
+    if (frame.type != MP_FRAME_VIDEO)
+        goto error;
+
+    struct mp_image *mpi = frame.data;
+
+    // Try to achieve minimum conversion, even if it makes the fingerprints less
+    // "portable" across source video.
+    p->scaled->params.color = mpi->params.color;
+    // Make output always full range; no reason to lose precision.
+    p->scaled->params.color.levels = MP_CSP_LEVELS_PC;
+
+    if (!mp_zimg_convert(p->zimg, p->scaled, mpi)) {
+        if (!p->fallback_warning) {
+            MP_WARN(f, "Falling back to libswscale.\n");
+            p->fallback_warning = true;
+        }
+        if (mp_sws_scale(p->sws, p->scaled, mpi) < 0)
+            goto error;
+    }
+
+    if (p->num_entries >= PRINT_ENTRY_NUM) {
+        talloc_free(p->entries[0].print);
+        MP_TARRAY_REMOVE_AT(p->entries, p->num_entries, 0);
+    }
+
+    int size = p->scaled->w;
+
+    struct print_entry *e = &p->entries[p->num_entries++];
+    e->pts = mpi->pts;
+    e->print = talloc_array(p, char, size * size * 2 + 1);
+
+    for (int y = 0; y < size; y++) {
+        for (int x = 0; x < size; x++) {
+            char *offs = &e->print[(y * size + x) * 2];
+            uint8_t v = p->scaled->planes[0][y * p->scaled->stride[0] + x];
+            snprintf(offs, 3, "%02x", v);
+        }
+    }
+
+    if (p->opts->print)
+        MP_INFO(f, "%f: %s\n", e->pts, e->print);
+
+    mp_pin_in_write(f->ppins[1], frame);
+    return;
+
+error:
+    MP_ERR(f, "unsupported video format\n");
+    mp_pin_in_write(f->ppins[1], frame);
+    mp_filter_internal_mark_failed(f);
+}
+
+static bool f_command(struct mp_filter *f, struct mp_filter_command *cmd)
+{
+    struct priv *p = f->priv;
+
+    switch (cmd->type) {
+    case MP_FILTER_COMMAND_GET_META: {
+        struct mp_tags *t = talloc_zero(NULL, struct mp_tags);
+
+        for (int n = 0; n < p->num_entries; n++) {
+            struct print_entry *e = &p->entries[n];
+
+            if (e->pts != MP_NOPTS_VALUE) {
+                mp_tags_set_str(t, mp_tprintf(80, "fp%d.pts", n),
+                                   mp_tprintf(80, "%f", e->pts));
+            }
+            mp_tags_set_str(t, mp_tprintf(80, "fp%d.hex", n), e->print);
+        }
+
+        mp_tags_set_str(t, "type", m_opt_choice_str(type_names, p->opts->type));
+
+        if (p->opts->clear)
+            f_reset(f);
+
+        *(struct mp_tags **)cmd->res = t;
+        return true;
+    }
+    default:
+        return false;
+    }
+}
+
+static const struct mp_filter_info filter = {
+    .name = "fingerprint",
+    .process = f_process,
+    .command = f_command,
+    .reset = f_reset,
+    .priv_size = sizeof(struct priv),
+};
+
+static struct mp_filter *f_create(struct mp_filter *parent, void *options)
+{
+    struct mp_filter *f = mp_filter_create(parent, &filter);
+    if (!f) {
+        talloc_free(options);
+        return NULL;
+    }
+
+    mp_filter_add_pin(f, MP_PIN_IN, "in");
+    mp_filter_add_pin(f, MP_PIN_OUT, "out");
+
+    struct priv *p = f->priv;
+    p->opts = talloc_steal(p, options);
+    int size = p->opts->type;
+    p->scaled = mp_image_alloc(IMGFMT_Y8, size, size);
+    MP_HANDLE_OOM(p->scaled);
+    talloc_steal(p, p->scaled);
+    p->sws = mp_sws_alloc(p);
+    MP_HANDLE_OOM(p->sws);
+    p->zimg = mp_zimg_alloc();
+    talloc_steal(p, p->zimg);
+    p->zimg->opts = (struct zimg_opts){
+        .scaler = ZIMG_RESIZE_BILINEAR,
+        .scaler_params = {NAN, NAN},
+        .scaler_chroma_params = {NAN, NAN},
+        .scaler_chroma = ZIMG_RESIZE_BILINEAR,
+        .dither = ZIMG_DITHER_NONE,
+        .fast = true,
+    };
+    return f;
+}
+
+const struct mp_user_filter_entry vf_fingerprint = {
+    .desc = {
+        .description = "Compute video frame fingerprints",
+        .name = "fingerprint",
+        .priv_size = sizeof(OPT_BASE_STRUCT),
+        .priv_defaults = &f_opts_def,
+        .options = f_opts_list,
+    },
+    .create = f_create,
+};
diff --git a/video/filter/vf_format.c b/video/filter/vf_format.c
new file mode 100644
index 0000000..4997d6f
--- /dev/null
+++ b/video/filter/vf_format.c
@@ -0,0 +1,245 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <math.h>
+
+#include <libavutil/rational.h>
+#include <libavutil/buffer.h>
+
+#include "common/msg.h"
+#include "common/common.h"
+#include "filters/f_autoconvert.h"
+#include "filters/filter.h"
+#include "filters/filter_internal.h"
+#include "filters/user_filters.h"
+#include "video/img_format.h"
+#include "video/mp_image.h"
+
+#include "options/m_option.h"
+
+struct priv {
+    struct vf_format_opts *opts;
+    struct mp_autoconvert *conv;
+};
+
+struct vf_format_opts {
+    int fmt;
+    int colormatrix;
+    int colorlevels;
+    int primaries;
+    int gamma;
+    float sig_peak;
+    int light;
+    int chroma_location;
+    int stereo_in;
+    int rotate;
+    int alpha;
+    int w, h;
+    int dw, dh;
+    double dar;
+    bool convert;
+    int force_scaler;
+    bool dovi;
+    bool film_grain;
+};
+
+static void set_params(struct vf_format_opts *p, struct mp_image_params *out,
+                       bool set_size)
+{
+    if (p->colormatrix)
+        out->color.space = p->colormatrix;
+    if (p->colorlevels)
+        out->color.levels = p->colorlevels;
+    if (p->primaries)
+        out->color.primaries = p->primaries;
+    if (p->gamma) {
+        enum mp_csp_trc in_gamma = p->gamma;
+        out->color.gamma = p->gamma;
+        if (in_gamma != out->color.gamma) {
+            // When changing the gamma function explicitly, also reset stuff
+            // related to the gamma function since that information will almost
+            // surely be false now and have to be re-inferred
+            out->color.hdr = (struct pl_hdr_metadata){0};
+            out->color.light = MP_CSP_LIGHT_AUTO;
+        }
+    }
+    if (p->sig_peak)
+        out->color.hdr = (struct pl_hdr_metadata){ .max_luma = p->sig_peak * MP_REF_WHITE };
+    if (p->light)
+        out->color.light = p->light;
+    if (p->chroma_location)
+        out->chroma_location = p->chroma_location;
+    if (p->stereo_in)
+        out->stereo3d = p->stereo_in;
+    if (p->rotate >= 0)
+        out->rotate = p->rotate;
+    if (p->alpha)
+        out->alpha = p->alpha;
+
+    if (p->w > 0 && set_size)
+        out->w = p->w;
+    if (p->h > 0 && set_size)
+        out->h = p->h;
+    AVRational dsize;
+    mp_image_params_get_dsize(out, &dsize.num, &dsize.den);
+    if (p->dw > 0)
+        dsize.num = p->dw;
+    if (p->dh > 0)
+        dsize.den = p->dh;
+    if (p->dar > 0)
+        dsize = av_d2q(p->dar, INT_MAX);
+    mp_image_params_set_dsize(out, dsize.num, dsize.den);
+}
+
+static void vf_format_process(struct mp_filter *f)
+{
+    struct priv *priv = f->priv;
+
+    if (mp_pin_can_transfer_data(priv->conv->f->pins[0], f->ppins[0])) {
+        struct mp_frame frame = mp_pin_out_read(f->ppins[0]);
+
+        if (priv->opts->convert && frame.type == MP_FRAME_VIDEO) {
+            struct mp_image *img = frame.data;
+            struct mp_image_params par = img->params;
+            int outfmt = priv->opts->fmt;
+
+            // If we convert from RGB to YUV, default to limited range.
+            if (mp_imgfmt_get_forced_csp(img->imgfmt) == MP_CSP_RGB &&
+                outfmt && mp_imgfmt_get_forced_csp(outfmt) == MP_CSP_AUTO)
+            {
+                par.color.levels = MP_CSP_LEVELS_TV;
+            }
+
+            set_params(priv->opts, &par, true);
+
+            if (outfmt && par.imgfmt != outfmt) {
+                par.imgfmt = outfmt;
+                par.hw_subfmt = 0;
+            }
+            mp_image_params_guess_csp(&par);
+
+            mp_autoconvert_set_target_image_params(priv->conv, &par);
+        }
+
+        mp_pin_in_write(priv->conv->f->pins[0], frame);
+    }
+
+    if (mp_pin_can_transfer_data(f->ppins[1], priv->conv->f->pins[1])) {
+        struct mp_frame frame = mp_pin_out_read(priv->conv->f->pins[1]);
+        struct mp_image *img = frame.data;
+
+        if (frame.type != MP_FRAME_VIDEO)
+            goto write_out;
+
+        if (!priv->opts->convert) {
+            set_params(priv->opts, &img->params, false);
+            mp_image_params_guess_csp(&img->params);
+        }
+
+        if (!priv->opts->dovi) {
+            av_buffer_unref(&img->dovi);
+            av_buffer_unref(&img->dovi_buf);
+        }
+
+        if (!priv->opts->film_grain)
+            av_buffer_unref(&img->film_grain);
+
+write_out:
+        mp_pin_in_write(f->ppins[1], frame);
+    }
+}
+
+static const struct mp_filter_info vf_format_filter = {
+    .name = "format",
+    .process = vf_format_process,
+    .priv_size = sizeof(struct priv),
+};
+
+static struct mp_filter *vf_format_create(struct mp_filter *parent, void *options)
+{
+    struct mp_filter *f = mp_filter_create(parent, &vf_format_filter);
+    if (!f) {
+        talloc_free(options);
+        return NULL;
+    }
+
+    struct priv *priv = f->priv;
+    priv->opts = talloc_steal(priv, options);
+
+    mp_filter_add_pin(f, MP_PIN_IN, "in");
+    mp_filter_add_pin(f, MP_PIN_OUT, "out");
+
+    priv->conv = mp_autoconvert_create(f);
+    if (!priv->conv) {
+        talloc_free(f);
+        return NULL;
+    }
+
+    priv->conv->force_scaler = priv->opts->force_scaler;
+
+    if (priv->opts->fmt)
+        mp_autoconvert_add_imgfmt(priv->conv, priv->opts->fmt, 0);
+
+    return f;
+}
+
+#define OPT_BASE_STRUCT struct vf_format_opts
+static const m_option_t vf_opts_fields[] = {
+    {"fmt", OPT_IMAGEFORMAT(fmt)},
+    {"colormatrix", OPT_CHOICE_C(colormatrix, mp_csp_names)},
+    {"colorlevels", OPT_CHOICE_C(colorlevels, mp_csp_levels_names)},
+    {"primaries", OPT_CHOICE_C(primaries, mp_csp_prim_names)},
+    {"gamma", OPT_CHOICE_C(gamma, mp_csp_trc_names)},
+    {"sig-peak", OPT_FLOAT(sig_peak)},
+    {"light", OPT_CHOICE_C(light, mp_csp_light_names)},
+    {"chroma-location", OPT_CHOICE_C(chroma_location, mp_chroma_names)},
+    {"stereo-in", OPT_CHOICE_C(stereo_in, mp_stereo3d_names)},
+    {"rotate", OPT_INT(rotate), M_RANGE(-1, 359)},
+    {"alpha", OPT_CHOICE_C(alpha, mp_alpha_names)},
+    {"w", OPT_INT(w)},
+    {"h", OPT_INT(h)},
+    {"dw", OPT_INT(dw)},
+    {"dh", OPT_INT(dh)},
+    {"dar", OPT_DOUBLE(dar)},
+    {"convert", OPT_BOOL(convert)},
+    {"dolbyvision", OPT_BOOL(dovi)},
+    {"film-grain", OPT_BOOL(film_grain)},
+    {"force-scaler", OPT_CHOICE(force_scaler,
+                                {"auto", MP_SWS_AUTO},
+                                {"sws", MP_SWS_SWS},
+                                {"zimg", MP_SWS_ZIMG})},
+    {0}
+};
+
+const struct mp_user_filter_entry vf_format = {
+    .desc = {
+        .description = "force output format",
+        .name = "format",
+        .priv_size = sizeof(OPT_BASE_STRUCT),
+        .priv_defaults = &(const OPT_BASE_STRUCT){
+            .rotate = -1,
+            .dovi = true,
+            .film_grain = true,
+        },
+        .options = vf_opts_fields,
+    },
+    .create = vf_format_create,
+};
diff --git a/video/filter/vf_gpu.c b/video/filter/vf_gpu.c
new file mode 100644
index 0000000..fb11941
--- /dev/null
+++ b/video/filter/vf_gpu.c
@@ -0,0 +1,373 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "common/common.h"
+#include "filters/filter.h"
+#include "filters/filter_internal.h"
+#include "filters/user_filters.h"
+#include "options/m_config.h"
+#include "options/m_option.h"
+#include "options/options.h"
+#include "video/out/aspect.h"
+#include "video/out/gpu/video.h"
+#include "video/out/opengl/egl_helpers.h"
+#include "video/out/opengl/ra_gl.h"
+
+struct offscreen_ctx {
+    struct mp_log *log;
+    struct ra *ra;
+    void *priv;
+
+    void (*set_context)(struct offscreen_ctx *ctx, bool enable);
+};
+
+struct gl_offscreen_ctx {
+    GL gl;
+    EGLDisplay egl_display;
+    EGLContext egl_context;
+};
+
+static void gl_ctx_destroy(void *p)
+{
+    struct offscreen_ctx *ctx = p;
+    struct gl_offscreen_ctx *gl = ctx->priv;
+
+    ra_free(&ctx->ra);
+
+    if (gl->egl_context)
+        eglDestroyContext(gl->egl_display, gl->egl_context);
+}
+
+static void gl_ctx_set_context(struct offscreen_ctx *ctx, bool enable)
+{
+    struct gl_offscreen_ctx *gl = ctx->priv;
+    EGLContext c = enable ? gl->egl_context : EGL_NO_CONTEXT;
+
+    if (!eglMakeCurrent(gl->egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, c))
+        MP_ERR(ctx, "Could not make EGL context current.\n");
+}
+
+static struct offscreen_ctx *gl_offscreen_ctx_create(struct mpv_global *global,
+                                                     struct mp_log *log)
+{
+    struct offscreen_ctx *ctx = talloc_zero(NULL, struct offscreen_ctx);
+    struct gl_offscreen_ctx *gl = talloc_zero(ctx, struct gl_offscreen_ctx);
+    talloc_set_destructor(ctx, gl_ctx_destroy);
+    *ctx = (struct offscreen_ctx){
+        .log = log,
+        .priv = gl,
+        .set_context = gl_ctx_set_context,
+    };
+
+    // This appears to work with Mesa. EGL 1.5 doesn't specify what a "default
+    // display" is at all.
+    gl->egl_display = eglGetDisplay(EGL_DEFAULT_DISPLAY);
+    if (!eglInitialize(gl->egl_display, NULL, NULL)) {
+        MP_ERR(ctx, "Could not initialize EGL.\n");
+        goto error;
+    }
+
+    // Unfortunately, mpegl_create_context() is entangled with ra_ctx.
+    // Fortunately, it does not need much, and we can provide a stub.
+    struct ra_ctx ractx = {
+        .log = ctx->log,
+        .global = global,
+    };
+    EGLConfig config;
+    if (!mpegl_create_context(&ractx, gl->egl_display, &gl->egl_context, &config))
+    {
+        MP_ERR(ctx, "Could not create EGL context.\n");
+        goto error;
+    }
+
+    if (!eglMakeCurrent(gl->egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE,
+                        gl->egl_context))
+    {
+        MP_ERR(ctx, "Could not make EGL context current.\n");
+        goto error;
+    }
+
+    mpegl_load_functions(&gl->gl, ctx->log);
+    ctx->ra = ra_create_gl(&gl->gl, ctx->log);
+
+    if (!ctx->ra)
+        goto error;
+
+    gl_ctx_set_context(ctx, false);
+
+    return ctx;
+
+error:
+    talloc_free(ctx);
+    return NULL;
+}
+
+static void offscreen_ctx_set_current(struct offscreen_ctx *ctx, bool enable)
+{
+    if (ctx->set_context)
+        ctx->set_context(ctx, enable);
+}
+
+struct gpu_opts {
+    int w, h;
+};
+
+struct priv {
+    struct gpu_opts *opts;
+    struct m_config_cache *vo_opts_cache;
+    struct mp_vo_opts *vo_opts;
+
+    struct offscreen_ctx *ctx;
+    struct gl_video *renderer;
+    struct ra_tex *target;
+
+    struct mp_image_params img_params;
+    uint64_t next_frame_id;
+};
+
+static struct mp_image *gpu_render_frame(struct mp_filter *f, struct mp_image *in)
+{
+    struct priv *priv = f->priv;
+    bool ok = false;
+    struct mp_image *res = NULL;
+    struct ra *ra = priv->ctx->ra;
+
+    if (priv->opts->w <= 0)
+        priv->opts->w = in->w;
+    if (priv->opts->h <= 0)
+        priv->opts->h = in->h;
+
+    int w = priv->opts->w;
+    int h = priv->opts->h;
+
+    struct vo_frame frame = {
+        .pts = in->pts,
+        .duration = -1,
+        .num_vsyncs = 1,
+        .current = in,
+        .num_frames = 1,
+        .frames = {in},
+        .frame_id = ++(priv->next_frame_id),
+    };
+
+    bool need_reconfig = m_config_cache_update(priv->vo_opts_cache);
+
+    if (!mp_image_params_equal(&priv->img_params, &in->params)) {
+        priv->img_params = in->params;
+        gl_video_config(priv->renderer, &in->params);
+        need_reconfig = true;
+    }
+
+    if (need_reconfig) {
+        struct mp_rect src, dst;
+        struct mp_osd_res osd;
+
+        struct mp_stream_info *info = mp_filter_find_stream_info(f);
+        struct osd_state *osd_state = info ? info->osd : NULL;
+        if (osd_state) {
+            osd_set_render_subs_in_filter(osd_state, true);
+            // Assume the osd_state doesn't somehow disappear.
+            gl_video_set_osd_source(priv->renderer, osd_state);
+        }
+
+        mp_get_src_dst_rects(f->log, priv->vo_opts, VO_CAP_ROTATE90, &in->params,
+                             w, h, 1, &src, &dst, &osd);
+
+        gl_video_resize(priv->renderer, &src, &dst, &osd);
+    }
+
+    if (!priv->target) {
+        struct ra_tex_params params = {
+            .dimensions = 2,
+            .downloadable = true,
+            .w = w,
+            .h = h,
+            .d = 1,
+            .render_dst = true,
+        };
+
+        params.format = ra_find_unorm_format(ra, 1, 4);
+
+        if (!params.format || !params.format->renderable)
+            goto done;
+
+        priv->target = ra_tex_create(ra, &params);
+        if (!priv->target)
+            goto done;
+    }
+
+    // (it doesn't have access to the OSD though)
+    int flags = RENDER_FRAME_SUBS | RENDER_FRAME_VF_SUBS;
+    gl_video_render_frame(priv->renderer, &frame, (struct ra_fbo){priv->target},
+                          flags);
+
+    res = mp_image_alloc(IMGFMT_RGB0, w, h);
+    if (!res)
+        goto done;
+
+    struct ra_tex_download_params download_params = {
+        .tex = priv->target,
+        .dst = res->planes[0],
+        .stride = res->stride[0],
+    };
+    if (!ra->fns->tex_download(ra, &download_params))
+        goto done;
+
+    ok = true;
+done:
+    if (!ok)
+        TA_FREEP(&res);
+    return res;
+}
+
+static void gpu_process(struct mp_filter *f)
+{
+    struct priv *priv = f->priv;
+
+    if (!mp_pin_can_transfer_data(f->ppins[1], f->ppins[0]))
+        return;
+
+    struct mp_frame frame = mp_pin_out_read(f->ppins[0]);
+
+    if (mp_frame_is_signaling(frame)) {
+        mp_pin_in_write(f->ppins[1], frame);
+        return;
+    }
+
+    if (frame.type != MP_FRAME_VIDEO)
+        goto error;
+
+    offscreen_ctx_set_current(priv->ctx, true);
+
+    struct mp_image *mpi = frame.data;
+    struct mp_image *res = gpu_render_frame(f, mpi);
+    if (!res) {
+        MP_ERR(f, "Could not render or retrieve frame.\n");
+        goto error;
+    }
+
+    // It's not clear which parameters to copy.
+    res->pts = mpi->pts;
+    res->dts = mpi->dts;
+    res->nominal_fps = mpi->nominal_fps;
+
+    talloc_free(mpi);
+
+    mp_pin_in_write(f->ppins[1], MAKE_FRAME(MP_FRAME_VIDEO, res));
+    return;
+
+error:
+    mp_frame_unref(&frame);
+    mp_filter_internal_mark_failed(f);
+    offscreen_ctx_set_current(priv->ctx, false);
+}
+
+static void gpu_reset(struct mp_filter *f)
+{
+    struct priv *priv = f->priv;
+
+    offscreen_ctx_set_current(priv->ctx, true);
+    gl_video_reset(priv->renderer);
+    offscreen_ctx_set_current(priv->ctx, false);
+}
+
+static void gpu_destroy(struct mp_filter *f)
+{
+    struct priv *priv = f->priv;
+
+    if (priv->ctx) {
+        offscreen_ctx_set_current(priv->ctx, true);
+
+        gl_video_uninit(priv->renderer);
+        ra_tex_free(priv->ctx->ra, &priv->target);
+
+        offscreen_ctx_set_current(priv->ctx, false);
+    }
+
+    talloc_free(priv->ctx);
+}
+
+static const struct mp_filter_info gpu_filter = {
+    .name = "gpu",
+    .process = gpu_process,
+    .reset = gpu_reset,
+    .destroy = gpu_destroy,
+    .priv_size = sizeof(struct priv),
+};
+
+static struct mp_filter *gpu_create(struct mp_filter *parent, void *options)
+{
+    struct mp_filter *f = mp_filter_create(parent, &gpu_filter);
+    if (!f) {
+        talloc_free(options);
+        return NULL;
+    }
+
+    mp_filter_add_pin(f, MP_PIN_IN, "in");
+    mp_filter_add_pin(f, MP_PIN_OUT, "out");
+
+    struct priv *priv = f->priv;
+    priv->opts = talloc_steal(priv, options);
+    priv->vo_opts_cache = m_config_cache_alloc(f, f->global, &vo_sub_opts);
+    priv->vo_opts = priv->vo_opts_cache->opts;
+
+    priv->ctx = gl_offscreen_ctx_create(f->global, f->log);
+    if (!priv->ctx) {
+        MP_FATAL(f, "Could not create offscreen ra context.\n");
+        goto error;
+    }
+
+    if (!priv->ctx->ra->fns->tex_download) {
+        MP_FATAL(f, "Offscreen ra context does not support image retrieval.\n");
+        goto error;
+    }
+
+    offscreen_ctx_set_current(priv->ctx, true);
+
+    priv->renderer = gl_video_init(priv->ctx->ra, f->log, f->global);
+    assert(priv->renderer); // can't fail (strangely)
+
+    offscreen_ctx_set_current(priv->ctx, false);
+
+    MP_WARN(f, "This is experimental. Keep in mind:\n");
+    MP_WARN(f, " - OSD rendering is done in software.\n");
+    MP_WARN(f, " - Encoding will convert the RGB output to yuv420p in software.\n");
+    MP_WARN(f, " - Using this with --vo=gpu will filter the video twice!\n");
+    MP_WARN(f, "   (And you can't prevent this; they use the same options.)\n");
+    MP_WARN(f, " - Some features are simply not supported.\n");
+
+    return f;
+
+error:
+    talloc_free(f);
+    return NULL;
+}
+
+#define OPT_BASE_STRUCT struct gpu_opts
+const struct mp_user_filter_entry vf_gpu = {
+    .desc = {
+        .description = "vo_gpu as filter",
+        .name = "gpu",
+        .priv_size = sizeof(OPT_BASE_STRUCT),
+        .options = (const struct m_option[]){
+            {"w", OPT_INT(w)},
+            {"h", OPT_INT(h)},
+            {0}
+        },
+    },
+    .create = gpu_create,
+};
diff --git a/video/filter/vf_sub.c b/video/filter/vf_sub.c
new file mode 100644
index 0000000..de7f787
--- /dev/null
+++ b/video/filter/vf_sub.c
@@ -0,0 +1,164 @@
+/*
+ * Copyright (C) 2006 Evgeniy Stepanov <eugeni.stepanov@gmail.com>
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <assert.h>
+#include <libavutil/common.h>
+
+#include "common/msg.h"
+#include "filters/filter.h"
+#include "filters/filter_internal.h"
+#include "filters/user_filters.h"
+#include "options/options.h"
+#include "video/img_format.h"
+#include "video/mp_image.h"
+#include "video/mp_image_pool.h"
+#include "sub/osd.h"
+#include "sub/dec_sub.h"
+
+#include "video/sws_utils.h"
+
+#include "options/m_option.h"
+
+struct vf_sub_opts {
+    int top_margin, bottom_margin;
+};
+
+struct priv {
+    struct vf_sub_opts *opts;
+    struct mp_image_pool *pool;
+};
+
+static void vf_sub_process(struct mp_filter *f)
+{
+    struct priv *priv = f->priv;
+
+    if (!mp_pin_can_transfer_data(f->ppins[1], f->ppins[0]))
+        return;
+
+    struct mp_frame frame = mp_pin_out_read(f->ppins[0]);
+
+    if (mp_frame_is_signaling(frame)) {
+        mp_pin_in_write(f->ppins[1], frame);
+        return;
+    }
+
+    struct mp_stream_info *info = mp_filter_find_stream_info(f);
+    struct osd_state *osd = info ? info->osd : NULL;
+
+    if (!osd)
+        goto error;
+
+    osd_set_render_subs_in_filter(osd, true);
+
+    if (frame.type != MP_FRAME_VIDEO)
+        goto error;
+
+    struct mp_image *mpi = frame.data;
+
+    struct mp_osd_res dim = {
+        .w = mpi->w,
+        .h = mpi->h + priv->opts->top_margin + priv->opts->bottom_margin,
+        .mt = priv->opts->top_margin,
+        .mb = priv->opts->bottom_margin,
+        .display_par = mpi->params.p_w / (double)mpi->params.p_h,
+    };
+
+    if (dim.w != mpi->w || dim.h != mpi->h) {
+        struct mp_image *dmpi =
+            mp_image_pool_get(priv->pool, mpi->imgfmt, dim.w, dim.h);
+        if (!dmpi)
+            goto error;
+        mp_image_copy_attributes(dmpi, mpi);
+        int y1 = MP_ALIGN_DOWN(priv->opts->top_margin, mpi->fmt.align_y);
+        int y2 = MP_ALIGN_DOWN(y1 + mpi->h, mpi->fmt.align_y);
+        struct mp_image cropped = *dmpi;
+        mp_image_crop(&cropped, 0, y1, mpi->w, y1 + mpi->h);
+        mp_image_copy(&cropped, mpi);
+        mp_image_clear(dmpi, 0, 0, dmpi->w, y1);
+        mp_image_clear(dmpi, 0, y2, dmpi->w, dim.h);
+        mp_frame_unref(&frame);
+        mpi = dmpi;
+        frame = (struct mp_frame){MP_FRAME_VIDEO, mpi};
+    }
+
+    osd_draw_on_image_p(osd, dim, mpi->pts, OSD_DRAW_SUB_FILTER, priv->pool, mpi);
+
+    mp_pin_in_write(f->ppins[1], frame);
+    return;
+
+error:
+    MP_ERR(f, "unsupported format, missing OSD, or failed allocation\n");
+    mp_frame_unref(&frame);
+    mp_filter_internal_mark_failed(f);
+}
+
+static void vf_sub_destroy(struct mp_filter *f)
+{
+    struct mp_stream_info *info = mp_filter_find_stream_info(f);
+    struct osd_state *osd = info ? info->osd : NULL;
+    if (osd)
+        osd_set_render_subs_in_filter(osd, false);
+}
+
+static const struct mp_filter_info vf_sub_filter = {
+    .name = "sub",
+    .process = vf_sub_process,
+    .destroy = vf_sub_destroy,
+    .priv_size = sizeof(struct priv),
+};
+
+static struct mp_filter *vf_sub_create(struct mp_filter *parent, void *options)
+{
+    struct mp_filter *f = mp_filter_create(parent, &vf_sub_filter);
+    if (!f) {
+        talloc_free(options);
+        return NULL;
+    }
+
+    mp_filter_add_pin(f, MP_PIN_IN, "in");
+    mp_filter_add_pin(f, MP_PIN_OUT, "out");
+
+    struct priv *priv = f->priv;
+    priv->opts = talloc_steal(priv, options);
+    priv->pool = mp_image_pool_new(priv);
+
+    return f;
+}
+
+#define OPT_BASE_STRUCT struct vf_sub_opts
+static const m_option_t vf_opts_fields[] = {
+    {"bottom-margin", OPT_INT(bottom_margin), M_RANGE(0, 2000)},
+    {"top-margin", OPT_INT(top_margin), M_RANGE(0, 2000)},
+    {0}
+};
+
+const struct mp_user_filter_entry vf_sub = {
+    .desc = {
+        .description = "Render subtitles",
+        .name = "sub",
+        .priv_size = sizeof(OPT_BASE_STRUCT),
+        .options = vf_opts_fields,
+    },
+    .create = vf_sub_create,
+};
diff --git a/video/filter/vf_vapoursynth.c b/video/filter/vf_vapoursynth.c
new file mode 100644
index 0000000..583a196
--- /dev/null
+++ b/video/filter/vf_vapoursynth.c
@@ -0,0 +1,892 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <assert.h>
+
+#include <VapourSynth.h>
+#include <VSHelper.h>
+
+#include <libavutil/rational.h>
+#include <libavutil/cpu.h>
+
+#include "common/msg.h"
+#include "filters/f_autoconvert.h"
+#include "filters/f_utils.h"
+#include "filters/filter_internal.h"
+#include "filters/filter.h"
+#include "filters/user_filters.h"
+#include "options/m_option.h"
+#include "options/path.h"
+#include "osdep/threads.h"
+#include "video/img_format.h"
+#include "video/mp_image.h"
+#include "video/sws_utils.h"
+
+struct vapoursynth_opts {
+    char *file;
+    int maxbuffer;
+    int maxrequests;
+
+    const struct script_driver *drv;
+};
+
+struct priv {
+    struct mp_log *log;
+    struct vapoursynth_opts *opts;
+    char *script_path;
+
+    VSCore *vscore;
+    const VSAPI *vsapi;
+    VSNodeRef *out_node;
+    VSNodeRef *in_node;
+
+    const struct script_driver *drv;
+    // drv_vss
+    bool vs_initialized;
+    struct VSScript *se;
+
+    struct mp_filter *f;
+    struct mp_pin *in_pin;
+
+    // Format for which VS is currently configured.
+    struct mp_image_params fmt_in;
+
+    mp_mutex lock;
+    mp_cond wakeup;
+
+    // --- the following members are all protected by lock
+    struct mp_image **buffered; // oldest image first
+    int num_buffered;
+    int in_frameno;             // frame number of buffered[0] (the oldest)
+    int requested_frameno;      // last frame number for which we woke up core
+    int out_frameno;            // frame number of first requested/ready frame
+    double out_pts;             // pts corresponding to first requested/ready frame
+    struct mp_image **requested;// frame callback results (can point to dummy_img)
+                                // requested[0] is the frame to return first
+    int max_requests;           // upper bound for requested[] array
+    bool failed;                // frame callback returned with an error
+    bool shutdown;              // ask node to return
+    bool eof;                   // drain remaining data
+    int64_t frames_sent;        // total nr. of frames ever added to input queue
+    bool initializing;          // filters are being built
+    bool in_node_active;        // node might still be called
+};
+
+// priv->requested[n] points to this if a request for frame n is in-progress
+static const struct mp_image dummy_img;
+// or if a request failed during EOF/reinit draining
+static const struct mp_image dummy_img_eof;
+
+static void destroy_vs(struct priv *p);
+static int reinit_vs(struct priv *p, struct mp_image *input);
+
+struct script_driver {
+    int (*init)(struct priv *p);                // first time init
+    void (*uninit)(struct priv *p);             // last time uninit
+    int (*load_core)(struct priv *p);           // make vsapi/vscore available
+    int (*load)(struct priv *p, VSMap *vars);   // also sets p->out_node
+    void (*unload)(struct priv *p);             // unload script and maybe vs
+};
+
+struct mpvs_fmt {
+    VSPresetFormat vs;
+    int bits, xs, ys;
+};
+
+static const struct mpvs_fmt mpvs_fmt_table[] = {
+    {pfYUV420P8,  8,  1, 1},
+    {pfYUV420P9,  9,  1, 1},
+    {pfYUV420P10, 10, 1, 1},
+    {pfYUV420P16, 16, 1, 1},
+    {pfYUV422P8,  8,  1, 0},
+    {pfYUV422P9,  9,  1, 0},
+    {pfYUV422P10, 10, 1, 0},
+    {pfYUV422P16, 16, 1, 0},
+    {pfYUV410P8,  8,  2, 2},
+    {pfYUV411P8,  8,  2, 0},
+    {pfYUV440P8,  8,  0, 1},
+    {pfYUV444P8,  8,  0, 0},
+    {pfYUV444P9,  9,  0, 0},
+    {pfYUV444P10, 10, 0, 0},
+    {pfYUV444P16, 16, 0, 0},
+    {pfNone}
+};
+
+static bool compare_fmt(int imgfmt, const struct mpvs_fmt *vs)
+{
+    struct mp_regular_imgfmt rfmt;
+    if (!mp_get_regular_imgfmt(&rfmt, imgfmt))
+        return false;
+    if (rfmt.component_pad > 0)
+        return false;
+    if (rfmt.chroma_xs != vs->xs || rfmt.chroma_ys != vs->ys)
+        return false;
+    if (rfmt.component_size * 8 + rfmt.component_pad != vs->bits)
+        return false;
+    if (rfmt.num_planes != 3)
+        return false;
+    for (int n = 0; n < 3; n++) {
+        if (rfmt.planes[n].num_components != 1)
+            return false;
+        if (rfmt.planes[n].components[0] != n + 1)
+            return false;
+    }
+    return true;
+}
+
+static VSPresetFormat mp_to_vs(int imgfmt)
+{
+    for (int n = 0; mpvs_fmt_table[n].bits; n++) {
+        const struct mpvs_fmt *vsentry = &mpvs_fmt_table[n];
+        if (compare_fmt(imgfmt, vsentry))
+            return vsentry->vs;
+    }
+    return pfNone;
+}
+
+static int mp_from_vs(VSPresetFormat vs)
+{
+    for (int n = 0; mpvs_fmt_table[n].bits; n++) {
+        const struct mpvs_fmt *vsentry = &mpvs_fmt_table[n];
+        if (vsentry->vs == vs) {
+            for (int imgfmt = IMGFMT_START; imgfmt < IMGFMT_END; imgfmt++) {
+                if (compare_fmt(imgfmt, vsentry))
+                    return imgfmt;
+            }
+            break;
+        }
+    }
+    return 0;
+}
+
+static void copy_mp_to_vs_frame_props_map(struct priv *p, VSMap *map,
+                                          struct mp_image *img)
+{
+    struct mp_image_params *params = &img->params;
+    p->vsapi->propSetInt(map, "_SARNum", params->p_w, 0);
+    p->vsapi->propSetInt(map, "_SARDen", params->p_h, 0);
+    if (params->color.levels) {
+        p->vsapi->propSetInt(map, "_ColorRange",
+                params->color.levels == MP_CSP_LEVELS_TV, 0);
+    }
+    // The docs explicitly say it uses libavcodec values.
+    p->vsapi->propSetInt(map, "_ColorSpace",
+            mp_csp_to_avcol_spc(params->color.space), 0);
+    if (params->chroma_location) {
+        p->vsapi->propSetInt(map, "_ChromaLocation",
+                params->chroma_location == MP_CHROMA_CENTER, 0);
+    }
+    char pict_type = 0;
+    switch (img->pict_type) {
+    case 1: pict_type = 'I'; break;
+    case 2: pict_type = 'P'; break;
+    case 3: pict_type = 'B'; break;
+    }
+    if (pict_type)
+        p->vsapi->propSetData(map, "_PictType", &pict_type, 1, 0);
+    int field = 0;
+    if (img->fields & MP_IMGFIELD_INTERLACED)
+        field = img->fields & MP_IMGFIELD_TOP_FIRST ? 2 : 1;
+    p->vsapi->propSetInt(map, "_FieldBased", field, 0);
+}
+
+static int set_vs_frame_props(struct priv *p, VSFrameRef *frame,
+                              struct mp_image *img, int dur_num, int dur_den)
+{
+    VSMap *map = p->vsapi->getFramePropsRW(frame);
+    if (!map)
+        return -1;
+    p->vsapi->propSetInt(map, "_DurationNum", dur_num, 0);
+    p->vsapi->propSetInt(map, "_DurationDen", dur_den, 0);
+    copy_mp_to_vs_frame_props_map(p, map, img);
+    return 0;
+}
+
+static VSFrameRef *alloc_vs_frame(struct priv *p, struct mp_image_params *fmt)
+{
+    const VSFormat *vsfmt =
+        p->vsapi->getFormatPreset(mp_to_vs(fmt->imgfmt), p->vscore);
+    return p->vsapi->newVideoFrame(vsfmt, fmt->w, fmt->h, NULL, p->vscore);
+}
+
+static struct mp_image map_vs_frame(struct priv *p, const VSFrameRef *ref,
+                                    bool w)
+{
+    const VSFormat *fmt = p->vsapi->getFrameFormat(ref);
+
+    struct mp_image img = {0};
+    mp_image_setfmt(&img, mp_from_vs(fmt->id));
+    mp_image_set_size(&img, p->vsapi->getFrameWidth(ref, 0),
+                            p->vsapi->getFrameHeight(ref, 0));
+
+    for (int n = 0; n < img.num_planes; n++) {
+        if (w) {
+            img.planes[n] = p->vsapi->getWritePtr((VSFrameRef *)ref, n);
+        } else {
+            img.planes[n] = (uint8_t *)p->vsapi->getReadPtr(ref, n);
+        }
+        img.stride[n] = p->vsapi->getStride(ref, n);
+    }
+
+    return img;
+}
+
+static void drain_oldest_buffered_frame(struct priv *p)
+{
+    if (!p->num_buffered)
+        return;
+    talloc_free(p->buffered[0]);
+    for (int n = 0; n < p->num_buffered - 1; n++)
+        p->buffered[n] = p->buffered[n + 1];
+    p->num_buffered--;
+    p->in_frameno++;
+}
+
+static void VS_CC vs_frame_done(void *userData, const VSFrameRef *f, int n,
+                                VSNodeRef *node, const char *errorMsg)
+{
+    struct priv *p = userData;
+
+    struct mp_image *res = NULL;
+    if (f) {
+        struct mp_image img = map_vs_frame(p, f, false);
+        struct mp_image dummy = {.params = p->fmt_in};
+        if (p->fmt_in.w != img.w || p->fmt_in.h != img.h)
+            dummy.params.crop = (struct mp_rect){0, 0, img.w, img.h};
+        mp_image_copy_attributes(&img, &dummy);
+        img.pkt_duration = -1;
+        const VSMap *map = p->vsapi->getFramePropsRO(f);
+        if (map) {
+            int err1, err2;
+            int num = p->vsapi->propGetInt(map, "_DurationNum", 0, &err1);
+            int den = p->vsapi->propGetInt(map, "_DurationDen", 0, &err2);
+            if (!err1 && !err2)
+                img.pkt_duration = num / (double)den;
+        }
+        if (img.pkt_duration < 0) {
+            MP_ERR(p, "No PTS after filter at frame %d!\n", n);
+        } else {
+            img.nominal_fps = 1.0 / img.pkt_duration;
+        }
+        res = mp_image_new_copy(&img);
+        p->vsapi->freeFrame(f);
+    }
+
+    mp_mutex_lock(&p->lock);
+
+    // If these assertions fail, n is an unrequested frame (or filtered twice).
+    assert(n >= p->out_frameno && n < p->out_frameno + p->max_requests);
+    int index = n - p->out_frameno;
+    MP_TRACE(p, "filtered frame %d (%d)\n", n, index);
+    assert(p->requested[index] == &dummy_img);
+
+    if (!res && !p->shutdown) {
+        if (p->eof) {
+            res = (struct mp_image *)&dummy_img_eof;
+        } else {
+            p->failed = true;
+            MP_ERR(p, "Filter error at frame %d: %s\n", n, errorMsg);
+        }
+    }
+    p->requested[index] = res;
+    mp_cond_broadcast(&p->wakeup);
+    mp_mutex_unlock(&p->lock);
+    mp_filter_wakeup(p->f);
+}
+
+static void vf_vapoursynth_process(struct mp_filter *f)
+{
+    struct priv *p = f->priv;
+
+    mp_mutex_lock(&p->lock);
+
+    if (p->failed) {
+        // Not sure what we do on errors, but at least don't deadlock.
+        MP_ERR(f, "failed, no action taken\n");
+        mp_filter_internal_mark_failed(f);
+        goto done;
+    }
+
+    // Read input and pass it to the input queue VS reads.
+    while (p->num_buffered < MP_TALLOC_AVAIL(p->buffered) && !p->eof) {
+        // Note: this requests new input frames even if no output was ever
+        // requested. Normally this is not how mp_filter works, but since VS
+        // works asynchronously, it's probably ok.
+        struct mp_frame frame = mp_pin_out_read(p->in_pin);
+        if (frame.type == MP_FRAME_EOF) {
+            if (p->out_node && !p->eof) {
+                MP_VERBOSE(p, "initiate EOF\n");
+                p->eof = true;
+                mp_cond_broadcast(&p->wakeup);
+            }
+            if (!p->out_node && mp_pin_in_needs_data(f->ppins[1])) {
+                MP_VERBOSE(p, "return EOF\n");
+                mp_pin_in_write(f->ppins[1], frame);
+            } else {
+                // Keep it until we can propagate it.
+                mp_pin_out_unread(p->in_pin, frame);
+                break;
+            }
+        } else if (frame.type == MP_FRAME_VIDEO) {
+            struct mp_image *mpi = frame.data;
+            // Init VS script, or reinit it to change video format. (This
+            // includes derived parameters we pass manually to the script.)
+            if (!p->out_node || mpi->imgfmt != p->fmt_in.imgfmt ||
+                mpi->w != p->fmt_in.w || mpi->h != p->fmt_in.h ||
+                mpi->params.p_w != p->fmt_in.p_w ||
+                mpi->params.p_h != p->fmt_in.p_h)
+            {
+                if (p->out_node) {
+                    // Drain still buffered frames.
+                    MP_VERBOSE(p, "draining VS for format change\n");
+                    mp_pin_out_unread(p->in_pin, frame);
+                    p->eof = true;
+                    mp_cond_broadcast(&p->wakeup);
+                    mp_filter_internal_mark_progress(f);
+                    goto done;
+                }
+                mp_mutex_unlock(&p->lock);
+                if (p->out_node)
+                    destroy_vs(p);
+                p->fmt_in = mpi->params;
+                if (reinit_vs(p, mpi) < 0) {
+                    MP_ERR(p, "could not init VS\n");
+                    mp_frame_unref(&frame);
+                    mp_filter_internal_mark_failed(f);
+                    return;
+                }
+                mp_mutex_lock(&p->lock);
+            }
+            if (p->out_pts == MP_NOPTS_VALUE)
+                p->out_pts = mpi->pts;
+            p->frames_sent++;
+            p->buffered[p->num_buffered++] = mpi;
+            mp_cond_broadcast(&p->wakeup);
+        } else if (frame.type != MP_FRAME_NONE) {
+            MP_ERR(p, "discarding unknown frame type\n");
+            mp_frame_unref(&frame);
+            goto done;
+        } else {
+            break; // no new data available
+        }
+    }
+
+    // Read output and return them from the VS output queue.
+    if (mp_pin_in_needs_data(f->ppins[1]) && p->requested[0] &&
+        p->requested[0] != &dummy_img &&
+        p->requested[0] != &dummy_img_eof)
+    {
+        struct mp_image *out = p->requested[0];
+
+        out->pts = p->out_pts;
+        if (p->out_pts != MP_NOPTS_VALUE && out->pkt_duration >= 0)
+            p->out_pts += out->pkt_duration;
+
+        mp_pin_in_write(f->ppins[1], MAKE_FRAME(MP_FRAME_VIDEO, out));
+
+        for (int n = 0; n < p->max_requests - 1; n++)
+            p->requested[n] = p->requested[n + 1];
+        p->requested[p->max_requests - 1] = NULL;
+        p->out_frameno++;
+    }
+
+    // This happens on EOF draining and format changes.
+    if (p->requested[0] == &dummy_img_eof) {
+        MP_VERBOSE(p, "finishing up\n");
+        assert(p->eof);
+        mp_mutex_unlock(&p->lock);
+        destroy_vs(p);
+        mp_filter_internal_mark_progress(f);
+        return;
+    }
+
+    // Don't request frames if we haven't sent any input yet.
+    if (p->frames_sent && p->out_node) {
+        // Request new future frames as far as possible.
+        for (int n = 0; n < p->max_requests; n++) {
+            if (!p->requested[n]) {
+                // Note: this assumes getFrameAsync() will never call
+                //       infiltGetFrame (if it does, we would deadlock)
+                p->requested[n] = (struct mp_image *)&dummy_img;
+                p->failed = false;
+                MP_TRACE(p, "requesting frame %d (%d)\n", p->out_frameno + n, n);
+                p->vsapi->getFrameAsync(p->out_frameno + n, p->out_node,
+                                        vs_frame_done, p);
+            }
+        }
+    }
+
+done:
+    mp_mutex_unlock(&p->lock);
+}
+
+static void VS_CC infiltInit(VSMap *in, VSMap *out, void **instanceData,
+                             VSNode *node, VSCore *core, const VSAPI *vsapi)
+{
+    struct priv *p = *instanceData;
+    // The number of frames of our input node is obviously unknown. The user
+    // could for example seek any time, randomly "ending" the clip.
+    // This specific value was suggested by the VapourSynth developer.
+    int enough_for_everyone = INT_MAX / 16;
+
+    // Note: this is called from createFilter, so no need for locking.
+
+    VSVideoInfo fmt = {
+        .format = p->vsapi->getFormatPreset(mp_to_vs(p->fmt_in.imgfmt), p->vscore),
+        .width = p->fmt_in.w,
+        .height = p->fmt_in.h,
+        .numFrames = enough_for_everyone,
+    };
+    if (!fmt.format) {
+        p->vsapi->setError(out, "Unsupported input format.\n");
+        return;
+    }
+
+    p->vsapi->setVideoInfo(&fmt, 1, node);
+    p->in_node_active = true;
+}
+
+static const VSFrameRef *VS_CC infiltGetFrame(int frameno, int activationReason,
+    void **instanceData, void **frameData,
+    VSFrameContext *frameCtx, VSCore *core,
+    const VSAPI *vsapi)
+{
+    struct priv *p = *instanceData;
+    VSFrameRef *ret = NULL;
+
+    mp_mutex_lock(&p->lock);
+    MP_TRACE(p, "VS asking for frame %d (at %d)\n", frameno, p->in_frameno);
+    while (1) {
+        if (p->shutdown) {
+            p->vsapi->setFilterError("EOF or filter reset/uninit", frameCtx);
+            MP_DBG(p, "returning error on reset/uninit\n");
+            break;
+        }
+        if (p->initializing) {
+            MP_WARN(p, "Frame requested during init! This is unsupported.\n"
+                        "Returning black dummy frame with 0 duration.\n");
+            ret = alloc_vs_frame(p, &p->fmt_in);
+            if (!ret) {
+                p->vsapi->setFilterError("Could not allocate VS frame", frameCtx);
+                break;
+            }
+            struct mp_image vsframe = map_vs_frame(p, ret, true);
+            mp_image_clear(&vsframe, 0, 0, p->fmt_in.w, p->fmt_in.h);
+            struct mp_image dummy = {0};
+            mp_image_set_params(&dummy, &p->fmt_in);
+            set_vs_frame_props(p, ret, &dummy, 0, 1);
+            break;
+        }
+        if (frameno < p->in_frameno) {
+            char msg[180];
+            snprintf(msg, sizeof(msg),
+                "Frame %d requested, but only have frames starting from %d. "
+                "Try increasing the buffered-frames suboption.",
+                frameno, p->in_frameno);
+            MP_FATAL(p, "%s\n", msg);
+            p->vsapi->setFilterError(msg, frameCtx);
+            break;
+        }
+        if (frameno >= p->in_frameno + MP_TALLOC_AVAIL(p->buffered)) {
+            // Too far in the future. Remove frames, so that the main thread can
+            // queue new frames.
+            if (p->num_buffered) {
+                drain_oldest_buffered_frame(p);
+                mp_cond_broadcast(&p->wakeup);
+                mp_filter_wakeup(p->f);
+                continue;
+            }
+        }
+        if (frameno >= p->in_frameno + p->num_buffered) {
+            // If there won't be any new frames, abort the request.
+            if (p->eof) {
+                p->vsapi->setFilterError("EOF or filter EOF/reinit", frameCtx);
+                MP_DBG(p, "returning error on EOF/reinit\n");
+                break;
+            }
+            // Request more frames.
+            if (p->requested_frameno <= p->in_frameno + p->num_buffered) {
+                p->requested_frameno = p->in_frameno + p->num_buffered + 1;
+                mp_filter_wakeup(p->f);
+            }
+        } else {
+            struct mp_image *img = p->buffered[frameno - p->in_frameno];
+            ret = alloc_vs_frame(p, &img->params);
+            if (!ret) {
+                p->vsapi->setFilterError("Could not allocate VS frame", frameCtx);
+                break;
+            }
+
+            mp_mutex_unlock(&p->lock);
+            struct mp_image vsframe = map_vs_frame(p, ret, true);
+            mp_image_copy(&vsframe, img);
+            int res = 1e6;
+            int dur = img->pkt_duration * res + 0.5;
+            set_vs_frame_props(p, ret, img, dur, res);
+            mp_mutex_lock(&p->lock);
+            break;
+        }
+        mp_cond_wait(&p->wakeup, &p->lock);
+    }
+    mp_cond_broadcast(&p->wakeup);
+    mp_mutex_unlock(&p->lock);
+    return ret;
+}
+
+static void VS_CC infiltFree(void *instanceData, VSCore *core, const VSAPI *vsapi)
+{
+    struct priv *p = instanceData;
+
+    mp_mutex_lock(&p->lock);
+    p->in_node_active = false;
+    mp_cond_broadcast(&p->wakeup);
+    mp_mutex_unlock(&p->lock);
+}
+
+// number of getAsyncFrame calls in progress
+// must be called with p->lock held
+static int num_requested(struct priv *p)
+{
+    int r = 0;
+    for (int n = 0; n < p->max_requests; n++)
+        r += p->requested[n] == &dummy_img;
+    return r;
+}
+
+static void destroy_vs(struct priv *p)
+{
+    if (!p->out_node && !p->initializing)
+        return;
+
+    MP_DBG(p, "destroying VS filters\n");
+
+    // Wait until our frame callbacks return.
+    mp_mutex_lock(&p->lock);
+    p->initializing = false;
+    p->shutdown = true;
+    mp_cond_broadcast(&p->wakeup);
+    while (num_requested(p))
+        mp_cond_wait(&p->wakeup, &p->lock);
+    mp_mutex_unlock(&p->lock);
+
+    MP_DBG(p, "all requests terminated\n");
+
+    if (p->in_node)
+        p->vsapi->freeNode(p->in_node);
+    if (p->out_node)
+        p->vsapi->freeNode(p->out_node);
+    p->in_node = p->out_node = NULL;
+
+    p->drv->unload(p);
+
+    assert(!p->in_node_active);
+    assert(num_requested(p) == 0); // async callback didn't return?
+
+    p->shutdown = false;
+    p->eof = false;
+    p->frames_sent = 0;
+    // Kill filtered images that weren't returned yet
+    for (int n = 0; n < p->max_requests; n++) {
+        if (p->requested[n] != &dummy_img_eof)
+            mp_image_unrefp(&p->requested[n]);
+        p->requested[n] = NULL;
+    }
+    // Kill queued frames too
+    for (int n = 0; n < p->num_buffered; n++)
+        talloc_free(p->buffered[n]);
+    p->num_buffered = 0;
+    p->out_frameno = p->in_frameno = 0;
+    p->requested_frameno = 0;
+    p->failed = false;
+
+    MP_DBG(p, "uninitialized.\n");
+}
+
+static int reinit_vs(struct priv *p, struct mp_image *input)
+{
+    VSMap *vars = NULL, *in = NULL, *out = NULL;
+    int res = -1;
+
+    destroy_vs(p);
+
+    MP_DBG(p, "initializing...\n");
+
+    struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(p->fmt_in.imgfmt);
+    if (p->fmt_in.w % desc.align_x || p->fmt_in.h % desc.align_y) {
+        MP_FATAL(p, "VapourSynth does not allow unaligned/cropped video sizes.\n");
+        return -1;
+    }
+
+    p->initializing = true;
+    p->out_pts = MP_NOPTS_VALUE;
+
+    if (p->drv->load_core(p) < 0 || !p->vsapi || !p->vscore) {
+        MP_FATAL(p, "Could not get vapoursynth API handle.\n");
+        goto error;
+    }
+
+    in = p->vsapi->createMap();
+    out = p->vsapi->createMap();
+    vars = p->vsapi->createMap();
+    if (!in || !out || !vars)
+        goto error;
+
+    p->vsapi->createFilter(in, out, "Input", infiltInit, infiltGetFrame,
+                           infiltFree, fmSerial, 0, p, p->vscore);
+    int vserr;
+    p->in_node = p->vsapi->propGetNode(out, "clip", 0, &vserr);
+    if (!p->in_node) {
+        MP_FATAL(p, "Could not get our own input node.\n");
+        goto error;
+    }
+
+    if (p->vsapi->propSetNode(vars, "video_in", p->in_node, 0))
+        goto error;
+
+    int d_w, d_h;
+    mp_image_params_get_dsize(&p->fmt_in, &d_w, &d_h);
+
+    p->vsapi->propSetInt(vars, "video_in_dw", d_w, 0);
+    p->vsapi->propSetInt(vars, "video_in_dh", d_h, 0);
+
+    struct mp_stream_info *info = mp_filter_find_stream_info(p->f);
+    double container_fps = input->nominal_fps;
+    double display_fps = 0;
+    int64_t display_res[2] = {0};
+    if (info) {
+        if (info->get_display_fps)
+            display_fps = info->get_display_fps(info);
+        if (info->get_display_res) {
+            int tmp[2] = {0};
+            info->get_display_res(info, tmp);
+            display_res[0] = tmp[0];
+            display_res[1] = tmp[1];
+        }
+    }
+    p->vsapi->propSetFloat(vars, "container_fps", container_fps, 0);
+    p->vsapi->propSetFloat(vars, "display_fps", display_fps, 0);
+    p->vsapi->propSetIntArray(vars, "display_res", display_res, 2);
+
+    if (p->drv->load(p, vars) < 0)
+        goto error;
+    if (!p->out_node) {
+        MP_FATAL(p, "Could not get script output node.\n");
+        goto error;
+    }
+
+    const VSVideoInfo *vi = p->vsapi->getVideoInfo(p->out_node);
+    if (!mp_from_vs(vi->format->id)) {
+        MP_FATAL(p, "Unsupported output format.\n");
+        goto error;
+    }
+
+    mp_mutex_lock(&p->lock);
+    p->initializing = false;
+    mp_mutex_unlock(&p->lock);
+    MP_DBG(p, "initialized.\n");
+    res = 0;
+error:
+    if (p->vsapi) {
+        p->vsapi->freeMap(in);
+        p->vsapi->freeMap(out);
+        p->vsapi->freeMap(vars);
+    }
+    if (res < 0)
+        destroy_vs(p);
+    return res;
+}
+
+static void vf_vapoursynth_reset(struct mp_filter *f)
+{
+    struct priv *p = f->priv;
+
+    destroy_vs(p);
+}
+
+static void vf_vapoursynth_destroy(struct mp_filter *f)
+{
+    struct priv *p = f->priv;
+
+    destroy_vs(p);
+    p->drv->uninit(p);
+
+    mp_cond_destroy(&p->wakeup);
+    mp_mutex_destroy(&p->lock);
+
+    mp_filter_free_children(f);
+}
+
+static const struct mp_filter_info vf_vapoursynth_filter = {
+    .name = "vapoursynth",
+    .process = vf_vapoursynth_process,
+    .reset = vf_vapoursynth_reset,
+    .destroy = vf_vapoursynth_destroy,
+    .priv_size = sizeof(struct priv),
+};
+
+static struct mp_filter *vf_vapoursynth_create(struct mp_filter *parent,
+                                               void *options)
+{
+    struct mp_filter *f = mp_filter_create(parent, &vf_vapoursynth_filter);
+    if (!f) {
+        talloc_free(options);
+        return NULL;
+    }
+
+    // In theory, we could allow multiple inputs and outputs, but since this
+    // wrapper is for --vf only, we don't.
+    mp_filter_add_pin(f, MP_PIN_IN, "in");
+    mp_filter_add_pin(f, MP_PIN_OUT, "out");
+
+    struct priv *p = f->priv;
+    p->opts = talloc_steal(p, options);
+    p->log = f->log;
+    p->drv = p->opts->drv;
+    p->f = f;
+
+    mp_mutex_init(&p->lock);
+    mp_cond_init(&p->wakeup);
+
+    if (!p->opts->file || !p->opts->file[0]) {
+        MP_FATAL(p, "'file' parameter must be set.\n");
+        goto error;
+    }
+    p->script_path = mp_get_user_path(p, f->global, p->opts->file);
+
+    p->max_requests = p->opts->maxrequests;
+    if (p->max_requests < 0)
+        p->max_requests = av_cpu_count();
+    MP_VERBOSE(p, "using %d concurrent requests.\n", p->max_requests);
+    int maxbuffer = p->opts->maxbuffer * p->max_requests;
+    p->buffered = talloc_array(p, struct mp_image *, maxbuffer);
+    p->requested = talloc_zero_array(p, struct mp_image *, p->max_requests);
+
+    struct mp_autoconvert *conv = mp_autoconvert_create(f);
+    if (!conv)
+        goto error;
+
+    for (int n = 0; mpvs_fmt_table[n].bits; n++) {
+        int imgfmt = mp_from_vs(mpvs_fmt_table[n].vs);
+        if (imgfmt)
+            mp_autoconvert_add_imgfmt(conv, imgfmt, 0);
+    }
+
+    struct mp_filter *dur = mp_compute_frame_duration_create(f);
+    if (!dur)
+        goto error;
+
+    mp_pin_connect(conv->f->pins[0], f->ppins[0]);
+    mp_pin_connect(dur->pins[0], conv->f->pins[1]);
+    p->in_pin = dur->pins[1];
+
+    if (p->drv->init(p) < 0)
+        goto error;
+
+    return f;
+
+error:
+    talloc_free(f);
+    return NULL;
+}
+
+
+#define OPT_BASE_STRUCT struct vapoursynth_opts
+static const m_option_t vf_opts_fields[] = {
+    {"file", OPT_STRING(file), .flags = M_OPT_FILE},
+    {"buffered-frames", OPT_INT(maxbuffer), M_RANGE(1, 9999),
+        OPTDEF_INT(4)},
+    {"concurrent-frames", OPT_CHOICE(maxrequests, {"auto", -1}),
+        M_RANGE(1, 99), OPTDEF_INT(-1)},
+    {0}
+};
+
+#include <VSScript.h>
+
+static int drv_vss_init(struct priv *p)
+{
+    if (!vsscript_init()) {
+        MP_FATAL(p, "Could not initialize VapourSynth scripting.\n");
+        return -1;
+    }
+    p->vs_initialized = true;
+    return 0;
+}
+
+static void drv_vss_uninit(struct priv *p)
+{
+    if (p->vs_initialized)
+        vsscript_finalize();
+    p->vs_initialized = false;
+}
+
+static int drv_vss_load_core(struct priv *p)
+{
+    // First load an empty script to get a VSScript, so that we get the vsapi
+    // and vscore.
+    if (vsscript_createScript(&p->se))
+        return -1;
+    p->vsapi = vsscript_getVSApi();
+    p->vscore = vsscript_getCore(p->se);
+    return 0;
+}
+
+static int drv_vss_load(struct priv *p, VSMap *vars)
+{
+    vsscript_setVariable(p->se, vars);
+
+    if (vsscript_evaluateFile(&p->se, p->script_path, 0)) {
+        MP_FATAL(p, "Script evaluation failed:\n%s\n", vsscript_getError(p->se));
+        return -1;
+    }
+    p->out_node = vsscript_getOutput(p->se, 0);
+    return 0;
+}
+
+static void drv_vss_unload(struct priv *p)
+{
+    if (p->se)
+        vsscript_freeScript(p->se);
+    p->se = NULL;
+    p->vsapi = NULL;
+    p->vscore = NULL;
+}
+
+static const struct script_driver drv_vss = {
+    .init = drv_vss_init,
+    .uninit = drv_vss_uninit,
+    .load_core = drv_vss_load_core,
+    .load = drv_vss_load,
+    .unload = drv_vss_unload,
+};
+
+const struct mp_user_filter_entry vf_vapoursynth = {
+    .desc = {
+        .description = "VapourSynth bridge",
+        .name = "vapoursynth",
+        .priv_size = sizeof(OPT_BASE_STRUCT),
+        .priv_defaults = &(const OPT_BASE_STRUCT){
+            .drv = &drv_vss,
+        },
+        .options = vf_opts_fields,
+    },
+    .create = vf_vapoursynth_create,
+};
diff --git a/video/filter/vf_vavpp.c b/video/filter/vf_vavpp.c
new file mode 100644
index 0000000..52be148
--- /dev/null
+++ b/video/filter/vf_vavpp.c
@@ -0,0 +1,503 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+
+#include <va/va.h>
+#include <va/va_vpp.h>
+
+#include <libavutil/hwcontext.h>
+#include <libavutil/hwcontext_vaapi.h>
+
+#include "options/options.h"
+#include "filters/filter.h"
+#include "filters/filter_internal.h"
+#include "filters/user_filters.h"
+#include "refqueue.h"
+
+#include "video/fmt-conversion.h"
+#include "video/vaapi.h"
+#include "video/hwdec.h"
+#include "video/mp_image_pool.h"
+
+struct surface_refs {
+    VASurfaceID *surfaces;
+    int num_surfaces;
+    int max_surfaces;
+};
+
+struct pipeline {
+    VABufferID *filters;
+    int num_filters;
+    VAProcColorStandardType input_colors[VAProcColorStandardCount];
+    VAProcColorStandardType output_colors[VAProcColorStandardCount];
+    int num_input_colors, num_output_colors;
+    struct surface_refs forward, backward;
+};
+
+struct opts {
+    int deint_type;
+    bool interlaced_only;
+    bool reversal_bug;
+};
+
+struct priv {
+    struct opts *opts;
+    bool do_deint;
+    VABufferID buffers[VAProcFilterCount];
+    int num_buffers;
+    VAConfigID config;
+    VAContextID context;
+    struct mp_image_params params;
+    VADisplay display;
+    AVBufferRef *av_device_ref;
+    struct pipeline pipe;
+    AVBufferRef *hw_pool;
+
+    struct mp_refqueue *queue;
+};
+
+static void add_surfaces(struct priv *p, struct surface_refs *refs, int dir)
+{
+    for (int n = 0; n < refs->max_surfaces; n++) {
+        struct mp_image *s = mp_refqueue_get(p->queue, (1 + n) * dir);
+        if (!s)
+            break;
+        VASurfaceID id = va_surface_id(s);
+        if (id == VA_INVALID_ID)
+            break;
+        MP_TARRAY_APPEND(p, refs->surfaces, refs->num_surfaces, id);
+    }
+}
+
+// The array items must match with the "deint" suboption values.
+// They're also sorted by quality.
+static const int deint_algorithm[] = {
+    [0] = VAProcDeinterlacingNone,
+    [1] = VAProcDeinterlacingBob, // first-field, special-cased
+    [2] = VAProcDeinterlacingBob,
+    [3] = VAProcDeinterlacingWeave,
+    [4] = VAProcDeinterlacingMotionAdaptive,
+    [5] = VAProcDeinterlacingMotionCompensated,
+};
+
+static void flush_frames(struct mp_filter *f)
+{
+    struct priv *p = f->priv;
+    mp_refqueue_flush(p->queue);
+}
+
+static void update_pipeline(struct mp_filter *vf)
+{
+    struct priv *p = vf->priv;
+    VABufferID *filters = p->buffers;
+    int num_filters = p->num_buffers;
+    if (p->opts->deint_type && !p->do_deint) {
+        filters++;
+        num_filters--;
+    }
+    p->pipe.forward.num_surfaces = p->pipe.backward.num_surfaces = 0;
+    p->pipe.num_input_colors = p->pipe.num_output_colors = 0;
+    p->pipe.num_filters = 0;
+    p->pipe.filters = NULL;
+    if (!num_filters)
+        goto nodeint;
+    VAProcPipelineCaps caps = {
+        .input_color_standards = p->pipe.input_colors,
+        .output_color_standards = p->pipe.output_colors,
+        .num_input_color_standards = VAProcColorStandardCount,
+        .num_output_color_standards = VAProcColorStandardCount,
+    };
+    VAStatus status = vaQueryVideoProcPipelineCaps(p->display, p->context,
+                                                   filters, num_filters, &caps);
+    if (!CHECK_VA_STATUS(vf, "vaQueryVideoProcPipelineCaps()"))
+        goto nodeint;
+    p->pipe.filters = filters;
+    p->pipe.num_filters = num_filters;
+    p->pipe.num_input_colors = caps.num_input_color_standards;
+    p->pipe.num_output_colors = caps.num_output_color_standards;
+    p->pipe.forward.max_surfaces = caps.num_forward_references;
+    p->pipe.backward.max_surfaces = caps.num_backward_references;
+    if (p->opts->reversal_bug) {
+        int max = MPMAX(caps.num_forward_references, caps.num_backward_references);
+        mp_refqueue_set_refs(p->queue, max, max);
+    } else {
+        mp_refqueue_set_refs(p->queue, p->pipe.backward.max_surfaces,
+                                       p->pipe.forward.max_surfaces);
+    }
+    mp_refqueue_set_mode(p->queue,
+        (p->do_deint ? MP_MODE_DEINT : 0) |
+        (p->opts->deint_type >= 2 ? MP_MODE_OUTPUT_FIELDS : 0) |
+        (p->opts->interlaced_only ? MP_MODE_INTERLACED_ONLY : 0));
+    return;
+
+nodeint:
+    mp_refqueue_set_refs(p->queue, 0, 0);
+    mp_refqueue_set_mode(p->queue, 0);
+}
+
+static struct mp_image *alloc_out(struct mp_filter *vf)
+{
+    struct priv *p = vf->priv;
+
+    struct mp_image *fmt = mp_refqueue_get_format(p->queue);
+    if (!fmt || !fmt->hwctx)
+        return NULL;
+
+    AVHWFramesContext *hw_frames = (void *)fmt->hwctx->data;
+    // VAAPI requires the full surface size to match for input and output.
+    int src_w = hw_frames->width;
+    int src_h = hw_frames->height;
+
+    if (!mp_update_av_hw_frames_pool(&p->hw_pool, p->av_device_ref,
+                                     IMGFMT_VAAPI, IMGFMT_NV12, src_w, src_h,
+                                     false))
+    {
+        MP_ERR(vf, "Failed to create hw pool.\n");
+        return NULL;
+    }
+
+    AVFrame *av_frame = av_frame_alloc();
+    MP_HANDLE_OOM(av_frame);
+    if (av_hwframe_get_buffer(p->hw_pool, av_frame, 0) < 0) {
+        MP_ERR(vf, "Failed to allocate frame from hw pool.\n");
+        av_frame_free(&av_frame);
+        return NULL;
+    }
+    struct mp_image *img = mp_image_from_av_frame(av_frame);
+    av_frame_free(&av_frame);
+    if (!img) {
+        MP_ERR(vf, "Unknown error.\n");
+        return NULL;
+    }
+    mp_image_set_size(img, fmt->w, fmt->h);
+    return img;
+}
+
+static struct mp_image *render(struct mp_filter *vf)
+{
+    struct priv *p = vf->priv;
+
+    struct mp_image *in = mp_refqueue_get(p->queue, 0);
+    struct mp_image *img = NULL;
+    bool need_end_picture = false;
+    bool success = false;
+    VABufferID buffer = VA_INVALID_ID;
+
+    VASurfaceID in_id = va_surface_id(in);
+    if (!p->pipe.filters || in_id == VA_INVALID_ID)
+        goto cleanup;
+
+    img = alloc_out(vf);
+    if (!img)
+        goto cleanup;
+
+    mp_image_copy_attributes(img, in);
+
+    unsigned int flags = va_get_colorspace_flag(p->params.color.space);
+    if (!mp_refqueue_should_deint(p->queue)) {
+        flags |= VA_FRAME_PICTURE;
+    } else if (mp_refqueue_is_top_field(p->queue)) {
+        flags |= VA_TOP_FIELD;
+    } else {
+        flags |= VA_BOTTOM_FIELD;
+    }
+
+    VASurfaceID id = va_surface_id(img);
+    if (id == VA_INVALID_ID)
+        goto cleanup;
+
+    VAStatus status = vaBeginPicture(p->display, p->context, id);
+    if (!CHECK_VA_STATUS(vf, "vaBeginPicture()"))
+        goto cleanup;
+
+    need_end_picture = true;
+
+    VAProcPipelineParameterBuffer *param = NULL;
+    status = vaCreateBuffer(p->display, p->context,
+                            VAProcPipelineParameterBufferType,
+                            sizeof(*param), 1, NULL, &buffer);
+    if (!CHECK_VA_STATUS(vf, "vaCreateBuffer()"))
+        goto cleanup;
+
+    VAProcFilterParameterBufferDeinterlacing *filter_params;
+    status = vaMapBuffer(p->display, *(p->pipe.filters), (void**)&filter_params);
+    if (!CHECK_VA_STATUS(vf, "vaMapBuffer()"))
+        goto cleanup;
+
+    filter_params->flags = flags & VA_TOP_FIELD ? 0 : VA_DEINTERLACING_BOTTOM_FIELD;
+    if (!mp_refqueue_top_field_first(p->queue))
+        filter_params->flags |= VA_DEINTERLACING_BOTTOM_FIELD_FIRST;
+
+    vaUnmapBuffer(p->display, *(p->pipe.filters));
+
+    status = vaMapBuffer(p->display, buffer, (void**)&param);
+    if (!CHECK_VA_STATUS(vf, "vaMapBuffer()"))
+        goto cleanup;
+
+    *param = (VAProcPipelineParameterBuffer){0};
+    param->surface = in_id;
+    param->surface_region = &(VARectangle){0, 0, in->w, in->h};
+    param->output_region = &(VARectangle){0, 0, img->w, img->h};
+    param->output_background_color = 0;
+    param->filter_flags = flags;
+    param->filters = p->pipe.filters;
+    param->num_filters = p->pipe.num_filters;
+
+    int dir = p->opts->reversal_bug ? -1 : 1;
+
+    add_surfaces(p, &p->pipe.forward, 1 * dir);
+    param->forward_references = p->pipe.forward.surfaces;
+    param->num_forward_references = p->pipe.forward.num_surfaces;
+
+    add_surfaces(p, &p->pipe.backward, -1 * dir);
+    param->backward_references = p->pipe.backward.surfaces;
+    param->num_backward_references = p->pipe.backward.num_surfaces;
+
+    MP_TRACE(vf, "in=0x%x\n", (unsigned)in_id);
+    for (int n = 0; n < param->num_backward_references; n++)
+        MP_TRACE(vf, " b%d=0x%x\n", n, (unsigned)param->backward_references[n]);
+    for (int n = 0; n < param->num_forward_references; n++)
+        MP_TRACE(vf, " f%d=0x%x\n", n, (unsigned)param->forward_references[n]);
+
+    vaUnmapBuffer(p->display, buffer);
+
+    status = vaRenderPicture(p->display, p->context, &buffer, 1);
+    if (!CHECK_VA_STATUS(vf, "vaRenderPicture()"))
+        goto cleanup;
+
+    success = true;
+
+cleanup:
+    if (need_end_picture)
+        vaEndPicture(p->display, p->context);
+    vaDestroyBuffer(p->display, buffer);
+    if (success)
+        return img;
+    talloc_free(img);
+    return NULL;
+}
+
+static void vf_vavpp_process(struct mp_filter *f)
+{
+    struct priv *p = f->priv;
+
+    update_pipeline(f);
+
+    mp_refqueue_execute_reinit(p->queue);
+
+    if (!mp_refqueue_can_output(p->queue))
+        return;
+
+    if (!p->pipe.num_filters || !mp_refqueue_should_deint(p->queue)) {
+        // no filtering
+        struct mp_image *in = mp_refqueue_get(p->queue, 0);
+        mp_refqueue_write_out_pin(p->queue, mp_image_new_ref(in));
+    } else {
+        mp_refqueue_write_out_pin(p->queue, render(f));
+    }
+}
+
+static void uninit(struct mp_filter *vf)
+{
+    struct priv *p = vf->priv;
+    for (int i = 0; i < p->num_buffers; i++)
+        vaDestroyBuffer(p->display, p->buffers[i]);
+    if (p->context != VA_INVALID_ID)
+        vaDestroyContext(p->display, p->context);
+    if (p->config != VA_INVALID_ID)
+        vaDestroyConfig(p->display, p->config);
+    av_buffer_unref(&p->hw_pool);
+    flush_frames(vf);
+    talloc_free(p->queue);
+    av_buffer_unref(&p->av_device_ref);
+}
+
+static int va_query_filter_caps(struct mp_filter *vf, VAProcFilterType type,
+                                void *caps, unsigned int count)
+{
+    struct priv *p = vf->priv;
+    VAStatus status = vaQueryVideoProcFilterCaps(p->display, p->context, type,
+                                                 caps, &count);
+    return CHECK_VA_STATUS(vf, "vaQueryVideoProcFilterCaps()") ? count : 0;
+}
+
+static VABufferID va_create_filter_buffer(struct mp_filter *vf, int bytes,
+                                          int num, void *data)
+{
+    struct priv *p = vf->priv;
+    VABufferID buffer;
+    VAStatus status = vaCreateBuffer(p->display, p->context,
+                                     VAProcFilterParameterBufferType,
+                                     bytes, num, data, &buffer);
+    return CHECK_VA_STATUS(vf, "vaCreateBuffer()") ? buffer : VA_INVALID_ID;
+}
+
+static bool initialize(struct mp_filter *vf)
+{
+    struct priv *p = vf->priv;
+    VAStatus status;
+
+    VAConfigID config;
+    status = vaCreateConfig(p->display, VAProfileNone, VAEntrypointVideoProc,
+                            NULL, 0, &config);
+    if (!CHECK_VA_STATUS(vf, "vaCreateConfig()")) // no entrypoint for video porc
+        return false;
+    p->config = config;
+
+    VAContextID context;
+    status = vaCreateContext(p->display, p->config, 0, 0, 0, NULL, 0, &context);
+    if (!CHECK_VA_STATUS(vf, "vaCreateContext()"))
+        return false;
+    p->context = context;
+
+    VAProcFilterType filters[VAProcFilterCount];
+    int num_filters = VAProcFilterCount;
+    status = vaQueryVideoProcFilters(p->display, p->context, filters, &num_filters);
+    if (!CHECK_VA_STATUS(vf, "vaQueryVideoProcFilters()"))
+        return false;
+
+    VABufferID buffers[VAProcFilterCount];
+    for (int i = 0; i < VAProcFilterCount; i++)
+        buffers[i] = VA_INVALID_ID;
+    for (int i = 0; i < num_filters; i++) {
+        if (filters[i] == VAProcFilterDeinterlacing) {
+            VAProcFilterCapDeinterlacing caps[VAProcDeinterlacingCount];
+            int num = va_query_filter_caps(vf, VAProcFilterDeinterlacing, caps,
+                                           VAProcDeinterlacingCount);
+            if (!num)
+                continue;
+            if (p->opts->deint_type < 0) {
+                for (int n = MP_ARRAY_SIZE(deint_algorithm) - 1; n > 0; n--) {
+                    for (int x = 0; x < num; x++) {
+                        if (caps[x].type == deint_algorithm[n]) {
+                            p->opts->deint_type = n;
+                            MP_VERBOSE(vf, "Selected deinterlacing algorithm: "
+                                       "%d\n", deint_algorithm[n]);
+                            goto found;
+                        }
+                    }
+                }
+                found: ;
+            }
+            if (p->opts->deint_type <= 0)
+                continue;
+            VAProcDeinterlacingType algorithm =
+                deint_algorithm[p->opts->deint_type];
+            for (int n=0; n < num; n++) { // find the algorithm
+                if (caps[n].type != algorithm)
+                    continue;
+                VAProcFilterParameterBufferDeinterlacing param = {0};
+                param.type = VAProcFilterDeinterlacing;
+                param.algorithm = algorithm;
+                buffers[VAProcFilterDeinterlacing] =
+                    va_create_filter_buffer(vf, sizeof(param), 1, &param);
+            }
+            if (buffers[VAProcFilterDeinterlacing] == VA_INVALID_ID)
+                MP_WARN(vf, "Selected deinterlacing algorithm not supported.\n");
+        } // check other filters
+    }
+    if (p->opts->deint_type < 0)
+        p->opts->deint_type = 0;
+    p->num_buffers = 0;
+    if (buffers[VAProcFilterDeinterlacing] != VA_INVALID_ID)
+        p->buffers[p->num_buffers++] = buffers[VAProcFilterDeinterlacing];
+    p->do_deint = !!p->opts->deint_type;
+    // next filters: p->buffers[p->num_buffers++] = buffers[next_filter];
+    return true;
+}
+
+static const struct mp_filter_info vf_vavpp_filter = {
+    .name = "vavpp",
+    .process = vf_vavpp_process,
+    .reset = flush_frames,
+    .destroy = uninit,
+    .priv_size = sizeof(struct priv),
+};
+
+static struct mp_filter *vf_vavpp_create(struct mp_filter *parent, void *options)
+{
+    struct mp_filter *f = mp_filter_create(parent, &vf_vavpp_filter);
+    if (!f) {
+        talloc_free(options);
+        return NULL;
+    }
+
+    mp_filter_add_pin(f, MP_PIN_IN, "in");
+    mp_filter_add_pin(f, MP_PIN_OUT, "out");
+
+    struct priv *p = f->priv;
+    p->opts = talloc_steal(p, options);
+    p->config = VA_INVALID_ID;
+    p->context = VA_INVALID_ID;
+
+    p->queue = mp_refqueue_alloc(f);
+
+    struct mp_hwdec_ctx *hwdec_ctx =
+        mp_filter_load_hwdec_device(f, IMGFMT_VAAPI);
+    if (!hwdec_ctx || !hwdec_ctx->av_device_ref)
+        goto error;
+    p->av_device_ref = av_buffer_ref(hwdec_ctx->av_device_ref);
+    if (!p->av_device_ref)
+        goto error;
+
+    AVHWDeviceContext *hwctx = (void *)p->av_device_ref->data;
+    AVVAAPIDeviceContext *vactx = hwctx->hwctx;
+
+    p->display = vactx->display;
+
+    mp_refqueue_add_in_format(p->queue, IMGFMT_VAAPI, 0);
+
+    if (!initialize(f))
+        goto error;
+
+    return f;
+
+error:
+    talloc_free(f);
+    return NULL;
+}
+
+#define OPT_BASE_STRUCT struct opts
+static const m_option_t vf_opts_fields[] = {
+    {"deint", OPT_CHOICE(deint_type,
+        // The values >=0 must match with deint_algorithm[].
+        {"auto", -1},
+        {"no", 0},
+        {"first-field", 1},
+        {"bob", 2},
+        {"weave", 3},
+        {"motion-adaptive", 4},
+        {"motion-compensated", 5})},
+    {"interlaced-only", OPT_BOOL(interlaced_only)},
+    {"reversal-bug", OPT_BOOL(reversal_bug)},
+    {0}
+};
+
+const struct mp_user_filter_entry vf_vavpp = {
+    .desc = {
+        .description = "VA-API Video Post-Process Filter",
+        .name = "vavpp",
+        .priv_size = sizeof(OPT_BASE_STRUCT),
+        .priv_defaults = &(const OPT_BASE_STRUCT){
+            .deint_type = -1,
+            .reversal_bug = true,
+        },
+        .options = vf_opts_fields,
+    },
+    .create = vf_vavpp_create,
+};
diff --git a/video/filter/vf_vdpaupp.c b/video/filter/vf_vdpaupp.c
new file mode 100644
index 0000000..0519f5a
--- /dev/null
+++ b/video/filter/vf_vdpaupp.c
@@ -0,0 +1,195 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include <assert.h>
+
+#include <libavutil/hwcontext.h>
+
+#include "common/common.h"
+#include "common/msg.h"
+#include "options/m_option.h"
+#include "filters/filter.h"
+#include "filters/filter_internal.h"
+#include "filters/user_filters.h"
+#include "video/img_format.h"
+#include "video/mp_image.h"
+#include "video/hwdec.h"
+#include "video/vdpau.h"
+#include "video/vdpau_mixer.h"
+#include "refqueue.h"
+
+// Note: this filter does no actual filtering; it merely sets appropriate
+//       flags on vdpau images (mp_vdpau_mixer_frame) to do the appropriate
+//       processing on the final rendering process in the VO.
+
+struct opts {
+    bool deint_enabled;
+    bool interlaced_only;
+    struct mp_vdpau_mixer_opts opts;
+};
+
+struct priv {
+    struct opts *opts;
+    struct mp_vdpau_ctx *ctx;
+    struct mp_refqueue *queue;
+    struct mp_pin *in_pin;
+};
+
+static VdpVideoSurface ref_field(struct priv *p,
+                                 struct mp_vdpau_mixer_frame *frame, int pos)
+{
+    struct mp_image *mpi = mp_image_new_ref(mp_refqueue_get_field(p->queue, pos));
+    if (!mpi)
+        return VDP_INVALID_HANDLE;
+    talloc_steal(frame, mpi);
+    return (uintptr_t)mpi->planes[3];
+}
+
+static void vf_vdpaupp_process(struct mp_filter *f)
+{
+    struct priv *p = f->priv;
+
+    mp_refqueue_execute_reinit(p->queue);
+
+    if (!mp_refqueue_can_output(p->queue))
+        return;
+
+    struct mp_image *mpi =
+        mp_vdpau_mixed_frame_create(mp_refqueue_get_field(p->queue, 0));
+    if (!mpi)
+        return; // OOM
+    struct mp_vdpau_mixer_frame *frame = mp_vdpau_mixed_frame_get(mpi);
+
+    if (!mp_refqueue_should_deint(p->queue)) {
+        frame->field = VDP_VIDEO_MIXER_PICTURE_STRUCTURE_FRAME;
+    } else if (mp_refqueue_is_top_field(p->queue)) {
+        frame->field = VDP_VIDEO_MIXER_PICTURE_STRUCTURE_TOP_FIELD;
+    } else {
+        frame->field = VDP_VIDEO_MIXER_PICTURE_STRUCTURE_BOTTOM_FIELD;
+    }
+
+    frame->future[0] = ref_field(p, frame, 1);
+    frame->current = ref_field(p, frame, 0);
+    frame->past[0] = ref_field(p, frame, -1);
+    frame->past[1] = ref_field(p, frame, -2);
+
+    frame->opts = p->opts->opts;
+
+    mpi->planes[3] = (void *)(uintptr_t)frame->current;
+
+    mpi->params.hw_subfmt = 0; // force mixer
+
+    mp_refqueue_write_out_pin(p->queue, mpi);
+}
+
+static void vf_vdpaupp_reset(struct mp_filter *f)
+{
+    struct priv *p = f->priv;
+    mp_refqueue_flush(p->queue);
+}
+
+static void vf_vdpaupp_destroy(struct mp_filter *f)
+{
+    struct priv *p = f->priv;
+    talloc_free(p->queue);
+}
+
+static const struct mp_filter_info vf_vdpaupp_filter = {
+    .name = "vdpaupp",
+    .process = vf_vdpaupp_process,
+    .reset = vf_vdpaupp_reset,
+    .destroy = vf_vdpaupp_destroy,
+    .priv_size = sizeof(struct priv),
+};
+
+static struct mp_filter *vf_vdpaupp_create(struct mp_filter *parent, void *options)
+{
+    struct mp_filter *f = mp_filter_create(parent, &vf_vdpaupp_filter);
+    if (!f) {
+        talloc_free(options);
+        return NULL;
+    }
+
+    mp_filter_add_pin(f, MP_PIN_IN, "in");
+    mp_filter_add_pin(f, MP_PIN_OUT, "out");
+
+    struct priv *p = f->priv;
+    p->opts = talloc_steal(p, options);
+
+    p->queue = mp_refqueue_alloc(f);
+
+    struct mp_hwdec_ctx *hwdec_ctx =
+        mp_filter_load_hwdec_device(f, IMGFMT_VDPAU);
+    if (!hwdec_ctx || !hwdec_ctx->av_device_ref)
+        goto error;
+    p->ctx = mp_vdpau_get_ctx_from_av(hwdec_ctx->av_device_ref);
+    if (!p->ctx)
+        goto error;
+
+    if (!p->opts->deint_enabled)
+        p->opts->opts.deint = 0;
+
+    if (p->opts->opts.deint >= 2) {
+        mp_refqueue_set_refs(p->queue, 1, 1); // 2 past fields, 1 future field
+    } else {
+        mp_refqueue_set_refs(p->queue, 0, 0);
+    }
+    mp_refqueue_set_mode(p->queue,
+        (p->opts->deint_enabled ? MP_MODE_DEINT : 0) |
+        (p->opts->interlaced_only ? MP_MODE_INTERLACED_ONLY : 0) |
+        (p->opts->opts.deint >= 2 ? MP_MODE_OUTPUT_FIELDS : 0));
+
+    mp_refqueue_add_in_format(p->queue, IMGFMT_VDPAU, 0);
+
+    return f;
+
+error:
+    talloc_free(f);
+    return NULL;
+}
+
+#define OPT_BASE_STRUCT struct opts
+static const m_option_t vf_opts_fields[] = {
+    {"deint-mode", OPT_CHOICE(opts.deint,
+        {"first-field", 1},
+        {"bob", 2},
+        {"temporal", 3},
+        {"temporal-spatial", 4}),
+        OPTDEF_INT(3)},
+    {"deint", OPT_BOOL(deint_enabled)},
+    {"chroma-deint", OPT_BOOL(opts.chroma_deint), OPTDEF_INT(1)},
+    {"pullup", OPT_BOOL(opts.pullup)},
+    {"denoise", OPT_FLOAT(opts.denoise), M_RANGE(0, 1)},
+    {"sharpen", OPT_FLOAT(opts.sharpen), M_RANGE(-1, 1)},
+    {"hqscaling", OPT_INT(opts.hqscaling), M_RANGE(0, 9)},
+    {"interlaced-only", OPT_BOOL(interlaced_only)},
+    {0}
+};
+
+const struct mp_user_filter_entry vf_vdpaupp = {
+    .desc = {
+        .description = "vdpau postprocessing",
+        .name = "vdpaupp",
+        .priv_size = sizeof(OPT_BASE_STRUCT),
+        .options = vf_opts_fields,
+    },
+    .create = vf_vdpaupp_create,
+};
diff --git a/video/fmt-conversion.c b/video/fmt-conversion.c
new file mode 100644
index 0000000..aa7d857
--- /dev/null
+++ b/video/fmt-conversion.c
@@ -0,0 +1,112 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <libavutil/pixdesc.h>
+#include <libavutil/avutil.h>
+
+#include "video/img_format.h"
+#include "fmt-conversion.h"
+
+static const struct {
+    int fmt;
+    enum AVPixelFormat pix_fmt;
+} conversion_map[] = {
+    {IMGFMT_ARGB, AV_PIX_FMT_ARGB},
+    {IMGFMT_BGRA, AV_PIX_FMT_BGRA},
+    {IMGFMT_BGR24, AV_PIX_FMT_BGR24},
+    {IMGFMT_RGB565, AV_PIX_FMT_RGB565},
+    {IMGFMT_ABGR, AV_PIX_FMT_ABGR},
+    {IMGFMT_RGBA, AV_PIX_FMT_RGBA},
+    {IMGFMT_RGB24, AV_PIX_FMT_RGB24},
+    {IMGFMT_PAL8,  AV_PIX_FMT_PAL8},
+    {IMGFMT_UYVY,  AV_PIX_FMT_UYVY422},
+    {IMGFMT_NV12,  AV_PIX_FMT_NV12},
+    {IMGFMT_Y8,    AV_PIX_FMT_GRAY8},
+    {IMGFMT_Y16, AV_PIX_FMT_GRAY16},
+    {IMGFMT_420P,  AV_PIX_FMT_YUV420P},
+    {IMGFMT_444P,  AV_PIX_FMT_YUV444P},
+
+    // YUVJ are YUV formats that use the full Y range. Decoder color range
+    // information is used instead. Deprecated in ffmpeg.
+    {IMGFMT_420P,  AV_PIX_FMT_YUVJ420P},
+    {IMGFMT_444P,  AV_PIX_FMT_YUVJ444P},
+
+    {IMGFMT_BGR0,  AV_PIX_FMT_BGR0},
+    {IMGFMT_0RGB,  AV_PIX_FMT_0RGB},
+    {IMGFMT_RGB0,  AV_PIX_FMT_RGB0},
+    {IMGFMT_0BGR,  AV_PIX_FMT_0BGR},
+
+    {IMGFMT_RGBA64, AV_PIX_FMT_RGBA64},
+
+#ifdef AV_PIX_FMT_X2RGB10
+    {IMGFMT_RGB30,  AV_PIX_FMT_X2RGB10},
+#endif
+
+    {IMGFMT_VDPAU, AV_PIX_FMT_VDPAU},
+    {IMGFMT_VIDEOTOOLBOX,   AV_PIX_FMT_VIDEOTOOLBOX},
+    {IMGFMT_MEDIACODEC, AV_PIX_FMT_MEDIACODEC},
+    {IMGFMT_VAAPI, AV_PIX_FMT_VAAPI},
+    {IMGFMT_DXVA2, AV_PIX_FMT_DXVA2_VLD},
+    {IMGFMT_D3D11, AV_PIX_FMT_D3D11},
+    {IMGFMT_MMAL, AV_PIX_FMT_MMAL},
+    {IMGFMT_CUDA, AV_PIX_FMT_CUDA},
+    {IMGFMT_P010, AV_PIX_FMT_P010},
+    {IMGFMT_DRMPRIME, AV_PIX_FMT_DRM_PRIME},
+#if HAVE_VULKAN_INTEROP
+    {IMGFMT_VULKAN, AV_PIX_FMT_VULKAN},
+#endif
+
+    {0, AV_PIX_FMT_NONE}
+};
+
+enum AVPixelFormat imgfmt2pixfmt(int fmt)
+{
+    if (fmt == IMGFMT_NONE)
+        return AV_PIX_FMT_NONE;
+
+    if (fmt >= IMGFMT_AVPIXFMT_START && fmt < IMGFMT_AVPIXFMT_END) {
+        enum AVPixelFormat pixfmt = fmt - IMGFMT_AVPIXFMT_START;
+        // Avoid duplicate format - each format must be unique.
+        int mpfmt = pixfmt2imgfmt(pixfmt);
+        if (mpfmt == fmt && av_pix_fmt_desc_get(pixfmt))
+            return pixfmt;
+        return AV_PIX_FMT_NONE;
+    }
+
+    for (int i = 0; conversion_map[i].fmt; i++) {
+        if (conversion_map[i].fmt == fmt)
+            return conversion_map[i].pix_fmt;
+    }
+    return AV_PIX_FMT_NONE;
+}
+
+int pixfmt2imgfmt(enum AVPixelFormat pix_fmt)
+{
+    if (pix_fmt == AV_PIX_FMT_NONE)
+        return IMGFMT_NONE;
+
+    for (int i = 0; conversion_map[i].pix_fmt != AV_PIX_FMT_NONE; i++) {
+        if (conversion_map[i].pix_fmt == pix_fmt)
+            return conversion_map[i].fmt;
+    }
+
+    int generic = IMGFMT_AVPIXFMT_START + pix_fmt;
+    if (generic < IMGFMT_AVPIXFMT_END && av_pix_fmt_desc_get(pix_fmt))
+        return generic;
+
+    return 0;
+}
diff --git a/video/fmt-conversion.h b/video/fmt-conversion.h
new file mode 100644
index 0000000..962e4b8
--- /dev/null
+++ b/video/fmt-conversion.h
@@ -0,0 +1,26 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MPLAYER_FMT_CONVERSION_H
+#define MPLAYER_FMT_CONVERSION_H
+
+#include <libavutil/pixfmt.h>
+
+enum AVPixelFormat imgfmt2pixfmt(int fmt);
+int pixfmt2imgfmt(enum AVPixelFormat pix_fmt);
+
+#endif /* MPLAYER_FMT_CONVERSION_H */
diff --git a/video/hwdec.c b/video/hwdec.c
new file mode 100644
index 0000000..f397f3b
--- /dev/null
+++ b/video/hwdec.c
@@ -0,0 +1,140 @@
+#include <assert.h>
+
+#include <libavutil/hwcontext.h>
+
+#include "config.h"
+#include "hwdec.h"
+#include "osdep/threads.h"
+
+struct mp_hwdec_devices {
+    mp_mutex lock;
+
+    struct mp_hwdec_ctx **hwctxs;
+    int num_hwctxs;
+
+    void (*load_api)(void *ctx,
+                     struct hwdec_imgfmt_request *params);
+    void *load_api_ctx;
+};
+
+struct mp_hwdec_devices *hwdec_devices_create(void)
+{
+    struct mp_hwdec_devices *devs = talloc_zero(NULL, struct mp_hwdec_devices);
+    mp_mutex_init(&devs->lock);
+    return devs;
+}
+
+void hwdec_devices_destroy(struct mp_hwdec_devices *devs)
+{
+    if (!devs)
+        return;
+    assert(!devs->num_hwctxs); // must have been hwdec_devices_remove()ed
+    assert(!devs->load_api); // must have been unset
+    mp_mutex_destroy(&devs->lock);
+    talloc_free(devs);
+}
+
+struct mp_hwdec_ctx *hwdec_devices_get_by_imgfmt(struct mp_hwdec_devices *devs,
+                                                 int hw_imgfmt)
+{
+    struct mp_hwdec_ctx *res = NULL;
+    mp_mutex_lock(&devs->lock);
+    for (int n = 0; n < devs->num_hwctxs; n++) {
+        struct mp_hwdec_ctx *dev = devs->hwctxs[n];
+        if (dev->hw_imgfmt == hw_imgfmt) {
+            res = dev;
+            break;
+        }
+    }
+    mp_mutex_unlock(&devs->lock);
+    return res;
+}
+
+struct mp_hwdec_ctx *hwdec_devices_get_first(struct mp_hwdec_devices *devs)
+{
+    return hwdec_devices_get_n(devs, 0);
+}
+
+struct mp_hwdec_ctx *hwdec_devices_get_n(struct mp_hwdec_devices *devs, int n)
+{
+    mp_mutex_lock(&devs->lock);
+    struct mp_hwdec_ctx *res = n < devs->num_hwctxs ? devs->hwctxs[n] : NULL;
+    mp_mutex_unlock(&devs->lock);
+    return res;
+}
+
+void hwdec_devices_add(struct mp_hwdec_devices *devs, struct mp_hwdec_ctx *ctx)
+{
+    mp_mutex_lock(&devs->lock);
+    MP_TARRAY_APPEND(devs, devs->hwctxs, devs->num_hwctxs, ctx);
+    mp_mutex_unlock(&devs->lock);
+}
+
+void hwdec_devices_remove(struct mp_hwdec_devices *devs, struct mp_hwdec_ctx *ctx)
+{
+    mp_mutex_lock(&devs->lock);
+    for (int n = 0; n < devs->num_hwctxs; n++) {
+        if (devs->hwctxs[n] == ctx) {
+            MP_TARRAY_REMOVE_AT(devs->hwctxs, devs->num_hwctxs, n);
+            break;
+        }
+    }
+    mp_mutex_unlock(&devs->lock);
+}
+
+void hwdec_devices_set_loader(struct mp_hwdec_devices *devs,
+    void (*load_api)(void *ctx, struct hwdec_imgfmt_request *params),
+    void *load_api_ctx)
+{
+    devs->load_api = load_api;
+    devs->load_api_ctx = load_api_ctx;
+}
+
+void hwdec_devices_request_for_img_fmt(struct mp_hwdec_devices *devs,
+                                       struct hwdec_imgfmt_request *params)
+{
+    if (devs->load_api)
+        devs->load_api(devs->load_api_ctx, params);
+}
+
+char *hwdec_devices_get_names(struct mp_hwdec_devices *devs)
+{
+    char *res = NULL;
+    for (int n = 0; n < devs->num_hwctxs; n++) {
+        if (res)
+            ta_xstrdup_append(&res, ",");
+        ta_xstrdup_append(&res, devs->hwctxs[n]->driver_name);
+    }
+    return res;
+}
+
+static const struct hwcontext_fns *const hwcontext_fns[] = {
+#if HAVE_CUDA_HWACCEL
+    &hwcontext_fns_cuda,
+#endif
+#if HAVE_D3D_HWACCEL
+    &hwcontext_fns_d3d11,
+#endif
+#if HAVE_D3D9_HWACCEL
+    &hwcontext_fns_dxva2,
+#endif
+#if HAVE_DRM
+    &hwcontext_fns_drmprime,
+#endif
+#if HAVE_VAAPI
+    &hwcontext_fns_vaapi,
+#endif
+#if HAVE_VDPAU
+    &hwcontext_fns_vdpau,
+#endif
+    NULL,
+};
+
+const struct hwcontext_fns *hwdec_get_hwcontext_fns(int av_hwdevice_type)
+{
+    for (int n = 0; hwcontext_fns[n]; n++) {
+        if (hwcontext_fns[n]->av_hwdevice_type == av_hwdevice_type)
+            return hwcontext_fns[n];
+    }
+    return NULL;
+}
diff --git a/video/hwdec.h b/video/hwdec.h
new file mode 100644
index 0000000..723c60f
--- /dev/null
+++ b/video/hwdec.h
@@ -0,0 +1,108 @@
+#ifndef MP_HWDEC_H_
+#define MP_HWDEC_H_
+
+#include <libavutil/buffer.h>
+
+#include "options/m_option.h"
+
+struct mp_image_pool;
+
+struct mp_hwdec_ctx {
+    const char *driver_name; // NULL if unknown/not loaded
+
+    // libavutil-wrapped context, if available.
+    struct AVBufferRef *av_device_ref; // AVHWDeviceContext*
+
+    // List of allowed IMGFMT_s, terminated with 0.
+    // If NULL, all software formats are considered to be supported.
+    const int *supported_formats;
+    // HW format used by the hwdec
+    int hw_imgfmt;
+
+    // The name of this hwdec's matching conversion filter if available.
+    // This will be used for hardware conversion of frame formats.
+    // NULL otherwise.
+    const char *conversion_filter_name;
+
+    // The libavutil hwconfig to be used when querying constraints for the
+    // conversion filter. Can be NULL if no special config is required.
+    void *conversion_config;
+};
+
+// Used to communicate hardware decoder device handles from VO to video decoder.
+struct mp_hwdec_devices;
+
+struct mp_hwdec_devices *hwdec_devices_create(void);
+void hwdec_devices_destroy(struct mp_hwdec_devices *devs);
+
+struct mp_hwdec_ctx *hwdec_devices_get_by_imgfmt(struct mp_hwdec_devices *devs,
+                                                 int hw_imgfmt);
+
+// For code which still strictly assumes there is 1 (or none) device.
+struct mp_hwdec_ctx *hwdec_devices_get_first(struct mp_hwdec_devices *devs);
+
+// Return the n-th device. NULL if none.
+struct mp_hwdec_ctx *hwdec_devices_get_n(struct mp_hwdec_devices *devs, int n);
+
+// Add this to the list of internal devices. Adding the same pointer twice must
+// be avoided.
+void hwdec_devices_add(struct mp_hwdec_devices *devs, struct mp_hwdec_ctx *ctx);
+
+// Remove this from the list of internal devices. Idempotent/ignores entries
+// not added yet. This is not thread-safe.
+void hwdec_devices_remove(struct mp_hwdec_devices *devs, struct mp_hwdec_ctx *ctx);
+
+struct hwdec_imgfmt_request {
+    int imgfmt;
+    bool probing;
+};
+
+// Can be used to enable lazy loading of an API with hwdec_devices_request().
+// If used at all, this must be set/unset during initialization/uninitialization,
+// as concurrent use with hwdec_devices_request() is a race condition.
+void hwdec_devices_set_loader(struct mp_hwdec_devices *devs,
+    void (*load_api)(void *ctx, struct hwdec_imgfmt_request *params),
+    void *load_api_ctx);
+
+// Cause VO to lazily load all devices for a specified img format, and will
+// block until this is done (even if not available). Pass IMGFMT_NONE to load
+// all available devices.
+void hwdec_devices_request_for_img_fmt(struct mp_hwdec_devices *devs,
+                                       struct hwdec_imgfmt_request *params);
+
+// Return "," concatenated list (for introspection/debugging). Use talloc_free().
+char *hwdec_devices_get_names(struct mp_hwdec_devices *devs);
+
+struct mp_image;
+struct mpv_global;
+
+struct hwcontext_create_dev_params {
+    bool probing;   // if true, don't log errors if unavailable
+};
+
+// Per AV_HWDEVICE_TYPE_* functions, queryable via hwdec_get_hwcontext_fns().
+// All entries are strictly optional.
+struct hwcontext_fns {
+    int av_hwdevice_type;
+    // Fill in special format-specific requirements.
+    void (*refine_hwframes)(struct AVBufferRef *hw_frames_ctx);
+    // Returns a AVHWDeviceContext*. Used for copy hwdecs.
+    struct AVBufferRef *(*create_dev)(struct mpv_global *global,
+                                      struct mp_log *log,
+                                      struct hwcontext_create_dev_params *params);
+    // Return whether this is using some sort of sub-optimal emulation layer.
+    bool (*is_emulated)(struct AVBufferRef *hw_device_ctx);
+};
+
+// The parameter is of type enum AVHWDeviceType (as in int to avoid extensive
+// recursive includes). May return NULL for unknown device types.
+const struct hwcontext_fns *hwdec_get_hwcontext_fns(int av_hwdevice_type);
+
+extern const struct hwcontext_fns hwcontext_fns_cuda;
+extern const struct hwcontext_fns hwcontext_fns_d3d11;
+extern const struct hwcontext_fns hwcontext_fns_drmprime;
+extern const struct hwcontext_fns hwcontext_fns_dxva2;
+extern const struct hwcontext_fns hwcontext_fns_vaapi;
+extern const struct hwcontext_fns hwcontext_fns_vdpau;
+
+#endif
diff --git a/video/image_loader.c b/video/image_loader.c
new file mode 100644
index 0000000..ba4d62a
--- /dev/null
+++ b/video/image_loader.c
@@ -0,0 +1,48 @@
+#include <libavcodec/avcodec.h>
+
+#include "common/common.h"
+#include "mp_image.h"
+#include "player/screenshot.h"
+
+#include "image_loader.h"
+
+struct mp_image *load_image_png_buf(void *buffer, size_t buffer_size, int imgfmt)
+{
+    const AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_PNG);
+    if (!codec)
+        return NULL;
+
+    AVCodecContext *avctx = avcodec_alloc_context3(codec);
+    if (!avctx)
+        return NULL;
+
+    if (avcodec_open2(avctx, codec, NULL) < 0) {
+        avcodec_free_context(&avctx);
+        return NULL;
+    }
+
+    AVPacket *pkt = av_packet_alloc();
+    if (pkt) {
+        if (av_new_packet(pkt, buffer_size) >= 0)
+            memcpy(pkt->data, buffer, buffer_size);
+    }
+
+    // (There is only 1 outcome: either it takes it and decodes it, or not.)
+    avcodec_send_packet(avctx, pkt);
+    avcodec_send_packet(avctx, NULL);
+
+    av_packet_free(&pkt);
+
+    struct mp_image *res = NULL;
+    AVFrame *frame = av_frame_alloc();
+    if (frame && avcodec_receive_frame(avctx, frame) >= 0) {
+        struct mp_image *r = mp_image_from_av_frame(frame);
+        if (r)
+            res = convert_image(r, imgfmt, NULL, mp_null_log);
+        talloc_free(r);
+    }
+    av_frame_free(&frame);
+
+    avcodec_free_context(&avctx);
+    return res;
+}
diff --git a/video/image_loader.h b/video/image_loader.h
new file mode 100644
index 0000000..f8b20c8
--- /dev/null
+++ b/video/image_loader.h
@@ -0,0 +1,9 @@
+#ifndef MP_IMAGE_LOADER_H_
+#define MP_IMAGE_LOADER_H_
+
+#include <stddef.h>
+
+struct mp_image;
+struct mp_image *load_image_png_buf(void *buffer, size_t buffer_size, int imgfmt);
+
+#endif
diff --git a/video/image_writer.c b/video/image_writer.c
new file mode 100644
index 0000000..288d809
--- /dev/null
+++ b/video/image_writer.c
@@ -0,0 +1,757 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+
+#include <libavcodec/avcodec.h>
+#include <libavformat/avformat.h>
+#include <libavutil/mem.h>
+#include <libavutil/opt.h>
+#include <libavutil/pixdesc.h>
+
+#include "common/msg.h"
+#include "config.h"
+
+#if HAVE_JPEG
+#include <setjmp.h>
+#include <jpeglib.h>
+#endif
+
+#include "osdep/io.h"
+
+#include "common/av_common.h"
+#include "common/msg.h"
+#include "image_writer.h"
+#include "mpv_talloc.h"
+#include "video/fmt-conversion.h"
+#include "video/img_format.h"
+#include "video/mp_image.h"
+#include "video/sws_utils.h"
+
+#include "options/m_option.h"
+
+const struct image_writer_opts image_writer_opts_defaults = {
+    .format = AV_CODEC_ID_MJPEG,
+    .high_bit_depth = true,
+    .png_compression = 7,
+    .png_filter = 5,
+    .jpeg_quality = 90,
+    .jpeg_source_chroma = true,
+    .webp_quality = 75,
+    .webp_compression = 4,
+    .jxl_distance = 1.0,
+    .jxl_effort = 4,
+    .avif_encoder = "libaom-av1",
+    .avif_pixfmt = "yuv420p",
+    .avif_opts = (char*[]){
+        "usage",    "allintra",
+        "crf",      "32",
+        "cpu-used", "8",
+        "tune",     "ssim",
+        NULL
+    },
+    .tag_csp = true,
+};
+
+const struct m_opt_choice_alternatives mp_image_writer_formats[] = {
+    {"jpg",  AV_CODEC_ID_MJPEG},
+    {"jpeg", AV_CODEC_ID_MJPEG},
+    {"png",  AV_CODEC_ID_PNG},
+    {"webp", AV_CODEC_ID_WEBP},
+#if HAVE_JPEGXL
+    {"jxl",  AV_CODEC_ID_JPEGXL},
+#endif
+#if HAVE_AVIF_MUXER
+    {"avif",  AV_CODEC_ID_AV1},
+#endif
+    {0}
+};
+
+#define OPT_BASE_STRUCT struct image_writer_opts
+
+const struct m_option image_writer_opts[] = {
+    {"format", OPT_CHOICE_C(format, mp_image_writer_formats)},
+    {"jpeg-quality", OPT_INT(jpeg_quality), M_RANGE(0, 100)},
+    {"jpeg-source-chroma", OPT_BOOL(jpeg_source_chroma)},
+    {"png-compression", OPT_INT(png_compression), M_RANGE(0, 9)},
+    {"png-filter", OPT_INT(png_filter), M_RANGE(0, 5)},
+    {"webp-lossless", OPT_BOOL(webp_lossless)},
+    {"webp-quality", OPT_INT(webp_quality), M_RANGE(0, 100)},
+    {"webp-compression", OPT_INT(webp_compression), M_RANGE(0, 6)},
+#if HAVE_JPEGXL
+    {"jxl-distance", OPT_DOUBLE(jxl_distance), M_RANGE(0.0, 15.0)},
+    {"jxl-effort", OPT_INT(jxl_effort), M_RANGE(1, 9)},
+#endif
+#if HAVE_AVIF_MUXER
+    {"avif-encoder", OPT_STRING(avif_encoder)},
+    {"avif-opts", OPT_KEYVALUELIST(avif_opts)},
+    {"avif-pixfmt", OPT_STRING(avif_pixfmt)},
+#endif
+    {"high-bit-depth", OPT_BOOL(high_bit_depth)},
+    {"tag-colorspace", OPT_BOOL(tag_csp)},
+    {0},
+};
+
+struct image_writer_ctx {
+    struct mp_log *log;
+    const struct image_writer_opts *opts;
+    struct mp_imgfmt_desc original_format;
+};
+
+static enum AVPixelFormat replace_j_format(enum AVPixelFormat fmt)
+{
+    switch (fmt) {
+    case AV_PIX_FMT_YUV420P: return AV_PIX_FMT_YUVJ420P;
+    case AV_PIX_FMT_YUV422P: return AV_PIX_FMT_YUVJ422P;
+    case AV_PIX_FMT_YUV444P: return AV_PIX_FMT_YUVJ444P;
+    }
+    return fmt;
+}
+
+static void prepare_avframe(AVFrame *pic, AVCodecContext *avctx,
+                            mp_image_t *image, bool tag_csp,
+                            struct mp_log *log)
+{
+    for (int n = 0; n < 4; n++) {
+        pic->data[n] = image->planes[n];
+        pic->linesize[n] = image->stride[n];
+    }
+    pic->format = avctx->pix_fmt;
+    pic->width = avctx->width;
+    pic->height = avctx->height;
+    avctx->color_range = pic->color_range =
+        mp_csp_levels_to_avcol_range(image->params.color.levels);
+
+    if (!tag_csp)
+        return;
+    avctx->color_primaries = pic->color_primaries =
+        mp_csp_prim_to_avcol_pri(image->params.color.primaries);
+    avctx->color_trc = pic->color_trc =
+        mp_csp_trc_to_avcol_trc(image->params.color.gamma);
+    avctx->colorspace = pic->colorspace =
+        mp_csp_to_avcol_spc(image->params.color.space);
+    avctx->chroma_sample_location = pic->chroma_location =
+        mp_chroma_location_to_av(image->params.chroma_location);
+    mp_dbg(log, "mapped color params:\n"
+        "  trc = %s\n"
+        "  primaries = %s\n"
+        "  range = %s\n"
+        "  colorspace = %s\n"
+        "  chroma_location = %s\n",
+        av_color_transfer_name(avctx->color_trc),
+        av_color_primaries_name(avctx->color_primaries),
+        av_color_range_name(avctx->color_range),
+        av_color_space_name(avctx->colorspace),
+        av_chroma_location_name(avctx->chroma_sample_location)
+    );
+}
+
+static bool write_lavc(struct image_writer_ctx *ctx, mp_image_t *image, const char *filename)
+{
+    FILE *fp = fopen(filename, "wb");
+    if (!fp) {
+        MP_ERR(ctx, "Error opening '%s' for writing!\n", filename);
+        return false;
+    }
+
+    bool success = false;
+    AVFrame *pic = NULL;
+    AVPacket *pkt = NULL;
+
+    const AVCodec *codec;
+    if (ctx->opts->format == AV_CODEC_ID_WEBP) {
+        codec = avcodec_find_encoder_by_name("libwebp"); // non-animated encoder
+    } else {
+        codec = avcodec_find_encoder(ctx->opts->format);
+    }
+
+    AVCodecContext *avctx = NULL;
+    if (!codec)
+        goto print_open_fail;
+    avctx = avcodec_alloc_context3(codec);
+    if (!avctx)
+        goto print_open_fail;
+
+    avctx->time_base = AV_TIME_BASE_Q;
+    avctx->width = image->w;
+    avctx->height = image->h;
+    avctx->pix_fmt = imgfmt2pixfmt(image->imgfmt);
+    if (codec->id == AV_CODEC_ID_MJPEG) {
+        // Annoying deprecated garbage for the jpg encoder.
+        if (image->params.color.levels == MP_CSP_LEVELS_PC)
+            avctx->pix_fmt = replace_j_format(avctx->pix_fmt);
+    }
+    if (avctx->pix_fmt == AV_PIX_FMT_NONE) {
+        MP_ERR(ctx, "Image format %s not supported by lavc.\n",
+               mp_imgfmt_to_name(image->imgfmt));
+        goto error_exit;
+    }
+
+    if (codec->id == AV_CODEC_ID_MJPEG) {
+        avctx->flags |= AV_CODEC_FLAG_QSCALE;
+        // jpeg_quality is set below
+    } else if (codec->id == AV_CODEC_ID_PNG) {
+        avctx->compression_level = ctx->opts->png_compression;
+        av_opt_set_int(avctx, "pred", ctx->opts->png_filter,
+                       AV_OPT_SEARCH_CHILDREN);
+    } else if (codec->id == AV_CODEC_ID_WEBP) {
+        avctx->compression_level = ctx->opts->webp_compression;
+        av_opt_set_int(avctx, "lossless", ctx->opts->webp_lossless,
+                       AV_OPT_SEARCH_CHILDREN);
+        av_opt_set_int(avctx, "quality", ctx->opts->webp_quality,
+                       AV_OPT_SEARCH_CHILDREN);
+#if HAVE_JPEGXL
+    } else if (codec->id == AV_CODEC_ID_JPEGXL) {
+        av_opt_set_double(avctx, "distance", ctx->opts->jxl_distance,
+                          AV_OPT_SEARCH_CHILDREN);
+        av_opt_set_int(avctx, "effort", ctx->opts->jxl_effort,
+                       AV_OPT_SEARCH_CHILDREN);
+#endif
+    }
+
+    if (avcodec_open2(avctx, codec, NULL) < 0) {
+     print_open_fail:
+        MP_ERR(ctx, "Could not open libavcodec encoder for saving images\n");
+        goto error_exit;
+    }
+
+    pic = av_frame_alloc();
+    if (!pic)
+        goto error_exit;
+    prepare_avframe(pic, avctx, image, ctx->opts->tag_csp, ctx->log);
+    if (codec->id == AV_CODEC_ID_MJPEG) {
+        int qscale = 1 + (100 - ctx->opts->jpeg_quality) * 30 / 100;
+        pic->quality = qscale * FF_QP2LAMBDA;
+    }
+
+    int ret = avcodec_send_frame(avctx, pic);
+    if (ret < 0)
+        goto error_exit;
+    ret = avcodec_send_frame(avctx, NULL); // send EOF
+    if (ret < 0)
+        goto error_exit;
+    pkt = av_packet_alloc();
+    if (!pkt)
+        goto error_exit;
+    ret = avcodec_receive_packet(avctx, pkt);
+    if (ret < 0)
+        goto error_exit;
+
+    success = fwrite(pkt->data, pkt->size, 1, fp) == 1;
+
+error_exit:
+    avcodec_free_context(&avctx);
+    av_frame_free(&pic);
+    av_packet_free(&pkt);
+    return !fclose(fp) && success;
+}
+
+#if HAVE_JPEG
+
+static void write_jpeg_error_exit(j_common_ptr cinfo)
+{
+  // NOTE: do not write error message, too much effort to connect the libjpeg
+  //       log callbacks with mplayer's log function mp_msp()
+
+  // Return control to the setjmp point
+  longjmp(*(jmp_buf*)cinfo->client_data, 1);
+}
+
+static bool write_jpeg(struct image_writer_ctx *ctx, mp_image_t *image,
+                       const char *filename)
+{
+    FILE *fp = fopen(filename, "wb");
+    if (!fp) {
+        MP_ERR(ctx, "Error opening '%s' for writing!\n", filename);
+        return false;
+    }
+
+    struct jpeg_compress_struct cinfo;
+    struct jpeg_error_mgr jerr;
+
+    cinfo.err = jpeg_std_error(&jerr);
+    jerr.error_exit = write_jpeg_error_exit;
+
+    jmp_buf error_return_jmpbuf;
+    cinfo.client_data = &error_return_jmpbuf;
+    if (setjmp(cinfo.client_data)) {
+        jpeg_destroy_compress(&cinfo);
+        fclose(fp);
+        return false;
+    }
+
+    jpeg_create_compress(&cinfo);
+    jpeg_stdio_dest(&cinfo, fp);
+
+    cinfo.image_width = image->w;
+    cinfo.image_height = image->h;
+    cinfo.input_components = 3;
+    cinfo.in_color_space = JCS_RGB;
+
+    cinfo.write_JFIF_header = TRUE;
+    cinfo.JFIF_major_version = 1;
+    cinfo.JFIF_minor_version = 2;
+
+    jpeg_set_defaults(&cinfo);
+    jpeg_set_quality(&cinfo, ctx->opts->jpeg_quality, 0);
+
+    if (ctx->opts->jpeg_source_chroma) {
+        cinfo.comp_info[0].h_samp_factor = 1 << ctx->original_format.chroma_xs;
+        cinfo.comp_info[0].v_samp_factor = 1 << ctx->original_format.chroma_ys;
+    }
+
+    jpeg_start_compress(&cinfo, TRUE);
+
+    while (cinfo.next_scanline < cinfo.image_height) {
+        JSAMPROW row_pointer[1];
+        row_pointer[0] = image->planes[0] +
+                         (ptrdiff_t)cinfo.next_scanline * image->stride[0];
+        jpeg_write_scanlines(&cinfo, row_pointer,1);
+    }
+
+    jpeg_finish_compress(&cinfo);
+
+    jpeg_destroy_compress(&cinfo);
+
+    return !fclose(fp);
+}
+
+#endif
+
+#if HAVE_AVIF_MUXER
+
+static void log_side_data(struct image_writer_ctx *ctx, AVPacketSideData *data,
+                          size_t size)
+{
+    if (!mp_msg_test(ctx->log, MSGL_DEBUG))
+        return;
+    char dbgbuff[129];
+    if (size)
+        MP_DBG(ctx, "write_avif() packet side data:\n");
+    for (int i = 0; i < size; i++) {
+        AVPacketSideData *sd = &data[i];
+        for (int k = 0; k < MPMIN(sd->size, 64); k++)
+            snprintf(dbgbuff + k*2, 3, "%02x", (int)sd->data[k]);
+        MP_DBG(ctx, "  [%d] = {[%s], '%s'}\n",
+               i, av_packet_side_data_name(sd->type), dbgbuff);
+    }
+}
+
+static bool write_avif(struct image_writer_ctx *ctx, mp_image_t *image,
+                       const char *filename)
+{
+    const AVCodec *codec = NULL;
+    const AVOutputFormat *ofmt = NULL;
+    AVCodecContext *avctx = NULL;
+    AVIOContext *avioctx = NULL;
+    AVFormatContext *fmtctx = NULL;
+    AVStream *stream = NULL;
+    AVFrame *pic = NULL;
+    AVPacket *pkt = NULL;
+    int ret;
+    bool success = false;
+
+    codec = avcodec_find_encoder_by_name(ctx->opts->avif_encoder);
+    if (!codec) {
+        MP_ERR(ctx, "Could not find encoder '%s', for saving images\n",
+               ctx->opts->avif_encoder);
+        goto free_data;
+    }
+
+    ofmt = av_guess_format("avif", NULL, NULL);
+    if (!ofmt) {
+        MP_ERR(ctx, "Could not guess output format 'avif'\n");
+        goto free_data;
+    }
+
+    avctx = avcodec_alloc_context3(codec);
+    if (!avctx) {
+        MP_ERR(ctx, "Failed to allocate AVContext.\n");
+        goto free_data;
+    }
+
+    avctx->width = image->w;
+    avctx->height = image->h;
+    avctx->time_base = (AVRational){1, 30};
+    avctx->pkt_timebase = (AVRational){1, 30};
+    avctx->codec_type = AVMEDIA_TYPE_VIDEO;
+    avctx->pix_fmt = imgfmt2pixfmt(image->imgfmt);
+    if (avctx->pix_fmt == AV_PIX_FMT_NONE) {
+        MP_ERR(ctx, "Image format %s not supported by lavc.\n",
+               mp_imgfmt_to_name(image->imgfmt));
+        goto free_data;
+    }
+
+    av_opt_set_int(avctx, "still-picture", 1, AV_OPT_SEARCH_CHILDREN);
+
+    AVDictionary *avd = NULL;
+    mp_set_avdict(&avd, ctx->opts->avif_opts);
+    av_opt_set_dict2(avctx, &avd, AV_OPT_SEARCH_CHILDREN);
+    av_dict_free(&avd);
+
+    pic = av_frame_alloc();
+    if (!pic) {
+        MP_ERR(ctx, "Could not allocate AVFrame\n");
+        goto free_data;
+    }
+
+    prepare_avframe(pic, avctx, image, ctx->opts->tag_csp, ctx->log);
+    // Not setting this flag caused ffmpeg to output avif that was not passing
+    // standard checks but ffmpeg would still read and not complain...
+    avctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER;
+
+    ret = avcodec_open2(avctx, codec, NULL);
+    if (ret < 0) {
+        MP_ERR(ctx, "Could not open libavcodec encoder for saving images\n");
+        goto free_data;
+    }
+
+    ret = avio_open(&avioctx, filename, AVIO_FLAG_WRITE);
+    if (ret < 0) {
+        MP_ERR(ctx, "Could not open file '%s' for saving images\n", filename);
+        goto free_data;
+    }
+
+    fmtctx = avformat_alloc_context();
+    if (!fmtctx) {
+        MP_ERR(ctx, "Could not allocate format context\n");
+        goto free_data;
+    }
+    fmtctx->pb = avioctx;
+    fmtctx->oformat = ofmt;
+
+    stream = avformat_new_stream(fmtctx, codec);
+    if (!stream) {
+        MP_ERR(ctx, "Could not allocate stream\n");
+        goto free_data;
+    }
+
+    ret = avcodec_parameters_from_context(stream->codecpar, avctx);
+    if (ret < 0) {
+        MP_ERR(ctx, "Could not copy parameters from context\n");
+        goto free_data;
+    }
+
+    ret = avformat_init_output(fmtctx, NULL);
+    if (ret < 0) {
+        MP_ERR(ctx, "Could not initialize output\n");
+        goto free_data;
+    }
+
+    ret = avformat_write_header(fmtctx, NULL);
+    if (ret < 0) {
+        MP_ERR(ctx, "Could not write format header\n");
+        goto free_data;
+    }
+
+    pkt = av_packet_alloc();
+    if (!pkt) {
+        MP_ERR(ctx, "Could not allocate packet\n");
+        goto free_data;
+    }
+
+    ret = avcodec_send_frame(avctx, pic);
+    if (ret < 0) {
+        MP_ERR(ctx, "Error sending frame\n");
+        goto free_data;
+    }
+    ret = avcodec_send_frame(avctx, NULL); // send EOF
+    if (ret < 0)
+        goto free_data;
+
+    int pts = 0;
+    log_side_data(ctx, avctx->coded_side_data, avctx->nb_coded_side_data);
+    while (ret >= 0) {
+        ret = avcodec_receive_packet(avctx, pkt);
+        if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF)
+            break;
+        if (ret < 0) {
+            MP_ERR(ctx, "Error receiving packet\n");
+            goto free_data;
+        }
+        pkt->dts = pkt->pts = ++pts;
+        pkt->stream_index = stream->index;
+        log_side_data(ctx, pkt->side_data, pkt->side_data_elems);
+
+        ret = av_write_frame(fmtctx, pkt);
+        if (ret < 0) {
+            MP_ERR(ctx, "Error writing frame\n");
+            goto free_data;
+        }
+        av_packet_unref(pkt);
+    }
+
+    ret = av_write_trailer(fmtctx);
+    if (ret < 0) {
+        MP_ERR(ctx, "Could not write trailer\n");
+        goto free_data;
+    }
+    MP_DBG(ctx, "write_avif(): avio_size() = %"PRIi64"\n", avio_size(avioctx));
+
+    success = true;
+
+free_data:
+    success = !avio_closep(&avioctx) && success;
+    avformat_free_context(fmtctx);
+    avcodec_free_context(&avctx);
+    av_packet_free(&pkt);
+    av_frame_free(&pic);
+
+    return success;
+}
+
+#endif
+
+static int get_encoder_format(const AVCodec *codec, int srcfmt, bool highdepth)
+{
+    const enum AVPixelFormat *pix_fmts = codec->pix_fmts;
+    int current = 0;
+    for (int n = 0; pix_fmts && pix_fmts[n] != AV_PIX_FMT_NONE; n++) {
+        int fmt = pixfmt2imgfmt(pix_fmts[n]);
+        if (!fmt)
+            continue;
+        if (!highdepth) {
+            // Ignore formats larger than 8 bit per pixel. (Or which are unknown.)
+            struct mp_regular_imgfmt rdesc;
+            if (!mp_get_regular_imgfmt(&rdesc, fmt)) {
+                int ofmt = mp_find_other_endian(fmt);
+                if (!mp_get_regular_imgfmt(&rdesc, ofmt))
+                    continue;
+            }
+            if (rdesc.component_size > 1)
+                continue;
+        }
+        current = current ? mp_imgfmt_select_best(current, fmt, srcfmt) : fmt;
+    }
+    return current;
+}
+
+static int get_target_format(struct image_writer_ctx *ctx)
+{
+    const AVCodec *codec = avcodec_find_encoder(ctx->opts->format);
+    if (!codec)
+        goto unknown;
+
+    int srcfmt = ctx->original_format.id;
+
+    int target = get_encoder_format(codec, srcfmt, ctx->opts->high_bit_depth);
+    if (!target) {
+        mp_dbg(ctx->log, "Falling back to high-depth format.\n");
+        target = get_encoder_format(codec, srcfmt, true);
+    }
+
+    if (!target)
+        goto unknown;
+
+    return target;
+
+unknown:
+    return IMGFMT_RGB0;
+}
+
+const char *image_writer_file_ext(const struct image_writer_opts *opts)
+{
+    struct image_writer_opts defs = image_writer_opts_defaults;
+
+    if (!opts)
+        opts = &defs;
+
+    return m_opt_choice_str(mp_image_writer_formats, opts->format);
+}
+
+bool image_writer_high_depth(const struct image_writer_opts *opts)
+{
+    return opts->format == AV_CODEC_ID_PNG
+#if HAVE_JPEGXL
+           || opts->format == AV_CODEC_ID_JPEGXL
+#endif
+#if HAVE_AVIF_MUXER
+           || opts->format == AV_CODEC_ID_AV1
+#endif
+    ;
+}
+
+bool image_writer_flexible_csp(const struct image_writer_opts *opts)
+{
+    if (!opts->tag_csp)
+        return false;
+    return false
+#if HAVE_JPEGXL
+        || opts->format == AV_CODEC_ID_JPEGXL
+#endif
+#if HAVE_AVIF_MUXER
+        || opts->format == AV_CODEC_ID_AV1
+#endif
+#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(59, 58, 100)
+        // This version added support for cICP tag writing
+        || opts->format == AV_CODEC_ID_PNG
+#endif
+    ;
+}
+
+int image_writer_format_from_ext(const char *ext)
+{
+    for (int n = 0; mp_image_writer_formats[n].name; n++) {
+        if (ext && strcmp(mp_image_writer_formats[n].name, ext) == 0)
+            return mp_image_writer_formats[n].value;
+    }
+    return 0;
+}
+
+static struct mp_image *convert_image(struct mp_image *image, int destfmt,
+                                      enum mp_csp_levels yuv_levels,
+                                      const struct image_writer_opts *opts,
+                                      struct mpv_global *global,
+                                      struct mp_log *log)
+{
+    int d_w, d_h;
+    mp_image_params_get_dsize(&image->params, &d_w, &d_h);
+
+    struct mp_image_params p = {
+        .imgfmt = destfmt,
+        .w = d_w,
+        .h = d_h,
+        .p_w = 1,
+        .p_h = 1,
+        .color = image->params.color,
+    };
+    mp_image_params_guess_csp(&p);
+
+    if (!image_writer_flexible_csp(opts)) {
+        // If our format can't tag csps, set something sane
+        p.color.primaries = MP_CSP_PRIM_BT_709;
+        p.color.gamma = MP_CSP_TRC_AUTO;
+        p.color.light = MP_CSP_LIGHT_DISPLAY;
+        p.color.hdr = (struct pl_hdr_metadata){0};
+        if (p.color.space != MP_CSP_RGB) {
+            p.color.levels = yuv_levels;
+            p.color.space = MP_CSP_BT_601;
+            p.chroma_location = MP_CHROMA_CENTER;
+        }
+        mp_image_params_guess_csp(&p);
+    }
+
+    if (mp_image_params_equal(&p, &image->params))
+        return mp_image_new_ref(image);
+
+    mp_dbg(log, "will convert image to %s\n", mp_imgfmt_to_name(p.imgfmt));
+
+    struct mp_image *src = image;
+    if (mp_image_crop_valid(&src->params) &&
+        (mp_rect_w(src->params.crop) != src->w ||
+         mp_rect_h(src->params.crop) != src->h))
+    {
+        src = mp_image_new_ref(src);
+        if (!src) {
+            mp_err(log, "mp_image_new_ref failed!\n");
+            return NULL;
+        }
+        mp_image_crop_rc(src, src->params.crop);
+    }
+
+    struct mp_image *dst = mp_image_alloc(p.imgfmt, p.w, p.h);
+    if (!dst) {
+        mp_err(log, "Out of memory.\n");
+        return NULL;
+    }
+    mp_image_copy_attributes(dst, src);
+
+    dst->params = p;
+
+    struct mp_sws_context *sws = mp_sws_alloc(NULL);
+    sws->log = log;
+    if (global)
+        mp_sws_enable_cmdline_opts(sws, global);
+    bool ok = mp_sws_scale(sws, dst, src) >= 0;
+    talloc_free(sws);
+
+    if (src != image)
+        talloc_free(src);
+
+    if (!ok) {
+        mp_err(log, "Error when converting image.\n");
+        talloc_free(dst);
+        return NULL;
+    }
+
+    return dst;
+}
+
+bool write_image(struct mp_image *image, const struct image_writer_opts *opts,
+                 const char *filename, struct mpv_global *global,
+                 struct mp_log *log)
+{
+    struct image_writer_opts defs = image_writer_opts_defaults;
+    if (!opts)
+        opts = &defs;
+
+    mp_dbg(log, "input: %s\n", mp_image_params_to_str(&image->params));
+
+    struct image_writer_ctx ctx = { log, opts, image->fmt };
+    bool (*write)(struct image_writer_ctx *, mp_image_t *, const char *) = write_lavc;
+    int destfmt = 0;
+
+#if HAVE_JPEG
+    if (opts->format == AV_CODEC_ID_MJPEG) {
+        write = write_jpeg;
+        destfmt = IMGFMT_RGB24;
+    }
+#endif
+#if HAVE_AVIF_MUXER
+    if (opts->format == AV_CODEC_ID_AV1) {
+        write = write_avif;
+        destfmt = mp_imgfmt_from_name(bstr0(opts->avif_pixfmt));
+    }
+#endif
+    if (opts->format == AV_CODEC_ID_WEBP && !opts->webp_lossless) {
+        // For lossy images, libwebp has its own RGB->YUV conversion.
+        // We don't want that, so force YUV/YUVA here.
+        int alpha = image->fmt.flags & MP_IMGFLAG_ALPHA;
+        destfmt = alpha ? pixfmt2imgfmt(AV_PIX_FMT_YUVA420P) : IMGFMT_420P;
+    }
+
+    if (!destfmt)
+        destfmt = get_target_format(&ctx);
+
+    enum mp_csp_levels levels; // Ignored if destfmt is a RGB format
+    if (opts->format == AV_CODEC_ID_WEBP) {
+        levels = MP_CSP_LEVELS_TV;
+    } else {
+        levels = MP_CSP_LEVELS_PC;
+    }
+
+    struct mp_image *dst = convert_image(image, destfmt, levels, opts, global, log);
+    if (!dst)
+        return false;
+
+    bool success = write(&ctx, dst, filename);
+    if (!success)
+        mp_err(log, "Error writing file '%s'!\n", filename);
+
+    talloc_free(dst);
+    return success;
+}
+
+void dump_png(struct mp_image *image, const char *filename, struct mp_log *log)
+{
+    struct image_writer_opts opts = image_writer_opts_defaults;
+    opts.format = AV_CODEC_ID_PNG;
+    write_image(image, &opts, filename, NULL, log);
+}
diff --git a/video/image_writer.h b/video/image_writer.h
new file mode 100644
index 0000000..72d1602
--- /dev/null
+++ b/video/image_writer.h
@@ -0,0 +1,74 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "options/m_option.h"
+
+struct mp_image;
+struct mp_log;
+
+struct image_writer_opts {
+    int format;
+    bool high_bit_depth;
+    int png_compression;
+    int png_filter;
+    int jpeg_quality;
+    bool jpeg_source_chroma;
+    bool webp_lossless;
+    int webp_quality;
+    int webp_compression;
+    double jxl_distance;
+    int jxl_effort;
+    char *avif_encoder;
+    char *avif_pixfmt;
+    char **avif_opts;
+    bool tag_csp;
+};
+
+extern const struct image_writer_opts image_writer_opts_defaults;
+
+extern const struct m_option image_writer_opts[];
+
+// Return the file extension that will be used, e.g. "png".
+const char *image_writer_file_ext(const struct image_writer_opts *opts);
+
+// Return whether the selected format likely supports >8 bit per component.
+bool image_writer_high_depth(const struct image_writer_opts *opts);
+
+// Return whether the selected format likely supports non-sRGB colorspaces
+bool image_writer_flexible_csp(const struct image_writer_opts *opts);
+
+// Map file extension to format ID - return 0 (which is invalid) if unknown.
+int image_writer_format_from_ext(const char *ext);
+
+/*
+ * Save the given image under the given filename. The parameters csp and opts
+ * are optional. All pixel formats supported by swscale are supported.
+ *
+ * File format and compression settings are controlled via the opts parameter.
+ *
+ * If global!=NULL, use command line scaler options etc.
+ *
+ * NOTE: The fields w/h/width/height of the passed mp_image must be all set
+ *       accordingly. Setting w and width or h and height to different values
+ *       can be used to store snapshots of anamorphic video.
+ */
+bool write_image(struct mp_image *image, const struct image_writer_opts *opts,
+                const char *filename, struct mpv_global *global,
+                 struct mp_log *log);
+
+// Debugging helper.
+void dump_png(struct mp_image *image, const char *filename, struct mp_log *log);
diff --git a/video/img_format.c b/video/img_format.c
new file mode 100644
index 0000000..6b7857f
--- /dev/null
+++ b/video/img_format.c
@@ -0,0 +1,824 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <string.h>
+
+#include <libavcodec/avcodec.h>
+#include <libavutil/imgutils.h>
+#include <libavutil/pixfmt.h>
+#include <libavutil/pixdesc.h>
+
+#include "video/img_format.h"
+#include "video/mp_image.h"
+#include "video/fmt-conversion.h"
+
+struct mp_imgfmt_entry {
+    const char *name;
+    // Valid if flags!=0.
+    // This can be incomplete, and missing fields are filled in:
+    //  - sets num_planes and bpp[], derived from comps[] (rounds to bytes)
+    //  - sets MP_IMGFLAG_GRAY, derived from comps[]
+    //  - sets MP_IMGFLAG_ALPHA, derived from comps[]
+    //  - sets align_x/y if 0, derived from chroma shift
+    //  - sets xs[]/ys[] always, derived from num_planes/chroma_shift
+    //  - sets MP_IMGFLAG_HAS_COMPS|MP_IMGFLAG_NE if num_planes>0
+    //  - sets MP_IMGFLAG_TYPE_UINT if no other type set
+    //  - sets id to mp_imgfmt_list[] implied format
+    struct mp_imgfmt_desc desc;
+};
+
+#define FRINGE_GBRP(def, dname, b)                                          \
+    [def - IMGFMT_CUST_BASE] = {                                            \
+        .name = dname,                                                      \
+        .desc = { .flags = MP_IMGFLAG_COLOR_RGB,                            \
+                  .comps = { {2, 0, 8, (b) - 8}, {0, 0, 8, (b) - 8},        \
+                             {1, 0, 8, (b) - 8}, }, }}
+
+#define FLOAT_YUV(def, dname, xs, ys, a)                                    \
+    [def - IMGFMT_CUST_BASE] = {                                            \
+        .name = dname,                                                      \
+        .desc = { .flags = MP_IMGFLAG_COLOR_YUV | MP_IMGFLAG_TYPE_FLOAT,    \
+                   .chroma_xs = xs, .chroma_ys = ys,                        \
+                   .comps = { {0, 0, 32}, {1, 0, 32}, {2, 0, 32},           \
+                              {3 * (a), 0, 32 * (a)} }, }}
+
+static const struct mp_imgfmt_entry mp_imgfmt_list[] = {
+    // not in ffmpeg
+    [IMGFMT_VDPAU_OUTPUT - IMGFMT_CUST_BASE] = {
+        .name = "vdpau_output",
+        .desc = {
+            .flags = MP_IMGFLAG_NE | MP_IMGFLAG_RGB | MP_IMGFLAG_HWACCEL,
+        },
+    },
+    [IMGFMT_RGB30 - IMGFMT_CUST_BASE] = {
+        .name = "rgb30",
+        .desc = {
+            .flags = MP_IMGFLAG_RGB,
+            .comps = { {0, 20, 10}, {0, 10, 10}, {0, 0, 10} },
+        },
+    },
+    [IMGFMT_YAP8 - IMGFMT_CUST_BASE] = {
+        .name = "yap8",
+        .desc = {
+            .flags = MP_IMGFLAG_COLOR_YUV,
+            .comps = { {0, 0, 8}, {0}, {0}, {1, 0, 8} },
+        },
+    },
+    [IMGFMT_YAP16 - IMGFMT_CUST_BASE] = {
+        .name = "yap16",
+        .desc = {
+            .flags = MP_IMGFLAG_COLOR_YUV,
+            .comps = { {0, 0, 16}, {0}, {0}, {1, 0, 16} },
+        },
+    },
+    [IMGFMT_Y1 - IMGFMT_CUST_BASE] = {
+        .name = "y1",
+        .desc = {
+            .flags = MP_IMGFLAG_COLOR_RGB,
+            .comps = { {0, 0, 8, -7} },
+        },
+    },
+    [IMGFMT_YAPF - IMGFMT_CUST_BASE] = {
+        .name = "grayaf32", // try to mimic ffmpeg naming convention
+        .desc = {
+            .flags = MP_IMGFLAG_COLOR_YUV | MP_IMGFLAG_TYPE_FLOAT,
+            .comps = { {0, 0, 32}, {0}, {0}, {1, 0, 32} },
+        },
+    },
+    FLOAT_YUV(IMGFMT_444PF,  "yuv444pf",  0, 0, 0),
+    FLOAT_YUV(IMGFMT_444APF, "yuva444pf", 0, 0, 1),
+    FLOAT_YUV(IMGFMT_420PF,  "yuv420pf",  1, 1, 0),
+    FLOAT_YUV(IMGFMT_420APF, "yuva420pf", 1, 1, 1),
+    FLOAT_YUV(IMGFMT_422PF,  "yuv422pf",  1, 0, 0),
+    FLOAT_YUV(IMGFMT_422APF, "yuva422pf", 1, 0, 1),
+    FLOAT_YUV(IMGFMT_440PF,  "yuv440pf",  0, 1, 0),
+    FLOAT_YUV(IMGFMT_440APF, "yuva440pf", 0, 1, 1),
+    FLOAT_YUV(IMGFMT_410PF,  "yuv410pf",  2, 2, 0),
+    FLOAT_YUV(IMGFMT_410APF, "yuva410pf", 2, 2, 1),
+    FLOAT_YUV(IMGFMT_411PF,  "yuv411pf",  2, 0, 0),
+    FLOAT_YUV(IMGFMT_411APF, "yuva411pf", 2, 0, 1),
+    FRINGE_GBRP(IMGFMT_GBRP1, "gbrp1", 1),
+    FRINGE_GBRP(IMGFMT_GBRP2, "gbrp2", 2),
+    FRINGE_GBRP(IMGFMT_GBRP3, "gbrp3", 3),
+    FRINGE_GBRP(IMGFMT_GBRP4, "gbrp4", 4),
+    FRINGE_GBRP(IMGFMT_GBRP5, "gbrp5", 5),
+    FRINGE_GBRP(IMGFMT_GBRP6, "gbrp6", 6),
+    // in FFmpeg, but FFmpeg names have an annoying "_vld" suffix
+    [IMGFMT_VIDEOTOOLBOX - IMGFMT_CUST_BASE] = {
+        .name = "videotoolbox",
+    },
+    [IMGFMT_VAAPI - IMGFMT_CUST_BASE] = {
+        .name = "vaapi",
+    },
+};
+
+static const struct mp_imgfmt_entry *get_mp_desc(int imgfmt)
+{
+    if (imgfmt < IMGFMT_CUST_BASE)
+        return NULL;
+    int index = imgfmt - IMGFMT_CUST_BASE;
+    if (index >= MP_ARRAY_SIZE(mp_imgfmt_list))
+        return NULL;
+    const struct mp_imgfmt_entry *e = &mp_imgfmt_list[index];
+    return e->name ? e : NULL;
+}
+
+char **mp_imgfmt_name_list(void)
+{
+    int count = IMGFMT_END - IMGFMT_START;
+    char **list = talloc_zero_array(NULL, char *, count + 1);
+    int num = 0;
+    for (int n = IMGFMT_START; n < IMGFMT_END; n++) {
+        const char *name = mp_imgfmt_to_name(n);
+        if (strcmp(name, "unknown") != 0)
+            list[num++] = talloc_strdup(list, name);
+    }
+    return list;
+}
+
+int mp_imgfmt_from_name(bstr name)
+{
+    if (bstr_equals0(name, "none"))
+        return 0;
+    for (int n = 0; n < MP_ARRAY_SIZE(mp_imgfmt_list); n++) {
+        const struct mp_imgfmt_entry *p = &mp_imgfmt_list[n];
+        if (p->name && bstr_equals0(name, p->name))
+            return IMGFMT_CUST_BASE + n;
+    }
+    return pixfmt2imgfmt(av_get_pix_fmt(mp_tprintf(80, "%.*s", BSTR_P(name))));
+}
+
+char *mp_imgfmt_to_name_buf(char *buf, size_t buf_size, int fmt)
+{
+    const struct mp_imgfmt_entry *p = get_mp_desc(fmt);
+    const char *name = p ? p->name : NULL;
+    if (!name) {
+        const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(imgfmt2pixfmt(fmt));
+        if (pixdesc)
+            name = pixdesc->name;
+    }
+    if (!name)
+        name = "unknown";
+    snprintf(buf, buf_size, "%s", name);
+    int len = strlen(buf);
+    if (len > 2 && buf[len - 2] == MP_SELECT_LE_BE('l', 'b') && buf[len - 1] == 'e')
+        buf[len - 2] = '\0';
+    return buf;
+}
+
+static void fill_pixdesc_layout(struct mp_imgfmt_desc *desc,
+                                enum AVPixelFormat fmt,
+                                const AVPixFmtDescriptor *pd)
+{
+    if (pd->flags & AV_PIX_FMT_FLAG_PAL ||
+        pd->flags & AV_PIX_FMT_FLAG_HWACCEL)
+        goto fail;
+
+    bool has_alpha = pd->flags & AV_PIX_FMT_FLAG_ALPHA;
+    if (pd->nb_components != 1 + has_alpha &&
+        pd->nb_components != 3 + has_alpha)
+        goto fail;
+
+    // Very convenient: we assume we're always on little endian, and FFmpeg
+    // explicitly marks big endian formats => don't need to guess whether a
+    // format is little endian, or not affected by byte order.
+    bool is_be = pd->flags & AV_PIX_FMT_FLAG_BE;
+    bool is_ne = MP_SELECT_LE_BE(false, true) == is_be;
+
+    // Packed sub-sampled YUV is very... special.
+    bool is_packed_ss_yuv = pd->log2_chroma_w && !pd->log2_chroma_h &&
+        pd->comp[1].plane == 0 && pd->comp[2].plane == 0 &&
+        pd->nb_components == 3;
+
+    if (is_packed_ss_yuv)
+        desc->bpp[0] = pd->comp[1].step * 8;
+
+    // Determine if there are any byte overlaps => relevant for determining
+    // access unit for endian, since pixdesc does not expose this, and assumes
+    // a weird model where you do separate memory fetches for each component.
+    bool any_shared_bytes = !!(pd->flags & AV_PIX_FMT_FLAG_BITSTREAM);
+    for (int c = 0; c < pd->nb_components; c++) {
+        for (int i = 0; i < c; i++) {
+            const AVComponentDescriptor *d1 = &pd->comp[c];
+            const AVComponentDescriptor *d2 = &pd->comp[i];
+            if (d1->plane == d2->plane) {
+                if (d1->offset + (d1->depth + 7) / 8u > d2->offset &&
+                    d2->offset + (d2->depth + 7) / 8u > d1->offset)
+                    any_shared_bytes = true;
+            }
+        }
+    }
+
+    int el_bits = (pd->flags & AV_PIX_FMT_FLAG_BITSTREAM) ? 1 : 8;
+    for (int c = 0; c < pd->nb_components; c++) {
+        const AVComponentDescriptor *d = &pd->comp[c];
+        if (d->plane >= MP_MAX_PLANES)
+            goto fail;
+
+        desc->num_planes = MPMAX(desc->num_planes, d->plane + 1);
+
+        int plane_bits = desc->bpp[d->plane];
+        int c_bits = d->step * el_bits;
+
+        // The first component wins, because either all components result in
+        // the same value, or luma wins (luma always comes before chroma).
+        if (plane_bits) {
+            if (c_bits > plane_bits)
+                goto fail; // inconsistent
+        } else {
+            desc->bpp[d->plane] = plane_bits = c_bits;
+        }
+
+        int shift = d->shift;
+        // What the fuck: for some inexplicable reason, MONOB uses shift=7
+        // in pixdesc, which is basically out of bounds. Pixdesc bug?
+        // Make it behave like MONOW. (No, the bit-order is not different.)
+        if (fmt == AV_PIX_FMT_MONOBLACK)
+            shift = 0;
+
+        int offset = d->offset * el_bits;
+        // The pixdesc logic for reading and endian swapping is as follows
+        // (reverse engineered from av_read_image_line2()):
+        // - determine a word size that will include the component fully;
+        //   this includes the "active" bits and the amount "shifted" away
+        //   (for example shift=7/depth=18 => 32 bit word reading [31:0])
+        // - the same format can use different word sizes (e.g. bgr565: the R
+        //   component at offset 0 is read as 8 bit; BG is read as 16 bits)
+        // - if BE flag is set, swap the word before proceeding
+        // - extract via shift and mask derived by depth
+        int word = mp_round_next_power_of_2(MPMAX(d->depth + shift, 8));
+        // The purpose of this is unknown. It's an absurdity fished out of
+        // av_read_image_line2()'s implementation. It seems technically
+        // unnecessary, and provides no information. On the other hand, it
+        // compensates for seemingly bogus packed integer pixdescs; this
+        // is "why" some formats use d->offset = -1.
+        if (is_be && el_bits == 8 && word == 8)
+            offset += 8;
+        // Pixdesc's model sometimes requires accesses with varying word-sizes,
+        // as seen in bgr565 and other formats. Also, it makes you read some
+        // formats with multiple endian-dependent accesses, where accessing a
+        // larger unit would make more sense. (Consider X2RGB10BE, for which
+        // pixdesc wants you to perform 3 * 2 byte accesses, and swap each of
+        // the read 16 bit words. What you really want is to swap the entire 4
+        // byte thing, and then extract the components with bit shifts).
+        // This is complete bullshit, so we transform it into word swaps before
+        // further processing. Care needs to be taken to not change formats like
+        // P010 or YA16 (prefer component accesses for them; P010 isn't even
+        // representable, because endian_shift is for all planes).
+        // As a heuristic, assume that if any components share a byte, the whole
+        // pixel is read as a single memory access and endian swapped at once.
+        int access_size = 8;
+        if (plane_bits > 8) {
+            if (any_shared_bytes) {
+                access_size = plane_bits;
+                if (is_be && word != access_size) {
+                    // Before: offset = 8*byte_offset (with word bits of data)
+                    // After: offset = bit_offset into swapped endian_size word
+                    offset = access_size - word - offset;
+                }
+            } else {
+                access_size = word;
+            }
+        }
+        int endian_size = (access_size && !is_ne) ? access_size : 8;
+        int endian_shift = mp_log2(endian_size) - 3;
+        if (!MP_IS_POWER_OF_2(endian_size) || endian_shift < 0 || endian_shift > 3)
+            goto fail;
+        if (desc->endian_shift && desc->endian_shift != endian_shift)
+            goto fail;
+        desc->endian_shift = endian_shift;
+
+        // We always use bit offsets; this doesn't lose any information,
+        // and pixdesc is merely more redundant.
+        offset += shift;
+        if (offset < 0 || offset >= (1 << 6))
+            goto fail;
+        if (offset + d->depth > plane_bits)
+            goto fail;
+        if (d->depth < 0 || d->depth >= (1 << 6))
+            goto fail;
+        desc->comps[c] = (struct mp_imgfmt_comp_desc){
+            .plane = d->plane,
+            .offset = offset,
+            .size = d->depth,
+        };
+    }
+
+    for (int p = 0; p < desc->num_planes; p++) {
+        if (!desc->bpp[p])
+            goto fail; // plane doesn't exist
+    }
+
+    // What the fuck: this is probably a pixdesc bug, so fix it.
+    if (fmt == AV_PIX_FMT_RGB8) {
+        desc->comps[2] = (struct mp_imgfmt_comp_desc){0, 0, 2};
+        desc->comps[1] = (struct mp_imgfmt_comp_desc){0, 2, 3};
+        desc->comps[0] = (struct mp_imgfmt_comp_desc){0, 5, 3};
+    }
+
+    // Overlap test. If any shared bits are happening, this is not a format we
+    // can represent (or it's something like Bayer: components in the same bits,
+    // but different alternating lines).
+    bool any_shared_bits = false;
+    for (int c = 0; c < pd->nb_components; c++) {
+        for (int i = 0; i < c; i++) {
+            struct mp_imgfmt_comp_desc *c1 = &desc->comps[c];
+            struct mp_imgfmt_comp_desc *c2 = &desc->comps[i];
+            if (c1->plane == c2->plane) {
+                if (c1->offset + c1->size > c2->offset &&
+                    c2->offset + c2->size > c1->offset)
+                    any_shared_bits = true;
+            }
+        }
+    }
+
+    if (any_shared_bits) {
+        for (int c = 0; c < pd->nb_components; c++)
+            desc->comps[c] = (struct mp_imgfmt_comp_desc){0};
+    }
+
+    // Many important formats have padding within an access word. For example
+    // yuv420p10 has the upper 6 bit cleared to 0; P010 has the lower 6 bits
+    // cleared to 0. Pixdesc cannot represent that these bits are 0. There are
+    // other formats where padding is not guaranteed to be 0, but they are
+    // described in the same way.
+    // Apply a heuristic that is supposed to identify formats which use
+    // guaranteed 0 padding. This could fail, but nobody said this pixdesc crap
+    // is robust.
+    for (int c = 0; c < pd->nb_components; c++) {
+        struct mp_imgfmt_comp_desc *cd = &desc->comps[c];
+        // Note: rgb444 would defeat our heuristic if we checked only per comp.
+        //       also, exclude "bitstream" formats due to monow/monob
+        int fsize = MP_ALIGN_UP(cd->size, 8);
+        if (!any_shared_bytes && el_bits == 8 && fsize != cd->size &&
+            fsize - cd->size <= (1 << 3))
+        {
+            if (!(cd->offset % 8u)) {
+                cd->pad = -(fsize - cd->size);
+                cd->size = fsize;
+            } else if (!((cd->offset + cd->size) % 8u)) {
+                cd->pad = fsize - cd->size;
+                cd->size = fsize;
+                cd->offset = MP_ALIGN_DOWN(cd->offset, 8);
+            }
+        }
+    }
+
+    // The alpha component always has ID 4 (index 3) in our representation, so
+    // move the alpha component to there.
+    if (has_alpha && pd->nb_components < 4) {
+        desc->comps[3] = desc->comps[pd->nb_components - 1];
+        desc->comps[pd->nb_components - 1] = (struct mp_imgfmt_comp_desc){0};
+    }
+
+    if (is_packed_ss_yuv) {
+        desc->flags |= MP_IMGFLAG_PACKED_SS_YUV;
+        desc->bpp[0] /= 1 << pd->log2_chroma_w;
+    } else if (!any_shared_bits) {
+        desc->flags |= MP_IMGFLAG_HAS_COMPS;
+    }
+
+    return;
+
+fail:
+    for (int n = 0; n < 4; n++)
+        desc->comps[n] = (struct mp_imgfmt_comp_desc){0};
+    // Average bit size fallback.
+    desc->num_planes = av_pix_fmt_count_planes(fmt);
+    for (int p = 0; p < desc->num_planes; p++) {
+        int ls = av_image_get_linesize(fmt, 256, p);
+        desc->bpp[p] = ls > 0 ? ls * 8 / 256 : 0;
+    }
+}
+
+static bool mp_imgfmt_get_desc_from_pixdesc(int mpfmt, struct mp_imgfmt_desc *out)
+{
+    enum AVPixelFormat fmt = imgfmt2pixfmt(mpfmt);
+    const AVPixFmtDescriptor *pd = av_pix_fmt_desc_get(fmt);
+    if (!pd || pd->nb_components > 4)
+        return false;
+
+    struct mp_imgfmt_desc desc = {
+        .id = mpfmt,
+        .chroma_xs = pd->log2_chroma_w,
+        .chroma_ys = pd->log2_chroma_h,
+    };
+
+    if (pd->flags & AV_PIX_FMT_FLAG_ALPHA)
+        desc.flags |= MP_IMGFLAG_ALPHA;
+
+    if (pd->flags & AV_PIX_FMT_FLAG_HWACCEL)
+        desc.flags |= MP_IMGFLAG_TYPE_HW;
+
+    // Pixdesc does not provide a flag for XYZ, so this is the best we can do.
+    if (strncmp(pd->name, "xyz", 3) == 0) {
+        desc.flags |= MP_IMGFLAG_COLOR_XYZ;
+    } else if (pd->flags & AV_PIX_FMT_FLAG_RGB) {
+        desc.flags |= MP_IMGFLAG_COLOR_RGB;
+    } else if (fmt == AV_PIX_FMT_MONOBLACK || fmt == AV_PIX_FMT_MONOWHITE) {
+        desc.flags |= MP_IMGFLAG_COLOR_RGB;
+    } else if (fmt == AV_PIX_FMT_PAL8) {
+        desc.flags |= MP_IMGFLAG_COLOR_RGB | MP_IMGFLAG_TYPE_PAL8;
+    }
+
+    if (pd->flags & AV_PIX_FMT_FLAG_FLOAT)
+        desc.flags |= MP_IMGFLAG_TYPE_FLOAT;
+
+    // Educated guess.
+    if (!(desc.flags & MP_IMGFLAG_COLOR_MASK) &&
+        !(desc.flags & MP_IMGFLAG_TYPE_HW))
+        desc.flags |= MP_IMGFLAG_COLOR_YUV;
+
+    desc.align_x = 1 << desc.chroma_xs;
+    desc.align_y = 1 << desc.chroma_ys;
+
+    fill_pixdesc_layout(&desc, fmt, pd);
+
+    if (desc.flags & (MP_IMGFLAG_HAS_COMPS | MP_IMGFLAG_PACKED_SS_YUV)) {
+        if (!(desc.flags & MP_IMGFLAG_TYPE_MASK))
+            desc.flags |= MP_IMGFLAG_TYPE_UINT;
+    }
+
+    if (desc.bpp[0] % 8u && (pd->flags & AV_PIX_FMT_FLAG_BITSTREAM))
+        desc.align_x = 8 / desc.bpp[0]; // expect power of 2
+
+    // Very heuristical.
+    bool is_ne = !desc.endian_shift;
+    bool need_endian = (desc.comps[0].size % 8u && desc.bpp[0] > 8) ||
+                       desc.comps[0].size > 8;
+
+    if (need_endian) {
+        bool is_le = MP_SELECT_LE_BE(is_ne, !is_ne);
+        desc.flags |= is_le ? MP_IMGFLAG_LE : MP_IMGFLAG_BE;
+    } else {
+        desc.flags |= MP_IMGFLAG_LE | MP_IMGFLAG_BE;
+    }
+
+    *out = desc;
+    return true;
+}
+
+bool mp_imgfmt_get_packed_yuv_locations(int imgfmt, uint8_t *luma_offsets)
+{
+    struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(imgfmt);
+    if (!(desc.flags & MP_IMGFLAG_PACKED_SS_YUV))
+        return false;
+
+    assert(desc.num_planes == 1);
+
+    // Guess at which positions the additional luma samples are. We iterate
+    // starting with the first byte, and then put a luma sample at places
+    // not covered by other luma/chroma.
+    // Pixdesc does not and can not provide this information. This heuristic
+    // may fail in certain cases. What a load of bullshit, right?
+    int lsize = desc.comps[0].size;
+    int cur_offset = 0;
+    for (int lsample = 1; lsample < (1 << desc.chroma_xs); lsample++) {
+        while (1) {
+            if (cur_offset + lsize > desc.bpp[0] * desc.align_x)
+                return false;
+            bool free = true;
+            for (int c = 0; c < 3; c++) {
+                struct mp_imgfmt_comp_desc *cd = &desc.comps[c];
+                if (!cd->size)
+                    continue;
+                if (cd->offset + cd->size > cur_offset &&
+                    cur_offset + lsize > cd->offset)
+                {
+                    free = false;
+                    break;
+                }
+            }
+            if (free)
+                break;
+            cur_offset += lsize;
+        }
+        luma_offsets[lsample] = cur_offset;
+        cur_offset += lsize;
+    }
+
+    luma_offsets[0] = desc.comps[0].offset;
+    return true;
+}
+
+static bool get_native_desc(int mpfmt, struct mp_imgfmt_desc *desc)
+{
+    const struct mp_imgfmt_entry *p = get_mp_desc(mpfmt);
+    if (!p || !p->desc.flags)
+        return false;
+
+    *desc = p->desc;
+
+    // Fill in some fields mp_imgfmt_entry.desc is not required to set.
+
+    desc->id = mpfmt;
+
+    for (int n = 0; n < MP_NUM_COMPONENTS; n++) {
+        struct mp_imgfmt_comp_desc *cd = &desc->comps[n];
+        if (cd->size)
+            desc->num_planes = MPMAX(desc->num_planes, cd->plane + 1);
+        desc->bpp[cd->plane] =
+            MPMAX(desc->bpp[cd->plane], MP_ALIGN_UP(cd->offset + cd->size, 8));
+    }
+
+    if (!desc->align_x && !desc->align_y) {
+        desc->align_x = 1 << desc->chroma_xs;
+        desc->align_y = 1 << desc->chroma_ys;
+    }
+
+    if (desc->num_planes)
+        desc->flags |= MP_IMGFLAG_HAS_COMPS | MP_IMGFLAG_NE;
+
+    if (!(desc->flags & MP_IMGFLAG_TYPE_MASK))
+        desc->flags |= MP_IMGFLAG_TYPE_UINT;
+
+    return true;
+}
+
+int mp_imgfmt_desc_get_num_comps(struct mp_imgfmt_desc *desc)
+{
+    int flags = desc->flags;
+    if (!(flags & MP_IMGFLAG_COLOR_MASK))
+        return 0;
+    return 3 + (flags & MP_IMGFLAG_GRAY ? -2 : 0) + !!(flags & MP_IMGFLAG_ALPHA);
+}
+
+struct mp_imgfmt_desc mp_imgfmt_get_desc(int mpfmt)
+{
+    struct mp_imgfmt_desc desc;
+
+    if (!get_native_desc(mpfmt, &desc) &&
+        !mp_imgfmt_get_desc_from_pixdesc(mpfmt, &desc))
+        return (struct mp_imgfmt_desc){0};
+
+    for (int p = 0; p < desc.num_planes; p++) {
+        desc.xs[p] = (p == 1 || p == 2) ? desc.chroma_xs : 0;
+        desc.ys[p] = (p == 1 || p == 2) ? desc.chroma_ys : 0;
+    }
+
+    bool is_ba = desc.num_planes > 0;
+    for (int p = 0; p < desc.num_planes; p++)
+        is_ba = !(desc.bpp[p] % 8u);
+
+    if (is_ba)
+        desc.flags |= MP_IMGFLAG_BYTE_ALIGNED;
+
+    if (desc.flags & MP_IMGFLAG_HAS_COMPS) {
+        if (desc.comps[3].size)
+            desc.flags |= MP_IMGFLAG_ALPHA;
+
+        // Assuming all colors are (CCC+[A]) or (C+[A]), the latter being gray.
+        if (!desc.comps[1].size)
+            desc.flags |= MP_IMGFLAG_GRAY;
+
+        bool bb = true;
+        for (int n = 0; n < MP_NUM_COMPONENTS; n++) {
+            if (desc.comps[n].offset % 8u || desc.comps[n].size % 8u)
+                bb = false;
+        }
+        if (bb)
+            desc.flags |= MP_IMGFLAG_BYTES;
+    }
+
+    if ((desc.flags & (MP_IMGFLAG_YUV | MP_IMGFLAG_RGB))
+        && (desc.flags & MP_IMGFLAG_HAS_COMPS)
+        && (desc.flags & MP_IMGFLAG_BYTES)
+        && ((desc.flags & MP_IMGFLAG_TYPE_MASK) == MP_IMGFLAG_TYPE_UINT))
+    {
+        int cnt = mp_imgfmt_desc_get_num_comps(&desc);
+        bool same_depth = true;
+        for (int p = 0; p < desc.num_planes; p++)
+            same_depth &= desc.bpp[p] == desc.bpp[0];
+        if (same_depth && cnt == desc.num_planes) {
+            if (desc.flags & MP_IMGFLAG_YUV) {
+                desc.flags |= MP_IMGFLAG_YUV_P;
+            } else {
+                desc.flags |= MP_IMGFLAG_RGB_P;
+            }
+        }
+        if (cnt == 3 && desc.num_planes == 2 &&
+            desc.bpp[1] == desc.bpp[0] * 2 &&
+            (desc.flags & MP_IMGFLAG_YUV))
+        {
+
+            desc.flags |= MP_IMGFLAG_YUV_NV;
+        }
+    }
+
+    return desc;
+}
+
+static bool validate_regular_imgfmt(const struct mp_regular_imgfmt *fmt)
+{
+    bool present[MP_NUM_COMPONENTS] = {0};
+    int n_comp = 0;
+
+    for (int n = 0; n < fmt->num_planes; n++) {
+        const struct mp_regular_imgfmt_plane *plane = &fmt->planes[n];
+        n_comp += plane->num_components;
+        if (n_comp > MP_NUM_COMPONENTS)
+            return false;
+        if (!plane->num_components)
+            return false; // no empty planes in between allowed
+
+        bool pad_only = true;
+        int chroma_luma = 0; // luma: 1, chroma: 2, both: 3
+        for (int i = 0; i < plane->num_components; i++) {
+            int comp = plane->components[i];
+            if (comp > MP_NUM_COMPONENTS)
+                return false;
+            if (comp == 0)
+                continue;
+            pad_only = false;
+            if (present[comp - 1])
+                return false; // no duplicates
+            present[comp - 1] = true;
+            chroma_luma |= (comp == 2 || comp == 3) ? 2 : 1;
+        }
+        if (pad_only)
+            return false; // no planes with only padding allowed
+        if ((fmt->chroma_xs > 0 || fmt->chroma_ys > 0) && chroma_luma == 3)
+            return false; // separate chroma/luma planes required
+    }
+
+    if (!(present[0] || present[3]) ||  // at least component 1 or alpha needed
+        (present[1] && !present[0]) ||  // component 2 requires component 1
+        (present[2] && !present[1]))    // component 3 requires component 2
+        return false;
+
+    return true;
+}
+
+static enum mp_csp get_forced_csp_from_flags(int flags)
+{
+    if (flags & MP_IMGFLAG_COLOR_XYZ)
+        return MP_CSP_XYZ;
+
+    if (flags & MP_IMGFLAG_COLOR_RGB)
+        return MP_CSP_RGB;
+
+    return MP_CSP_AUTO;
+}
+
+enum mp_csp mp_imgfmt_get_forced_csp(int imgfmt)
+{
+    return get_forced_csp_from_flags(mp_imgfmt_get_desc(imgfmt).flags);
+}
+
+static enum mp_component_type get_component_type_from_flags(int flags)
+{
+    if (flags & MP_IMGFLAG_TYPE_UINT)
+        return MP_COMPONENT_TYPE_UINT;
+
+    if (flags & MP_IMGFLAG_TYPE_FLOAT)
+        return MP_COMPONENT_TYPE_FLOAT;
+
+    return MP_COMPONENT_TYPE_UNKNOWN;
+}
+
+enum mp_component_type mp_imgfmt_get_component_type(int imgfmt)
+{
+    return get_component_type_from_flags(mp_imgfmt_get_desc(imgfmt).flags);
+}
+
+int mp_find_other_endian(int imgfmt)
+{
+    return pixfmt2imgfmt(av_pix_fmt_swap_endianness(imgfmt2pixfmt(imgfmt)));
+}
+
+bool mp_get_regular_imgfmt(struct mp_regular_imgfmt *dst, int imgfmt)
+{
+    struct mp_regular_imgfmt res = {0};
+
+    struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(imgfmt);
+    if (!desc.num_planes)
+        return false;
+    res.num_planes = desc.num_planes;
+
+    if (desc.endian_shift || !(desc.flags & MP_IMGFLAG_HAS_COMPS))
+        return false;
+
+    res.component_type = get_component_type_from_flags(desc.flags);
+    if (!res.component_type)
+        return false;
+
+    struct mp_imgfmt_comp_desc *comp0 = &desc.comps[0];
+    if (comp0->size < 1 || comp0->size > 64 || (comp0->size % 8u))
+        return false;
+
+    res.component_size = comp0->size / 8u;
+    res.component_pad = comp0->pad;
+
+    for (int n = 0; n < res.num_planes; n++) {
+        if (desc.bpp[n] % comp0->size)
+            return false;
+        res.planes[n].num_components = desc.bpp[n] / comp0->size;
+    }
+
+    for (int n = 0; n < MP_NUM_COMPONENTS; n++) {
+        struct mp_imgfmt_comp_desc *comp = &desc.comps[n];
+        if (!comp->size)
+            continue;
+
+        struct mp_regular_imgfmt_plane *plane = &res.planes[comp->plane];
+
+        res.num_planes = MPMAX(res.num_planes, comp->plane + 1);
+
+        // We support uniform depth only.
+        if (comp->size != comp0->size || comp->pad != comp0->pad)
+            return false;
+
+        // Size-aligned only.
+        int pos = comp->offset / comp->size;
+        if (comp->offset != pos * comp->size || pos >= MP_NUM_COMPONENTS)
+            return false;
+
+        if (plane->components[pos])
+            return false;
+        plane->components[pos] = n + 1;
+    }
+
+    res.chroma_xs = desc.chroma_xs;
+    res.chroma_ys = desc.chroma_ys;
+
+    res.forced_csp = get_forced_csp_from_flags(desc.flags);
+
+    if (!validate_regular_imgfmt(&res))
+        return false;
+
+    *dst = res;
+    return true;
+}
+
+static bool regular_imgfmt_equals(struct mp_regular_imgfmt *a,
+                                  struct mp_regular_imgfmt *b)
+{
+    if (a->component_type != b->component_type ||
+        a->component_size != b->component_size ||
+        a->num_planes     != b->num_planes ||
+        a->component_pad  != b->component_pad ||
+        a->forced_csp     != b->forced_csp ||
+        a->chroma_xs      != b->chroma_xs ||
+        a->chroma_ys      != b->chroma_ys)
+        return false;
+
+    for (int n = 0; n < a->num_planes; n++) {
+        int num_comps = a->planes[n].num_components;
+        if (num_comps != b->planes[n].num_components)
+            return false;
+        for (int i = 0; i < num_comps; i++) {
+            if (a->planes[n].components[i] != b->planes[n].components[i])
+                return false;
+        }
+    }
+
+    return true;
+}
+
+// Find a format that matches this one exactly.
+int mp_find_regular_imgfmt(struct mp_regular_imgfmt *src)
+{
+    for (int n = IMGFMT_START + 1; n < IMGFMT_END; n++) {
+        struct mp_regular_imgfmt f;
+        if (mp_get_regular_imgfmt(&f, n) && regular_imgfmt_equals(src, &f))
+            return n;
+    }
+    return 0;
+}
+
+// Compare the dst image formats, and return the one which can carry more data
+// (e.g. higher depth, more color components, lower chroma subsampling, etc.),
+// with respect to what is required to keep most of the src format.
+// Returns the imgfmt, or 0 on error.
+int mp_imgfmt_select_best(int dst1, int dst2, int src)
+{
+    enum AVPixelFormat dst1pxf = imgfmt2pixfmt(dst1);
+    enum AVPixelFormat dst2pxf = imgfmt2pixfmt(dst2);
+    enum AVPixelFormat srcpxf = imgfmt2pixfmt(src);
+    enum AVPixelFormat dstlist[] = {dst1pxf, dst2pxf, AV_PIX_FMT_NONE};
+    return pixfmt2imgfmt(avcodec_find_best_pix_fmt_of_list(dstlist, srcpxf, 1, 0));
+}
+
+// Same as mp_imgfmt_select_best(), but with a list of dst formats.
+int mp_imgfmt_select_best_list(int *dst, int num_dst, int src)
+{
+    int best = 0;
+    for (int n = 0; n < num_dst; n++)
+        best = best ? mp_imgfmt_select_best(best, dst[n], src) : dst[n];
+    return best;
+}
diff --git a/video/img_format.h b/video/img_format.h
new file mode 100644
index 0000000..0753829
--- /dev/null
+++ b/video/img_format.h
@@ -0,0 +1,342 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MPLAYER_IMG_FORMAT_H
+#define MPLAYER_IMG_FORMAT_H
+
+#include <inttypes.h>
+
+#include "config.h"
+#include "osdep/endian.h"
+#include "misc/bstr.h"
+#include "video/csputils.h"
+
+#define MP_MAX_PLANES 4
+#define MP_NUM_COMPONENTS 4
+
+// mp_imgfmt_desc.comps[] is set to useful values. Some types of formats will
+// use comps[], but not set this flag, because it doesn't cover all requirements
+// (for example MP_IMGFLAG_PACKED_SS_YUV).
+#define MP_IMGFLAG_HAS_COMPS    (1 << 0)
+
+// all components start on byte boundaries
+#define MP_IMGFLAG_BYTES        (1 << 1)
+
+// all pixels start in byte boundaries
+#define MP_IMGFLAG_BYTE_ALIGNED (1 << 2)
+
+// set if in little endian, or endian independent
+#define MP_IMGFLAG_LE           (1 << 3)
+
+// set if in big endian, or endian independent
+#define MP_IMGFLAG_BE           (1 << 4)
+
+// set if in native (host) endian, or endian independent
+#define MP_IMGFLAG_NE           MP_SELECT_LE_BE(MP_IMGFLAG_LE, MP_IMGFLAG_BE)
+
+// set if an alpha component is included
+#define MP_IMGFLAG_ALPHA        (1 << 5)
+
+// color class flags - can use via bit tests, or use the mask and compare
+#define MP_IMGFLAG_COLOR_MASK   (15 << 6)
+#define MP_IMGFLAG_COLOR_YUV    (1 << 6)
+#define MP_IMGFLAG_COLOR_RGB    (2 << 6)
+#define MP_IMGFLAG_COLOR_XYZ    (4 << 6)
+
+// component type flags (same access conventions as MP_IMGFLAG_COLOR_*)
+#define MP_IMGFLAG_TYPE_MASK    (15 << 10)
+#define MP_IMGFLAG_TYPE_UINT    (1 << 10)
+#define MP_IMGFLAG_TYPE_FLOAT   (2 << 10)
+#define MP_IMGFLAG_TYPE_PAL8    (4 << 10)
+#define MP_IMGFLAG_TYPE_HW      (8 << 10)
+
+#define MP_IMGFLAG_YUV          MP_IMGFLAG_COLOR_YUV
+#define MP_IMGFLAG_RGB          MP_IMGFLAG_COLOR_RGB
+#define MP_IMGFLAG_PAL          MP_IMGFLAG_TYPE_PAL8
+#define MP_IMGFLAG_HWACCEL      MP_IMGFLAG_TYPE_HW
+
+// 1 component format (or 2 components if MP_IMGFLAG_ALPHA is set).
+// This should probably be a separate MP_IMGFLAG_COLOR_GRAY, but for now it
+// is too much of a mess.
+#define MP_IMGFLAG_GRAY         (1 << 14)
+
+// Packed, sub-sampled YUV format. Does not apply to packed non-subsampled YUV.
+// These formats pack multiple pixels into one sample with strange organization.
+// In this specific case, mp_imgfmt_desc.align_x gives the size of a "full"
+// pixel, which has align_x luma samples, and 1 chroma sample of each Cb and Cr.
+// mp_imgfmt_desc.comps describes the chroma samples, and the first luma sample.
+// All luma samples have the same configuration as the first one, and you can
+// get their offsets with mp_imgfmt_get_packed_yuv_locations(). Note that the
+// component offsets can be >= bpp[0]; the actual range is bpp[0]*align_x.
+// These formats have no alpha.
+#define MP_IMGFLAG_PACKED_SS_YUV (1 << 15)
+
+// set if the format is in a standard YUV format:
+// - planar and yuv colorspace
+// - chroma shift 0-2
+// - 1-4 planes (1: gray, 2: gray/alpha, 3: yuv, 4: yuv/alpha)
+// - 8-16 bit per pixel/plane, all planes have same depth,
+//   each plane has exactly one component
+#define MP_IMGFLAG_YUV_P        (1 << 16)
+
+// Like MP_IMGFLAG_YUV_P, but RGB. This can be e.g. AV_PIX_FMT_GBRP. The planes
+// are always shuffled (G - B - R [- A]).
+#define MP_IMGFLAG_RGB_P        (1 << 17)
+
+// Semi-planar YUV formats, like AV_PIX_FMT_NV12.
+#define MP_IMGFLAG_YUV_NV       (1 << 18)
+
+struct mp_imgfmt_comp_desc {
+    // Plane on which this component is.
+    uint8_t plane;
+    // Bit offset of first sample, from start of the pixel group (little endian).
+    uint8_t offset : 6;
+    // Number of bits used by each sample.
+    uint8_t size : 6;
+    // Internal padding. See mp_regular_imgfmt.component_pad.
+    int8_t pad : 4;
+};
+
+struct mp_imgfmt_desc {
+    int id;                 // IMGFMT_*
+    int flags;              // MP_IMGFLAG_* bitfield
+    int8_t num_planes;
+    int8_t chroma_xs, chroma_ys; // chroma shift (i.e. log2 of chroma pixel size)
+    int8_t align_x, align_y;     // pixel count to get byte alignment and to get
+                                 // to a pixel pos where luma & chroma aligns
+                                 // always power of 2
+    int8_t bpp[MP_MAX_PLANES];   // bits per pixel (may be "average"; the real
+                                 // byte value is determined by align_x*bpp/8
+                                 // for align_x pixels)
+    // chroma shifts per plane (provided for convenience with planar formats)
+    // Packed YUV always uses xs[0]=ys[0]=0, because plane 0 contains luma in
+    // addition to chroma, and thus is not sub-sampled (uses align_x=2 instead).
+    int8_t xs[MP_MAX_PLANES];
+    int8_t ys[MP_MAX_PLANES];
+
+    // Description for each component. Generally valid only if flags has
+    // MP_IMGFLAG_HAS_COMPS set.
+    // This is indexed by component_type-1 (so 0=R, 1=G, etc.), see
+    // mp_regular_imgfmt_plane.components[x] for component_type. Components not
+    // present use size=0. Bits not covered by any component are random and not
+    // interpreted by any software.
+    // In particular, don't make the mistake to index this by plane.
+    struct mp_imgfmt_comp_desc comps[MP_NUM_COMPONENTS];
+
+    // log(2) of the word size in bytes for endian swapping that needs to be
+    // performed for converting to native endian. This is performed before any
+    // other unpacking steps, and for all data covered by bits.
+    // Always 0 if IMGFLAG_NE is set.
+    uint8_t endian_shift : 2;
+};
+
+struct mp_imgfmt_desc mp_imgfmt_get_desc(int imgfmt);
+
+// Return the number of component types, or 0 if unknown.
+int mp_imgfmt_desc_get_num_comps(struct mp_imgfmt_desc *desc);
+
+// For MP_IMGFLAG_PACKED_SS_YUV formats (packed sub-sampled YUV): positions of
+// further luma samples. luma_offsets must be an array of align_x size, and the
+// function will return the offset (like in mp_imgfmt_comp_desc.offset) of each
+// luma pixel. luma_offsets[0] == mp_imgfmt_desc.comps[0].offset.
+bool mp_imgfmt_get_packed_yuv_locations(int imgfmt, uint8_t *luma_offsets);
+
+// MP_CSP_AUTO for YUV, MP_CSP_RGB or MP_CSP_XYZ otherwise.
+// (Because IMGFMT/AV_PIX_FMT conflate format and csp for RGB and XYZ.)
+enum mp_csp mp_imgfmt_get_forced_csp(int imgfmt);
+
+enum mp_component_type {
+    MP_COMPONENT_TYPE_UNKNOWN = 0,
+    MP_COMPONENT_TYPE_UINT,
+    MP_COMPONENT_TYPE_FLOAT,
+};
+
+enum mp_component_type mp_imgfmt_get_component_type(int imgfmt);
+
+struct mp_regular_imgfmt_plane {
+    uint8_t num_components;
+    // 1 is red/luminance/gray, 2 is green/Cb, 3 is blue/Cr, 4 is alpha.
+    // 0 is used for padding (undefined contents).
+    // It is guaranteed that non-0 values occur only once in the whole format.
+    uint8_t components[MP_NUM_COMPONENTS];
+};
+
+// This describes pixel formats that are byte aligned, have byte aligned
+// components, native endian, etc.
+struct mp_regular_imgfmt {
+    // Type of each component.
+    enum mp_component_type component_type;
+
+    // See mp_imgfmt_get_forced_csp(). Normally code should use
+    // mp_image_params.colors. This field is only needed to map the format
+    // unambiguously to FFmpeg formats.
+    enum mp_csp forced_csp;
+
+    // Size of each component in bytes.
+    uint8_t component_size;
+
+    // If >0, LSB padding, if <0, MSB padding. The padding bits are always 0.
+    // This applies: bit_depth = component_size * 8 - abs(component_pad)
+    //               bit_size  = component_size * 8 + MPMIN(0, component_pad)
+    //  E.g. P010: component_pad=6 (LSB always implied 0, all data in MSB)
+    //          => has a "depth" of 10 bit, but usually treated as 16 bit value
+    //       yuv420p10: component_pad=-6 (like a 10 bit value 0-extended to 16)
+    //          => has depth of 10 bit, needs <<6 to get a 16 bit value
+    int8_t component_pad;
+
+    uint8_t num_planes;
+    struct mp_regular_imgfmt_plane planes[MP_MAX_PLANES];
+
+    // Chroma shifts for chroma planes. 0/0 is 4:4:4 YUV or RGB. If not 0/0,
+    // then this is always a yuv format, with components 2/3 on separate planes
+    // (reduced by the shift), and planes for components 1/4 are full sized.
+    uint8_t chroma_xs, chroma_ys;
+};
+
+bool mp_get_regular_imgfmt(struct mp_regular_imgfmt *dst, int imgfmt);
+int mp_find_regular_imgfmt(struct mp_regular_imgfmt *src);
+
+// If imgfmt is valid, and there exists a format that is exactly the same, but
+// has inverse endianness, return this other format. Otherwise return 0.
+int mp_find_other_endian(int imgfmt);
+
+enum mp_imgfmt {
+    IMGFMT_NONE = 0,
+
+    // Offset to make confusing with ffmpeg formats harder
+    IMGFMT_START = 1000,
+
+    // Planar YUV formats
+    IMGFMT_444P,                // 1x1
+    IMGFMT_420P,                // 2x2
+
+    // Gray
+    IMGFMT_Y8,
+    IMGFMT_Y16,
+
+    // Packed YUV formats (components are byte-accessed)
+    IMGFMT_UYVY,                // U  Y0 V  Y1
+
+    // Y plane + packed plane for chroma
+    IMGFMT_NV12,
+
+    // Like IMGFMT_NV12, but with 10 bits per component (and 6 bits of padding)
+    IMGFMT_P010,
+
+    // RGB/BGR Formats
+
+    // Byte accessed (low address to high address)
+    IMGFMT_ARGB,
+    IMGFMT_BGRA,
+    IMGFMT_ABGR,
+    IMGFMT_RGBA,
+    IMGFMT_BGR24,               // 3 bytes per pixel
+    IMGFMT_RGB24,
+
+    // Like e.g. IMGFMT_ARGB, but has a padding byte instead of alpha
+    IMGFMT_0RGB,
+    IMGFMT_BGR0,
+    IMGFMT_0BGR,
+    IMGFMT_RGB0,
+
+    // Like IMGFMT_RGBA, but 2 bytes per component.
+    IMGFMT_RGBA64,
+
+    // Accessed with bit-shifts after endian-swapping the uint16_t pixel
+    IMGFMT_RGB565,              // 5r 6g 5b (MSB to LSB)
+
+    // AV_PIX_FMT_PAL8
+    IMGFMT_PAL8,
+
+    // Hardware accelerated formats. Plane data points to special data
+    // structures, instead of pixel data.
+    IMGFMT_VDPAU,           // VdpVideoSurface
+    // plane 0: ID3D11Texture2D
+    // plane 1: slice index casted to pointer
+    IMGFMT_D3D11,
+    IMGFMT_DXVA2,           // IDirect3DSurface9 (NV12/P010/P016)
+    IMGFMT_MMAL,            // MMAL_BUFFER_HEADER_T
+    IMGFMT_MEDIACODEC,      // AVMediaCodecBuffer
+    IMGFMT_CUDA,            // CUDA Buffer
+
+    // Not an actual format; base for mpv-specific descriptor table.
+    // Some may still map to AV_PIX_FMT_*.
+    IMGFMT_CUST_BASE,
+
+    // Planar gray/alpha.
+    IMGFMT_YAP8,
+    IMGFMT_YAP16,
+
+    // Planar YUV/alpha formats. Sometimes useful for internal processing. There
+    // should be one for each subsampling factor, with and without alpha, gray.
+    IMGFMT_YAPF, // Note: non-alpha version exists in ffmpeg
+    IMGFMT_444PF,
+    IMGFMT_444APF,
+    IMGFMT_420PF,
+    IMGFMT_420APF,
+    IMGFMT_422PF,
+    IMGFMT_422APF,
+    IMGFMT_440PF,
+    IMGFMT_440APF,
+    IMGFMT_410PF,
+    IMGFMT_410APF,
+    IMGFMT_411PF,
+    IMGFMT_411APF,
+
+    // Accessed with bit-shifts, uint32_t units.
+    IMGFMT_RGB30,               // 2pad 10r 10g 10b (MSB to LSB)
+
+    // Fringe formats for fringe RGB format repacking.
+    IMGFMT_Y1,      // gray with 1 bit per pixel
+    IMGFMT_GBRP1,   // planar RGB with N bits per color component
+    IMGFMT_GBRP2,
+    IMGFMT_GBRP3,
+    IMGFMT_GBRP4,
+    IMGFMT_GBRP5,
+    IMGFMT_GBRP6,
+
+    // Hardware accelerated formats (again).
+    IMGFMT_VDPAU_OUTPUT,    // VdpOutputSurface
+    IMGFMT_VAAPI,
+    IMGFMT_VIDEOTOOLBOX,    // CVPixelBufferRef
+#if HAVE_VULKAN_INTEROP
+    IMGFMT_VULKAN,          // VKImage
+#endif
+    IMGFMT_DRMPRIME,        // AVDRMFrameDescriptor
+
+    // Generic pass-through of AV_PIX_FMT_*. Used for formats which don't have
+    // a corresponding IMGFMT_ value.
+    IMGFMT_AVPIXFMT_START,
+    IMGFMT_AVPIXFMT_END = IMGFMT_AVPIXFMT_START + 500,
+
+    IMGFMT_END,
+};
+
+#define IMGFMT_IS_HWACCEL(fmt) (!!(mp_imgfmt_get_desc(fmt).flags & MP_IMGFLAG_HWACCEL))
+
+int mp_imgfmt_from_name(bstr name);
+char *mp_imgfmt_to_name_buf(char *buf, size_t buf_size, int fmt);
+#define mp_imgfmt_to_name(fmt) mp_imgfmt_to_name_buf((char[16]){0}, 16, (fmt))
+
+char **mp_imgfmt_name_list(void);
+
+#define vo_format_name mp_imgfmt_to_name
+
+int mp_imgfmt_select_best(int dst1, int dst2, int src);
+int mp_imgfmt_select_best_list(int *dst, int num_dst, int src);
+
+#endif /* MPLAYER_IMG_FORMAT_H */
diff --git a/video/mp_image.c b/video/mp_image.c
new file mode 100644
index 0000000..dff2051
--- /dev/null
+++ b/video/mp_image.c
@@ -0,0 +1,1289 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <limits.h>
+#include <assert.h>
+
+#include <libavutil/mem.h>
+#include <libavutil/common.h>
+#include <libavutil/display.h>
+#include <libavutil/bswap.h>
+#include <libavutil/hwcontext.h>
+#include <libavutil/intreadwrite.h>
+#include <libavutil/rational.h>
+#include <libavcodec/avcodec.h>
+#include <libavutil/mastering_display_metadata.h>
+#include <libplacebo/utils/libav.h>
+
+#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 16, 100)
+# include <libavutil/dovi_meta.h>
+#endif
+
+#include "mpv_talloc.h"
+
+#include "common/av_common.h"
+#include "common/common.h"
+#include "fmt-conversion.h"
+#include "hwdec.h"
+#include "mp_image.h"
+#include "osdep/threads.h"
+#include "sws_utils.h"
+#include "out/placebo/utils.h"
+
+// Determine strides, plane sizes, and total required size for an image
+// allocation. Returns total size on success, <0 on error. Unused planes
+// have out_stride/out_plane_size to 0, and out_plane_offset set to -1 up
+// until MP_MAX_PLANES-1.
+static int mp_image_layout(int imgfmt, int w, int h, int stride_align,
+                           int out_stride[MP_MAX_PLANES],
+                           int out_plane_offset[MP_MAX_PLANES],
+                           int out_plane_size[MP_MAX_PLANES])
+{
+    struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(imgfmt);
+
+    w = MP_ALIGN_UP(w, desc.align_x);
+    h = MP_ALIGN_UP(h, desc.align_y);
+
+    struct mp_image_params params = {.imgfmt = imgfmt, .w = w, .h = h};
+
+    if (!mp_image_params_valid(&params) || desc.flags & MP_IMGFLAG_HWACCEL)
+        return -1;
+
+    // Note: for non-mod-2 4:2:0 YUV frames, we have to allocate an additional
+    //       top/right border. This is needed for correct handling of such
+    //       images in filter and VO code (e.g. vo_vdpau or vo_gpu).
+
+    for (int n = 0; n < MP_MAX_PLANES; n++) {
+        int alloc_w = mp_chroma_div_up(w, desc.xs[n]);
+        int alloc_h = MP_ALIGN_UP(h, 32) >> desc.ys[n];
+        int line_bytes = (alloc_w * desc.bpp[n] + 7) / 8;
+        out_stride[n] = MP_ALIGN_NPOT(line_bytes, stride_align);
+        out_plane_size[n] = out_stride[n] * alloc_h;
+    }
+    if (desc.flags & MP_IMGFLAG_PAL)
+        out_plane_size[1] = AVPALETTE_SIZE;
+
+    int sum = 0;
+    for (int n = 0; n < MP_MAX_PLANES; n++) {
+        out_plane_offset[n] = out_plane_size[n] ? sum : -1;
+        sum += out_plane_size[n];
+    }
+
+    return sum;
+}
+
+// Return the total size needed for an image allocation of the given
+// configuration (imgfmt, w, h must be set). Returns -1 on error.
+// Assumes the allocation is already aligned on stride_align (otherwise you
+// need to add padding yourself).
+int mp_image_get_alloc_size(int imgfmt, int w, int h, int stride_align)
+{
+    int stride[MP_MAX_PLANES];
+    int plane_offset[MP_MAX_PLANES];
+    int plane_size[MP_MAX_PLANES];
+    return mp_image_layout(imgfmt, w, h, stride_align, stride, plane_offset,
+                           plane_size);
+}
+
+// Fill the mpi->planes and mpi->stride fields of the given mpi with data
+// from buffer according to the mpi's w/h/imgfmt fields. See mp_image_from_buffer
+// aboud remarks how to allocate/use buffer/buffer_size.
+// This does not free the data. You are expected to setup refcounting by
+// setting mp_image.bufs before or after this function is called.
+// Returns true on success, false on failure.
+static bool mp_image_fill_alloc(struct mp_image *mpi, int stride_align,
+                                void *buffer, int buffer_size)
+{
+    int stride[MP_MAX_PLANES];
+    int plane_offset[MP_MAX_PLANES];
+    int plane_size[MP_MAX_PLANES];
+    int size = mp_image_layout(mpi->imgfmt, mpi->w, mpi->h, stride_align,
+                               stride, plane_offset, plane_size);
+    if (size < 0 || size > buffer_size)
+        return false;
+
+    int align = MP_ALIGN_UP((uintptr_t)buffer, stride_align) - (uintptr_t)buffer;
+    if (buffer_size - size < align)
+        return false;
+    uint8_t *s = buffer;
+    s += align;
+
+    for (int n = 0; n < MP_MAX_PLANES; n++) {
+        mpi->planes[n] = plane_offset[n] >= 0 ? s + plane_offset[n] : NULL;
+        mpi->stride[n] = stride[n];
+    }
+
+    return true;
+}
+
+// Create a mp_image from the provided buffer. The mp_image is filled according
+// to the imgfmt/w/h parameters, and respecting the stride_align parameter to
+// align the plane start pointers and strides. Once the last reference to the
+// returned image is destroyed, free(free_opaque, buffer) is called. (Be aware
+// that this can happen from any thread.)
+// The allocated size of buffer must be given by buffer_size. buffer_size should
+// be at least the value returned by mp_image_get_alloc_size(). If buffer is not
+// already aligned to stride_align, the function will attempt to align the
+// pointer itself by incrementing the buffer pointer until their alignment is
+// achieved (if buffer_size is not large enough to allow aligning the buffer
+// safely, the function fails). To be safe, you may want to overallocate the
+// buffer by stride_align bytes, and include the overallocation in buffer_size.
+// Returns NULL on failure. On failure, the free() callback is not called.
+struct mp_image *mp_image_from_buffer(int imgfmt, int w, int h, int stride_align,
+                                      uint8_t *buffer, int buffer_size,
+                                      void *free_opaque,
+                                      void (*free)(void *opaque, uint8_t *data))
+{
+    struct mp_image *mpi = mp_image_new_dummy_ref(NULL);
+    mp_image_setfmt(mpi, imgfmt);
+    mp_image_set_size(mpi, w, h);
+
+    if (!mp_image_fill_alloc(mpi, stride_align, buffer, buffer_size))
+        goto fail;
+
+    mpi->bufs[0] = av_buffer_create(buffer, buffer_size, free, free_opaque, 0);
+    if (!mpi->bufs[0])
+        goto fail;
+
+    return mpi;
+
+fail:
+    talloc_free(mpi);
+    return NULL;
+}
+
+static bool mp_image_alloc_planes(struct mp_image *mpi)
+{
+    assert(!mpi->planes[0]);
+    assert(!mpi->bufs[0]);
+
+    int align = MP_IMAGE_BYTE_ALIGN;
+
+    int size = mp_image_get_alloc_size(mpi->imgfmt, mpi->w, mpi->h, align);
+    if (size < 0)
+        return false;
+
+    // Note: mp_image_pool assumes this creates only 1 AVBufferRef.
+    mpi->bufs[0] = av_buffer_alloc(size + align);
+    if (!mpi->bufs[0])
+        return false;
+
+    if (!mp_image_fill_alloc(mpi, align, mpi->bufs[0]->data, mpi->bufs[0]->size)) {
+        av_buffer_unref(&mpi->bufs[0]);
+        return false;
+    }
+
+    return true;
+}
+
+void mp_image_setfmt(struct mp_image *mpi, int out_fmt)
+{
+    struct mp_image_params params = mpi->params;
+    struct mp_imgfmt_desc fmt = mp_imgfmt_get_desc(out_fmt);
+    params.imgfmt = fmt.id;
+    mpi->fmt = fmt;
+    mpi->imgfmt = fmt.id;
+    mpi->num_planes = fmt.num_planes;
+    mpi->params = params;
+}
+
+static void mp_image_destructor(void *ptr)
+{
+    mp_image_t *mpi = ptr;
+    for (int p = 0; p < MP_MAX_PLANES; p++)
+        av_buffer_unref(&mpi->bufs[p]);
+    av_buffer_unref(&mpi->hwctx);
+    av_buffer_unref(&mpi->icc_profile);
+    av_buffer_unref(&mpi->a53_cc);
+    av_buffer_unref(&mpi->dovi);
+    av_buffer_unref(&mpi->film_grain);
+    av_buffer_unref(&mpi->dovi_buf);
+    for (int n = 0; n < mpi->num_ff_side_data; n++)
+        av_buffer_unref(&mpi->ff_side_data[n].buf);
+    talloc_free(mpi->ff_side_data);
+}
+
+int mp_chroma_div_up(int size, int shift)
+{
+    return (size + (1 << shift) - 1) >> shift;
+}
+
+// Return the storage width in pixels of the given plane.
+int mp_image_plane_w(struct mp_image *mpi, int plane)
+{
+    return mp_chroma_div_up(mpi->w, mpi->fmt.xs[plane]);
+}
+
+// Return the storage height in pixels of the given plane.
+int mp_image_plane_h(struct mp_image *mpi, int plane)
+{
+    return mp_chroma_div_up(mpi->h, mpi->fmt.ys[plane]);
+}
+
+// Caller has to make sure this doesn't exceed the allocated plane data/strides.
+void mp_image_set_size(struct mp_image *mpi, int w, int h)
+{
+    assert(w >= 0 && h >= 0);
+    mpi->w = mpi->params.w = w;
+    mpi->h = mpi->params.h = h;
+}
+
+void mp_image_set_params(struct mp_image *image,
+                         const struct mp_image_params *params)
+{
+    // possibly initialize other stuff
+    mp_image_setfmt(image, params->imgfmt);
+    mp_image_set_size(image, params->w, params->h);
+    image->params = *params;
+}
+
+struct mp_image *mp_image_alloc(int imgfmt, int w, int h)
+{
+    struct mp_image *mpi = talloc_zero(NULL, struct mp_image);
+    talloc_set_destructor(mpi, mp_image_destructor);
+
+    mp_image_set_size(mpi, w, h);
+    mp_image_setfmt(mpi, imgfmt);
+    if (!mp_image_alloc_planes(mpi)) {
+        talloc_free(mpi);
+        return NULL;
+    }
+    return mpi;
+}
+
+int mp_image_approx_byte_size(struct mp_image *img)
+{
+    int total = sizeof(*img);
+
+    for (int n = 0; n < MP_MAX_PLANES; n++) {
+        struct AVBufferRef *buf = img->bufs[n];
+        if (buf)
+            total += buf->size;
+    }
+
+    return total;
+}
+
+struct mp_image *mp_image_new_copy(struct mp_image *img)
+{
+    struct mp_image *new = mp_image_alloc(img->imgfmt, img->w, img->h);
+    if (!new)
+        return NULL;
+    mp_image_copy(new, img);
+    mp_image_copy_attributes(new, img);
+    return new;
+}
+
+// Make dst take over the image data of src, and free src.
+// This is basically a safe version of *dst = *src; free(src);
+// Only works with ref-counted images, and can't change image size/format.
+void mp_image_steal_data(struct mp_image *dst, struct mp_image *src)
+{
+    assert(dst->imgfmt == src->imgfmt && dst->w == src->w && dst->h == src->h);
+    assert(dst->bufs[0] && src->bufs[0]);
+
+    mp_image_destructor(dst); // unref old
+    talloc_free_children(dst);
+
+    *dst = *src;
+
+    *src = (struct mp_image){0};
+    talloc_free(src);
+}
+
+// Unref most data buffer (and clear the data array), but leave other fields
+// allocated. In particular, mp_image.hwctx is preserved.
+void mp_image_unref_data(struct mp_image *img)
+{
+    for (int n = 0; n < MP_MAX_PLANES; n++) {
+        img->planes[n] = NULL;
+        img->stride[n] = 0;
+        av_buffer_unref(&img->bufs[n]);
+    }
+}
+
+static void ref_buffer(AVBufferRef **dst)
+{
+    if (*dst) {
+        *dst = av_buffer_ref(*dst);
+        MP_HANDLE_OOM(*dst);
+    }
+}
+
+// Return a new reference to img. The returned reference is owned by the caller,
+// while img is left untouched.
+struct mp_image *mp_image_new_ref(struct mp_image *img)
+{
+    if (!img)
+        return NULL;
+
+    if (!img->bufs[0])
+        return mp_image_new_copy(img);
+
+    struct mp_image *new = talloc_ptrtype(NULL, new);
+    talloc_set_destructor(new, mp_image_destructor);
+    *new = *img;
+
+    for (int p = 0; p < MP_MAX_PLANES; p++)
+        ref_buffer(&new->bufs[p]);
+
+    ref_buffer(&new->hwctx);
+    ref_buffer(&new->icc_profile);
+    ref_buffer(&new->a53_cc);
+    ref_buffer(&new->dovi);
+    ref_buffer(&new->film_grain);
+    ref_buffer(&new->dovi_buf);
+
+    new->ff_side_data = talloc_memdup(NULL, new->ff_side_data,
+                        new->num_ff_side_data * sizeof(new->ff_side_data[0]));
+    for (int n = 0; n < new->num_ff_side_data; n++)
+        ref_buffer(&new->ff_side_data[n].buf);
+
+    return new;
+}
+
+struct free_args {
+    void *arg;
+    void (*free)(void *arg);
+};
+
+static void call_free(void *opaque, uint8_t *data)
+{
+    struct free_args *args = opaque;
+    args->free(args->arg);
+    talloc_free(args);
+}
+
+// Create a new mp_image based on img, but don't set any buffers.
+// Using this is only valid until the original img is unreferenced (including
+// implicit unreferencing of the data by mp_image_make_writeable()), unless
+// a new reference is set.
+struct mp_image *mp_image_new_dummy_ref(struct mp_image *img)
+{
+    struct mp_image *new = talloc_ptrtype(NULL, new);
+    talloc_set_destructor(new, mp_image_destructor);
+    *new = img ? *img : (struct mp_image){0};
+    for (int p = 0; p < MP_MAX_PLANES; p++)
+        new->bufs[p] = NULL;
+    new->hwctx = NULL;
+    new->icc_profile = NULL;
+    new->a53_cc = NULL;
+    new->dovi = NULL;
+    new->film_grain = NULL;
+    new->dovi_buf = NULL;
+    new->num_ff_side_data = 0;
+    new->ff_side_data = NULL;
+    return new;
+}
+
+// Return a reference counted reference to img. If the reference count reaches
+// 0, call free(free_arg). The data passed by img must not be free'd before
+// that. The new reference will be writeable.
+// On allocation failure, unref the frame and return NULL.
+// This is only used for hw decoding; this is important, because libav* expects
+// all plane data to be accounted for by AVBufferRefs.
+struct mp_image *mp_image_new_custom_ref(struct mp_image *img, void *free_arg,
+                                         void (*free)(void *arg))
+{
+    struct mp_image *new = mp_image_new_dummy_ref(img);
+
+    struct free_args *args = talloc_ptrtype(NULL, args);
+    *args = (struct free_args){free_arg, free};
+    new->bufs[0] = av_buffer_create(NULL, 0, call_free, args,
+                                    AV_BUFFER_FLAG_READONLY);
+    if (new->bufs[0])
+        return new;
+    talloc_free(new);
+    return NULL;
+}
+
+bool mp_image_is_writeable(struct mp_image *img)
+{
+    if (!img->bufs[0])
+        return true; // not ref-counted => always considered writeable
+    for (int p = 0; p < MP_MAX_PLANES; p++) {
+        if (!img->bufs[p])
+            break;
+        if (!av_buffer_is_writable(img->bufs[p]))
+            return false;
+    }
+    return true;
+}
+
+// Make the image data referenced by img writeable. This allocates new data
+// if the data wasn't already writeable, and img->planes[] and img->stride[]
+// will be set to the copy.
+// Returns success; if false is returned, the image could not be made writeable.
+bool mp_image_make_writeable(struct mp_image *img)
+{
+    if (mp_image_is_writeable(img))
+        return true;
+
+    struct mp_image *new = mp_image_new_copy(img);
+    if (!new)
+        return false;
+    mp_image_steal_data(img, new);
+    assert(mp_image_is_writeable(img));
+    return true;
+}
+
+// Helper function: unrefs *p_img, and sets *p_img to a new ref of new_value.
+// Only unrefs *p_img and sets it to NULL if out of memory.
+void mp_image_setrefp(struct mp_image **p_img, struct mp_image *new_value)
+{
+    if (*p_img != new_value) {
+        talloc_free(*p_img);
+        *p_img = new_value ? mp_image_new_ref(new_value) : NULL;
+    }
+}
+
+// Mere helper function (mp_image can be directly free'd with talloc_free)
+void mp_image_unrefp(struct mp_image **p_img)
+{
+    talloc_free(*p_img);
+    *p_img = NULL;
+}
+
+void memcpy_pic(void *dst, const void *src, int bytesPerLine, int height,
+                int dstStride, int srcStride)
+{
+    if (bytesPerLine == dstStride && dstStride == srcStride && height) {
+        if (srcStride < 0) {
+            src = (uint8_t*)src + (height - 1) * srcStride;
+            dst = (uint8_t*)dst + (height - 1) * dstStride;
+            srcStride = -srcStride;
+        }
+
+        memcpy(dst, src, srcStride * (height - 1) + bytesPerLine);
+    } else {
+        for (int i = 0; i < height; i++) {
+            memcpy(dst, src, bytesPerLine);
+            src = (uint8_t*)src + srcStride;
+            dst = (uint8_t*)dst + dstStride;
+        }
+    }
+}
+
+void mp_image_copy(struct mp_image *dst, struct mp_image *src)
+{
+    assert(dst->imgfmt == src->imgfmt);
+    assert(dst->w == src->w && dst->h == src->h);
+    assert(mp_image_is_writeable(dst));
+    for (int n = 0; n < dst->num_planes; n++) {
+        int line_bytes = (mp_image_plane_w(dst, n) * dst->fmt.bpp[n] + 7) / 8;
+        int plane_h = mp_image_plane_h(dst, n);
+        memcpy_pic(dst->planes[n], src->planes[n], line_bytes, plane_h,
+                   dst->stride[n], src->stride[n]);
+    }
+    if (dst->fmt.flags & MP_IMGFLAG_PAL)
+        memcpy(dst->planes[1], src->planes[1], AVPALETTE_SIZE);
+}
+
+static enum mp_csp mp_image_params_get_forced_csp(struct mp_image_params *params)
+{
+    int imgfmt = params->hw_subfmt ? params->hw_subfmt : params->imgfmt;
+    return mp_imgfmt_get_forced_csp(imgfmt);
+}
+
+static void assign_bufref(AVBufferRef **dst, AVBufferRef *new)
+{
+    av_buffer_unref(dst);
+    if (new) {
+        *dst = av_buffer_ref(new);
+        MP_HANDLE_OOM(*dst);
+    }
+}
+
+void mp_image_copy_attributes(struct mp_image *dst, struct mp_image *src)
+{
+    assert(dst != src);
+
+    dst->pict_type = src->pict_type;
+    dst->fields = src->fields;
+    dst->pts = src->pts;
+    dst->dts = src->dts;
+    dst->pkt_duration = src->pkt_duration;
+    dst->params.rotate = src->params.rotate;
+    dst->params.stereo3d = src->params.stereo3d;
+    dst->params.p_w = src->params.p_w;
+    dst->params.p_h = src->params.p_h;
+    dst->params.color = src->params.color;
+    dst->params.chroma_location = src->params.chroma_location;
+    dst->params.alpha = src->params.alpha;
+    dst->params.crop = src->params.crop;
+    dst->nominal_fps = src->nominal_fps;
+
+    // ensure colorspace consistency
+    enum mp_csp dst_forced_csp = mp_image_params_get_forced_csp(&dst->params);
+    if (mp_image_params_get_forced_csp(&src->params) != dst_forced_csp) {
+        dst->params.color.space = dst_forced_csp != MP_CSP_AUTO ?
+                                    dst_forced_csp :
+                                    mp_csp_guess_colorspace(src->w, src->h);
+    }
+
+    if ((dst->fmt.flags & MP_IMGFLAG_PAL) && (src->fmt.flags & MP_IMGFLAG_PAL)) {
+        if (dst->planes[1] && src->planes[1]) {
+            if (mp_image_make_writeable(dst))
+                memcpy(dst->planes[1], src->planes[1], AVPALETTE_SIZE);
+        }
+    }
+    assign_bufref(&dst->icc_profile, src->icc_profile);
+    assign_bufref(&dst->dovi, src->dovi);
+    assign_bufref(&dst->dovi_buf, src->dovi_buf);
+    assign_bufref(&dst->film_grain, src->film_grain);
+    assign_bufref(&dst->a53_cc, src->a53_cc);
+
+    for (int n = 0; n < dst->num_ff_side_data; n++)
+        av_buffer_unref(&dst->ff_side_data[n].buf);
+
+    MP_RESIZE_ARRAY(NULL, dst->ff_side_data, src->num_ff_side_data);
+    dst->num_ff_side_data = src->num_ff_side_data;
+
+    for (int n = 0; n < dst->num_ff_side_data; n++) {
+        dst->ff_side_data[n].type = src->ff_side_data[n].type;
+        dst->ff_side_data[n].buf = av_buffer_ref(src->ff_side_data[n].buf);
+        MP_HANDLE_OOM(dst->ff_side_data[n].buf);
+    }
+}
+
+// Crop the given image to (x0, y0)-(x1, y1) (bottom/right border exclusive)
+// x0/y0 must be naturally aligned.
+void mp_image_crop(struct mp_image *img, int x0, int y0, int x1, int y1)
+{
+    assert(x0 >= 0 && y0 >= 0);
+    assert(x0 <= x1 && y0 <= y1);
+    assert(x1 <= img->w && y1 <= img->h);
+    assert(!(x0 & (img->fmt.align_x - 1)));
+    assert(!(y0 & (img->fmt.align_y - 1)));
+
+    for (int p = 0; p < img->num_planes; ++p) {
+        img->planes[p] += (y0 >> img->fmt.ys[p]) * img->stride[p] +
+                          (x0 >> img->fmt.xs[p]) * img->fmt.bpp[p] / 8;
+    }
+    mp_image_set_size(img, x1 - x0, y1 - y0);
+}
+
+void mp_image_crop_rc(struct mp_image *img, struct mp_rect rc)
+{
+    mp_image_crop(img, rc.x0, rc.y0, rc.x1, rc.y1);
+}
+
+// Repeatedly write count patterns of src[0..src_size] to p.
+static void memset_pattern(void *p, size_t count, uint8_t *src, size_t src_size)
+{
+    assert(src_size >= 1);
+
+    if (src_size == 1) {
+        memset(p, src[0], count);
+    } else if (src_size == 2) { // >8 bit YUV => common, be slightly less naive
+        uint16_t val;
+        memcpy(&val, src, 2);
+        uint16_t *p16 = p;
+        while (count--)
+            *p16++ = val;
+    } else {
+        while (count--) {
+            memcpy(p, src, src_size);
+            p = (char *)p + src_size;
+        }
+    }
+}
+
+static bool endian_swap_bytes(void *d, size_t bytes, size_t word_size)
+{
+    if (word_size != 2 && word_size != 4)
+        return false;
+
+    size_t num_words = bytes / word_size;
+    uint8_t *ud = d;
+
+    switch (word_size) {
+    case 2:
+        for (size_t x = 0; x < num_words; x++)
+            AV_WL16(ud + x * 2, AV_RB16(ud + x * 2));
+        break;
+    case 4:
+        for (size_t x = 0; x < num_words; x++)
+            AV_WL32(ud + x * 2, AV_RB32(ud + x * 2));
+        break;
+    default:
+        MP_ASSERT_UNREACHABLE();
+    }
+
+    return true;
+}
+
+// Bottom/right border is allowed not to be aligned, but it might implicitly
+// overwrite pixel data until the alignment (align_x/align_y) is reached.
+// Alpha is cleared to 0 (fully transparent).
+void mp_image_clear(struct mp_image *img, int x0, int y0, int x1, int y1)
+{
+    assert(x0 >= 0 && y0 >= 0);
+    assert(x0 <= x1 && y0 <= y1);
+    assert(x1 <= img->w && y1 <= img->h);
+    assert(!(x0 & (img->fmt.align_x - 1)));
+    assert(!(y0 & (img->fmt.align_y - 1)));
+
+    struct mp_image area = *img;
+    struct mp_imgfmt_desc *fmt = &area.fmt;
+    mp_image_crop(&area, x0, y0, x1, y1);
+
+    // "Black" color for each plane.
+    uint8_t plane_clear[MP_MAX_PLANES][8] = {0};
+    int plane_size[MP_MAX_PLANES] = {0};
+    int misery = 1; // pixel group width
+
+    // YUV integer chroma needs special consideration, and technically luma is
+    // usually not 0 either.
+    if ((fmt->flags & (MP_IMGFLAG_HAS_COMPS | MP_IMGFLAG_PACKED_SS_YUV)) &&
+        (fmt->flags & MP_IMGFLAG_TYPE_MASK) == MP_IMGFLAG_TYPE_UINT &&
+        (fmt->flags & MP_IMGFLAG_COLOR_MASK) == MP_IMGFLAG_COLOR_YUV)
+    {
+        uint64_t plane_clear_i[MP_MAX_PLANES] = {0};
+
+        // Need to handle "multiple" pixels with packed YUV.
+        uint8_t luma_offsets[4] = {0};
+        if (fmt->flags & MP_IMGFLAG_PACKED_SS_YUV) {
+            misery = fmt->align_x;
+            if (misery <= MP_ARRAY_SIZE(luma_offsets)) // ignore if out of bounds
+                mp_imgfmt_get_packed_yuv_locations(fmt->id, luma_offsets);
+        }
+
+        for (int c = 0; c < 4; c++) {
+            struct mp_imgfmt_comp_desc *cd = &fmt->comps[c];
+            int plane_bits = fmt->bpp[cd->plane] * misery;
+            if (plane_bits <= 64 && plane_bits % 8u == 0 && cd->size) {
+                plane_size[cd->plane] = plane_bits / 8u;
+                int depth = cd->size + MPMIN(cd->pad, 0);
+                double m, o;
+                mp_get_csp_uint_mul(area.params.color.space,
+                                    area.params.color.levels,
+                                    depth, c + 1, &m, &o);
+                uint64_t val = MPCLAMP(lrint((0 - o) / m), 0, 1ull << depth);
+                plane_clear_i[cd->plane] |= val << cd->offset;
+                for (int x = 1; x < (c ? 0 : misery); x++)
+                    plane_clear_i[cd->plane] |= val << luma_offsets[x];
+            }
+        }
+
+        for (int p = 0; p < MP_MAX_PLANES; p++) {
+            if (!plane_clear_i[p])
+                plane_size[p] = 0;
+            memcpy(&plane_clear[p][0], &plane_clear_i[p], 8); // endian dependent
+
+            if (fmt->endian_shift) {
+                endian_swap_bytes(&plane_clear[p][0], plane_size[p],
+                                  1 << fmt->endian_shift);
+            }
+        }
+    }
+
+    for (int p = 0; p < area.num_planes; p++) {
+        int p_h = mp_image_plane_h(&area, p);
+        int p_w = mp_image_plane_w(&area, p);
+        for (int y = 0; y < p_h; y++) {
+            void *ptr = area.planes[p] + (ptrdiff_t)area.stride[p] * y;
+            if (plane_size[p]) {
+                memset_pattern(ptr, p_w / misery, plane_clear[p], plane_size[p]);
+            } else {
+                memset(ptr, 0, mp_image_plane_bytes(&area, p, 0, area.w));
+            }
+        }
+    }
+}
+
+void mp_image_clear_rc(struct mp_image *mpi, struct mp_rect rc)
+{
+    mp_image_clear(mpi, rc.x0, rc.y0, rc.x1, rc.y1);
+}
+
+// Clear the are of the image _not_ covered by rc.
+void mp_image_clear_rc_inv(struct mp_image *mpi, struct mp_rect rc)
+{
+    struct mp_rect clr[4];
+    int cnt = mp_rect_subtract(&(struct mp_rect){0, 0, mpi->w, mpi->h}, &rc, clr);
+    for (int n = 0; n < cnt; n++)
+        mp_image_clear_rc(mpi, clr[n]);
+}
+
+void mp_image_vflip(struct mp_image *img)
+{
+    for (int p = 0; p < img->num_planes; p++) {
+        int plane_h = mp_image_plane_h(img, p);
+        img->planes[p] = img->planes[p] + img->stride[p] * (plane_h - 1);
+        img->stride[p] = -img->stride[p];
+    }
+}
+
+bool mp_image_crop_valid(const struct mp_image_params *p)
+{
+    return p->crop.x1 > p->crop.x0 && p->crop.y1 > p->crop.y0 &&
+           p->crop.x0 >= 0 && p->crop.y0 >= 0 &&
+           p->crop.x1 <= p->w && p->crop.y1 <= p->h;
+}
+
+// Display size derived from image size and pixel aspect ratio.
+void mp_image_params_get_dsize(const struct mp_image_params *p,
+                               int *d_w, int *d_h)
+{
+    if (mp_image_crop_valid(p))
+    {
+        *d_w = mp_rect_w(p->crop);
+        *d_h = mp_rect_h(p->crop);
+    } else {
+        *d_w = p->w;
+        *d_h = p->h;
+    }
+
+    if (p->p_w > p->p_h && p->p_h >= 1)
+        *d_w = MPCLAMP(*d_w * (int64_t)p->p_w / p->p_h, 1, INT_MAX);
+    if (p->p_h > p->p_w && p->p_w >= 1)
+        *d_h = MPCLAMP(*d_h * (int64_t)p->p_h / p->p_w, 1, INT_MAX);
+}
+
+void mp_image_params_set_dsize(struct mp_image_params *p, int d_w, int d_h)
+{
+    AVRational ds = av_div_q((AVRational){d_w, d_h}, (AVRational){p->w, p->h});
+    p->p_w = ds.num;
+    p->p_h = ds.den;
+}
+
+char *mp_image_params_to_str_buf(char *b, size_t bs,
+                                 const struct mp_image_params *p)
+{
+    if (p && p->imgfmt) {
+        snprintf(b, bs, "%dx%d", p->w, p->h);
+        if (p->p_w != p->p_h || !p->p_w)
+            mp_snprintf_cat(b, bs, " [%d:%d]", p->p_w, p->p_h);
+        mp_snprintf_cat(b, bs, " %s", mp_imgfmt_to_name(p->imgfmt));
+        if (p->hw_subfmt)
+            mp_snprintf_cat(b, bs, "[%s]", mp_imgfmt_to_name(p->hw_subfmt));
+        mp_snprintf_cat(b, bs, " %s/%s/%s/%s/%s",
+                        m_opt_choice_str(mp_csp_names, p->color.space),
+                        m_opt_choice_str(mp_csp_prim_names, p->color.primaries),
+                        m_opt_choice_str(mp_csp_trc_names, p->color.gamma),
+                        m_opt_choice_str(mp_csp_levels_names, p->color.levels),
+                        m_opt_choice_str(mp_csp_light_names, p->color.light));
+        mp_snprintf_cat(b, bs, " CL=%s",
+                        m_opt_choice_str(mp_chroma_names, p->chroma_location));
+        if (mp_image_crop_valid(p)) {
+            mp_snprintf_cat(b, bs, " crop=%dx%d+%d+%d", mp_rect_w(p->crop),
+                            mp_rect_h(p->crop), p->crop.x0, p->crop.y0);
+        }
+        if (p->rotate)
+            mp_snprintf_cat(b, bs, " rot=%d", p->rotate);
+        if (p->stereo3d > 0) {
+            mp_snprintf_cat(b, bs, " stereo=%s",
+                            MP_STEREO3D_NAME_DEF(p->stereo3d, "?"));
+        }
+        if (p->alpha) {
+            mp_snprintf_cat(b, bs, " A=%s",
+                            m_opt_choice_str(mp_alpha_names, p->alpha));
+        }
+    } else {
+        snprintf(b, bs, "???");
+    }
+    return b;
+}
+
+// Return whether the image parameters are valid.
+// Some non-essential fields are allowed to be unset (like colorspace flags).
+bool mp_image_params_valid(const struct mp_image_params *p)
+{
+    // av_image_check_size has similar checks and triggers around 16000*16000
+    // It's mostly needed to deal with the fact that offsets are sometimes
+    // ints. We also should (for now) do the same as FFmpeg, to be sure large
+    // images don't crash with libswscale or when wrapping with AVFrame and
+    // passing the result to filters.
+    if (p->w <= 0 || p->h <= 0 || (p->w + 128LL) * (p->h + 128LL) >= INT_MAX / 8)
+        return false;
+
+    if (p->p_w < 0 || p->p_h < 0)
+        return false;
+
+    if (p->rotate < 0 || p->rotate >= 360)
+        return false;
+
+    struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(p->imgfmt);
+    if (!desc.id)
+        return false;
+
+    if (p->hw_subfmt && !(desc.flags & MP_IMGFLAG_HWACCEL))
+        return false;
+
+    return true;
+}
+
+bool mp_image_params_equal(const struct mp_image_params *p1,
+                           const struct mp_image_params *p2)
+{
+    return p1->imgfmt == p2->imgfmt &&
+           p1->hw_subfmt == p2->hw_subfmt &&
+           p1->w == p2->w && p1->h == p2->h &&
+           p1->p_w == p2->p_w && p1->p_h == p2->p_h &&
+           p1->force_window == p2->force_window &&
+           mp_colorspace_equal(p1->color, p2->color) &&
+           p1->chroma_location == p2->chroma_location &&
+           p1->rotate == p2->rotate &&
+           p1->stereo3d == p2->stereo3d &&
+           p1->alpha == p2->alpha &&
+           mp_rect_equals(&p1->crop, &p2->crop);
+}
+
+// Set most image parameters, but not image format or size.
+// Display size is used to set the PAR.
+void mp_image_set_attributes(struct mp_image *image,
+                             const struct mp_image_params *params)
+{
+    struct mp_image_params nparams = *params;
+    nparams.imgfmt = image->imgfmt;
+    nparams.w = image->w;
+    nparams.h = image->h;
+    if (nparams.imgfmt != params->imgfmt)
+        nparams.color = (struct mp_colorspace){0};
+    mp_image_set_params(image, &nparams);
+}
+
+static enum mp_csp_levels infer_levels(enum mp_imgfmt imgfmt)
+{
+    switch (imgfmt2pixfmt(imgfmt)) {
+    case AV_PIX_FMT_YUVJ420P:
+    case AV_PIX_FMT_YUVJ411P:
+    case AV_PIX_FMT_YUVJ422P:
+    case AV_PIX_FMT_YUVJ444P:
+    case AV_PIX_FMT_YUVJ440P:
+    case AV_PIX_FMT_GRAY8:
+    case AV_PIX_FMT_YA8:
+    case AV_PIX_FMT_GRAY9LE:
+    case AV_PIX_FMT_GRAY9BE:
+    case AV_PIX_FMT_GRAY10LE:
+    case AV_PIX_FMT_GRAY10BE:
+    case AV_PIX_FMT_GRAY12LE:
+    case AV_PIX_FMT_GRAY12BE:
+    case AV_PIX_FMT_GRAY14LE:
+    case AV_PIX_FMT_GRAY14BE:
+    case AV_PIX_FMT_GRAY16LE:
+    case AV_PIX_FMT_GRAY16BE:
+    case AV_PIX_FMT_YA16BE:
+    case AV_PIX_FMT_YA16LE:
+        return MP_CSP_LEVELS_PC;
+    default:
+        return MP_CSP_LEVELS_TV;
+    }
+}
+
+// If details like params->colorspace/colorlevels are missing, guess them from
+// the other settings. Also, even if they are set, make them consistent with
+// the colorspace as implied by the pixel format.
+void mp_image_params_guess_csp(struct mp_image_params *params)
+{
+    enum mp_csp forced_csp = mp_image_params_get_forced_csp(params);
+    if (forced_csp == MP_CSP_AUTO) { // YUV/other
+        if (params->color.space != MP_CSP_BT_601 &&
+            params->color.space != MP_CSP_BT_709 &&
+            params->color.space != MP_CSP_BT_2020_NC &&
+            params->color.space != MP_CSP_BT_2020_C &&
+            params->color.space != MP_CSP_SMPTE_240M &&
+            params->color.space != MP_CSP_YCGCO)
+        {
+            // Makes no sense, so guess instead
+            // YCGCO should be separate, but libavcodec disagrees
+            params->color.space = MP_CSP_AUTO;
+        }
+        if (params->color.space == MP_CSP_AUTO)
+            params->color.space = mp_csp_guess_colorspace(params->w, params->h);
+        if (params->color.levels == MP_CSP_LEVELS_AUTO) {
+            if (params->color.gamma == MP_CSP_TRC_V_LOG) {
+                params->color.levels = MP_CSP_LEVELS_PC;
+            } else {
+                params->color.levels = infer_levels(params->imgfmt);
+            }
+        }
+        if (params->color.primaries == MP_CSP_PRIM_AUTO) {
+            // Guess based on the colormatrix as a first priority
+            if (params->color.space == MP_CSP_BT_2020_NC ||
+                params->color.space == MP_CSP_BT_2020_C) {
+                params->color.primaries = MP_CSP_PRIM_BT_2020;
+            } else if (params->color.space == MP_CSP_BT_709) {
+                params->color.primaries = MP_CSP_PRIM_BT_709;
+            } else {
+                // Ambiguous colormatrix for BT.601, guess based on res
+                params->color.primaries = mp_csp_guess_primaries(params->w, params->h);
+            }
+        }
+        if (params->color.gamma == MP_CSP_TRC_AUTO)
+            params->color.gamma = MP_CSP_TRC_BT_1886;
+    } else if (forced_csp == MP_CSP_RGB) {
+        params->color.space = MP_CSP_RGB;
+        params->color.levels = MP_CSP_LEVELS_PC;
+
+        // The majority of RGB content is either sRGB or (rarely) some other
+        // color space which we don't even handle, like AdobeRGB or
+        // ProPhotoRGB. The only reasonable thing we can do is assume it's
+        // sRGB and hope for the best, which should usually just work out fine.
+        // Note: sRGB primaries = BT.709 primaries
+        if (params->color.primaries == MP_CSP_PRIM_AUTO)
+            params->color.primaries = MP_CSP_PRIM_BT_709;
+        if (params->color.gamma == MP_CSP_TRC_AUTO)
+            params->color.gamma = MP_CSP_TRC_SRGB;
+    } else if (forced_csp == MP_CSP_XYZ) {
+        params->color.space = MP_CSP_XYZ;
+        params->color.levels = MP_CSP_LEVELS_PC;
+        // Force gamma to ST428 as this is the only correct for DCDM X'Y'Z'
+        params->color.gamma = MP_CSP_TRC_ST428;
+        // Don't care about primaries, they shouldn't be used, or if anything
+        // MP_CSP_PRIM_ST428 should be defined.
+    } else {
+        // We have no clue.
+        params->color.space = MP_CSP_AUTO;
+        params->color.levels = MP_CSP_LEVELS_AUTO;
+        params->color.primaries = MP_CSP_PRIM_AUTO;
+        params->color.gamma = MP_CSP_TRC_AUTO;
+    }
+
+    if (!params->color.hdr.max_luma) {
+        if (params->color.gamma == MP_CSP_TRC_HLG) {
+            params->color.hdr.max_luma = 1000; // reference display
+        } else {
+            // If the signal peak is unknown, we're forced to pick the TRC's
+            // nominal range as the signal peak to prevent clipping
+            params->color.hdr.max_luma = mp_trc_nom_peak(params->color.gamma) * MP_REF_WHITE;
+        }
+    }
+
+    if (!mp_trc_is_hdr(params->color.gamma)) {
+        // Some clips have leftover HDR metadata after conversion to SDR, so to
+        // avoid blowing up the tone mapping code, strip/sanitize it
+        params->color.hdr = pl_hdr_metadata_empty;
+    }
+
+    if (params->chroma_location == MP_CHROMA_AUTO) {
+        if (params->color.levels == MP_CSP_LEVELS_TV)
+            params->chroma_location = MP_CHROMA_LEFT;
+        if (params->color.levels == MP_CSP_LEVELS_PC)
+            params->chroma_location = MP_CHROMA_CENTER;
+    }
+
+    if (params->color.light == MP_CSP_LIGHT_AUTO) {
+        // HLG is always scene-referred (using its own OOTF), everything else
+        // we assume is display-referred by default.
+        if (params->color.gamma == MP_CSP_TRC_HLG) {
+            params->color.light = MP_CSP_LIGHT_SCENE_HLG;
+        } else {
+            params->color.light = MP_CSP_LIGHT_DISPLAY;
+        }
+    }
+}
+
+// Create a new mp_image reference to av_frame.
+struct mp_image *mp_image_from_av_frame(struct AVFrame *src)
+{
+    struct mp_image *dst = &(struct mp_image){0};
+    AVFrameSideData *sd;
+
+    for (int p = 0; p < MP_MAX_PLANES; p++)
+        dst->bufs[p] = src->buf[p];
+
+    dst->hwctx = src->hw_frames_ctx;
+
+    mp_image_setfmt(dst, pixfmt2imgfmt(src->format));
+    mp_image_set_size(dst, src->width, src->height);
+
+    dst->params.p_w = src->sample_aspect_ratio.num;
+    dst->params.p_h = src->sample_aspect_ratio.den;
+
+    for (int i = 0; i < 4; i++) {
+        dst->planes[i] = src->data[i];
+        dst->stride[i] = src->linesize[i];
+    }
+
+    dst->pict_type = src->pict_type;
+
+    dst->params.crop.x0 = src->crop_left;
+    dst->params.crop.y0 = src->crop_top;
+    dst->params.crop.x1 = src->width - src->crop_right;
+    dst->params.crop.y1 = src->height - src->crop_bottom;
+
+    dst->fields = 0;
+#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(58, 7, 100)
+    if (src->flags & AV_FRAME_FLAG_INTERLACED)
+        dst->fields |= MP_IMGFIELD_INTERLACED;
+    if (src->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST)
+        dst->fields |= MP_IMGFIELD_TOP_FIRST;
+#else
+    if (src->interlaced_frame)
+        dst->fields |= MP_IMGFIELD_INTERLACED;
+    if (src->top_field_first)
+        dst->fields |= MP_IMGFIELD_TOP_FIRST;
+#endif
+    if (src->repeat_pict == 1)
+        dst->fields |= MP_IMGFIELD_REPEAT_FIRST;
+
+    dst->params.color = (struct mp_colorspace){
+        .space = avcol_spc_to_mp_csp(src->colorspace),
+        .levels = avcol_range_to_mp_csp_levels(src->color_range),
+        .primaries = avcol_pri_to_mp_csp_prim(src->color_primaries),
+        .gamma = avcol_trc_to_mp_csp_trc(src->color_trc),
+    };
+
+    dst->params.chroma_location = avchroma_location_to_mp(src->chroma_location);
+
+    if (src->opaque_ref) {
+        struct mp_image_params *p = (void *)src->opaque_ref->data;
+        dst->params.stereo3d = p->stereo3d;
+        // Might be incorrect if colorspace changes.
+        dst->params.color.light = p->color.light;
+        dst->params.alpha = p->alpha;
+    }
+
+    sd = av_frame_get_side_data(src, AV_FRAME_DATA_DISPLAYMATRIX);
+    if (sd) {
+        double r = av_display_rotation_get((int32_t *)(sd->data));
+        if (!isnan(r))
+            dst->params.rotate = (((int)(-r) % 360) + 360) % 360;
+    }
+
+    sd = av_frame_get_side_data(src, AV_FRAME_DATA_ICC_PROFILE);
+    if (sd)
+        dst->icc_profile = sd->buf;
+
+    AVFrameSideData *mdm = av_frame_get_side_data(src, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA);
+    AVFrameSideData *clm = av_frame_get_side_data(src, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL);
+    AVFrameSideData *dhp = av_frame_get_side_data(src, AV_FRAME_DATA_DYNAMIC_HDR_PLUS);
+    pl_map_hdr_metadata(&dst->params.color.hdr, &(struct pl_av_hdr_metadata) {
+        .mdm = (void *)(mdm ? mdm->data : NULL),
+        .clm = (void *)(clm ? clm->data : NULL),
+        .dhp = (void *)(dhp ? dhp->data : NULL),
+    });
+
+    sd = av_frame_get_side_data(src, AV_FRAME_DATA_A53_CC);
+    if (sd)
+        dst->a53_cc = sd->buf;
+
+#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 16, 100)
+    sd = av_frame_get_side_data(src, AV_FRAME_DATA_DOVI_METADATA);
+    if (sd)
+        dst->dovi = sd->buf;
+
+    sd = av_frame_get_side_data(src, AV_FRAME_DATA_DOVI_RPU_BUFFER);
+    if (sd)
+        dst->dovi_buf = sd->buf;
+#endif
+
+    sd = av_frame_get_side_data(src, AV_FRAME_DATA_FILM_GRAIN_PARAMS);
+    if (sd)
+        dst->film_grain = sd->buf;
+
+    for (int n = 0; n < src->nb_side_data; n++) {
+        sd = src->side_data[n];
+        struct mp_ff_side_data mpsd = {
+            .type = sd->type,
+            .buf = sd->buf,
+        };
+        MP_TARRAY_APPEND(NULL, dst->ff_side_data, dst->num_ff_side_data, mpsd);
+    }
+
+    if (dst->hwctx) {
+        AVHWFramesContext *fctx = (void *)dst->hwctx->data;
+        dst->params.hw_subfmt = pixfmt2imgfmt(fctx->sw_format);
+    }
+
+    struct mp_image *res = mp_image_new_ref(dst);
+
+    // Allocated, but non-refcounted data.
+    talloc_free(dst->ff_side_data);
+
+    return res;
+}
+
+
+// Convert the mp_image reference to a AVFrame reference.
+struct AVFrame *mp_image_to_av_frame(struct mp_image *src)
+{
+    struct mp_image *new_ref = mp_image_new_ref(src);
+    AVFrame *dst = av_frame_alloc();
+    if (!dst || !new_ref) {
+        talloc_free(new_ref);
+        av_frame_free(&dst);
+        return NULL;
+    }
+
+    for (int p = 0; p < MP_MAX_PLANES; p++) {
+        dst->buf[p] = new_ref->bufs[p];
+        new_ref->bufs[p] = NULL;
+    }
+
+    dst->hw_frames_ctx = new_ref->hwctx;
+    new_ref->hwctx = NULL;
+
+    dst->format = imgfmt2pixfmt(src->imgfmt);
+    dst->width = src->w;
+    dst->height = src->h;
+
+    dst->crop_left = src->params.crop.x0;
+    dst->crop_top = src->params.crop.y0;
+    dst->crop_right = dst->width - src->params.crop.x1;
+    dst->crop_bottom = dst->height - src->params.crop.y1;
+
+    dst->sample_aspect_ratio.num = src->params.p_w;
+    dst->sample_aspect_ratio.den = src->params.p_h;
+
+    for (int i = 0; i < 4; i++) {
+        dst->data[i] = src->planes[i];
+        dst->linesize[i] = src->stride[i];
+    }
+    dst->extended_data = dst->data;
+
+    dst->pict_type = src->pict_type;
+#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(58, 7, 100)
+    if (src->fields & MP_IMGFIELD_INTERLACED)
+        dst->flags |= AV_FRAME_FLAG_INTERLACED;
+    if (src->fields & MP_IMGFIELD_TOP_FIRST)
+        dst->flags |= AV_FRAME_FLAG_TOP_FIELD_FIRST;
+#else
+    if (src->fields & MP_IMGFIELD_INTERLACED)
+        dst->interlaced_frame = 1;
+    if (src->fields & MP_IMGFIELD_TOP_FIRST)
+        dst->top_field_first = 1;
+#endif
+    if (src->fields & MP_IMGFIELD_REPEAT_FIRST)
+        dst->repeat_pict = 1;
+
+    dst->colorspace = mp_csp_to_avcol_spc(src->params.color.space);
+    dst->color_range = mp_csp_levels_to_avcol_range(src->params.color.levels);
+    dst->color_primaries =
+        mp_csp_prim_to_avcol_pri(src->params.color.primaries);
+    dst->color_trc = mp_csp_trc_to_avcol_trc(src->params.color.gamma);
+
+    dst->chroma_location = mp_chroma_location_to_av(src->params.chroma_location);
+
+    dst->opaque_ref = av_buffer_alloc(sizeof(struct mp_image_params));
+    MP_HANDLE_OOM(dst->opaque_ref);
+    *(struct mp_image_params *)dst->opaque_ref->data = src->params;
+
+    if (src->icc_profile) {
+        AVFrameSideData *sd =
+            av_frame_new_side_data_from_buf(dst, AV_FRAME_DATA_ICC_PROFILE,
+                                            new_ref->icc_profile);
+        MP_HANDLE_OOM(sd);
+        new_ref->icc_profile = NULL;
+    }
+
+    pl_avframe_set_color(dst, (struct pl_color_space){
+        .primaries = mp_prim_to_pl(src->params.color.primaries),
+        .transfer = mp_trc_to_pl(src->params.color.gamma),
+        .hdr = src->params.color.hdr,
+    });
+
+    {
+        AVFrameSideData *sd = av_frame_new_side_data(dst,
+                                                     AV_FRAME_DATA_DISPLAYMATRIX,
+                                                     sizeof(int32_t) * 9);
+        MP_HANDLE_OOM(sd);
+        av_display_rotation_set((int32_t *)sd->data, src->params.rotate);
+    }
+
+    // Add back side data, but only for types which are not specially handled
+    // above. Keep in mind that the types above will be out of sync anyway.
+    for (int n = 0; n < new_ref->num_ff_side_data; n++) {
+        struct mp_ff_side_data *mpsd = &new_ref->ff_side_data[n];
+        if (!av_frame_get_side_data(dst, mpsd->type)) {
+            AVFrameSideData *sd = av_frame_new_side_data_from_buf(dst, mpsd->type,
+                                                                  mpsd->buf);
+            MP_HANDLE_OOM(sd);
+            mpsd->buf = NULL;
+        }
+    }
+
+    talloc_free(new_ref);
+
+    if (dst->format == AV_PIX_FMT_NONE)
+        av_frame_free(&dst);
+    return dst;
+}
+
+// Same as mp_image_to_av_frame(), but unref img. (It does so even on failure.)
+struct AVFrame *mp_image_to_av_frame_and_unref(struct mp_image *img)
+{
+    AVFrame *frame = mp_image_to_av_frame(img);
+    talloc_free(img);
+    return frame;
+}
+
+void memset_pic(void *dst, int fill, int bytesPerLine, int height, int stride)
+{
+    if (bytesPerLine == stride && height) {
+        memset(dst, fill, stride * (height - 1) + bytesPerLine);
+    } else {
+        for (int i = 0; i < height; i++) {
+            memset(dst, fill, bytesPerLine);
+            dst = (uint8_t *)dst + stride;
+        }
+    }
+}
+
+void memset16_pic(void *dst, int fill, int unitsPerLine, int height, int stride)
+{
+    if (fill == 0) {
+        memset_pic(dst, 0, unitsPerLine * 2, height, stride);
+    } else {
+        for (int i = 0; i < height; i++) {
+            uint16_t *line = dst;
+            uint16_t *end = line + unitsPerLine;
+            while (line < end)
+                *line++ = fill;
+            dst = (uint8_t *)dst + stride;
+        }
+    }
+}
+
+// Pixel at the given luma position on the given plane. x/y always refer to
+// non-subsampled coordinates (even if plane is chroma).
+// The coordinates must be aligned to mp_imgfmt_desc.align_x/y (these are byte
+// and chroma boundaries).
+// You cannot access e.g. individual luma pixels on the luma plane with yuv420p.
+void *mp_image_pixel_ptr(struct mp_image *img, int plane, int x, int y)
+{
+    assert(MP_IS_ALIGNED(x, img->fmt.align_x));
+    assert(MP_IS_ALIGNED(y, img->fmt.align_y));
+    return mp_image_pixel_ptr_ny(img, plane, x, y);
+}
+
+// Like mp_image_pixel_ptr(), but do not require alignment on Y coordinates if
+// the plane does not require it. Use with care.
+// Useful for addressing luma rows.
+void *mp_image_pixel_ptr_ny(struct mp_image *img, int plane, int x, int y)
+{
+    assert(MP_IS_ALIGNED(x, img->fmt.align_x));
+    assert(MP_IS_ALIGNED(y, 1 << img->fmt.ys[plane]));
+    return img->planes[plane] +
+           img->stride[plane] * (ptrdiff_t)(y >> img->fmt.ys[plane]) +
+           (x >> img->fmt.xs[plane]) * (size_t)img->fmt.bpp[plane] / 8;
+}
+
+// Return size of pixels [x0, x0+w-1] in bytes. The coordinates refer to non-
+// subsampled pixels (basically plane 0), and the size is rounded to chroma
+// and byte alignment boundaries for the entire image, even if plane!=0.
+// x0!=0 is useful for rounding (e.g. 8 bpp, x0=7, w=7 => 0..15 => 2 bytes).
+size_t mp_image_plane_bytes(struct mp_image *img, int plane, int x0, int w)
+{
+    int x1 = MP_ALIGN_UP(x0 + w, img->fmt.align_x);
+    x0 = MP_ALIGN_DOWN(x0, img->fmt.align_x);
+    size_t bpp = img->fmt.bpp[plane];
+    int xs = img->fmt.xs[plane];
+    return (x1 >> xs) * bpp / 8 - (x0 >> xs) * bpp / 8;
+}
diff --git a/video/mp_image.h b/video/mp_image.h
new file mode 100644
index 0000000..0408aab
--- /dev/null
+++ b/video/mp_image.h
@@ -0,0 +1,203 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MPLAYER_MP_IMAGE_H
+#define MPLAYER_MP_IMAGE_H
+
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <inttypes.h>
+#include "common/common.h"
+#include "common/msg.h"
+#include "csputils.h"
+#include "video/img_format.h"
+
+// Assumed minimum align needed for image allocation. It's notable that FFmpeg's
+// libraries except libavcodec don't really know what alignment they want.
+// Things will randomly crash or get slower if the alignment is not satisfied.
+// Whatever. This value should be pretty safe with current CPU architectures.
+#define MP_IMAGE_BYTE_ALIGN 64
+
+#define MP_IMGFIELD_TOP_FIRST 0x02
+#define MP_IMGFIELD_REPEAT_FIRST 0x04
+#define MP_IMGFIELD_INTERLACED 0x20
+
+// Describes image parameters that usually stay constant.
+// New fields can be added in the future. Code changing the parameters should
+// usually copy the whole struct, so that fields added later will be preserved.
+struct mp_image_params {
+    enum mp_imgfmt imgfmt;      // pixel format
+    enum mp_imgfmt hw_subfmt;   // underlying format for some hwaccel pixfmts
+    int w, h;                   // image dimensions
+    int p_w, p_h;               // define pixel aspect ratio (undefined: 0/0)
+    bool force_window;          // fake image created by handle_force_window
+    struct mp_colorspace color;
+    enum mp_chroma_location chroma_location;
+    // The image should be rotated clockwise (0-359 degrees).
+    int rotate;
+    enum mp_stereo3d_mode stereo3d; // image is encoded with this mode
+    enum mp_alpha_type alpha;   // usually auto; only set if explicitly known
+    struct mp_rect crop;        // crop applied on image
+};
+
+/* Memory management:
+ * - mp_image is a light-weight reference to the actual image data (pixels).
+ *   The actual image data is reference counted and can outlive mp_image
+ *   allocations. mp_image references can be created with mp_image_new_ref()
+ *   and free'd with talloc_free() (the helpers mp_image_setrefp() and
+ *   mp_image_unrefp() can also be used). The actual image data is free'd when
+ *   the last mp_image reference to it is free'd.
+ * - Each mp_image has a clear owner. The owner can do anything with it, such
+ *   as changing mp_image fields. Instead of making ownership ambiguous by
+ *   sharing a mp_image reference, new references should be created.
+ * - Write access to the actual image data is allowed only after calling
+ *   mp_image_make_writeable(), or if mp_image_is_writeable() returns true.
+ *   Conceptually, images can be changed by their owner only, and copy-on-write
+ *   is used to ensure that other references do not see any changes to the
+ *   image data. mp_image_make_writeable() will do that copy if required.
+ */
+typedef struct mp_image {
+    int w, h;  // visible dimensions (redundant with params.w/h)
+
+    struct mp_image_params params;
+
+    // fields redundant to params.imgfmt, for convenience or compatibility
+    struct mp_imgfmt_desc fmt;
+    enum mp_imgfmt imgfmt;
+    int num_planes;
+
+    uint8_t *planes[MP_MAX_PLANES];
+    int stride[MP_MAX_PLANES];
+
+    int pict_type; // 0->unknown, 1->I, 2->P, 3->B
+    int fields;
+
+    /* only inside filter chain */
+    double pts;
+    /* only after decoder */
+    double dts, pkt_duration;
+    /* container reported FPS; can be incorrect, or 0 if unknown */
+    double nominal_fps;
+    /* for private use */
+    void* priv;
+
+    // Reference-counted data references.
+    // These do not necessarily map directly to planes[]. They can have
+    // different order or count. There shouldn't be more buffers than planes.
+    // If bufs[n] is NULL, bufs[n+1] must also be NULL.
+    // All mp_* functions manage this automatically; do not mess with it.
+    // (See also AVFrame.buf.)
+    struct AVBufferRef *bufs[MP_MAX_PLANES];
+    // Points to AVHWFramesContext* (same as AVFrame.hw_frames_ctx)
+    struct AVBufferRef *hwctx;
+    // Embedded ICC profile, if any
+    struct AVBufferRef *icc_profile;
+    // Closed captions packet, if any (only after decoder)
+    struct AVBufferRef *a53_cc;
+    // Dolby Vision metadata, if any
+    struct AVBufferRef *dovi;
+    // Film grain data, if any
+    struct AVBufferRef *film_grain;
+    // Dolby Vision RPU buffer, if any
+    struct AVBufferRef *dovi_buf;
+    // Other side data we don't care about.
+    struct mp_ff_side_data *ff_side_data;
+    int num_ff_side_data;
+} mp_image_t;
+
+struct mp_ff_side_data {
+    int type;
+    struct AVBufferRef *buf;
+};
+
+int mp_chroma_div_up(int size, int shift);
+
+int mp_image_get_alloc_size(int imgfmt, int w, int h, int stride_align);
+struct mp_image *mp_image_from_buffer(int imgfmt, int w, int h, int stride_align,
+                                      uint8_t *buffer, int buffer_size,
+                                      void *free_opaque,
+                                      void (*free)(void *opaque, uint8_t *data));
+
+struct mp_image *mp_image_alloc(int fmt, int w, int h);
+void mp_image_copy(struct mp_image *dmpi, struct mp_image *mpi);
+void mp_image_copy_attributes(struct mp_image *dmpi, struct mp_image *mpi);
+struct mp_image *mp_image_new_copy(struct mp_image *img);
+struct mp_image *mp_image_new_ref(struct mp_image *img);
+bool mp_image_is_writeable(struct mp_image *img);
+bool mp_image_make_writeable(struct mp_image *img);
+void mp_image_setrefp(struct mp_image **p_img, struct mp_image *new_value);
+void mp_image_unrefp(struct mp_image **p_img);
+
+void mp_image_clear(struct mp_image *mpi, int x0, int y0, int x1, int y1);
+void mp_image_clear_rc(struct mp_image *mpi, struct mp_rect rc);
+void mp_image_clear_rc_inv(struct mp_image *mpi, struct mp_rect rc);
+void mp_image_crop(struct mp_image *img, int x0, int y0, int x1, int y1);
+void mp_image_crop_rc(struct mp_image *img, struct mp_rect rc);
+void mp_image_vflip(struct mp_image *img);
+
+void mp_image_set_size(struct mp_image *mpi, int w, int h);
+int mp_image_plane_w(struct mp_image *mpi, int plane);
+int mp_image_plane_h(struct mp_image *mpi, int plane);
+
+void mp_image_setfmt(mp_image_t* mpi, int out_fmt);
+void mp_image_steal_data(struct mp_image *dst, struct mp_image *src);
+void mp_image_unref_data(struct mp_image *img);
+
+int mp_image_approx_byte_size(struct mp_image *img);
+
+struct mp_image *mp_image_new_dummy_ref(struct mp_image *img);
+struct mp_image *mp_image_new_custom_ref(struct mp_image *img, void *arg,
+                                         void (*free)(void *arg));
+
+void mp_image_params_guess_csp(struct mp_image_params *params);
+
+char *mp_image_params_to_str_buf(char *b, size_t bs,
+                                 const struct mp_image_params *p);
+#define mp_image_params_to_str(p) mp_image_params_to_str_buf((char[256]){0}, 256, p)
+
+bool mp_image_crop_valid(const struct mp_image_params *p);
+bool mp_image_params_valid(const struct mp_image_params *p);
+bool mp_image_params_equal(const struct mp_image_params *p1,
+                           const struct mp_image_params *p2);
+
+void mp_image_params_get_dsize(const struct mp_image_params *p,
+                               int *d_w, int *d_h);
+void mp_image_params_set_dsize(struct mp_image_params *p, int d_w, int d_h);
+
+void mp_image_set_params(struct mp_image *image,
+                         const struct mp_image_params *params);
+
+void mp_image_set_attributes(struct mp_image *image,
+                             const struct mp_image_params *params);
+
+struct AVFrame;
+struct mp_image *mp_image_from_av_frame(struct AVFrame *av_frame);
+struct AVFrame *mp_image_to_av_frame(struct mp_image *img);
+struct AVFrame *mp_image_to_av_frame_and_unref(struct mp_image *img);
+
+void memcpy_pic(void *dst, const void *src, int bytesPerLine, int height,
+                int dstStride, int srcStride);
+void memset_pic(void *dst, int fill, int bytesPerLine, int height, int stride);
+void memset16_pic(void *dst, int fill, int unitsPerLine, int height, int stride);
+
+void *mp_image_pixel_ptr(struct mp_image *img, int plane, int x, int y);
+void *mp_image_pixel_ptr_ny(struct mp_image *img, int plane, int x, int y);
+size_t mp_image_plane_bytes(struct mp_image *img, int plane, int x0, int w);
+
+#endif /* MPLAYER_MP_IMAGE_H */
diff --git a/video/mp_image_pool.c b/video/mp_image_pool.c
new file mode 100644
index 0000000..0b5e520
--- /dev/null
+++ b/video/mp_image_pool.c
@@ -0,0 +1,472 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+
+#include <stddef.h>
+#include <stdbool.h>
+#include <assert.h>
+
+#include <libavutil/buffer.h>
+#include <libavutil/hwcontext.h>
+#if HAVE_VULKAN_INTEROP
+#include <libavutil/hwcontext_vulkan.h>
+#endif
+#include <libavutil/mem.h>
+#include <libavutil/pixdesc.h>
+
+#include "mpv_talloc.h"
+
+#include "common/common.h"
+
+#include "fmt-conversion.h"
+#include "mp_image_pool.h"
+#include "mp_image.h"
+#include "osdep/threads.h"
+
+static mp_static_mutex pool_mutex = MP_STATIC_MUTEX_INITIALIZER;
+#define pool_lock() mp_mutex_lock(&pool_mutex)
+#define pool_unlock() mp_mutex_unlock(&pool_mutex)
+
+// Thread-safety: the pool itself is not thread-safe, but pool-allocated images
+// can be referenced and unreferenced from other threads. (As long as the image
+// destructors are thread-safe.)
+
+struct mp_image_pool {
+    struct mp_image **images;
+    int num_images;
+
+    int fmt, w, h;
+
+    mp_image_allocator allocator;
+    void *allocator_ctx;
+
+    bool use_lru;
+    unsigned int lru_counter;
+};
+
+// Used to gracefully handle the case when the pool is freed while image
+// references allocated from the image pool are still held by someone.
+struct image_flags {
+    // If both of these are false, the image must be freed.
+    bool referenced;            // outside mp_image reference exists
+    bool pool_alive;            // the mp_image_pool references this
+    unsigned int order;         // for LRU allocation (basically a timestamp)
+};
+
+static void image_pool_destructor(void *ptr)
+{
+    struct mp_image_pool *pool = ptr;
+    mp_image_pool_clear(pool);
+}
+
+// If tparent!=NULL, set it as talloc parent for the pool.
+struct mp_image_pool *mp_image_pool_new(void *tparent)
+{
+    struct mp_image_pool *pool = talloc_ptrtype(tparent, pool);
+    talloc_set_destructor(pool, image_pool_destructor);
+    *pool = (struct mp_image_pool) {0};
+    return pool;
+}
+
+void mp_image_pool_clear(struct mp_image_pool *pool)
+{
+    for (int n = 0; n < pool->num_images; n++) {
+        struct mp_image *img = pool->images[n];
+        struct image_flags *it = img->priv;
+        bool referenced;
+        pool_lock();
+        assert(it->pool_alive);
+        it->pool_alive = false;
+        referenced = it->referenced;
+        pool_unlock();
+        if (!referenced)
+            talloc_free(img);
+    }
+    pool->num_images = 0;
+}
+
+// This is the only function that is allowed to run in a different thread.
+// (Consider passing an image to another thread, which frees it.)
+static void unref_image(void *opaque, uint8_t *data)
+{
+    struct mp_image *img = opaque;
+    struct image_flags *it = img->priv;
+    bool alive;
+    pool_lock();
+    assert(it->referenced);
+    it->referenced = false;
+    alive = it->pool_alive;
+    pool_unlock();
+    if (!alive)
+        talloc_free(img);
+}
+
+// Return a new image of given format/size. Unlike mp_image_pool_get(), this
+// returns NULL if there is no free image of this format/size.
+struct mp_image *mp_image_pool_get_no_alloc(struct mp_image_pool *pool, int fmt,
+                                            int w, int h)
+{
+    struct mp_image *new = NULL;
+    pool_lock();
+    for (int n = 0; n < pool->num_images; n++) {
+        struct mp_image *img = pool->images[n];
+        struct image_flags *img_it = img->priv;
+        assert(img_it->pool_alive);
+        if (!img_it->referenced) {
+            if (img->imgfmt == fmt && img->w == w && img->h == h) {
+                if (pool->use_lru) {
+                    struct image_flags *new_it = new ? new->priv : NULL;
+                    if (!new_it || new_it->order > img_it->order)
+                        new = img;
+                } else {
+                    new = img;
+                    break;
+                }
+            }
+        }
+    }
+    pool_unlock();
+    if (!new)
+        return NULL;
+
+    // Reference the new image. Since mp_image_pool is not declared thread-safe,
+    // and unreffing images from other threads does not allocate new images,
+    // no synchronization is required here.
+    for (int p = 0; p < MP_MAX_PLANES; p++)
+        assert(!!new->bufs[p] == !p); // only 1 AVBufferRef
+
+    struct mp_image *ref = mp_image_new_dummy_ref(new);
+
+    // This assumes the buffer is at this point exclusively owned by us: we
+    // can't track whether the buffer is unique otherwise.
+    // (av_buffer_is_writable() checks the refcount of the new buffer only.)
+    int flags = av_buffer_is_writable(new->bufs[0]) ? 0 : AV_BUFFER_FLAG_READONLY;
+    ref->bufs[0] = av_buffer_create(new->bufs[0]->data, new->bufs[0]->size,
+                                    unref_image, new, flags);
+    if (!ref->bufs[0]) {
+        talloc_free(ref);
+        return NULL;
+    }
+
+    struct image_flags *it = new->priv;
+    assert(!it->referenced && it->pool_alive);
+    it->referenced = true;
+    it->order = ++pool->lru_counter;
+    return ref;
+}
+
+void mp_image_pool_add(struct mp_image_pool *pool, struct mp_image *new)
+{
+    struct image_flags *it = talloc_ptrtype(new, it);
+    *it = (struct image_flags) { .pool_alive = true };
+    new->priv = it;
+    MP_TARRAY_APPEND(pool, pool->images, pool->num_images, new);
+}
+
+// Return a new image of given format/size. The only difference to
+// mp_image_alloc() is that there is a transparent mechanism to recycle image
+// data allocations through this pool.
+// If pool==NULL, mp_image_alloc() is called (for convenience).
+// The image can be free'd with talloc_free().
+// Returns NULL on OOM.
+struct mp_image *mp_image_pool_get(struct mp_image_pool *pool, int fmt,
+                                   int w, int h)
+{
+    if (!pool)
+        return mp_image_alloc(fmt, w, h);
+    struct mp_image *new = mp_image_pool_get_no_alloc(pool, fmt, w, h);
+    if (!new) {
+        if (fmt != pool->fmt || w != pool->w || h != pool->h)
+            mp_image_pool_clear(pool);
+        pool->fmt = fmt;
+        pool->w = w;
+        pool->h = h;
+        if (pool->allocator) {
+            new = pool->allocator(pool->allocator_ctx, fmt, w, h);
+        } else {
+            new = mp_image_alloc(fmt, w, h);
+        }
+        if (!new)
+            return NULL;
+        mp_image_pool_add(pool, new);
+        new = mp_image_pool_get_no_alloc(pool, fmt, w, h);
+    }
+    return new;
+}
+
+// Like mp_image_new_copy(), but allocate the image out of the pool.
+// If pool==NULL, a plain copy is made (for convenience).
+// Returns NULL on OOM.
+struct mp_image *mp_image_pool_new_copy(struct mp_image_pool *pool,
+                                        struct mp_image *img)
+{
+    struct mp_image *new = mp_image_pool_get(pool, img->imgfmt, img->w, img->h);
+    if (new) {
+        mp_image_copy(new, img);
+        mp_image_copy_attributes(new, img);
+    }
+    return new;
+}
+
+// Like mp_image_make_writeable(), but if a copy has to be made, allocate it
+// out of the pool.
+// If pool==NULL, mp_image_make_writeable() is called (for convenience).
+// Returns false on failure (see mp_image_make_writeable()).
+bool mp_image_pool_make_writeable(struct mp_image_pool *pool,
+                                  struct mp_image *img)
+{
+    if (mp_image_is_writeable(img))
+        return true;
+    struct mp_image *new = mp_image_pool_new_copy(pool, img);
+    if (!new)
+        return false;
+    mp_image_steal_data(img, new);
+    assert(mp_image_is_writeable(img));
+    return true;
+}
+
+// Call cb(cb_data, fmt, w, h) to allocate an image. Note that the resulting
+// image must use only 1 AVBufferRef. The returned image must also be owned
+// exclusively by the image pool, otherwise mp_image_is_writeable() will not
+// work due to FFmpeg restrictions.
+void mp_image_pool_set_allocator(struct mp_image_pool *pool,
+                                 mp_image_allocator cb, void  *cb_data)
+{
+    pool->allocator = cb;
+    pool->allocator_ctx = cb_data;
+}
+
+// Put into LRU mode. (Likely better for hwaccel surfaces, but worse for memory.)
+void mp_image_pool_set_lru(struct mp_image_pool *pool)
+{
+    pool->use_lru = true;
+}
+
+// Return the sw image format mp_image_hw_download() would use. This can be
+// different from src->params.hw_subfmt in obscure cases.
+int mp_image_hw_download_get_sw_format(struct mp_image *src)
+{
+    if (!src->hwctx)
+        return 0;
+
+    // Try to find the first format which we can apparently use.
+    int imgfmt = 0;
+    enum AVPixelFormat *fmts;
+    if (av_hwframe_transfer_get_formats(src->hwctx,
+            AV_HWFRAME_TRANSFER_DIRECTION_FROM, &fmts, 0) < 0)
+        return 0;
+    for (int n = 0; fmts[n] != AV_PIX_FMT_NONE; n++) {
+        imgfmt = pixfmt2imgfmt(fmts[n]);
+        if (imgfmt)
+            break;
+    }
+    av_free(fmts);
+
+    return imgfmt;
+}
+
+// Copies the contents of the HW surface src to system memory and returns it.
+// If swpool is not NULL, it's used to allocate the target image.
+// src must be a hw surface with a AVHWFramesContext attached.
+// The returned image is cropped as needed.
+// Returns NULL on failure.
+struct mp_image *mp_image_hw_download(struct mp_image *src,
+                                      struct mp_image_pool *swpool)
+{
+    int imgfmt = mp_image_hw_download_get_sw_format(src);
+    if (!imgfmt)
+        return NULL;
+
+    assert(src->hwctx);
+    AVHWFramesContext *fctx = (void *)src->hwctx->data;
+
+    struct mp_image *dst =
+        mp_image_pool_get(swpool, imgfmt, fctx->width, fctx->height);
+    if (!dst)
+        return NULL;
+
+    // Target image must be writable, so unref it.
+    AVFrame *dstav = mp_image_to_av_frame_and_unref(dst);
+    if (!dstav)
+        return NULL;
+
+    AVFrame *srcav = mp_image_to_av_frame(src);
+    if (!srcav) {
+        av_frame_unref(dstav);
+        return NULL;
+    }
+
+    int res = av_hwframe_transfer_data(dstav, srcav, 0);
+    av_frame_free(&srcav);
+    dst = mp_image_from_av_frame(dstav);
+    av_frame_free(&dstav);
+    if (res >= 0 && dst) {
+        mp_image_set_size(dst, src->w, src->h);
+        mp_image_copy_attributes(dst, src);
+    } else {
+        mp_image_unrefp(&dst);
+    }
+    return dst;
+}
+
+bool mp_image_hw_upload(struct mp_image *hw_img, struct mp_image *src)
+{
+    if (hw_img->w != src->w || hw_img->h != src->h)
+        return false;
+
+    if (!hw_img->hwctx)
+        return false;
+
+    bool ok = false;
+    AVFrame *dstav = NULL;
+    AVFrame *srcav = NULL;
+
+    // This means the destination image will not be "writable", which would be
+    // a pain if Libav enforced this - fortunately it doesn't care. We can
+    // transfer data to it even if there are multiple refs.
+    dstav = mp_image_to_av_frame(hw_img);
+    if (!dstav)
+        goto done;
+
+    srcav = mp_image_to_av_frame(src);
+    if (!srcav)
+        goto done;
+
+    ok = av_hwframe_transfer_data(dstav, srcav, 0) >= 0;
+
+done:
+    av_frame_free(&srcav);
+    av_frame_free(&dstav);
+
+    if (ok)
+        mp_image_copy_attributes(hw_img, src);
+    return ok;
+}
+
+bool mp_update_av_hw_frames_pool(struct AVBufferRef **hw_frames_ctx,
+                                 struct AVBufferRef *hw_device_ctx,
+                                 int imgfmt, int sw_imgfmt, int w, int h,
+                                 bool disable_multiplane)
+{
+    enum AVPixelFormat format = imgfmt2pixfmt(imgfmt);
+    enum AVPixelFormat sw_format = imgfmt2pixfmt(sw_imgfmt);
+
+    if (format == AV_PIX_FMT_NONE || sw_format == AV_PIX_FMT_NONE ||
+        !hw_device_ctx || w < 1 || h < 1)
+    {
+        av_buffer_unref(hw_frames_ctx);
+        return false;
+    }
+
+    if (*hw_frames_ctx) {
+        AVHWFramesContext *hw_frames = (void *)(*hw_frames_ctx)->data;
+
+        if (hw_frames->device_ref->data != hw_device_ctx->data ||
+            hw_frames->format != format || hw_frames->sw_format != sw_format ||
+            hw_frames->width != w || hw_frames->height != h)
+            av_buffer_unref(hw_frames_ctx);
+    }
+
+    if (!*hw_frames_ctx) {
+        *hw_frames_ctx = av_hwframe_ctx_alloc(hw_device_ctx);
+        if (!*hw_frames_ctx)
+            return false;
+
+        AVHWFramesContext *hw_frames = (void *)(*hw_frames_ctx)->data;
+        hw_frames->format = format;
+        hw_frames->sw_format = sw_format;
+        hw_frames->width = w;
+        hw_frames->height = h;
+
+#if HAVE_VULKAN_INTEROP
+        if (format == AV_PIX_FMT_VULKAN && disable_multiplane) {
+            const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(sw_format);
+            if ((desc->flags & AV_PIX_FMT_FLAG_PLANAR) &&
+                !(desc->flags & AV_PIX_FMT_FLAG_RGB)) {
+                AVVulkanFramesContext *vk_frames = hw_frames->hwctx;
+                vk_frames->flags = AV_VK_FRAME_FLAG_DISABLE_MULTIPLANE;
+            }
+        }
+#endif
+
+        if (av_hwframe_ctx_init(*hw_frames_ctx) < 0) {
+            av_buffer_unref(hw_frames_ctx);
+            return false;
+        }
+    }
+
+    return true;
+}
+
+struct mp_image *mp_av_pool_image_hw_upload(struct AVBufferRef *hw_frames_ctx,
+                                            struct mp_image *src)
+{
+    AVFrame *av_frame = av_frame_alloc();
+    if (!av_frame)
+        return NULL;
+    if (av_hwframe_get_buffer(hw_frames_ctx, av_frame, 0) < 0) {
+        av_frame_free(&av_frame);
+        return NULL;
+    }
+    struct mp_image *dst = mp_image_from_av_frame(av_frame);
+    av_frame_free(&av_frame);
+    if (!dst)
+        return NULL;
+
+    if (dst->w < src->w || dst->h < src->h) {
+        talloc_free(dst);
+        return NULL;
+    }
+
+    mp_image_set_size(dst, src->w, src->h);
+
+    if (!mp_image_hw_upload(dst, src)) {
+        talloc_free(dst);
+        return NULL;
+    }
+
+    mp_image_copy_attributes(dst, src);
+    return dst;
+}
+
+struct mp_image *mp_av_pool_image_hw_map(struct AVBufferRef *hw_frames_ctx,
+                                         struct mp_image *src)
+{
+    AVFrame *dst_frame = av_frame_alloc();
+    if (!dst_frame)
+        return NULL;
+
+    dst_frame->format = ((AVHWFramesContext*)hw_frames_ctx->data)->format;
+    dst_frame->hw_frames_ctx = av_buffer_ref(hw_frames_ctx);
+
+    AVFrame *src_frame = mp_image_to_av_frame(src);
+    if (av_hwframe_map(dst_frame, src_frame, 0) < 0) {
+        av_frame_free(&src_frame);
+        av_frame_free(&dst_frame);
+        return NULL;
+    }
+    av_frame_free(&src_frame);
+
+    struct mp_image *dst = mp_image_from_av_frame(dst_frame);
+    av_frame_free(&dst_frame);
+    if (!dst)
+        return NULL;
+
+    mp_image_copy_attributes(dst, src);
+    return dst;
+}
diff --git a/video/mp_image_pool.h b/video/mp_image_pool.h
new file mode 100644
index 0000000..8cb2a5f
--- /dev/null
+++ b/video/mp_image_pool.h
@@ -0,0 +1,47 @@
+#ifndef MPV_MP_IMAGE_POOL_H
+#define MPV_MP_IMAGE_POOL_H
+
+#include <stdbool.h>
+
+struct mp_image_pool;
+
+struct mp_image_pool *mp_image_pool_new(void *tparent);
+struct mp_image *mp_image_pool_get(struct mp_image_pool *pool, int fmt,
+                                   int w, int h);
+// the reference to "new" is transferred to the pool
+void mp_image_pool_add(struct mp_image_pool *pool, struct mp_image *new);
+void mp_image_pool_clear(struct mp_image_pool *pool);
+
+void mp_image_pool_set_lru(struct mp_image_pool *pool);
+
+struct mp_image *mp_image_pool_get_no_alloc(struct mp_image_pool *pool, int fmt,
+                                            int w, int h);
+
+typedef struct mp_image *(*mp_image_allocator)(void *data, int fmt, int w, int h);
+void mp_image_pool_set_allocator(struct mp_image_pool *pool,
+                                 mp_image_allocator cb, void  *cb_data);
+
+struct mp_image *mp_image_pool_new_copy(struct mp_image_pool *pool,
+                                        struct mp_image *img);
+bool mp_image_pool_make_writeable(struct mp_image_pool *pool,
+                                  struct mp_image *img);
+
+struct mp_image *mp_image_hw_download(struct mp_image *img,
+                                      struct mp_image_pool *swpool);
+
+int mp_image_hw_download_get_sw_format(struct mp_image *img);
+
+bool mp_image_hw_upload(struct mp_image *hw_img, struct mp_image *src);
+
+struct AVBufferRef;
+bool mp_update_av_hw_frames_pool(struct AVBufferRef **hw_frames_ctx,
+                                 struct AVBufferRef *hw_device_ctx,
+                                 int imgfmt, int sw_imgfmt, int w, int h,
+                                 bool disable_multiplane);
+
+struct mp_image *mp_av_pool_image_hw_upload(struct AVBufferRef *hw_frames_ctx,
+                                            struct mp_image *src);
+
+struct mp_image *mp_av_pool_image_hw_map(struct AVBufferRef *hw_frames_ctx,
+                                         struct mp_image *src);
+#endif
diff --git a/video/out/android_common.c b/video/out/android_common.c
new file mode 100644
index 0000000..27e7b5b
--- /dev/null
+++ b/video/out/android_common.c
@@ -0,0 +1,99 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <libavcodec/jni.h>
+#include <android/native_window_jni.h>
+
+#include "android_common.h"
+#include "common/msg.h"
+#include "misc/jni.h"
+#include "options/m_config.h"
+#include "vo.h"
+
+struct vo_android_state {
+    struct mp_log *log;
+    ANativeWindow *native_window;
+};
+
+bool vo_android_init(struct vo *vo)
+{
+    vo->android = talloc_zero(vo, struct vo_android_state);
+    struct vo_android_state *ctx = vo->android;
+
+    *ctx = (struct vo_android_state){
+        .log = mp_log_new(ctx, vo->log, "android"),
+    };
+
+    JNIEnv *env = MP_JNI_GET_ENV(ctx);
+    if (!env) {
+        MP_FATAL(ctx, "Could not attach java VM.\n");
+        goto fail;
+    }
+
+    assert(vo->opts->WinID != 0 && vo->opts->WinID != -1);
+    jobject surface = (jobject)(intptr_t)vo->opts->WinID;
+    ctx->native_window = ANativeWindow_fromSurface(env, surface);
+    if (!ctx->native_window) {
+        MP_FATAL(ctx, "Failed to create ANativeWindow\n");
+        goto fail;
+    }
+
+    return true;
+fail:
+    talloc_free(ctx);
+    vo->android = NULL;
+    return false;
+}
+
+void vo_android_uninit(struct vo *vo)
+{
+    struct vo_android_state *ctx = vo->android;
+    if (!ctx)
+        return;
+
+    if (ctx->native_window)
+        ANativeWindow_release(ctx->native_window);
+
+    talloc_free(ctx);
+    vo->android = NULL;
+}
+
+ANativeWindow *vo_android_native_window(struct vo *vo)
+{
+    struct vo_android_state *ctx = vo->android;
+    return ctx->native_window;
+}
+
+bool vo_android_surface_size(struct vo *vo, int *out_w, int *out_h)
+{
+    struct vo_android_state *ctx = vo->android;
+
+    int w = vo->opts->android_surface_size.w,
+        h = vo->opts->android_surface_size.h;
+    if (!w)
+        w = ANativeWindow_getWidth(ctx->native_window);
+    if (!h)
+        h = ANativeWindow_getHeight(ctx->native_window);
+
+    if (w <= 0 || h <= 0) {
+        MP_ERR(ctx, "Failed to get height and width.\n");
+        return false;
+    }
+    *out_w = w;
+    *out_h = h;
+    return true;
+}
diff --git a/video/out/android_common.h b/video/out/android_common.h
new file mode 100644
index 0000000..7f075ea
--- /dev/null
+++ b/video/out/android_common.h
@@ -0,0 +1,29 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <android/native_window_jni.h>
+
+#include "common/common.h"
+
+struct vo;
+
+bool vo_android_init(struct vo *vo);
+void vo_android_uninit(struct vo *vo);
+ANativeWindow *vo_android_native_window(struct vo *vo);
+bool vo_android_surface_size(struct vo *vo, int *w, int *h);
diff --git a/video/out/aspect.c b/video/out/aspect.c
new file mode 100644
index 0000000..6e1cd63
--- /dev/null
+++ b/video/out/aspect.c
@@ -0,0 +1,216 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/* Stuff for correct aspect scaling. */
+#include "aspect.h"
+#include "math.h"
+#include "vo.h"
+#include "common/msg.h"
+#include "options/options.h"
+#include "video/mp_image.h"
+
+#include "vo.h"
+#include "sub/osd.h"
+
+static void aspect_calc_panscan(struct mp_vo_opts *opts,
+                                int w, int h, int d_w, int d_h, int unscaled,
+                                int window_w, int window_h, double monitor_par,
+                                int *out_w, int *out_h)
+{
+    int fwidth = window_w;
+    int fheight = (float)window_w / d_w * d_h / monitor_par;
+    if (fheight > window_h || fheight < h) {
+        int tmpw = (float)window_h / d_h * d_w * monitor_par;
+        if (tmpw <= window_w) {
+            fheight = window_h;
+            fwidth = tmpw;
+        }
+    }
+
+    int vo_panscan_area = window_h - fheight;
+    double f_w = fwidth / (double)MPMAX(fheight, 1);
+    double f_h = 1;
+    if (vo_panscan_area == 0) {
+        vo_panscan_area = window_w - fwidth;
+        f_w = 1;
+        f_h = fheight / (double)MPMAX(fwidth, 1);
+    }
+
+    if (unscaled) {
+        vo_panscan_area = 0;
+        if (unscaled != 2 || (d_w <= window_w && d_h <= window_h)) {
+            fwidth = d_w * monitor_par;
+            fheight = d_h;
+        }
+    }
+
+    *out_w = fwidth + vo_panscan_area * opts->panscan * f_w;
+    *out_h = fheight + vo_panscan_area * opts->panscan * f_h;
+}
+
+// Clamp [start, end) to range [0, size) with various fallbacks.
+static void clamp_size(int size, int *start, int *end)
+{
+    *start = MPMAX(0, *start);
+    *end = MPMIN(size, *end);
+    if (*start >= *end) {
+        *start = 0;
+        *end = 1;
+    }
+}
+
+static void src_dst_split_scaling(int src_size, int dst_size,
+                                  int scaled_src_size,
+                                  float zoom, float align, float pan, float scale,
+                                  int *src_start, int *src_end,
+                                  int *dst_start, int *dst_end,
+                                  int *osd_margin_a, int *osd_margin_b)
+{
+    scaled_src_size *= powf(2, zoom) * scale;
+    scaled_src_size = MPMAX(scaled_src_size, 1);
+    align = (align + 1) / 2;
+
+    *dst_start = (dst_size - scaled_src_size) * align + pan * scaled_src_size;
+    *dst_end = *dst_start + scaled_src_size;
+
+    // Distance of screen frame to video
+    *osd_margin_a = *dst_start;
+    *osd_margin_b = dst_size - *dst_end;
+
+    // Clip to screen
+    int s_src = *src_end - *src_start;
+    int s_dst = *dst_end - *dst_start;
+    if (*dst_start < 0) {
+        int border = -(*dst_start) * s_src / s_dst;
+        *src_start += border;
+        *dst_start = 0;
+    }
+    if (*dst_end > dst_size) {
+        int border = (*dst_end - dst_size) * s_src / s_dst;
+        *src_end -= border;
+        *dst_end = dst_size;
+    }
+
+    // For sanity: avoid bothering VOs with corner cases
+    clamp_size(src_size, src_start, src_end);
+    clamp_size(dst_size, dst_start, dst_end);
+}
+
+static void calc_margin(float opts[2], int out[2], int size)
+{
+    out[0] = MPCLAMP((int)(opts[0] * size), 0, size);
+    out[1] = MPCLAMP((int)(opts[1] * size), 0, size);
+
+    if (out[0] + out[1] >= size) {
+        // This case is not really supported. Show an error by 1 pixel.
+        out[0] = 0;
+        out[1] = MPMAX(0, size - 1);
+    }
+}
+
+void mp_get_src_dst_rects(struct mp_log *log, struct mp_vo_opts *opts,
+                          int vo_caps, struct mp_image_params *video,
+                          int window_w, int window_h, double monitor_par,
+                          struct mp_rect *out_src,
+                          struct mp_rect *out_dst,
+                          struct mp_osd_res *out_osd)
+{
+    int src_w = video->w;
+    int src_h = video->h;
+    int src_dw, src_dh;
+
+    mp_image_params_get_dsize(video, &src_dw, &src_dh);
+    window_w = MPMAX(1, window_w);
+    window_h = MPMAX(1, window_h);
+
+    int margin_x[2] = {0};
+    int margin_y[2] = {0};
+    if (opts->keepaspect) {
+        calc_margin(opts->margin_x, margin_x, window_w);
+        calc_margin(opts->margin_y, margin_y, window_h);
+    }
+
+    int vid_window_w = window_w - margin_x[0] - margin_x[1];
+    int vid_window_h = window_h - margin_y[0] - margin_y[1];
+
+    struct mp_rect dst = {0, 0, window_w, window_h};
+    struct mp_rect src = {0, 0, src_w,    src_h};
+    if (mp_image_crop_valid(video))
+        src = video->crop;
+
+    if (vo_caps & VO_CAP_ROTATE90) {
+        if (video->rotate % 180 == 90) {
+            MPSWAP(int, src_w, src_h);
+            MPSWAP(int, src_dw, src_dh);
+        }
+        mp_rect_rotate(&src, src_w, src_h, video->rotate);
+    }
+
+    struct mp_osd_res osd = {
+        .w = window_w,
+        .h = window_h,
+        .display_par = monitor_par,
+    };
+
+    if (opts->keepaspect) {
+        int scaled_width, scaled_height;
+        aspect_calc_panscan(opts, src_w, src_h, src_dw, src_dh, opts->unscaled,
+                            vid_window_w, vid_window_h, monitor_par,
+                            &scaled_width, &scaled_height);
+        src_dst_split_scaling(src_w, vid_window_w, scaled_width,
+                              opts->zoom, opts->align_x, opts->pan_x, opts->scale_x,
+                              &src.x0, &src.x1, &dst.x0, &dst.x1,
+                              &osd.ml, &osd.mr);
+        src_dst_split_scaling(src_h, vid_window_h, scaled_height,
+                              opts->zoom, opts->align_y, opts->pan_y, opts->scale_y,
+                              &src.y0, &src.y1, &dst.y0, &dst.y1,
+                              &osd.mt, &osd.mb);
+    }
+
+    dst.x0 += margin_x[0];
+    dst.y0 += margin_y[0];
+    dst.x1 += margin_x[0];
+    dst.y1 += margin_y[0];
+
+    // OSD really uses the full window, but was computed on the margin-cut
+    // video sub-window. Correct it to the full window.
+    osd.ml += margin_x[0];
+    osd.mr += margin_x[1];
+    osd.mt += margin_y[0];
+    osd.mb += margin_y[1];
+
+    *out_src = src;
+    *out_dst = dst;
+    *out_osd = osd;
+
+    int sw = src.x1 - src.x0, sh = src.y1 - src.y0;
+    int dw = dst.x1 - dst.x0, dh = dst.y1 - dst.y0;
+
+    mp_verbose(log, "Window size: %dx%d (Borders: l=%d t=%d r=%d b=%d)\n",
+               window_w, window_h,
+               margin_x[0], margin_y[0], margin_x[1], margin_y[1]);
+    mp_verbose(log, "Video source: %dx%d (%d:%d)\n",
+               video->w, video->h, video->p_w, video->p_h);
+    mp_verbose(log, "Video display: (%d, %d) %dx%d -> (%d, %d) %dx%d\n",
+               src.x0, src.y0, sw, sh, dst.x0, dst.y0, dw, dh);
+    mp_verbose(log, "Video scale: %f/%f\n",
+               (double)dw / sw, (double)dh / sh);
+    mp_verbose(log, "OSD borders: l=%d t=%d r=%d b=%d\n",
+               osd.ml, osd.mt, osd.mr, osd.mb);
+    mp_verbose(log, "Video borders: l=%d t=%d r=%d b=%d\n",
+               dst.x0, dst.y0, window_w - dst.x1, window_h - dst.y1);
+}
diff --git a/video/out/aspect.h b/video/out/aspect.h
new file mode 100644
index 0000000..4123311
--- /dev/null
+++ b/video/out/aspect.h
@@ -0,0 +1,33 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MPLAYER_ASPECT_H
+#define MPLAYER_ASPECT_H
+
+struct mp_log;
+struct mp_vo_opts;
+struct mp_image_params;
+struct mp_rect;
+struct mp_osd_res;
+void mp_get_src_dst_rects(struct mp_log *log, struct mp_vo_opts *opts,
+                          int vo_caps, struct mp_image_params *video,
+                          int window_w, int window_h, double monitor_par,
+                          struct mp_rect *out_src,
+                          struct mp_rect *out_dst,
+                          struct mp_osd_res *out_osd);
+
+#endif /* MPLAYER_ASPECT_H */
diff --git a/video/out/bitmap_packer.c b/video/out/bitmap_packer.c
new file mode 100644
index 0000000..5ef090b
--- /dev/null
+++ b/video/out/bitmap_packer.c
@@ -0,0 +1,197 @@
+/*
+ * Calculate how to pack bitmap rectangles into a larger surface
+ *
+ * Copyright 2009, 2012 Uoti Urpala
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <stdio.h>
+#include <limits.h>
+
+#include "mpv_talloc.h"
+#include "bitmap_packer.h"
+#include "common/common.h"
+
+#define IS_POWER_OF_2(x) (((x) > 0) && !(((x) - 1) & (x)))
+
+void packer_reset(struct bitmap_packer *packer)
+{
+    struct bitmap_packer old = *packer;
+    *packer = (struct bitmap_packer) {
+        .w_max = old.w_max,
+        .h_max = old.h_max,
+    };
+    talloc_free_children(packer);
+}
+
+void packer_get_bb(struct bitmap_packer *packer, struct pos out_bb[2])
+{
+    out_bb[0] = (struct pos) {0};
+    out_bb[1] = (struct pos) {packer->used_width, packer->used_height};
+}
+
+#define HEIGHT_SORT_BITS 4
+static int size_index(int s)
+{
+    int n = mp_log2(s);
+    return (n << HEIGHT_SORT_BITS)
+       + ((- 1 - (s << HEIGHT_SORT_BITS >> n)) & ((1 << HEIGHT_SORT_BITS) - 1));
+}
+
+/* Pack the given rectangles into an area of size w * h.
+ * The size of each rectangle is read from in[i].x / in[i].y.
+ * The height of each rectangle must be less than 65536.
+ * 'scratch' must point to work memory for num_rects+16 ints.
+ * The packed position for rectangle number i is set in out[i].
+ * Return 0 on success, -1 if the rectangles did not fit in w*h.
+ *
+ * The rectangles are placed in rows in order approximately sorted by
+ * height (the approximate sorting is simpler than a full one would be,
+ * and allows the algorithm to work in linear time). Additionally, to
+ * reduce wasted space when there are a few tall rectangles, empty
+ * lower-right parts of rows are filled recursively when the size of
+ * rectangles in the row drops past a power-of-two threshold. So if a
+ * row starts with rectangles of size 3x50, 10x40 and 5x20 then the
+ * free rectangle with corners (13, 20)-(w, 50) is filled recursively.
+ */
+static int pack_rectangles(struct pos *in, struct pos *out, int num_rects,
+                           int w, int h, int *scratch, int *used_width)
+{
+    int bins[16 << HEIGHT_SORT_BITS];
+    int sizes[16 << HEIGHT_SORT_BITS] = { 0 };
+    for (int i = 0; i < num_rects; i++)
+        sizes[size_index(in[i].y)]++;
+    int idx = 0;
+    for (int i = 0; i < 16 << HEIGHT_SORT_BITS; i += 1 << HEIGHT_SORT_BITS) {
+        for (int j = 0; j < 1 << HEIGHT_SORT_BITS; j++) {
+            bins[i + j] = idx;
+            idx += sizes[i + j];
+        }
+        scratch[idx++] = -1;
+    }
+    for (int i = 0; i < num_rects; i++)
+        scratch[bins[size_index(in[i].y)]++] = i;
+    for (int i = 0; i < 16; i++)
+        bins[i] = bins[i << HEIGHT_SORT_BITS] - sizes[i << HEIGHT_SORT_BITS];
+    struct {
+        int size, x, bottom;
+    } stack[16] = {{15, 0, h}}, s = {0};
+    int stackpos = 1;
+    int y;
+    while (stackpos) {
+        y = s.bottom;
+        s = stack[--stackpos];
+        s.size++;
+        while (s.size--) {
+            int maxy = -1;
+            int obj;
+            while ((obj = scratch[bins[s.size]]) >= 0) {
+                int bottom = y + in[obj].y;
+                if (bottom > s.bottom)
+                    break;
+                int right = s.x + in[obj].x;
+                if (right > w)
+                    break;
+                bins[s.size]++;
+                out[obj] = (struct pos){s.x, y};
+                num_rects--;
+                if (maxy < 0)
+                    stack[stackpos++] = s;
+                s.x = right;
+                maxy = MPMAX(maxy, bottom);
+            }
+            *used_width = MPMAX(*used_width, s.x);
+            if (maxy > 0)
+                s.bottom = maxy;
+        }
+    }
+    return num_rects ? -1 : y;
+}
+
+int packer_pack(struct bitmap_packer *packer)
+{
+    if (packer->count == 0)
+        return 0;
+    int w_orig = packer->w, h_orig = packer->h;
+    struct pos *in = packer->in;
+    int xmax = 0, ymax = 0;
+    for (int i = 0; i < packer->count; i++) {
+        if (in[i].x <= 0 || in[i].y <= 0) {
+            in[i] = (struct pos){0, 0};
+        } else {
+            in[i].x += packer->padding * 2;
+            in[i].y += packer->padding * 2;
+        }
+        if (in[i].x < 0 || in [i].x > 65535 || in[i].y < 0 || in[i].y > 65535) {
+            fprintf(stderr, "Invalid OSD / subtitle bitmap size\n");
+            abort();
+        }
+        xmax = MPMAX(xmax, in[i].x);
+        ymax = MPMAX(ymax, in[i].y);
+    }
+    if (xmax > packer->w)
+        packer->w = 1 << (mp_log2(xmax - 1) + 1);
+    if (ymax > packer->h)
+        packer->h = 1 << (mp_log2(ymax - 1) + 1);
+    while (1) {
+        int used_width = 0;
+        int y = pack_rectangles(in, packer->result, packer->count,
+                                packer->w, packer->h,
+                                packer->scratch, &used_width);
+        if (y >= 0) {
+            packer->used_width = MPMIN(used_width, packer->w);
+            packer->used_height = MPMIN(y, packer->h);
+            assert(packer->w == 0 || IS_POWER_OF_2(packer->w));
+            assert(packer->h == 0 || IS_POWER_OF_2(packer->h));
+            if (packer->padding) {
+                for (int i = 0; i < packer->count; i++) {
+                    packer->result[i].x += packer->padding;
+                    packer->result[i].y += packer->padding;
+                }
+            }
+            return packer->w != w_orig || packer->h != h_orig;
+        }
+        int w_max = packer->w_max > 0 ? packer->w_max : INT_MAX;
+        int h_max = packer->h_max > 0 ? packer->h_max : INT_MAX;
+        if (packer->w <= packer->h && packer->w != w_max)
+            packer->w = MPMIN(packer->w * 2, w_max);
+        else if (packer->h != h_max)
+            packer->h = MPMIN(packer->h * 2, h_max);
+        else {
+            packer->w = w_orig;
+            packer->h = h_orig;
+            return -1;
+        }
+    }
+}
+
+void packer_set_size(struct bitmap_packer *packer, int size)
+{
+    packer->count = size;
+    if (size <= packer->asize)
+        return;
+    packer->asize = MPMAX(packer->asize * 2, size);
+    talloc_free(packer->result);
+    talloc_free(packer->scratch);
+    packer->in = talloc_realloc(packer, packer->in, struct pos, packer->asize);
+    packer->result = talloc_array_ptrtype(packer, packer->result,
+                                          packer->asize);
+    packer->scratch = talloc_array_ptrtype(packer, packer->scratch,
+                                           packer->asize + 16);
+}
diff --git a/video/out/bitmap_packer.h b/video/out/bitmap_packer.h
new file mode 100644
index 0000000..97bf88f
--- /dev/null
+++ b/video/out/bitmap_packer.h
@@ -0,0 +1,51 @@
+#ifndef MPLAYER_PACK_RECTANGLES_H
+#define MPLAYER_PACK_RECTANGLES_H
+
+struct pos {
+    int x;
+    int y;
+};
+
+struct bitmap_packer {
+    int w;
+    int h;
+    int w_max;
+    int h_max;
+    int padding;
+    int count;
+    struct pos *in;
+    struct pos *result;
+    int used_width;
+    int used_height;
+
+    // internal
+    int *scratch;
+    int asize;
+};
+
+struct sub_bitmaps;
+
+// Clear all internal state. Leave the following fields: w_max, h_max
+void packer_reset(struct bitmap_packer *packer);
+
+// Get the bounding box used for bitmap data (including padding).
+// The bounding box doesn't exceed (0,0)-(packer->w,packer->h).
+void packer_get_bb(struct bitmap_packer *packer, struct pos out_bb[2]);
+
+/* Reallocate packer->in for at least to desired number of items.
+ * Also sets packer->count to the same value.
+ */
+void packer_set_size(struct bitmap_packer *packer, int size);
+
+/* To use this, set packer->count to number of rectangles, w_max and h_max
+ * to maximum output rectangle size, and w and h to start size (may be 0).
+ * Write input sizes in packer->in.
+ * Resulting packing will be written in packer->result.
+ * w and h will be increased if necessary for successful packing.
+ * There is a strong guarantee that w and h will be powers of 2 (or set to 0).
+ * Return value is -1 if packing failed because w and h were set to max
+ * values but that wasn't enough, 1 if w or h was increased, and 0 otherwise.
+ */
+int packer_pack(struct bitmap_packer *packer);
+
+#endif
diff --git a/video/out/cocoa_cb_common.swift b/video/out/cocoa_cb_common.swift
new file mode 100644
index 0000000..9c0054a
--- /dev/null
+++ b/video/out/cocoa_cb_common.swift
@@ -0,0 +1,230 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import Cocoa
+
+class CocoaCB: Common {
+    var libmpv: LibmpvHelper
+    var layer: GLLayer?
+
+    @objc var isShuttingDown: Bool = false
+
+    enum State {
+        case uninitialized
+        case needsInit
+        case initialized
+    }
+    var backendState: State = .uninitialized
+
+
+    @objc init(_ mpvHandle: OpaquePointer) {
+        let newlog = mp_log_new(UnsafeMutablePointer<MPContext>(mpvHandle), mp_client_get_log(mpvHandle), "cocoacb")
+        libmpv = LibmpvHelper(mpvHandle, newlog)
+        super.init(newlog)
+        layer = GLLayer(cocoaCB: self)
+    }
+
+    func preinit(_ vo: UnsafeMutablePointer<vo>) {
+        mpv = MPVHelper(vo, log)
+
+        if backendState == .uninitialized {
+            backendState = .needsInit
+
+            guard let layer = self.layer else {
+                log.sendError("Something went wrong, no GLLayer was initialized")
+                exit(1)
+            }
+
+            initView(vo, layer)
+            initMisc(vo)
+        }
+    }
+
+    func uninit() {
+        window?.orderOut(nil)
+        window?.close()
+        mpv = nil
+    }
+
+    func reconfig(_ vo: UnsafeMutablePointer<vo>) {
+        mpv?.vo = vo
+        if backendState == .needsInit {
+            DispatchQueue.main.sync { self.initBackend(vo) }
+        } else {
+            DispatchQueue.main.async {
+                self.updateWindowSize(vo)
+                self.layer?.update(force: true)
+            }
+        }
+    }
+
+    func initBackend(_ vo: UnsafeMutablePointer<vo>) {
+        let previousActiveApp = getActiveApp()
+        initApp()
+        initWindow(vo, previousActiveApp)
+        updateICCProfile()
+        initWindowState()
+
+        backendState = .initialized
+    }
+
+    func updateWindowSize(_ vo: UnsafeMutablePointer<vo>) {
+        guard let targetScreen = getTargetScreen(forFullscreen: false) ?? NSScreen.main else
+        {
+            log.sendWarning("Couldn't update Window size, no Screen available")
+            return
+        }
+
+        let wr = getWindowGeometry(forScreen: targetScreen, videoOut: vo)
+        if !(window?.isVisible ?? false) &&
+           !(window?.isMiniaturized ?? false) &&
+           !NSApp.isHidden
+        {
+            window?.makeKeyAndOrderFront(nil)
+        }
+        layer?.atomicDrawingStart()
+        window?.updateSize(wr.size)
+    }
+
+    override func displayLinkCallback(_ displayLink: CVDisplayLink,
+                                            _ inNow: UnsafePointer<CVTimeStamp>,
+                                     _ inOutputTime: UnsafePointer<CVTimeStamp>,
+                                          _ flagsIn: CVOptionFlags,
+                                         _ flagsOut: UnsafeMutablePointer<CVOptionFlags>) -> CVReturn
+    {
+        libmpv.reportRenderFlip()
+        return kCVReturnSuccess
+    }
+
+    override func lightSensorUpdate() {
+        libmpv.setRenderLux(lmuToLux(lastLmu))
+    }
+
+    override func updateICCProfile() {
+        guard let colorSpace = window?.screen?.colorSpace else {
+            log.sendWarning("Couldn't update ICC Profile, no color space available")
+            return
+        }
+
+        libmpv.setRenderICCProfile(colorSpace)
+        layer?.colorspace = colorSpace.cgColorSpace
+    }
+
+    override func windowDidEndAnimation() {
+        layer?.update()
+        checkShutdown()
+    }
+
+    override func windowSetToFullScreen() {
+        layer?.update(force: true)
+    }
+
+    override func windowSetToWindow() {
+        layer?.update(force: true)
+    }
+
+    override func windowDidUpdateFrame() {
+        layer?.update(force: true)
+    }
+
+    override func windowDidChangeScreen() {
+        layer?.update(force: true)
+    }
+
+    override func windowDidChangeScreenProfile() {
+        layer?.needsICCUpdate = true
+    }
+
+    override func windowDidChangeBackingProperties() {
+        layer?.contentsScale = window?.backingScaleFactor ?? 1
+    }
+
+    override func windowWillStartLiveResize() {
+        layer?.inLiveResize = true
+    }
+
+    override func windowDidEndLiveResize() {
+        layer?.inLiveResize = false
+    }
+
+    override func windowDidChangeOcclusionState() {
+        layer?.update(force: true)
+    }
+
+    var controlCallback: mp_render_cb_control_fn = { ( v, ctx, e, request, data ) -> Int32 in
+        let ccb = unsafeBitCast(ctx, to: CocoaCB.self)
+
+        guard let vo = v, let events = e else {
+            ccb.log.sendWarning("Unexpected nil value in Control Callback")
+            return VO_FALSE
+        }
+
+        return ccb.control(vo, events: events, request: request, data: data)
+    }
+
+    override func control(_ vo: UnsafeMutablePointer<vo>,
+                    events: UnsafeMutablePointer<Int32>,
+                    request: UInt32,
+                    data: UnsafeMutableRawPointer?) -> Int32
+    {
+        switch mp_voctrl(request) {
+        case VOCTRL_PREINIT:
+            DispatchQueue.main.sync { self.preinit(vo) }
+            return VO_TRUE
+        case VOCTRL_UNINIT:
+            DispatchQueue.main.async { self.uninit() }
+            return VO_TRUE
+        case VOCTRL_RECONFIG:
+            reconfig(vo)
+            return VO_TRUE
+        default:
+            break
+        }
+
+        return super.control(vo, events: events, request: request, data: data)
+    }
+
+    func shutdown(_ destroy: Bool = false) {
+        isShuttingDown = window?.isAnimating ?? false ||
+                         window?.isInFullscreen ?? false && mpv?.opts.native_fs ?? true
+        if window?.isInFullscreen ?? false && !(window?.isAnimating ?? false) {
+            window?.close()
+        }
+        if isShuttingDown { return }
+
+        uninit()
+        uninitCommon()
+
+        libmpv.deinitRender()
+        libmpv.deinitMPV(destroy)
+    }
+
+    func checkShutdown() {
+        if isShuttingDown {
+            shutdown(true)
+        }
+    }
+
+    @objc func processEvent(_ event: UnsafePointer<mpv_event>) {
+        switch event.pointee.event_id {
+        case MPV_EVENT_SHUTDOWN:
+            shutdown()
+        default:
+            break
+        }
+    }
+}
diff --git a/video/out/d3d11/context.c b/video/out/d3d11/context.c
new file mode 100644
index 0000000..05f04fd
--- /dev/null
+++ b/video/out/d3d11/context.c
@@ -0,0 +1,566 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "common/msg.h"
+#include "options/m_config.h"
+#include "osdep/timer.h"
+#include "osdep/windows_utils.h"
+
+#include "video/out/gpu/context.h"
+#include "video/out/gpu/d3d11_helpers.h"
+#include "video/out/gpu/spirv.h"
+#include "video/out/w32_common.h"
+#include "context.h"
+#include "ra_d3d11.h"
+
+static int d3d11_validate_adapter(struct mp_log *log,
+                                  const struct m_option *opt,
+                                  struct bstr name, const char **value);
+
+struct d3d11_opts {
+    int feature_level;
+    int warp;
+    bool flip;
+    int sync_interval;
+    char *adapter_name;
+    int output_format;
+    int color_space;
+    bool exclusive_fs;
+};
+
+#define OPT_BASE_STRUCT struct d3d11_opts
+const struct m_sub_options d3d11_conf = {
+    .opts = (const struct m_option[]) {
+        {"d3d11-warp", OPT_CHOICE(warp,
+            {"auto", -1},
+            {"no", 0},
+            {"yes", 1})},
+        {"d3d11-feature-level", OPT_CHOICE(feature_level,
+            {"12_1", D3D_FEATURE_LEVEL_12_1},
+            {"12_0", D3D_FEATURE_LEVEL_12_0},
+            {"11_1", D3D_FEATURE_LEVEL_11_1},
+            {"11_0", D3D_FEATURE_LEVEL_11_0},
+            {"10_1", D3D_FEATURE_LEVEL_10_1},
+            {"10_0", D3D_FEATURE_LEVEL_10_0},
+            {"9_3", D3D_FEATURE_LEVEL_9_3},
+            {"9_2", D3D_FEATURE_LEVEL_9_2},
+            {"9_1", D3D_FEATURE_LEVEL_9_1})},
+        {"d3d11-flip", OPT_BOOL(flip)},
+        {"d3d11-sync-interval", OPT_INT(sync_interval), M_RANGE(0, 4)},
+        {"d3d11-adapter", OPT_STRING_VALIDATE(adapter_name,
+                                              d3d11_validate_adapter)},
+        {"d3d11-output-format", OPT_CHOICE(output_format,
+            {"auto",     DXGI_FORMAT_UNKNOWN},
+            {"rgba8",    DXGI_FORMAT_R8G8B8A8_UNORM},
+            {"bgra8",    DXGI_FORMAT_B8G8R8A8_UNORM},
+            {"rgb10_a2", DXGI_FORMAT_R10G10B10A2_UNORM},
+            {"rgba16f",  DXGI_FORMAT_R16G16B16A16_FLOAT})},
+        {"d3d11-output-csp", OPT_CHOICE(color_space,
+            {"auto", -1},
+            {"srgb",    DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709},
+            {"linear",  DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709},
+            {"pq",      DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020},
+            {"bt.2020", DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P2020})},
+        {"d3d11-exclusive-fs", OPT_BOOL(exclusive_fs)},
+        {0}
+    },
+    .defaults = &(const struct d3d11_opts) {
+        .feature_level = D3D_FEATURE_LEVEL_12_1,
+        .warp = -1,
+        .flip = true,
+        .sync_interval = 1,
+        .adapter_name = NULL,
+        .output_format = DXGI_FORMAT_UNKNOWN,
+        .color_space = -1,
+    },
+    .size = sizeof(struct d3d11_opts)
+};
+
+struct priv {
+    struct d3d11_opts *opts;
+    struct m_config_cache *opts_cache;
+
+    struct mp_vo_opts *vo_opts;
+    struct m_config_cache *vo_opts_cache;
+
+    struct ra_tex *backbuffer;
+    ID3D11Device *device;
+    IDXGISwapChain *swapchain;
+    struct mp_colorspace swapchain_csp;
+
+    int64_t perf_freq;
+    unsigned sync_refresh_count;
+    int64_t sync_qpc_time;
+    int64_t vsync_duration_qpc;
+    int64_t last_submit_qpc;
+};
+
+static int d3d11_validate_adapter(struct mp_log *log,
+                                  const struct m_option *opt,
+                                  struct bstr name, const char **value)
+{
+    struct bstr param = bstr0(*value);
+    bool help = bstr_equals0(param, "help");
+    bool adapter_matched = false;
+    struct bstr listing = { 0 };
+
+    if (bstr_equals0(param, "")) {
+        return 0;
+    }
+
+    adapter_matched = mp_d3d11_list_or_verify_adapters(log,
+                                                       help ? bstr0(NULL) : param,
+                                                       help ? &listing : NULL);
+
+    if (help) {
+        mp_info(log, "Available D3D11 adapters:\n%.*s",
+                BSTR_P(listing));
+        talloc_free(listing.start);
+        return M_OPT_EXIT;
+    }
+
+    if (!adapter_matched) {
+        mp_err(log, "No adapter matching '%.*s'!\n", BSTR_P(param));
+    }
+
+    return adapter_matched ? 0 : M_OPT_INVALID;
+}
+
+static struct ra_tex *get_backbuffer(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    ID3D11Texture2D *backbuffer = NULL;
+    struct ra_tex *tex = NULL;
+    HRESULT hr;
+
+    hr = IDXGISwapChain_GetBuffer(p->swapchain, 0, &IID_ID3D11Texture2D,
+                                  (void**)&backbuffer);
+    if (FAILED(hr)) {
+        MP_ERR(ctx, "Couldn't get swapchain image\n");
+        goto done;
+    }
+
+    tex = ra_d3d11_wrap_tex(ctx->ra, (ID3D11Resource *)backbuffer);
+done:
+    SAFE_RELEASE(backbuffer);
+    return tex;
+}
+
+static bool resize(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    HRESULT hr;
+
+    if (p->backbuffer) {
+        MP_ERR(ctx, "Attempt at resizing while a frame was in progress!\n");
+        return false;
+    }
+
+    hr = IDXGISwapChain_ResizeBuffers(p->swapchain, 0, ctx->vo->dwidth,
+        ctx->vo->dheight, DXGI_FORMAT_UNKNOWN, 0);
+    if (FAILED(hr)) {
+        MP_FATAL(ctx, "Couldn't resize swapchain: %s\n", mp_HRESULT_to_str(hr));
+        return false;
+    }
+
+    return true;
+}
+
+static bool d3d11_reconfig(struct ra_ctx *ctx)
+{
+    vo_w32_config(ctx->vo);
+    return resize(ctx);
+}
+
+static int d3d11_color_depth(struct ra_swapchain *sw)
+{
+    struct priv *p = sw->priv;
+    DXGI_SWAP_CHAIN_DESC desc;
+
+    HRESULT hr = IDXGISwapChain_GetDesc(p->swapchain, &desc);
+    if (FAILED(hr)) {
+        MP_ERR(sw->ctx, "Failed to query swap chain description: %s!\n",
+               mp_HRESULT_to_str(hr));
+        return 0;
+    }
+
+    const struct ra_format *ra_fmt =
+        ra_d3d11_get_ra_format(sw->ctx->ra, desc.BufferDesc.Format);
+    if (!ra_fmt)
+        return 0;
+
+    return ra_fmt->component_depth[0];
+}
+
+static bool d3d11_start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo)
+{
+    struct priv *p = sw->priv;
+
+    if (!out_fbo)
+        return true;
+
+    assert(!p->backbuffer);
+
+    p->backbuffer = get_backbuffer(sw->ctx);
+    if (!p->backbuffer)
+        return false;
+
+    *out_fbo = (struct ra_fbo) {
+        .tex = p->backbuffer,
+        .flip = false,
+        .color_space = p->swapchain_csp
+    };
+    return true;
+}
+
+static bool d3d11_submit_frame(struct ra_swapchain *sw,
+                               const struct vo_frame *frame)
+{
+    struct priv *p = sw->priv;
+
+    ra_d3d11_flush(sw->ctx->ra);
+    ra_tex_free(sw->ctx->ra, &p->backbuffer);
+    return true;
+}
+
+static int64_t qpc_to_ns(struct ra_swapchain *sw, int64_t qpc)
+{
+    struct priv *p = sw->priv;
+
+    // Convert QPC units (1/perf_freq seconds) to nanoseconds. This will work
+    // without overflow because the QPC value is guaranteed not to roll-over
+    // within 100 years, so perf_freq must be less than 2.9*10^9.
+    return qpc / p->perf_freq * INT64_C(1000000000) +
+        qpc % p->perf_freq * INT64_C(1000000000) / p->perf_freq;
+}
+
+static int64_t qpc_ns_now(struct ra_swapchain *sw)
+{
+    LARGE_INTEGER perf_count;
+    QueryPerformanceCounter(&perf_count);
+    return qpc_to_ns(sw, perf_count.QuadPart);
+}
+
+static void d3d11_swap_buffers(struct ra_swapchain *sw)
+{
+    struct priv *p = sw->priv;
+
+    m_config_cache_update(p->opts_cache);
+
+    LARGE_INTEGER perf_count;
+    QueryPerformanceCounter(&perf_count);
+    p->last_submit_qpc = perf_count.QuadPart;
+
+    IDXGISwapChain_Present(p->swapchain, p->opts->sync_interval, 0);
+}
+
+static void d3d11_get_vsync(struct ra_swapchain *sw, struct vo_vsync_info *info)
+{
+    struct priv *p = sw->priv;
+    HRESULT hr;
+
+    m_config_cache_update(p->opts_cache);
+
+    // The calculations below are only valid if mpv presents on every vsync
+    if (p->opts->sync_interval != 1)
+        return;
+
+    // They're also only valid for flip model swapchains
+    DXGI_SWAP_CHAIN_DESC desc;
+    hr = IDXGISwapChain_GetDesc(p->swapchain, &desc);
+    if (FAILED(hr) || (desc.SwapEffect != DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL &&
+                       desc.SwapEffect != DXGI_SWAP_EFFECT_FLIP_DISCARD))
+    {
+        return;
+    }
+
+    // GetLastPresentCount returns a sequential ID for the frame submitted by
+    // the last call to IDXGISwapChain::Present()
+    UINT submit_count;
+    hr = IDXGISwapChain_GetLastPresentCount(p->swapchain, &submit_count);
+    if (FAILED(hr))
+        return;
+
+    // GetFrameStatistics returns two pairs. The first is (PresentCount,
+    // PresentRefreshCount) which relates a present ID (on the same timeline as
+    // GetLastPresentCount) to the physical vsync it was displayed on. The
+    // second is (SyncRefreshCount, SyncQPCTime), which relates a physical vsync
+    // to a timestamp on the same clock as QueryPerformanceCounter.
+    DXGI_FRAME_STATISTICS stats;
+    hr = IDXGISwapChain_GetFrameStatistics(p->swapchain, &stats);
+    if (hr == DXGI_ERROR_FRAME_STATISTICS_DISJOINT) {
+        p->sync_refresh_count = 0;
+        p->sync_qpc_time = 0;
+    }
+    if (FAILED(hr))
+        return;
+
+    info->last_queue_display_time = 0;
+    info->vsync_duration = 0;
+    // Detecting skipped vsyncs is possible but not supported yet
+    info->skipped_vsyncs = -1;
+
+    // Get the number of physical vsyncs that have passed since the start of the
+    // playback or disjoint event.
+    // Check for 0 here, since sometimes GetFrameStatistics returns S_OK but
+    // with 0s in some (all?) members of DXGI_FRAME_STATISTICS.
+    unsigned src_passed = 0;
+    if (stats.SyncRefreshCount && p->sync_refresh_count)
+        src_passed = stats.SyncRefreshCount - p->sync_refresh_count;
+    if (p->sync_refresh_count == 0)
+        p->sync_refresh_count = stats.SyncRefreshCount;
+
+    // Get the elapsed time passed between the above vsyncs
+    unsigned sqt_passed = 0;
+    if (stats.SyncQPCTime.QuadPart && p->sync_qpc_time)
+        sqt_passed = stats.SyncQPCTime.QuadPart - p->sync_qpc_time;
+    if (p->sync_qpc_time == 0)
+        p->sync_qpc_time = stats.SyncQPCTime.QuadPart;
+
+    // If any vsyncs have passed, estimate the physical frame rate
+    if (src_passed && sqt_passed)
+        p->vsync_duration_qpc = sqt_passed / src_passed;
+    if (p->vsync_duration_qpc)
+        info->vsync_duration = qpc_to_ns(sw, p->vsync_duration_qpc);
+
+    // If the physical frame rate is known and the other members of
+    // DXGI_FRAME_STATISTICS are non-0, estimate the timing of the next frame
+    if (p->vsync_duration_qpc && stats.PresentCount &&
+        stats.PresentRefreshCount && stats.SyncRefreshCount &&
+        stats.SyncQPCTime.QuadPart)
+    {
+        // It's not clear if PresentRefreshCount and SyncRefreshCount can refer
+        // to different frames, but in case they can, assuming mpv presents on
+        // every frame, guess the present count that relates to SyncRefreshCount.
+        unsigned expected_sync_pc = stats.PresentCount +
+            (stats.SyncRefreshCount - stats.PresentRefreshCount);
+
+        // Now guess the timestamp of the last submitted frame based on the
+        // timestamp of the frame at SyncRefreshCount and the frame rate
+        int queued_frames = submit_count - expected_sync_pc;
+        int64_t last_queue_display_time_qpc = stats.SyncQPCTime.QuadPart +
+            queued_frames * p->vsync_duration_qpc;
+
+        // Only set the estimated display time if it's after the last submission
+        // time. It could be before if mpv skips a lot of frames.
+        if (last_queue_display_time_qpc >= p->last_submit_qpc) {
+            info->last_queue_display_time = mp_time_ns() +
+                (qpc_to_ns(sw, last_queue_display_time_qpc) - qpc_ns_now(sw));
+        }
+    }
+}
+
+static bool d3d11_set_fullscreen(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    HRESULT hr;
+
+    m_config_cache_update(p->opts_cache);
+
+    if (!p->swapchain) {
+        MP_ERR(ctx, "Full screen configuration was requested before D3D11 "
+                    "swap chain was ready!");
+        return false;
+    }
+
+    // we only want exclusive FS if we are entering FS and
+    // exclusive FS is enabled. Otherwise disable exclusive FS.
+    bool enable_exclusive_fs = p->vo_opts->fullscreen &&
+                               p->opts->exclusive_fs;
+
+    MP_VERBOSE(ctx, "%s full-screen exclusive mode while %s fullscreen\n",
+               enable_exclusive_fs ? "Enabling" : "Disabling",
+               ctx->vo->opts->fullscreen ? "entering" : "leaving");
+
+    hr = IDXGISwapChain_SetFullscreenState(p->swapchain,
+                                           enable_exclusive_fs, NULL);
+    if (FAILED(hr))
+        return false;
+
+    if (!resize(ctx))
+        return false;
+
+    return true;
+}
+
+static int d3d11_control(struct ra_ctx *ctx, int *events, int request, void *arg)
+{
+    struct priv *p = ctx->priv;
+    int ret = -1;
+    bool fullscreen_switch_needed = false;
+
+    switch (request) {
+    case VOCTRL_VO_OPTS_CHANGED: {
+        void *changed_option;
+
+        while (m_config_cache_get_next_changed(p->vo_opts_cache,
+                                               &changed_option))
+        {
+            struct mp_vo_opts *vo_opts = p->vo_opts_cache->opts;
+
+            if (changed_option == &vo_opts->fullscreen) {
+                fullscreen_switch_needed = true;
+            }
+        }
+
+        break;
+    }
+    default:
+        break;
+    }
+
+    // if leaving full screen, handle d3d11 stuff first, then general
+    // windowing
+    if (fullscreen_switch_needed && !p->vo_opts->fullscreen) {
+        if (!d3d11_set_fullscreen(ctx))
+            return VO_FALSE;
+
+        fullscreen_switch_needed = false;
+    }
+
+    ret = vo_w32_control(ctx->vo, events, request, arg);
+
+    // if entering full screen, handle d3d11 after general windowing stuff
+    if (fullscreen_switch_needed && p->vo_opts->fullscreen) {
+        if (!d3d11_set_fullscreen(ctx))
+            return VO_FALSE;
+
+        fullscreen_switch_needed = false;
+    }
+
+    if (*events & VO_EVENT_RESIZE) {
+        if (!resize(ctx))
+            return VO_ERROR;
+    }
+    return ret;
+}
+
+static void d3d11_uninit(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    if (p->swapchain)
+        IDXGISwapChain_SetFullscreenState(p->swapchain, FALSE, NULL);
+
+    if (ctx->ra)
+        ra_tex_free(ctx->ra, &p->backbuffer);
+    SAFE_RELEASE(p->swapchain);
+    vo_w32_uninit(ctx->vo);
+    SAFE_RELEASE(p->device);
+
+    // Destroy the RA last to prevent objects we hold from showing up in D3D's
+    // leak checker
+    if (ctx->ra)
+        ctx->ra->fns->destroy(ctx->ra);
+}
+
+static const struct ra_swapchain_fns d3d11_swapchain = {
+    .color_depth  = d3d11_color_depth,
+    .start_frame  = d3d11_start_frame,
+    .submit_frame = d3d11_submit_frame,
+    .swap_buffers = d3d11_swap_buffers,
+    .get_vsync    = d3d11_get_vsync,
+};
+
+static bool d3d11_init(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv = talloc_zero(ctx, struct priv);
+    p->opts_cache = m_config_cache_alloc(ctx, ctx->global, &d3d11_conf);
+    p->opts = p->opts_cache->opts;
+
+    p->vo_opts_cache = m_config_cache_alloc(ctx, ctx->vo->global, &vo_sub_opts);
+    p->vo_opts = p->vo_opts_cache->opts;
+
+    LARGE_INTEGER perf_freq;
+    QueryPerformanceFrequency(&perf_freq);
+    p->perf_freq = perf_freq.QuadPart;
+
+    struct ra_swapchain *sw = ctx->swapchain = talloc_zero(ctx, struct ra_swapchain);
+    sw->priv = p;
+    sw->ctx = ctx;
+    sw->fns = &d3d11_swapchain;
+
+    struct d3d11_device_opts dopts = {
+        .debug = ctx->opts.debug,
+        .allow_warp = p->opts->warp != 0,
+        .force_warp = p->opts->warp == 1,
+        .max_feature_level = p->opts->feature_level,
+        .max_frame_latency = ctx->vo->opts->swapchain_depth,
+        .adapter_name = p->opts->adapter_name,
+    };
+    if (!mp_d3d11_create_present_device(ctx->log, &dopts, &p->device))
+        goto error;
+
+    if (!spirv_compiler_init(ctx))
+        goto error;
+    ctx->ra = ra_d3d11_create(p->device, ctx->log, ctx->spirv);
+    if (!ctx->ra)
+        goto error;
+
+    if (!vo_w32_init(ctx->vo))
+        goto error;
+
+    UINT usage = DXGI_USAGE_RENDER_TARGET_OUTPUT | DXGI_USAGE_SHADER_INPUT;
+    if (ID3D11Device_GetFeatureLevel(p->device) >= D3D_FEATURE_LEVEL_11_0 &&
+        p->opts->output_format != DXGI_FORMAT_B8G8R8A8_UNORM)
+    {
+        usage |= DXGI_USAGE_UNORDERED_ACCESS;
+    }
+
+    struct d3d11_swapchain_opts scopts = {
+        .window = vo_w32_hwnd(ctx->vo),
+        .width = ctx->vo->dwidth,
+        .height = ctx->vo->dheight,
+        .format = p->opts->output_format,
+        .color_space = p->opts->color_space,
+        .configured_csp = &p->swapchain_csp,
+        .flip = p->opts->flip,
+        // Add one frame for the backbuffer and one frame of "slack" to reduce
+        // contention with the window manager when acquiring the backbuffer
+        .length = ctx->vo->opts->swapchain_depth + 2,
+        .usage = usage,
+    };
+    if (!mp_d3d11_create_swapchain(p->device, ctx->log, &scopts, &p->swapchain))
+        goto error;
+
+    return true;
+
+error:
+    d3d11_uninit(ctx);
+    return false;
+}
+
+IDXGISwapChain *ra_d3d11_ctx_get_swapchain(struct ra_ctx *ra)
+{
+    if (ra->swapchain->fns != &d3d11_swapchain)
+        return NULL;
+
+    struct priv *p = ra->priv;
+
+    IDXGISwapChain_AddRef(p->swapchain);
+
+    return p->swapchain;
+}
+
+const struct ra_ctx_fns ra_ctx_d3d11 = {
+    .type     = "d3d11",
+    .name     = "d3d11",
+    .reconfig = d3d11_reconfig,
+    .control  = d3d11_control,
+    .init     = d3d11_init,
+    .uninit   = d3d11_uninit,
+};
diff --git a/video/out/d3d11/context.h b/video/out/d3d11/context.h
new file mode 100644
index 0000000..8a9ef4c
--- /dev/null
+++ b/video/out/d3d11/context.h
@@ -0,0 +1,9 @@
+#pragma once
+
+#include <dxgi.h>
+
+#include "video/out/gpu/context.h"
+
+// Get the underlying D3D11 swap chain from an RA context. The returned swap chain is
+// refcounted and must be released by the caller.
+IDXGISwapChain *ra_d3d11_ctx_get_swapchain(struct ra_ctx *ra);
diff --git a/video/out/d3d11/hwdec_d3d11va.c b/video/out/d3d11/hwdec_d3d11va.c
new file mode 100644
index 0000000..6aaa12b
--- /dev/null
+++ b/video/out/d3d11/hwdec_d3d11va.c
@@ -0,0 +1,258 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <windows.h>
+#include <d3d11.h>
+#include <d3d11_1.h>
+
+#include "common/common.h"
+#include "options/m_config.h"
+#include "osdep/windows_utils.h"
+#include "video/hwdec.h"
+#include "video/d3d.h"
+#include "video/out/d3d11/ra_d3d11.h"
+#include "video/out/gpu/hwdec.h"
+
+struct d3d11va_opts {
+    bool zero_copy;
+};
+
+#define OPT_BASE_STRUCT struct d3d11va_opts
+const struct m_sub_options d3d11va_conf = {
+    .opts = (const struct m_option[]) {
+        {"d3d11va-zero-copy", OPT_BOOL(zero_copy)},
+        {0}
+    },
+    .defaults = &(const struct d3d11va_opts) {0},
+    .size = sizeof(struct d3d11va_opts)
+};
+
+struct priv_owner {
+    struct d3d11va_opts *opts;
+
+    struct mp_hwdec_ctx hwctx;
+    ID3D11Device *device;
+    ID3D11Device1 *device1;
+};
+
+struct priv {
+    // 1-copy path
+    ID3D11DeviceContext1 *ctx;
+    ID3D11Texture2D *copy_tex;
+
+    // zero-copy path
+    int num_planes;
+    const struct ra_format *fmt[4];
+};
+
+static void uninit(struct ra_hwdec *hw)
+{
+    struct priv_owner *p = hw->priv;
+    hwdec_devices_remove(hw->devs, &p->hwctx);
+    av_buffer_unref(&p->hwctx.av_device_ref);
+    SAFE_RELEASE(p->device);
+    SAFE_RELEASE(p->device1);
+}
+
+static int init(struct ra_hwdec *hw)
+{
+    struct priv_owner *p = hw->priv;
+    HRESULT hr;
+
+    if (!ra_is_d3d11(hw->ra_ctx->ra))
+        return -1;
+    p->device = ra_d3d11_get_device(hw->ra_ctx->ra);
+    if (!p->device)
+        return -1;
+
+    p->opts = mp_get_config_group(hw->priv, hw->global, &d3d11va_conf);
+
+    // D3D11VA requires Direct3D 11.1, so this should always succeed
+    hr = ID3D11Device_QueryInterface(p->device, &IID_ID3D11Device1,
+                                     (void**)&p->device1);
+    if (FAILED(hr)) {
+        MP_ERR(hw, "Failed to get D3D11.1 interface: %s\n",
+               mp_HRESULT_to_str(hr));
+        return -1;
+    }
+
+    ID3D10Multithread *multithread;
+    hr = ID3D11Device_QueryInterface(p->device, &IID_ID3D10Multithread,
+                                     (void **)&multithread);
+    if (FAILED(hr)) {
+        MP_ERR(hw, "Failed to get Multithread interface: %s\n",
+               mp_HRESULT_to_str(hr));
+        return -1;
+    }
+    ID3D10Multithread_SetMultithreadProtected(multithread, TRUE);
+    ID3D10Multithread_Release(multithread);
+
+    static const int subfmts[] = {IMGFMT_NV12, IMGFMT_P010, 0};
+    p->hwctx = (struct mp_hwdec_ctx){
+        .driver_name = hw->driver->name,
+        .av_device_ref = d3d11_wrap_device_ref(p->device),
+        .supported_formats = subfmts,
+        .hw_imgfmt = IMGFMT_D3D11,
+    };
+
+    if (!p->hwctx.av_device_ref) {
+        MP_VERBOSE(hw, "Failed to create hwdevice_ctx\n");
+        return -1;
+    }
+
+    hwdec_devices_add(hw->devs, &p->hwctx);
+    return 0;
+}
+
+static void mapper_uninit(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+    for (int i = 0; i < 4; i++)
+        ra_tex_free(mapper->ra, &mapper->tex[i]);
+    SAFE_RELEASE(p->copy_tex);
+    SAFE_RELEASE(p->ctx);
+}
+
+static int mapper_init(struct ra_hwdec_mapper *mapper)
+{
+    struct priv_owner *o = mapper->owner->priv;
+    struct priv *p = mapper->priv;
+    HRESULT hr;
+
+    mapper->dst_params = mapper->src_params;
+    mapper->dst_params.imgfmt = mapper->src_params.hw_subfmt;
+    mapper->dst_params.hw_subfmt = 0;
+
+    struct ra_imgfmt_desc desc = {0};
+
+    if (!ra_get_imgfmt_desc(mapper->ra, mapper->dst_params.imgfmt, &desc))
+        return -1;
+
+    if (o->opts->zero_copy) {
+        // In the zero-copy path, we create the ra_tex objects in the map
+        // operation, so we just need to store the format of each plane
+        p->num_planes = desc.num_planes;
+        for (int i = 0; i < desc.num_planes; i++)
+            p->fmt[i] = desc.planes[i];
+    } else {
+        struct mp_image layout = {0};
+        mp_image_set_params(&layout, &mapper->dst_params);
+
+        DXGI_FORMAT copy_fmt;
+        switch (mapper->dst_params.imgfmt) {
+        case IMGFMT_NV12: copy_fmt = DXGI_FORMAT_NV12; break;
+        case IMGFMT_P010: copy_fmt = DXGI_FORMAT_P010; break;
+        default: return -1;
+        }
+
+        D3D11_TEXTURE2D_DESC copy_desc = {
+            .Width = mapper->dst_params.w,
+            .Height = mapper->dst_params.h,
+            .MipLevels = 1,
+            .ArraySize = 1,
+            .SampleDesc.Count = 1,
+            .Format = copy_fmt,
+            .BindFlags = D3D11_BIND_SHADER_RESOURCE,
+        };
+        hr = ID3D11Device_CreateTexture2D(o->device, &copy_desc, NULL,
+                                          &p->copy_tex);
+        if (FAILED(hr)) {
+            MP_FATAL(mapper, "Could not create shader resource texture\n");
+            return -1;
+        }
+
+        for (int i = 0; i < desc.num_planes; i++) {
+            mapper->tex[i] = ra_d3d11_wrap_tex_video(mapper->ra, p->copy_tex,
+                mp_image_plane_w(&layout, i), mp_image_plane_h(&layout, i), 0,
+                desc.planes[i]);
+            if (!mapper->tex[i]) {
+                MP_FATAL(mapper, "Could not create RA texture view\n");
+                return -1;
+            }
+        }
+
+        // A ref to the immediate context is needed for CopySubresourceRegion
+        ID3D11Device1_GetImmediateContext1(o->device1, &p->ctx);
+    }
+
+    return 0;
+}
+
+static int mapper_map(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+    ID3D11Texture2D *tex = (void *)mapper->src->planes[0];
+    int subresource = (intptr_t)mapper->src->planes[1];
+
+    if (p->copy_tex) {
+        ID3D11DeviceContext1_CopySubresourceRegion1(p->ctx,
+            (ID3D11Resource *)p->copy_tex, 0, 0, 0, 0,
+            (ID3D11Resource *)tex, subresource, (&(D3D11_BOX) {
+                .left = 0,
+                .top = 0,
+                .front = 0,
+                .right = mapper->dst_params.w,
+                .bottom = mapper->dst_params.h,
+                .back = 1,
+            }), D3D11_COPY_DISCARD);
+
+        // We no longer need the original texture after copying it.
+        mp_image_unrefp(&mapper->src);
+    } else {
+        D3D11_TEXTURE2D_DESC desc2d;
+        ID3D11Texture2D_GetDesc(tex, &desc2d);
+
+        for (int i = 0; i < p->num_planes; i++) {
+            // The video decode texture may include padding, so the size of the
+            // ra_tex needs to be determined by the actual size of the Tex2D
+            bool chroma = i >= 1;
+            int w = desc2d.Width / (chroma ? 2 : 1);
+            int h = desc2d.Height / (chroma ? 2 : 1);
+
+            mapper->tex[i] = ra_d3d11_wrap_tex_video(mapper->ra, tex,
+                w, h, subresource, p->fmt[i]);
+            if (!mapper->tex[i])
+                return -1;
+        }
+    }
+
+    return 0;
+}
+
+static void mapper_unmap(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+    if (p->copy_tex)
+        return;
+    for (int i = 0; i < 4; i++)
+        ra_tex_free(mapper->ra, &mapper->tex[i]);
+}
+
+const struct ra_hwdec_driver ra_hwdec_d3d11va = {
+    .name = "d3d11va",
+    .priv_size = sizeof(struct priv_owner),
+    .imgfmts = {IMGFMT_D3D11, 0},
+    .init = init,
+    .uninit = uninit,
+    .mapper = &(const struct ra_hwdec_mapper_driver){
+        .priv_size = sizeof(struct priv),
+        .init = mapper_init,
+        .uninit = mapper_uninit,
+        .map = mapper_map,
+        .unmap = mapper_unmap,
+    },
+};
diff --git a/video/out/d3d11/hwdec_dxva2dxgi.c b/video/out/d3d11/hwdec_dxva2dxgi.c
new file mode 100644
index 0000000..62158d4
--- /dev/null
+++ b/video/out/d3d11/hwdec_dxva2dxgi.c
@@ -0,0 +1,478 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <windows.h>
+#include <d3d9.h>
+#include <d3d11.h>
+#include <dxva2api.h>
+
+#include "common/common.h"
+#include "osdep/windows_utils.h"
+#include "video/hwdec.h"
+#include "video/d3d.h"
+#include "video/out/d3d11/ra_d3d11.h"
+#include "video/out/gpu/hwdec.h"
+
+struct priv_owner {
+    struct mp_hwdec_ctx hwctx;
+    ID3D11Device *dev11;
+    IDirect3DDevice9Ex *dev9;
+};
+
+struct queue_surf {
+    ID3D11Texture2D *tex11;
+    ID3D11Query *idle11;
+    ID3D11Texture2D *stage11;
+    IDirect3DTexture9 *tex9;
+    IDirect3DSurface9 *surf9;
+    IDirect3DSurface9 *stage9;
+    struct ra_tex *tex;
+
+    bool busy11; // The surface is currently being used by D3D11
+};
+
+struct priv {
+    ID3D11Device *dev11;
+    ID3D11DeviceContext *ctx11;
+    IDirect3DDevice9Ex *dev9;
+
+    // Surface queue stuff. Following Microsoft recommendations, a queue of
+    // surfaces is used to share images between D3D9 and D3D11. This allows
+    // multiple D3D11 frames to be in-flight at once.
+    struct queue_surf **queue;
+    int queue_len;
+    int queue_pos;
+};
+
+static void uninit(struct ra_hwdec *hw)
+{
+    struct priv_owner *p = hw->priv;
+    hwdec_devices_remove(hw->devs, &p->hwctx);
+    av_buffer_unref(&p->hwctx.av_device_ref);
+    SAFE_RELEASE(p->dev11);
+    SAFE_RELEASE(p->dev9);
+}
+
+static int init(struct ra_hwdec *hw)
+{
+    struct priv_owner *p = hw->priv;
+    IDirect3D9Ex *d3d9ex = NULL;
+    int ret = -1;
+    HRESULT hr;
+
+    if (!ra_is_d3d11(hw->ra_ctx->ra))
+        goto done;
+    p->dev11 = ra_d3d11_get_device(hw->ra_ctx->ra);
+    if (!p->dev11)
+        goto done;
+
+    d3d_load_dlls();
+    if (!d3d9_dll) {
+        MP_FATAL(hw, "Failed to load \"d3d9.dll\": %s\n", mp_LastError_to_str());
+        goto done;
+    }
+    if (!dxva2_dll) {
+        MP_FATAL(hw, "Failed to load \"dxva2.dll\": %s\n", mp_LastError_to_str());
+        goto done;
+    }
+
+    HRESULT (WINAPI *Direct3DCreate9Ex)(UINT SDKVersion, IDirect3D9Ex **ppD3D);
+    Direct3DCreate9Ex = (void *)GetProcAddress(d3d9_dll, "Direct3DCreate9Ex");
+    if (!Direct3DCreate9Ex) {
+        MP_FATAL(hw, "Direct3D 9Ex not supported\n");
+        goto done;
+    }
+
+    hr = Direct3DCreate9Ex(D3D_SDK_VERSION, &d3d9ex);
+    if (FAILED(hr)) {
+        MP_FATAL(hw, "Couldn't create Direct3D9Ex: %s\n", mp_HRESULT_to_str(hr));
+        goto done;
+    }
+
+    D3DPRESENT_PARAMETERS pparams = {
+        .BackBufferWidth = 16,
+        .BackBufferHeight = 16,
+        .BackBufferCount = 1,
+        .SwapEffect = D3DSWAPEFFECT_DISCARD,
+        .hDeviceWindow = GetDesktopWindow(),
+        .Windowed = TRUE,
+        .Flags = D3DPRESENTFLAG_VIDEO,
+    };
+    hr = IDirect3D9Ex_CreateDeviceEx(d3d9ex, D3DADAPTER_DEFAULT,
+        D3DDEVTYPE_HAL, GetDesktopWindow(), D3DCREATE_NOWINDOWCHANGES |
+        D3DCREATE_FPU_PRESERVE | D3DCREATE_HARDWARE_VERTEXPROCESSING |
+        D3DCREATE_DISABLE_PSGP_THREADING | D3DCREATE_MULTITHREADED, &pparams,
+        NULL, &p->dev9);
+    if (FAILED(hr)) {
+        MP_FATAL(hw, "Failed to create Direct3D9Ex device: %s\n",
+                 mp_HRESULT_to_str(hr));
+        goto done;
+    }
+
+    // Check if it's possible to StretchRect() from NV12 to XRGB surfaces
+    hr = IDirect3D9Ex_CheckDeviceFormatConversion(d3d9ex, D3DADAPTER_DEFAULT,
+        D3DDEVTYPE_HAL, MAKEFOURCC('N', 'V', '1', '2'), D3DFMT_X8R8G8B8);
+    if (hr != S_OK) {
+        MP_FATAL(hw, "Can't StretchRect from NV12 to XRGB surfaces\n");
+        goto done;
+    }
+
+    p->hwctx = (struct mp_hwdec_ctx){
+        .driver_name = hw->driver->name,
+        .av_device_ref = d3d9_wrap_device_ref((IDirect3DDevice9 *)p->dev9),
+        .hw_imgfmt = IMGFMT_DXVA2,
+    };
+
+    if (!p->hwctx.av_device_ref) {
+        MP_VERBOSE(hw, "Failed to create hwdevice_ctx\n");
+        goto done;
+    }
+
+    hwdec_devices_add(hw->devs, &p->hwctx);
+
+    ret = 0;
+done:
+    SAFE_RELEASE(d3d9ex);
+    return ret;
+}
+
+static int mapper_init(struct ra_hwdec_mapper *mapper)
+{
+    struct priv_owner *o = mapper->owner->priv;
+    struct priv *p = mapper->priv;
+
+    ID3D11Device_AddRef(o->dev11);
+    p->dev11 = o->dev11;
+    IDirect3DDevice9Ex_AddRef(o->dev9);
+    p->dev9 = o->dev9;
+    ID3D11Device_GetImmediateContext(o->dev11, &p->ctx11);
+
+    mapper->dst_params = mapper->src_params;
+    mapper->dst_params.imgfmt = IMGFMT_RGB0;
+    mapper->dst_params.hw_subfmt = 0;
+    return 0;
+}
+
+static void surf_destroy(struct ra_hwdec_mapper *mapper,
+                         struct queue_surf *surf)
+{
+    if (!surf)
+        return;
+    SAFE_RELEASE(surf->tex11);
+    SAFE_RELEASE(surf->idle11);
+    SAFE_RELEASE(surf->stage11);
+    SAFE_RELEASE(surf->tex9);
+    SAFE_RELEASE(surf->surf9);
+    SAFE_RELEASE(surf->stage9);
+    ra_tex_free(mapper->ra, &surf->tex);
+    talloc_free(surf);
+}
+
+static struct queue_surf *surf_create(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+    IDXGIResource *res11 = NULL;
+    bool success = false;
+    HRESULT hr;
+
+    struct queue_surf *surf = talloc_ptrtype(p, surf);
+
+    D3D11_TEXTURE2D_DESC desc11 = {
+        .Width = mapper->src->w,
+        .Height = mapper->src->h,
+        .MipLevels = 1,
+        .ArraySize = 1,
+        .Format = DXGI_FORMAT_B8G8R8X8_UNORM,
+        .SampleDesc.Count = 1,
+        .Usage = D3D11_USAGE_DEFAULT,
+        .BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET,
+        .MiscFlags = D3D11_RESOURCE_MISC_SHARED,
+    };
+    hr = ID3D11Device_CreateTexture2D(p->dev11, &desc11, NULL, &surf->tex11);
+    if (FAILED(hr)) {
+        MP_ERR(mapper, "Failed to create D3D11 texture: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto done;
+    }
+
+    // Try to use a 16x16 staging texture, unless the source surface is
+    // smaller. Ideally, a 1x1 texture would be sufficient, but Microsoft's
+    // D3D9ExDXGISharedSurf example uses 16x16 to avoid driver bugs.
+    D3D11_TEXTURE2D_DESC sdesc11 = {
+        .Width = MPMIN(16, desc11.Width),
+        .Height = MPMIN(16, desc11.Height),
+        .MipLevels = 1,
+        .ArraySize = 1,
+        .Format = DXGI_FORMAT_B8G8R8X8_UNORM,
+        .SampleDesc.Count = 1,
+        .Usage = D3D11_USAGE_STAGING,
+        .CPUAccessFlags = D3D11_CPU_ACCESS_READ,
+    };
+    hr = ID3D11Device_CreateTexture2D(p->dev11, &sdesc11, NULL, &surf->stage11);
+    if (FAILED(hr)) {
+        MP_ERR(mapper, "Failed to create D3D11 staging texture: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto done;
+    }
+
+    hr = ID3D11Texture2D_QueryInterface(surf->tex11, &IID_IDXGIResource,
+                                        (void**)&res11);
+    if (FAILED(hr)) {
+        MP_ERR(mapper, "Failed to get share handle: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto done;
+    }
+
+    HANDLE share_handle;
+    hr = IDXGIResource_GetSharedHandle(res11, &share_handle);
+    if (FAILED(hr)) {
+        MP_ERR(mapper, "Failed to get share handle: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto done;
+    }
+
+    hr = ID3D11Device_CreateQuery(p->dev11,
+        &(D3D11_QUERY_DESC) { D3D11_QUERY_EVENT }, &surf->idle11);
+    if (FAILED(hr)) {
+        MP_ERR(mapper, "Failed to create D3D11 query: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto done;
+    }
+
+    // Share the D3D11 texture with D3D9Ex
+    hr = IDirect3DDevice9Ex_CreateTexture(p->dev9, desc11.Width, desc11.Height,
+        1, D3DUSAGE_RENDERTARGET, D3DFMT_X8R8G8B8, D3DPOOL_DEFAULT,
+        &surf->tex9, &share_handle);
+    if (FAILED(hr)) {
+        MP_ERR(mapper, "Failed to create D3D9 texture: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto done;
+    }
+
+    hr = IDirect3DTexture9_GetSurfaceLevel(surf->tex9, 0, &surf->surf9);
+    if (FAILED(hr)) {
+        MP_ERR(mapper, "Failed to get D3D9 surface: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto done;
+    }
+
+    // As above, try to use a 16x16 staging texture to avoid driver bugs
+    hr = IDirect3DDevice9Ex_CreateRenderTarget(p->dev9,
+        MPMIN(16, desc11.Width), MPMIN(16, desc11.Height), D3DFMT_X8R8G8B8,
+        D3DMULTISAMPLE_NONE, 0, TRUE, &surf->stage9, NULL);
+    if (FAILED(hr)) {
+        MP_ERR(mapper, "Failed to create D3D9 staging surface: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto done;
+    }
+
+    surf->tex = ra_d3d11_wrap_tex(mapper->ra, (ID3D11Resource *)surf->tex11);
+    if (!surf->tex)
+        goto done;
+
+    success = true;
+done:
+    if (!success)
+        surf_destroy(mapper, surf);
+    SAFE_RELEASE(res11);
+    return success ? surf : NULL;
+}
+
+// true if the surface is currently in-use by the D3D11 graphics pipeline
+static bool surf_is_idle11(struct ra_hwdec_mapper *mapper,
+                           struct queue_surf *surf)
+{
+    struct priv *p = mapper->priv;
+    HRESULT hr;
+    BOOL idle;
+
+    if (!surf->busy11)
+        return true;
+
+    hr = ID3D11DeviceContext_GetData(p->ctx11,
+        (ID3D11Asynchronous *)surf->idle11, &idle, sizeof(idle),
+        D3D11_ASYNC_GETDATA_DONOTFLUSH);
+    if (FAILED(hr) || hr == S_FALSE || !idle)
+        return false;
+
+    surf->busy11 = false;
+    return true;
+}
+
+// If the surface is currently in-use by the D3D11 graphics pipeline, wait for
+// it to become idle. Should only be called in the queue-underflow case.
+static bool surf_wait_idle11(struct ra_hwdec_mapper *mapper,
+                             struct queue_surf *surf)
+{
+    struct priv *p = mapper->priv;
+    HRESULT hr;
+
+    ID3D11DeviceContext_CopySubresourceRegion(p->ctx11,
+        (ID3D11Resource *)surf->stage11, 0, 0, 0, 0,
+        (ID3D11Resource *)surf->tex11, 0, (&(D3D11_BOX){
+            .right = MPMIN(16, mapper->src->w),
+            .bottom = MPMIN(16, mapper->src->h),
+            .back = 1,
+        }));
+
+    // Block until the surface becomes idle (see surf_wait_idle9())
+    D3D11_MAPPED_SUBRESOURCE map = {0};
+    hr = ID3D11DeviceContext_Map(p->ctx11, (ID3D11Resource *)surf->stage11, 0,
+                                 D3D11_MAP_READ, 0, &map);
+    if (FAILED(hr)) {
+        MP_ERR(mapper, "Couldn't map D3D11 staging texture: %s\n",
+               mp_HRESULT_to_str(hr));
+        return false;
+    }
+
+    ID3D11DeviceContext_Unmap(p->ctx11, (ID3D11Resource *)surf->stage11, 0);
+    surf->busy11 = false;
+    return true;
+}
+
+static bool surf_wait_idle9(struct ra_hwdec_mapper *mapper,
+                            struct queue_surf *surf)
+{
+    struct priv *p = mapper->priv;
+    HRESULT hr;
+
+    // Rather than polling for the surface to become idle, copy part of the
+    // surface to a staging texture and map it. This should block until the
+    // surface becomes idle. Microsoft's ISurfaceQueue does this as well.
+    RECT rc = {0, 0, MPMIN(16, mapper->src->w), MPMIN(16, mapper->src->h)};
+    hr = IDirect3DDevice9Ex_StretchRect(p->dev9, surf->surf9, &rc, surf->stage9,
+                                        &rc, D3DTEXF_NONE);
+    if (FAILED(hr)) {
+        MP_ERR(mapper, "Couldn't copy to D3D9 staging texture: %s\n",
+               mp_HRESULT_to_str(hr));
+        return false;
+    }
+
+    D3DLOCKED_RECT lock;
+    hr = IDirect3DSurface9_LockRect(surf->stage9, &lock, NULL, D3DLOCK_READONLY);
+    if (FAILED(hr)) {
+        MP_ERR(mapper, "Couldn't map D3D9 staging texture: %s\n",
+               mp_HRESULT_to_str(hr));
+        return false;
+    }
+
+    IDirect3DSurface9_UnlockRect(surf->stage9);
+    p->queue[p->queue_pos]->busy11 = true;
+    return true;
+}
+
+static struct queue_surf *surf_acquire(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+
+    if (!p->queue_len || !surf_is_idle11(mapper, p->queue[p->queue_pos])) {
+        if (p->queue_len < 16) {
+            struct queue_surf *surf = surf_create(mapper);
+            if (!surf)
+                return NULL;
+
+            // The next surface is busy, so grow the queue
+            MP_TARRAY_INSERT_AT(p, p->queue, p->queue_len, p->queue_pos, surf);
+            MP_DBG(mapper, "Queue grew to %d surfaces\n", p->queue_len);
+        } else {
+            // For sanity, don't let the queue grow beyond 16 surfaces. It
+            // should never get this big. If it does, wait for the surface to
+            // become idle rather than polling it.
+            if (!surf_wait_idle11(mapper, p->queue[p->queue_pos]))
+                return NULL;
+            MP_WARN(mapper, "Queue underflow!\n");
+        }
+    }
+    return p->queue[p->queue_pos];
+}
+
+static void surf_release(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+    ID3D11DeviceContext_End(p->ctx11,
+        (ID3D11Asynchronous *)p->queue[p->queue_pos]->idle11);
+
+    // The current surface is now in-flight, move to the next surface
+    p->queue_pos++;
+    if (p->queue_pos >= p->queue_len)
+        p->queue_pos = 0;
+}
+
+static void mapper_uninit(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+
+    for (int i = 0; i < p->queue_len; i++)
+        surf_destroy(mapper, p->queue[i]);
+
+    SAFE_RELEASE(p->ctx11);
+    SAFE_RELEASE(p->dev9);
+    SAFE_RELEASE(p->dev11);
+}
+
+static int mapper_map(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+    HRESULT hr;
+
+    struct queue_surf *surf = surf_acquire(mapper);
+    if (!surf)
+        return -1;
+
+    RECT rc = {0, 0, mapper->src->w, mapper->src->h};
+    IDirect3DSurface9* hw_surface = (IDirect3DSurface9 *)mapper->src->planes[3];
+
+    hr = IDirect3DDevice9Ex_StretchRect(p->dev9, hw_surface, &rc, surf->surf9,
+                                        &rc, D3DTEXF_NONE);
+    if (FAILED(hr)) {
+        MP_ERR(mapper, "StretchRect() failed: %s\n", mp_HRESULT_to_str(hr));
+        return -1;
+    }
+
+    if (!surf_wait_idle9(mapper, surf))
+        return -1;
+
+    mapper->tex[0] = surf->tex;
+    return 0;
+}
+
+static void mapper_unmap(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+
+    if (p->queue_pos < p->queue_len &&
+        p->queue[p->queue_pos]->tex == mapper->tex[0])
+    {
+        surf_release(mapper);
+        mapper->tex[0] = NULL;
+    }
+}
+
+const struct ra_hwdec_driver ra_hwdec_dxva2dxgi = {
+    .name = "dxva2-dxgi",
+    .priv_size = sizeof(struct priv_owner),
+    .imgfmts = {IMGFMT_DXVA2, 0},
+    .init = init,
+    .uninit = uninit,
+    .mapper = &(const struct ra_hwdec_mapper_driver){
+        .priv_size = sizeof(struct priv),
+        .init = mapper_init,
+        .uninit = mapper_uninit,
+        .map = mapper_map,
+        .unmap = mapper_unmap,
+    },
+};
diff --git a/video/out/d3d11/ra_d3d11.c b/video/out/d3d11/ra_d3d11.c
new file mode 100644
index 0000000..84fd004
--- /dev/null
+++ b/video/out/d3d11/ra_d3d11.c
@@ -0,0 +1,2544 @@
+#include <windows.h>
+#include <versionhelpers.h>
+#include <d3d11_1.h>
+#include <d3d11sdklayers.h>
+#include <dxgi1_2.h>
+#include <d3dcompiler.h>
+#include <spirv_cross_c.h>
+
+#include "common/msg.h"
+#include "osdep/io.h"
+#include "osdep/subprocess.h"
+#include "osdep/timer.h"
+#include "osdep/windows_utils.h"
+#include "video/out/gpu/spirv.h"
+#include "video/out/gpu/utils.h"
+
+#include "ra_d3d11.h"
+
+#ifndef D3D11_1_UAV_SLOT_COUNT
+#define D3D11_1_UAV_SLOT_COUNT (64)
+#endif
+#define D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE (0x80)
+
+// D3D11.3 message IDs, not present in mingw-w64 v9
+#define D3D11_MESSAGE_ID_CREATE_FENCE  ((D3D11_MESSAGE_ID)0x300209)
+#define D3D11_MESSAGE_ID_DESTROY_FENCE ((D3D11_MESSAGE_ID)0x30020b)
+
+struct dll_version {
+    uint16_t major;
+    uint16_t minor;
+    uint16_t build;
+    uint16_t revision;
+};
+
+struct ra_d3d11 {
+    struct spirv_compiler *spirv;
+
+    ID3D11Device *dev;
+    ID3D11Device1 *dev1;
+    ID3D11DeviceContext *ctx;
+    ID3D11DeviceContext1 *ctx1;
+    pD3DCompile D3DCompile;
+
+    struct dll_version d3d_compiler_ver;
+
+    // Debug interfaces (--gpu-debug)
+    ID3D11Debug *debug;
+    ID3D11InfoQueue *iqueue;
+
+    // Device capabilities
+    D3D_FEATURE_LEVEL fl;
+    bool has_clear_view;
+    bool has_timestamp_queries;
+    int max_uavs;
+
+    // Streaming dynamic vertex buffer, which is used for all renderpasses
+    ID3D11Buffer *vbuf;
+    size_t vbuf_size;
+    size_t vbuf_used;
+
+    // clear() renderpass resources (only used when has_clear_view is false)
+    ID3D11PixelShader *clear_ps;
+    ID3D11VertexShader *clear_vs;
+    ID3D11InputLayout *clear_layout;
+    ID3D11Buffer *clear_vbuf;
+    ID3D11Buffer *clear_cbuf;
+
+    // blit() renderpass resources
+    ID3D11PixelShader *blit_float_ps;
+    ID3D11VertexShader *blit_vs;
+    ID3D11InputLayout *blit_layout;
+    ID3D11Buffer *blit_vbuf;
+    ID3D11SamplerState *blit_sampler;
+};
+
+struct d3d_tex {
+    // res mirrors one of tex1d, tex2d or tex3d for convenience. It does not
+    // hold an additional reference to the texture object.
+    ID3D11Resource *res;
+
+    ID3D11Texture1D *tex1d;
+    ID3D11Texture2D *tex2d;
+    ID3D11Texture3D *tex3d;
+    int array_slice;
+
+    // Staging texture for tex_download(), 2D only
+    ID3D11Texture2D *staging;
+
+    ID3D11ShaderResourceView *srv;
+    ID3D11RenderTargetView *rtv;
+    ID3D11UnorderedAccessView *uav;
+    ID3D11SamplerState *sampler;
+};
+
+struct d3d_buf {
+    ID3D11Buffer *buf;
+    ID3D11UnorderedAccessView *uav;
+    void *data; // System-memory mirror of the data in buf
+    bool dirty; // Is buf out of date?
+};
+
+struct d3d_rpass {
+    ID3D11PixelShader *ps;
+    ID3D11VertexShader *vs;
+    ID3D11ComputeShader *cs;
+    ID3D11InputLayout *layout;
+    ID3D11BlendState *bstate;
+};
+
+struct d3d_timer {
+    ID3D11Query *ts_start;
+    ID3D11Query *ts_end;
+    ID3D11Query *disjoint;
+    uint64_t result; // Latches the result from the previous use of the timer
+};
+
+struct d3d_fmt {
+    const char *name;
+    int components;
+    int bytes;
+    int bits[4];
+    DXGI_FORMAT fmt;
+    enum ra_ctype ctype;
+    bool unordered;
+};
+
+static const char clear_vs[] = "\
+float4 main(float2 pos : POSITION) : SV_Position\n\
+{\n\
+    return float4(pos, 0.0, 1.0);\n\
+}\n\
+";
+
+static const char clear_ps[] = "\
+cbuffer ps_cbuf : register(b0) {\n\
+    float4 color : packoffset(c0);\n\
+}\n\
+\n\
+float4 main(float4 pos : SV_Position) : SV_Target\n\
+{\n\
+    return color;\n\
+}\n\
+";
+
+struct blit_vert {
+    float x, y, u, v;
+};
+
+static const char blit_vs[] = "\
+void main(float2 pos : POSITION, float2 coord : TEXCOORD0,\n\
+          out float4 out_pos : SV_Position, out float2 out_coord : TEXCOORD0)\n\
+{\n\
+    out_pos = float4(pos, 0.0, 1.0);\n\
+    out_coord = coord;\n\
+}\n\
+";
+
+static const char blit_float_ps[] = "\
+Texture2D<float4> tex : register(t0);\n\
+SamplerState samp : register(s0);\n\
+\n\
+float4 main(float4 pos : SV_Position, float2 coord : TEXCOORD0) : SV_Target\n\
+{\n\
+    return tex.Sample(samp, coord);\n\
+}\n\
+";
+
+#define DXFMT(f, t) .fmt = DXGI_FORMAT_##f##_##t, .ctype = RA_CTYPE_##t
+static struct d3d_fmt formats[] = {
+    { "r8",       1,  1, { 8},             DXFMT(R8, UNORM)           },
+    { "rg8",      2,  2, { 8,  8},         DXFMT(R8G8, UNORM)         },
+    { "rgba8",    4,  4, { 8,  8,  8,  8}, DXFMT(R8G8B8A8, UNORM)     },
+    { "r16",      1,  2, {16},             DXFMT(R16, UNORM)          },
+    { "rg16",     2,  4, {16, 16},         DXFMT(R16G16, UNORM)       },
+    { "rgba16",   4,  8, {16, 16, 16, 16}, DXFMT(R16G16B16A16, UNORM) },
+
+    { "r32ui",    1,  4, {32},             DXFMT(R32, UINT)           },
+    { "rg32ui",   2,  8, {32, 32},         DXFMT(R32G32, UINT)        },
+    { "rgb32ui",  3, 12, {32, 32, 32},     DXFMT(R32G32B32, UINT)     },
+    { "rgba32ui", 4, 16, {32, 32, 32, 32}, DXFMT(R32G32B32A32, UINT)  },
+
+    { "r16hf",    1,  2, {16},             DXFMT(R16, FLOAT)          },
+    { "rg16hf",   2,  4, {16, 16},         DXFMT(R16G16, FLOAT)       },
+    { "rgba16hf", 4,  8, {16, 16, 16, 16}, DXFMT(R16G16B16A16, FLOAT) },
+    { "r32f",     1,  4, {32},             DXFMT(R32, FLOAT)          },
+    { "rg32f",    2,  8, {32, 32},         DXFMT(R32G32, FLOAT)       },
+    { "rgb32f",   3, 12, {32, 32, 32},     DXFMT(R32G32B32, FLOAT)    },
+    { "rgba32f",  4, 16, {32, 32, 32, 32}, DXFMT(R32G32B32A32, FLOAT) },
+
+    { "rgb10_a2", 4,  4, {10, 10, 10,  2}, DXFMT(R10G10B10A2, UNORM)  },
+    { "bgra8",    4,  4, { 8,  8,  8,  8}, DXFMT(B8G8R8A8, UNORM), .unordered = true },
+    { "bgrx8",    3,  4, { 8,  8,  8},     DXFMT(B8G8R8X8, UNORM), .unordered = true },
+};
+
+static bool dll_version_equal(struct dll_version a, struct dll_version b)
+{
+    return a.major == b.major &&
+           a.minor == b.minor &&
+           a.build == b.build &&
+           a.revision == b.revision;
+}
+
+DXGI_FORMAT ra_d3d11_get_format(const struct ra_format *fmt)
+{
+    struct d3d_fmt *d3d = fmt->priv;
+    return d3d->fmt;
+}
+
+const struct ra_format *ra_d3d11_get_ra_format(struct ra *ra, DXGI_FORMAT fmt)
+{
+    for (int i = 0; i < ra->num_formats; i++) {
+        struct ra_format *ra_fmt = ra->formats[i];
+
+        if (ra_d3d11_get_format(ra_fmt) == fmt)
+            return ra_fmt;
+    }
+
+    return NULL;
+}
+
+static void setup_formats(struct ra *ra)
+{
+    // All formats must be usable as a 2D texture
+    static const UINT sup_basic = D3D11_FORMAT_SUPPORT_TEXTURE2D;
+    // SHADER_SAMPLE indicates support for linear sampling, point always works
+    static const UINT sup_filter = D3D11_FORMAT_SUPPORT_SHADER_SAMPLE;
+    // RA requires renderable surfaces to be blendable as well
+    static const UINT sup_render = D3D11_FORMAT_SUPPORT_RENDER_TARGET |
+                                   D3D11_FORMAT_SUPPORT_BLENDABLE;
+    // Typed UAVs are equivalent to images. RA only cares if they're storable.
+    static const UINT sup_store = D3D11_FORMAT_SUPPORT_TYPED_UNORDERED_ACCESS_VIEW;
+    static const UINT sup2_store = D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE;
+
+    struct ra_d3d11 *p = ra->priv;
+    HRESULT hr;
+
+    for (int i = 0; i < MP_ARRAY_SIZE(formats); i++) {
+        struct d3d_fmt *d3dfmt = &formats[i];
+        UINT support = 0;
+        hr = ID3D11Device_CheckFormatSupport(p->dev, d3dfmt->fmt, &support);
+        if (FAILED(hr))
+            continue;
+        if ((support & sup_basic) != sup_basic)
+            continue;
+
+        D3D11_FEATURE_DATA_FORMAT_SUPPORT2 sup2 = { .InFormat = d3dfmt->fmt };
+        ID3D11Device_CheckFeatureSupport(p->dev, D3D11_FEATURE_FORMAT_SUPPORT2,
+                                         &sup2, sizeof(sup2));
+        UINT support2 = sup2.OutFormatSupport2;
+
+        struct ra_format *fmt = talloc_zero(ra, struct ra_format);
+        *fmt = (struct ra_format) {
+            .name           = d3dfmt->name,
+            .priv           = d3dfmt,
+            .ctype          = d3dfmt->ctype,
+            .ordered        = !d3dfmt->unordered,
+            .num_components = d3dfmt->components,
+            .pixel_size     = d3dfmt->bytes,
+            .linear_filter  = (support & sup_filter) == sup_filter,
+            .renderable     = (support & sup_render) == sup_render,
+            .storable       = p->fl >= D3D_FEATURE_LEVEL_11_0 &&
+                              (support & sup_store) == sup_store &&
+                              (support2 & sup2_store) == sup2_store,
+        };
+
+        if (support & D3D11_FORMAT_SUPPORT_TEXTURE1D)
+            ra->caps |= RA_CAP_TEX_1D;
+
+        for (int j = 0; j < d3dfmt->components; j++)
+            fmt->component_size[j] = fmt->component_depth[j] = d3dfmt->bits[j];
+
+        fmt->glsl_format = ra_fmt_glsl_format(fmt);
+
+        MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt);
+    }
+}
+
+static bool tex_init(struct ra *ra, struct ra_tex *tex)
+{
+    struct ra_d3d11 *p = ra->priv;
+    struct d3d_tex *tex_p = tex->priv;
+    struct ra_tex_params *params = &tex->params;
+    HRESULT hr;
+
+    // A SRV is required for renderpasses and blitting, since blitting can use
+    // a renderpass internally
+    if (params->render_src || params->blit_src) {
+        // Always specify the SRV format for simplicity. This will match the
+        // texture format for textures created with tex_create, but it can be
+        // different for wrapped planar video textures.
+        D3D11_SHADER_RESOURCE_VIEW_DESC srvdesc = {
+            .Format = ra_d3d11_get_format(params->format),
+        };
+        switch (params->dimensions) {
+        case 1:
+            if (tex_p->array_slice >= 0) {
+                srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1DARRAY;
+                srvdesc.Texture1DArray.MipLevels = 1;
+                srvdesc.Texture1DArray.FirstArraySlice = tex_p->array_slice;
+                srvdesc.Texture1DArray.ArraySize = 1;
+            } else {
+                srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D;
+                srvdesc.Texture1D.MipLevels = 1;
+            }
+            break;
+        case 2:
+            if (tex_p->array_slice >= 0) {
+                srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY;
+                srvdesc.Texture2DArray.MipLevels = 1;
+                srvdesc.Texture2DArray.FirstArraySlice = tex_p->array_slice;
+                srvdesc.Texture2DArray.ArraySize = 1;
+            } else {
+                srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D;
+                srvdesc.Texture2D.MipLevels = 1;
+            }
+            break;
+        case 3:
+            // D3D11 does not have Texture3D arrays
+            srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D;
+            srvdesc.Texture3D.MipLevels = 1;
+            break;
+        }
+        hr = ID3D11Device_CreateShaderResourceView(p->dev, tex_p->res, &srvdesc,
+                                                   &tex_p->srv);
+        if (FAILED(hr)) {
+            MP_ERR(ra, "Failed to create SRV: %s\n", mp_HRESULT_to_str(hr));
+            goto error;
+        }
+    }
+
+    // Samplers are required for renderpasses, but not blitting, since the blit
+    // code uses its own point sampler
+    if (params->render_src) {
+        D3D11_SAMPLER_DESC sdesc = {
+            .AddressU = D3D11_TEXTURE_ADDRESS_CLAMP,
+            .AddressV = D3D11_TEXTURE_ADDRESS_CLAMP,
+            .AddressW = D3D11_TEXTURE_ADDRESS_CLAMP,
+            .ComparisonFunc = D3D11_COMPARISON_NEVER,
+            .MinLOD = 0,
+            .MaxLOD = D3D11_FLOAT32_MAX,
+            .MaxAnisotropy = 1,
+        };
+        if (params->src_linear)
+            sdesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR;
+        if (params->src_repeat) {
+            sdesc.AddressU = sdesc.AddressV = sdesc.AddressW =
+                D3D11_TEXTURE_ADDRESS_WRAP;
+        }
+        // The runtime pools sampler state objects internally, so we don't have
+        // to worry about resource usage when creating one for every ra_tex
+        hr = ID3D11Device_CreateSamplerState(p->dev, &sdesc, &tex_p->sampler);
+        if (FAILED(hr)) {
+            MP_ERR(ra, "Failed to create sampler: %s\n", mp_HRESULT_to_str(hr));
+            goto error;
+        }
+    }
+
+    // Like SRVs, an RTV is required for renderpass output and blitting
+    if (params->render_dst || params->blit_dst) {
+        hr = ID3D11Device_CreateRenderTargetView(p->dev, tex_p->res, NULL,
+                                                 &tex_p->rtv);
+        if (FAILED(hr)) {
+            MP_ERR(ra, "Failed to create RTV: %s\n", mp_HRESULT_to_str(hr));
+            goto error;
+        }
+    }
+
+    if (p->fl >= D3D_FEATURE_LEVEL_11_0 && params->storage_dst) {
+        hr = ID3D11Device_CreateUnorderedAccessView(p->dev, tex_p->res, NULL,
+                                                    &tex_p->uav);
+        if (FAILED(hr)) {
+            MP_ERR(ra, "Failed to create UAV: %s\n", mp_HRESULT_to_str(hr));
+            goto error;
+        }
+    }
+
+    return true;
+error:
+    return false;
+}
+
+static void tex_destroy(struct ra *ra, struct ra_tex *tex)
+{
+    if (!tex)
+        return;
+    struct d3d_tex *tex_p = tex->priv;
+
+    SAFE_RELEASE(tex_p->srv);
+    SAFE_RELEASE(tex_p->rtv);
+    SAFE_RELEASE(tex_p->uav);
+    SAFE_RELEASE(tex_p->sampler);
+    SAFE_RELEASE(tex_p->res);
+    SAFE_RELEASE(tex_p->staging);
+    talloc_free(tex);
+}
+
+static struct ra_tex *tex_create(struct ra *ra,
+                                 const struct ra_tex_params *params)
+{
+    // Only 2D textures may be downloaded for now
+    if (params->downloadable && params->dimensions != 2)
+        return NULL;
+
+    struct ra_d3d11 *p = ra->priv;
+    HRESULT hr;
+
+    struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
+    tex->params = *params;
+    tex->params.initial_data = NULL;
+
+    struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex);
+    DXGI_FORMAT fmt = ra_d3d11_get_format(params->format);
+
+    D3D11_SUBRESOURCE_DATA data;
+    D3D11_SUBRESOURCE_DATA *pdata = NULL;
+    if (params->initial_data) {
+        data = (D3D11_SUBRESOURCE_DATA) {
+            .pSysMem = params->initial_data,
+            .SysMemPitch = params->w * params->format->pixel_size,
+        };
+        if (params->dimensions >= 3)
+            data.SysMemSlicePitch = data.SysMemPitch * params->h;
+        pdata = &data;
+    }
+
+    D3D11_USAGE usage = D3D11_USAGE_DEFAULT;
+    D3D11_BIND_FLAG bind_flags = 0;
+
+    if (params->render_src || params->blit_src)
+        bind_flags |= D3D11_BIND_SHADER_RESOURCE;
+    if (params->render_dst || params->blit_dst)
+        bind_flags |= D3D11_BIND_RENDER_TARGET;
+    if (p->fl >= D3D_FEATURE_LEVEL_11_0 && params->storage_dst)
+        bind_flags |= D3D11_BIND_UNORDERED_ACCESS;
+
+    // Apparently IMMUTABLE textures are efficient, so try to infer whether we
+    // can use one
+    if (params->initial_data && !params->render_dst && !params->storage_dst &&
+        !params->blit_dst && !params->host_mutable)
+        usage = D3D11_USAGE_IMMUTABLE;
+
+    switch (params->dimensions) {
+    case 1:;
+        D3D11_TEXTURE1D_DESC desc1d = {
+            .Width = params->w,
+            .MipLevels = 1,
+            .ArraySize = 1,
+            .Format = fmt,
+            .Usage = usage,
+            .BindFlags = bind_flags,
+        };
+        hr = ID3D11Device_CreateTexture1D(p->dev, &desc1d, pdata, &tex_p->tex1d);
+        if (FAILED(hr)) {
+            MP_ERR(ra, "Failed to create Texture1D: %s\n",
+                   mp_HRESULT_to_str(hr));
+            goto error;
+        }
+        tex_p->res = (ID3D11Resource *)tex_p->tex1d;
+        break;
+    case 2:;
+        D3D11_TEXTURE2D_DESC desc2d = {
+            .Width = params->w,
+            .Height = params->h,
+            .MipLevels = 1,
+            .ArraySize = 1,
+            .SampleDesc.Count = 1,
+            .Format = fmt,
+            .Usage = usage,
+            .BindFlags = bind_flags,
+        };
+        hr = ID3D11Device_CreateTexture2D(p->dev, &desc2d, pdata, &tex_p->tex2d);
+        if (FAILED(hr)) {
+            MP_ERR(ra, "Failed to create Texture2D: %s\n",
+                   mp_HRESULT_to_str(hr));
+            goto error;
+        }
+        tex_p->res = (ID3D11Resource *)tex_p->tex2d;
+
+        // Create a staging texture with CPU access for tex_download()
+        if (params->downloadable) {
+            desc2d.BindFlags = 0;
+            desc2d.CPUAccessFlags = D3D11_CPU_ACCESS_READ;
+            desc2d.Usage = D3D11_USAGE_STAGING;
+
+            hr = ID3D11Device_CreateTexture2D(p->dev, &desc2d, NULL,
+                                              &tex_p->staging);
+            if (FAILED(hr)) {
+                MP_ERR(ra, "Failed to staging texture: %s\n",
+                       mp_HRESULT_to_str(hr));
+                goto error;
+            }
+        }
+        break;
+    case 3:;
+        D3D11_TEXTURE3D_DESC desc3d = {
+            .Width = params->w,
+            .Height = params->h,
+            .Depth = params->d,
+            .MipLevels = 1,
+            .Format = fmt,
+            .Usage = usage,
+            .BindFlags = bind_flags,
+        };
+        hr = ID3D11Device_CreateTexture3D(p->dev, &desc3d, pdata, &tex_p->tex3d);
+        if (FAILED(hr)) {
+            MP_ERR(ra, "Failed to create Texture3D: %s\n",
+                   mp_HRESULT_to_str(hr));
+            goto error;
+        }
+        tex_p->res = (ID3D11Resource *)tex_p->tex3d;
+        break;
+    default:
+        MP_ASSERT_UNREACHABLE();
+    }
+
+    tex_p->array_slice = -1;
+
+    if (!tex_init(ra, tex))
+        goto error;
+
+    return tex;
+
+error:
+    tex_destroy(ra, tex);
+    return NULL;
+}
+
+struct ra_tex *ra_d3d11_wrap_tex(struct ra *ra, ID3D11Resource *res)
+{
+    HRESULT hr;
+
+    struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
+    struct ra_tex_params *params = &tex->params;
+    struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex);
+
+    DXGI_FORMAT fmt = DXGI_FORMAT_UNKNOWN;
+    D3D11_USAGE usage = D3D11_USAGE_DEFAULT;
+    D3D11_BIND_FLAG bind_flags = 0;
+
+    D3D11_RESOURCE_DIMENSION type;
+    ID3D11Resource_GetType(res, &type);
+    switch (type) {
+    case D3D11_RESOURCE_DIMENSION_TEXTURE2D:
+        hr = ID3D11Resource_QueryInterface(res, &IID_ID3D11Texture2D,
+                                           (void**)&tex_p->tex2d);
+        if (FAILED(hr)) {
+            MP_ERR(ra, "Resource is not a ID3D11Texture2D\n");
+            goto error;
+        }
+        tex_p->res = (ID3D11Resource *)tex_p->tex2d;
+
+        D3D11_TEXTURE2D_DESC desc2d;
+        ID3D11Texture2D_GetDesc(tex_p->tex2d, &desc2d);
+        if (desc2d.MipLevels != 1) {
+            MP_ERR(ra, "Mipmapped textures not supported for wrapping\n");
+            goto error;
+        }
+        if (desc2d.ArraySize != 1) {
+            MP_ERR(ra, "Texture arrays not supported for wrapping\n");
+            goto error;
+        }
+        if (desc2d.SampleDesc.Count != 1) {
+            MP_ERR(ra, "Multisampled textures not supported for wrapping\n");
+            goto error;
+        }
+
+        params->dimensions = 2;
+        params->w = desc2d.Width;
+        params->h = desc2d.Height;
+        params->d = 1;
+        usage = desc2d.Usage;
+        bind_flags = desc2d.BindFlags;
+        fmt = desc2d.Format;
+        break;
+    default:
+        // We could wrap Texture1D/3D as well, but keep it simple, since this
+        // function is only used for swapchain backbuffers at the moment
+        MP_ERR(ra, "Resource is not suitable to wrap\n");
+        goto error;
+    }
+
+    for (int i = 0; i < ra->num_formats; i++) {
+        DXGI_FORMAT target_fmt = ra_d3d11_get_format(ra->formats[i]);
+        if (fmt == target_fmt) {
+            params->format = ra->formats[i];
+            break;
+        }
+    }
+    if (!params->format) {
+        MP_ERR(ra, "Could not find a suitable RA format for wrapped resource\n");
+        goto error;
+    }
+
+    if (bind_flags & D3D11_BIND_SHADER_RESOURCE) {
+        params->render_src = params->blit_src = true;
+        params->src_linear = params->format->linear_filter;
+    }
+    if (bind_flags & D3D11_BIND_RENDER_TARGET)
+        params->render_dst = params->blit_dst = true;
+    if (bind_flags & D3D11_BIND_UNORDERED_ACCESS)
+        params->storage_dst = true;
+
+    if (usage != D3D11_USAGE_DEFAULT) {
+        MP_ERR(ra, "Resource is not D3D11_USAGE_DEFAULT\n");
+        goto error;
+    }
+
+    tex_p->array_slice = -1;
+
+    if (!tex_init(ra, tex))
+        goto error;
+
+    return tex;
+error:
+    tex_destroy(ra, tex);
+    return NULL;
+}
+
+struct ra_tex *ra_d3d11_wrap_tex_video(struct ra *ra, ID3D11Texture2D *res,
+                                       int w, int h, int array_slice,
+                                       const struct ra_format *fmt)
+{
+    struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
+    struct ra_tex_params *params = &tex->params;
+    struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex);
+
+    tex_p->tex2d = res;
+    tex_p->res = (ID3D11Resource *)tex_p->tex2d;
+    ID3D11Texture2D_AddRef(res);
+
+    D3D11_TEXTURE2D_DESC desc2d;
+    ID3D11Texture2D_GetDesc(tex_p->tex2d, &desc2d);
+    if (!(desc2d.BindFlags & D3D11_BIND_SHADER_RESOURCE)) {
+        MP_ERR(ra, "Video resource is not bindable\n");
+        goto error;
+    }
+
+    params->dimensions = 2;
+    params->w = w;
+    params->h = h;
+    params->d = 1;
+    params->render_src = true;
+    params->src_linear = true;
+    // fmt can be different to the texture format for planar video textures
+    params->format = fmt;
+
+    if (desc2d.ArraySize > 1) {
+        tex_p->array_slice = array_slice;
+    } else {
+        tex_p->array_slice = -1;
+    }
+
+    if (!tex_init(ra, tex))
+        goto error;
+
+    return tex;
+error:
+    tex_destroy(ra, tex);
+    return NULL;
+}
+
+ID3D11Resource *ra_d3d11_get_raw_tex(struct ra *ra, struct ra_tex *tex,
+                                     int *array_slice)
+{
+    struct d3d_tex *tex_p = tex->priv;
+
+    ID3D11Resource_AddRef(tex_p->res);
+    if (array_slice)
+        *array_slice = tex_p->array_slice;
+    return tex_p->res;
+}
+
+static bool tex_upload(struct ra *ra, const struct ra_tex_upload_params *params)
+{
+    struct ra_d3d11 *p = ra->priv;
+    struct ra_tex *tex = params->tex;
+    struct d3d_tex *tex_p = tex->priv;
+
+    if (!params->src) {
+        MP_ERR(ra, "Pixel buffers are not supported\n");
+        return false;
+    }
+
+    const char *src = params->src;
+    ptrdiff_t stride = tex->params.dimensions >= 2 ? tex->params.w : 0;
+    ptrdiff_t pitch = tex->params.dimensions >= 3 ? stride * tex->params.h : 0;
+    bool invalidate = true;
+    D3D11_BOX rc;
+    D3D11_BOX *prc = NULL;
+
+    if (tex->params.dimensions == 2) {
+        stride = params->stride;
+
+        if (params->rc && (params->rc->x0 != 0 || params->rc->y0 != 0 ||
+            params->rc->x1 != tex->params.w || params->rc->y1 != tex->params.h))
+        {
+            rc = (D3D11_BOX) {
+                .left = params->rc->x0,
+                .top = params->rc->y0,
+                .front = 0,
+                .right = params->rc->x1,
+                .bottom = params->rc->y1,
+                .back = 1,
+            };
+            prc = &rc;
+            invalidate = params->invalidate;
+        }
+    }
+
+    int subresource = tex_p->array_slice >= 0 ? tex_p->array_slice : 0;
+    if (p->ctx1) {
+        ID3D11DeviceContext1_UpdateSubresource1(p->ctx1, tex_p->res,
+            subresource, prc, src, stride, pitch,
+            invalidate ? D3D11_COPY_DISCARD : 0);
+    } else {
+        ID3D11DeviceContext_UpdateSubresource(p->ctx, tex_p->res, subresource,
+            prc, src, stride, pitch);
+    }
+
+    return true;
+}
+
+static bool tex_download(struct ra *ra, struct ra_tex_download_params *params)
+{
+    struct ra_d3d11 *p = ra->priv;
+    struct ra_tex *tex = params->tex;
+    struct d3d_tex *tex_p = tex->priv;
+    HRESULT hr;
+
+    if (!tex_p->staging)
+        return false;
+
+    ID3D11DeviceContext_CopyResource(p->ctx, (ID3D11Resource*)tex_p->staging,
+        tex_p->res);
+
+    D3D11_MAPPED_SUBRESOURCE lock;
+    hr = ID3D11DeviceContext_Map(p->ctx, (ID3D11Resource*)tex_p->staging, 0,
+                                 D3D11_MAP_READ, 0, &lock);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to map staging texture: %s\n", mp_HRESULT_to_str(hr));
+        return false;
+    }
+
+    char *cdst = params->dst;
+    char *csrc = lock.pData;
+    for (int y = 0; y < tex->params.h; y++) {
+        memcpy(cdst + y * params->stride, csrc + y * lock.RowPitch,
+               MPMIN(params->stride, lock.RowPitch));
+    }
+
+    ID3D11DeviceContext_Unmap(p->ctx, (ID3D11Resource*)tex_p->staging, 0);
+
+    return true;
+}
+
+static void buf_destroy(struct ra *ra, struct ra_buf *buf)
+{
+    if (!buf)
+        return;
+    struct d3d_buf *buf_p = buf->priv;
+    SAFE_RELEASE(buf_p->buf);
+    SAFE_RELEASE(buf_p->uav);
+    talloc_free(buf);
+}
+
+static struct ra_buf *buf_create(struct ra *ra,
+                                 const struct ra_buf_params *params)
+{
+    // D3D11 does not support permanent mapping or pixel buffers
+    if (params->host_mapped || params->type == RA_BUF_TYPE_TEX_UPLOAD)
+        return NULL;
+
+    struct ra_d3d11 *p = ra->priv;
+    HRESULT hr;
+
+    struct ra_buf *buf = talloc_zero(NULL, struct ra_buf);
+    buf->params = *params;
+    buf->params.initial_data = NULL;
+
+    struct d3d_buf *buf_p = buf->priv = talloc_zero(buf, struct d3d_buf);
+
+    D3D11_SUBRESOURCE_DATA data;
+    D3D11_SUBRESOURCE_DATA *pdata = NULL;
+    if (params->initial_data) {
+        data = (D3D11_SUBRESOURCE_DATA) { .pSysMem = params->initial_data };
+        pdata = &data;
+    }
+
+    D3D11_BUFFER_DESC desc = { .ByteWidth = params->size };
+    switch (params->type) {
+    case RA_BUF_TYPE_SHADER_STORAGE:
+        desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS;
+        desc.ByteWidth = MP_ALIGN_UP(desc.ByteWidth, sizeof(float));
+        desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS;
+        break;
+    case RA_BUF_TYPE_UNIFORM:
+        desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER;
+        desc.ByteWidth = MP_ALIGN_UP(desc.ByteWidth, sizeof(float[4]));
+        break;
+    }
+
+    hr = ID3D11Device_CreateBuffer(p->dev, &desc, pdata, &buf_p->buf);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to create buffer: %s\n", mp_HRESULT_to_str(hr));
+        goto error;
+    }
+
+    // D3D11 doesn't allow constant buffer updates that aren't aligned to a
+    // full constant boundary (vec4,) and some drivers don't allow partial
+    // constant buffer updates at all. To support partial buffer updates, keep
+    // a mirror of the buffer data in system memory and upload the whole thing
+    // before the buffer is used.
+    if (params->host_mutable)
+        buf_p->data = talloc_zero_size(buf, desc.ByteWidth);
+
+    if (params->type == RA_BUF_TYPE_SHADER_STORAGE) {
+        D3D11_UNORDERED_ACCESS_VIEW_DESC udesc = {
+            .Format = DXGI_FORMAT_R32_TYPELESS,
+            .ViewDimension = D3D11_UAV_DIMENSION_BUFFER,
+            .Buffer = {
+                .NumElements = desc.ByteWidth / sizeof(float),
+                .Flags = D3D11_BUFFER_UAV_FLAG_RAW,
+            },
+        };
+        hr = ID3D11Device_CreateUnorderedAccessView(p->dev,
+            (ID3D11Resource *)buf_p->buf, &udesc, &buf_p->uav);
+        if (FAILED(hr)) {
+            MP_ERR(ra, "Failed to create UAV: %s\n", mp_HRESULT_to_str(hr));
+            goto error;
+        }
+    }
+
+    return buf;
+error:
+    buf_destroy(ra, buf);
+    return NULL;
+}
+
+static void buf_resolve(struct ra *ra, struct ra_buf *buf)
+{
+    struct ra_d3d11 *p = ra->priv;
+    struct d3d_buf *buf_p = buf->priv;
+
+    if (!buf->params.host_mutable || !buf_p->dirty)
+        return;
+
+    // Synchronize the GPU buffer with the system-memory copy
+    ID3D11DeviceContext_UpdateSubresource(p->ctx, (ID3D11Resource *)buf_p->buf,
+        0, NULL, buf_p->data, 0, 0);
+    buf_p->dirty = false;
+}
+
+static void buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset,
+                       const void *data, size_t size)
+{
+    struct d3d_buf *buf_p = buf->priv;
+
+    char *cdata = buf_p->data;
+    memcpy(cdata + offset, data, size);
+    buf_p->dirty = true;
+}
+
+static const char *get_shader_target(struct ra *ra, enum glsl_shader type)
+{
+    struct ra_d3d11 *p = ra->priv;
+    switch (p->fl) {
+    default:
+        switch (type) {
+        case GLSL_SHADER_VERTEX:   return "vs_5_0";
+        case GLSL_SHADER_FRAGMENT: return "ps_5_0";
+        case GLSL_SHADER_COMPUTE:  return "cs_5_0";
+        }
+        break;
+    case D3D_FEATURE_LEVEL_10_1:
+        switch (type) {
+        case GLSL_SHADER_VERTEX:   return "vs_4_1";
+        case GLSL_SHADER_FRAGMENT: return "ps_4_1";
+        case GLSL_SHADER_COMPUTE:  return "cs_4_1";
+        }
+        break;
+    case D3D_FEATURE_LEVEL_10_0:
+        switch (type) {
+        case GLSL_SHADER_VERTEX:   return "vs_4_0";
+        case GLSL_SHADER_FRAGMENT: return "ps_4_0";
+        case GLSL_SHADER_COMPUTE:  return "cs_4_0";
+        }
+        break;
+    case D3D_FEATURE_LEVEL_9_3:
+        switch (type) {
+        case GLSL_SHADER_VERTEX:   return "vs_4_0_level_9_3";
+        case GLSL_SHADER_FRAGMENT: return "ps_4_0_level_9_3";
+        }
+        break;
+    case D3D_FEATURE_LEVEL_9_2:
+    case D3D_FEATURE_LEVEL_9_1:
+        switch (type) {
+        case GLSL_SHADER_VERTEX:   return "vs_4_0_level_9_1";
+        case GLSL_SHADER_FRAGMENT: return "ps_4_0_level_9_1";
+        }
+        break;
+    }
+    return NULL;
+}
+
+static const char *shader_type_name(enum glsl_shader type)
+{
+    switch (type) {
+    case GLSL_SHADER_VERTEX:   return "vertex";
+    case GLSL_SHADER_FRAGMENT: return "fragment";
+    case GLSL_SHADER_COMPUTE:  return "compute";
+    default:                   return "unknown";
+    }
+}
+
+static bool setup_clear_rpass(struct ra *ra)
+{
+    struct ra_d3d11 *p = ra->priv;
+    ID3DBlob *vs_blob = NULL;
+    ID3DBlob *ps_blob = NULL;
+    HRESULT hr;
+
+    hr = p->D3DCompile(clear_vs, sizeof(clear_vs), NULL, NULL, NULL, "main",
+        get_shader_target(ra, GLSL_SHADER_VERTEX),
+        D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &vs_blob, NULL);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to compile clear() vertex shader: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto error;
+    }
+
+    hr = ID3D11Device_CreateVertexShader(p->dev,
+        ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob),
+        NULL, &p->clear_vs);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to create clear() vertex shader: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto error;
+    }
+
+    hr = p->D3DCompile(clear_ps, sizeof(clear_ps), NULL, NULL, NULL, "main",
+        get_shader_target(ra, GLSL_SHADER_FRAGMENT),
+        D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &ps_blob, NULL);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to compile clear() pixel shader: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto error;
+    }
+
+    hr = ID3D11Device_CreatePixelShader(p->dev,
+        ID3D10Blob_GetBufferPointer(ps_blob), ID3D10Blob_GetBufferSize(ps_blob),
+        NULL, &p->clear_ps);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to create clear() pixel shader: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto error;
+    }
+
+    D3D11_INPUT_ELEMENT_DESC in_descs[] = {
+        { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0 },
+    };
+    hr = ID3D11Device_CreateInputLayout(p->dev, in_descs,
+        MP_ARRAY_SIZE(in_descs), ID3D10Blob_GetBufferPointer(vs_blob),
+        ID3D10Blob_GetBufferSize(vs_blob), &p->clear_layout);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to create clear() IA layout: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto error;
+    }
+
+    // clear() always draws to a quad covering the whole viewport
+    static const float verts[] = {
+        -1, -1,
+         1, -1,
+         1,  1,
+        -1,  1,
+        -1, -1,
+         1,  1,
+    };
+    D3D11_BUFFER_DESC vdesc = {
+        .ByteWidth = sizeof(verts),
+        .Usage = D3D11_USAGE_IMMUTABLE,
+        .BindFlags = D3D11_BIND_VERTEX_BUFFER,
+    };
+    D3D11_SUBRESOURCE_DATA vdata = {
+        .pSysMem = verts,
+    };
+    hr = ID3D11Device_CreateBuffer(p->dev, &vdesc, &vdata, &p->clear_vbuf);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to create clear() vertex buffer: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto error;
+    }
+
+    D3D11_BUFFER_DESC cdesc = {
+        .ByteWidth = sizeof(float[4]),
+        .BindFlags = D3D11_BIND_CONSTANT_BUFFER,
+    };
+    hr = ID3D11Device_CreateBuffer(p->dev, &cdesc, NULL, &p->clear_cbuf);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to create clear() constant buffer: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto error;
+    }
+
+    SAFE_RELEASE(vs_blob);
+    SAFE_RELEASE(ps_blob);
+    return true;
+error:
+    SAFE_RELEASE(vs_blob);
+    SAFE_RELEASE(ps_blob);
+    return false;
+}
+
+static void clear_rpass(struct ra *ra, struct ra_tex *tex, float color[4],
+                        struct mp_rect *rc)
+{
+    struct ra_d3d11 *p = ra->priv;
+    struct d3d_tex *tex_p = tex->priv;
+    struct ra_tex_params *params = &tex->params;
+
+    ID3D11DeviceContext_UpdateSubresource(p->ctx,
+        (ID3D11Resource *)p->clear_cbuf, 0, NULL, color, 0, 0);
+
+    ID3D11DeviceContext_IASetInputLayout(p->ctx, p->clear_layout);
+    ID3D11DeviceContext_IASetVertexBuffers(p->ctx, 0, 1, &p->clear_vbuf,
+        &(UINT) { sizeof(float[2]) }, &(UINT) { 0 });
+    ID3D11DeviceContext_IASetPrimitiveTopology(p->ctx,
+        D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
+
+    ID3D11DeviceContext_VSSetShader(p->ctx, p->clear_vs, NULL, 0);
+
+    ID3D11DeviceContext_RSSetViewports(p->ctx, 1, (&(D3D11_VIEWPORT) {
+        .Width = params->w,
+        .Height = params->h,
+        .MinDepth = 0,
+        .MaxDepth = 1,
+    }));
+    ID3D11DeviceContext_RSSetScissorRects(p->ctx, 1, (&(D3D11_RECT) {
+        .left = rc->x0,
+        .top = rc->y0,
+        .right = rc->x1,
+        .bottom = rc->y1,
+    }));
+    ID3D11DeviceContext_PSSetShader(p->ctx, p->clear_ps, NULL, 0);
+    ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, 1, &p->clear_cbuf);
+
+    ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 1, &tex_p->rtv, NULL);
+    ID3D11DeviceContext_OMSetBlendState(p->ctx, NULL, NULL,
+                                        D3D11_DEFAULT_SAMPLE_MASK);
+
+    ID3D11DeviceContext_Draw(p->ctx, 6, 0);
+
+    ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, 1,
+        &(ID3D11Buffer *){ NULL });
+    ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 0, NULL, NULL);
+}
+
+static void clear(struct ra *ra, struct ra_tex *tex, float color[4],
+                  struct mp_rect *rc)
+{
+    struct ra_d3d11 *p = ra->priv;
+    struct d3d_tex *tex_p = tex->priv;
+    struct ra_tex_params *params = &tex->params;
+
+    if (!tex_p->rtv)
+        return;
+
+    if (rc->x0 || rc->y0 || rc->x1 != params->w || rc->y1 != params->h) {
+        if (p->has_clear_view) {
+            ID3D11DeviceContext1_ClearView(p->ctx1, (ID3D11View *)tex_p->rtv,
+                color, (&(D3D11_RECT) {
+                    .left = rc->x0,
+                    .top = rc->y0,
+                    .right = rc->x1,
+                    .bottom = rc->y1,
+                }), 1);
+        } else {
+            clear_rpass(ra, tex, color, rc);
+        }
+    } else {
+        ID3D11DeviceContext_ClearRenderTargetView(p->ctx, tex_p->rtv, color);
+    }
+}
+
+static bool setup_blit_rpass(struct ra *ra)
+{
+    struct ra_d3d11 *p = ra->priv;
+    ID3DBlob *vs_blob = NULL;
+    ID3DBlob *float_ps_blob = NULL;
+    HRESULT hr;
+
+    hr = p->D3DCompile(blit_vs, sizeof(blit_vs), NULL, NULL, NULL, "main",
+        get_shader_target(ra, GLSL_SHADER_VERTEX),
+        D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &vs_blob, NULL);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to compile blit() vertex shader: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto error;
+    }
+
+    hr = ID3D11Device_CreateVertexShader(p->dev,
+        ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob),
+        NULL, &p->blit_vs);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to create blit() vertex shader: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto error;
+    }
+
+    hr = p->D3DCompile(blit_float_ps, sizeof(blit_float_ps), NULL, NULL, NULL,
+        "main", get_shader_target(ra, GLSL_SHADER_FRAGMENT),
+        D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &float_ps_blob, NULL);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to compile blit() pixel shader: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto error;
+    }
+
+    hr = ID3D11Device_CreatePixelShader(p->dev,
+        ID3D10Blob_GetBufferPointer(float_ps_blob),
+        ID3D10Blob_GetBufferSize(float_ps_blob),
+        NULL, &p->blit_float_ps);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to create blit() pixel shader: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto error;
+    }
+
+    D3D11_INPUT_ELEMENT_DESC in_descs[] = {
+        { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0 },
+        { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 8 },
+    };
+    hr = ID3D11Device_CreateInputLayout(p->dev, in_descs,
+        MP_ARRAY_SIZE(in_descs), ID3D10Blob_GetBufferPointer(vs_blob),
+        ID3D10Blob_GetBufferSize(vs_blob), &p->blit_layout);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to create blit() IA layout: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto error;
+    }
+
+    D3D11_BUFFER_DESC vdesc = {
+        .ByteWidth = sizeof(struct blit_vert[6]),
+        .Usage = D3D11_USAGE_DEFAULT,
+        .BindFlags = D3D11_BIND_VERTEX_BUFFER,
+    };
+    hr = ID3D11Device_CreateBuffer(p->dev, &vdesc, NULL, &p->blit_vbuf);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to create blit() vertex buffer: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto error;
+    }
+
+    // Blit always uses point sampling, regardless of the source texture
+    D3D11_SAMPLER_DESC sdesc = {
+        .AddressU = D3D11_TEXTURE_ADDRESS_CLAMP,
+        .AddressV = D3D11_TEXTURE_ADDRESS_CLAMP,
+        .AddressW = D3D11_TEXTURE_ADDRESS_CLAMP,
+        .ComparisonFunc = D3D11_COMPARISON_NEVER,
+        .MinLOD = 0,
+        .MaxLOD = D3D11_FLOAT32_MAX,
+        .MaxAnisotropy = 1,
+    };
+    hr = ID3D11Device_CreateSamplerState(p->dev, &sdesc, &p->blit_sampler);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to create blit() sampler: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto error;
+    }
+
+    SAFE_RELEASE(vs_blob);
+    SAFE_RELEASE(float_ps_blob);
+    return true;
+error:
+    SAFE_RELEASE(vs_blob);
+    SAFE_RELEASE(float_ps_blob);
+    return false;
+}
+
+static void blit_rpass(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
+                       struct mp_rect *dst_rc, struct mp_rect *src_rc)
+{
+    struct ra_d3d11 *p = ra->priv;
+    struct d3d_tex *dst_p = dst->priv;
+    struct d3d_tex *src_p = src->priv;
+
+    float u_min = (double)src_rc->x0 / src->params.w;
+    float u_max = (double)src_rc->x1 / src->params.w;
+    float v_min = (double)src_rc->y0 / src->params.h;
+    float v_max = (double)src_rc->y1 / src->params.h;
+
+    struct blit_vert verts[6] = {
+        { .x = -1, .y = -1, .u = u_min, .v = v_max },
+        { .x =  1, .y = -1, .u = u_max, .v = v_max },
+        { .x =  1, .y =  1, .u = u_max, .v = v_min },
+        { .x = -1, .y =  1, .u = u_min, .v = v_min },
+    };
+    verts[4] = verts[0];
+    verts[5] = verts[2];
+    ID3D11DeviceContext_UpdateSubresource(p->ctx,
+        (ID3D11Resource *)p->blit_vbuf, 0, NULL, verts, 0, 0);
+
+    ID3D11DeviceContext_IASetInputLayout(p->ctx, p->blit_layout);
+    ID3D11DeviceContext_IASetVertexBuffers(p->ctx, 0, 1, &p->blit_vbuf,
+        &(UINT) { sizeof(verts[0]) }, &(UINT) { 0 });
+    ID3D11DeviceContext_IASetPrimitiveTopology(p->ctx,
+        D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
+
+    ID3D11DeviceContext_VSSetShader(p->ctx, p->blit_vs, NULL, 0);
+
+    ID3D11DeviceContext_RSSetViewports(p->ctx, 1, (&(D3D11_VIEWPORT) {
+        .TopLeftX = dst_rc->x0,
+        .TopLeftY = dst_rc->y0,
+        .Width = mp_rect_w(*dst_rc),
+        .Height = mp_rect_h(*dst_rc),
+        .MinDepth = 0,
+        .MaxDepth = 1,
+    }));
+    ID3D11DeviceContext_RSSetScissorRects(p->ctx, 1, (&(D3D11_RECT) {
+        .left = dst_rc->x0,
+        .top = dst_rc->y0,
+        .right = dst_rc->x1,
+        .bottom = dst_rc->y1,
+    }));
+
+    ID3D11DeviceContext_PSSetShader(p->ctx, p->blit_float_ps, NULL, 0);
+    ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, 1, &src_p->srv);
+    ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, 1, &p->blit_sampler);
+
+    ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 1, &dst_p->rtv, NULL);
+    ID3D11DeviceContext_OMSetBlendState(p->ctx, NULL, NULL,
+                                        D3D11_DEFAULT_SAMPLE_MASK);
+
+    ID3D11DeviceContext_Draw(p->ctx, 6, 0);
+
+    ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, 1,
+        &(ID3D11ShaderResourceView *) { NULL });
+    ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, 1,
+        &(ID3D11SamplerState *) { NULL });
+    ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 0, NULL, NULL);
+}
+
+static void blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
+                 struct mp_rect *dst_rc_ptr, struct mp_rect *src_rc_ptr)
+{
+    struct ra_d3d11 *p = ra->priv;
+    struct d3d_tex *dst_p = dst->priv;
+    struct d3d_tex *src_p = src->priv;
+    struct mp_rect dst_rc = *dst_rc_ptr;
+    struct mp_rect src_rc = *src_rc_ptr;
+
+    assert(dst->params.dimensions == 2);
+    assert(src->params.dimensions == 2);
+
+    // A zero-sized target rectangle is a no-op
+    if (!mp_rect_w(dst_rc) || !mp_rect_h(dst_rc))
+        return;
+
+    // ra.h seems to imply that both dst_rc and src_rc can be flipped, but it's
+    // easier for blit_rpass() if only src_rc can be flipped, so unflip dst_rc.
+    if (dst_rc.x0 > dst_rc.x1) {
+        MPSWAP(int, dst_rc.x0, dst_rc.x1);
+        MPSWAP(int, src_rc.x0, src_rc.x1);
+    }
+    if (dst_rc.y0 > dst_rc.y1) {
+        MPSWAP(int, dst_rc.y0, dst_rc.y1);
+        MPSWAP(int, src_rc.y0, src_rc.y1);
+    }
+
+    // If format conversion, stretching or flipping is required, a renderpass
+    // must be used
+    if (dst->params.format != src->params.format ||
+        mp_rect_w(dst_rc) != mp_rect_w(src_rc) ||
+        mp_rect_h(dst_rc) != mp_rect_h(src_rc))
+    {
+        blit_rpass(ra, dst, src, &dst_rc, &src_rc);
+    } else {
+        int dst_sr = dst_p->array_slice >= 0 ? dst_p->array_slice : 0;
+        int src_sr = src_p->array_slice >= 0 ? src_p->array_slice : 0;
+        ID3D11DeviceContext_CopySubresourceRegion(p->ctx, dst_p->res, dst_sr,
+            dst_rc.x0, dst_rc.y0, 0, src_p->res, src_sr, (&(D3D11_BOX) {
+                .left = src_rc.x0,
+                .top = src_rc.y0,
+                .front = 0,
+                .right = src_rc.x1,
+                .bottom = src_rc.y1,
+                .back = 1,
+            }));
+    }
+}
+
+static int desc_namespace(struct ra *ra, enum ra_vartype type)
+{
+    // Images and SSBOs both use UAV bindings
+    if (type == RA_VARTYPE_IMG_W)
+        type = RA_VARTYPE_BUF_RW;
+    return type;
+}
+
+static bool compile_glsl(struct ra *ra, enum glsl_shader type,
+                         const char *glsl, ID3DBlob **out)
+{
+    struct ra_d3d11 *p = ra->priv;
+    struct spirv_compiler *spirv = p->spirv;
+    void *ta_ctx = talloc_new(NULL);
+    spvc_result sc_res = SPVC_SUCCESS;
+    spvc_context sc_ctx = NULL;
+    spvc_parsed_ir sc_ir = NULL;
+    spvc_compiler sc_compiler = NULL;
+    spvc_compiler_options sc_opts = NULL;
+    const char *hlsl = NULL;
+    ID3DBlob *errors = NULL;
+    bool success = false;
+    HRESULT hr;
+
+    int sc_shader_model;
+    if (p->fl >= D3D_FEATURE_LEVEL_11_0) {
+        sc_shader_model = 50;
+    } else if (p->fl >= D3D_FEATURE_LEVEL_10_1) {
+        sc_shader_model = 41;
+    } else {
+        sc_shader_model = 40;
+    }
+
+    int64_t start_ns = mp_time_ns();
+
+    bstr spv_module;
+    if (!spirv->fns->compile_glsl(spirv, ta_ctx, type, glsl, &spv_module))
+        goto done;
+
+    int64_t shaderc_ns = mp_time_ns();
+
+    sc_res = spvc_context_create(&sc_ctx);
+    if (sc_res != SPVC_SUCCESS)
+        goto done;
+
+    sc_res = spvc_context_parse_spirv(sc_ctx, (SpvId *)spv_module.start,
+                                      spv_module.len / sizeof(SpvId), &sc_ir);
+    if (sc_res != SPVC_SUCCESS)
+        goto done;
+
+    sc_res = spvc_context_create_compiler(sc_ctx, SPVC_BACKEND_HLSL, sc_ir,
+                                          SPVC_CAPTURE_MODE_TAKE_OWNERSHIP,
+                                          &sc_compiler);
+    if (sc_res != SPVC_SUCCESS)
+        goto done;
+
+    sc_res = spvc_compiler_create_compiler_options(sc_compiler, &sc_opts);
+    if (sc_res != SPVC_SUCCESS)
+        goto done;
+    sc_res = spvc_compiler_options_set_uint(sc_opts,
+        SPVC_COMPILER_OPTION_HLSL_SHADER_MODEL, sc_shader_model);
+    if (sc_res != SPVC_SUCCESS)
+        goto done;
+    if (type == GLSL_SHADER_VERTEX) {
+        // FLIP_VERTEX_Y is only valid for vertex shaders
+        sc_res = spvc_compiler_options_set_bool(sc_opts,
+            SPVC_COMPILER_OPTION_FLIP_VERTEX_Y, SPVC_TRUE);
+        if (sc_res != SPVC_SUCCESS)
+            goto done;
+    }
+    sc_res = spvc_compiler_install_compiler_options(sc_compiler, sc_opts);
+    if (sc_res != SPVC_SUCCESS)
+        goto done;
+
+    sc_res = spvc_compiler_compile(sc_compiler, &hlsl);
+    if (sc_res != SPVC_SUCCESS)
+        goto done;
+
+    int64_t cross_ns = mp_time_ns();
+
+    hr = p->D3DCompile(hlsl, strlen(hlsl), NULL, NULL, NULL, "main",
+        get_shader_target(ra, type), D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, out,
+        &errors);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "D3DCompile failed: %s\n%.*s", mp_HRESULT_to_str(hr),
+               (int)ID3D10Blob_GetBufferSize(errors),
+               (char*)ID3D10Blob_GetBufferPointer(errors));
+        goto done;
+    }
+
+    int64_t d3dcompile_ns = mp_time_ns();
+
+    MP_VERBOSE(ra, "Compiled a %s shader in %lldns\n", shader_type_name(type),
+               d3dcompile_ns - start_ns);
+    MP_VERBOSE(ra, "shaderc: %lldns, SPIRV-Cross: %lldns, D3DCompile: %lldns\n",
+               shaderc_ns - start_ns,
+               cross_ns - shaderc_ns,
+               d3dcompile_ns - cross_ns);
+
+    success = true;
+done:
+    if (sc_res != SPVC_SUCCESS) {
+        MP_MSG(ra, MSGL_ERR, "SPIRV-Cross failed: %s\n",
+               spvc_context_get_last_error_string(sc_ctx));
+    }
+    int level = success ? MSGL_DEBUG : MSGL_ERR;
+    MP_MSG(ra, level, "GLSL source:\n");
+    mp_log_source(ra->log, level, glsl);
+    if (hlsl) {
+        MP_MSG(ra, level, "HLSL source:\n");
+        mp_log_source(ra->log, level, hlsl);
+    }
+    SAFE_RELEASE(errors);
+    if (sc_ctx)
+        spvc_context_destroy(sc_ctx);
+    talloc_free(ta_ctx);
+    return success;
+}
+
+static void renderpass_destroy(struct ra *ra, struct ra_renderpass *pass)
+{
+    if (!pass)
+        return;
+    struct d3d_rpass *pass_p = pass->priv;
+
+    SAFE_RELEASE(pass_p->vs);
+    SAFE_RELEASE(pass_p->ps);
+    SAFE_RELEASE(pass_p->cs);
+    SAFE_RELEASE(pass_p->layout);
+    SAFE_RELEASE(pass_p->bstate);
+    talloc_free(pass);
+}
+
+static D3D11_BLEND map_ra_blend(enum ra_blend blend)
+{
+    switch (blend) {
+    default:
+    case RA_BLEND_ZERO:                return D3D11_BLEND_ZERO;
+    case RA_BLEND_ONE:                 return D3D11_BLEND_ONE;
+    case RA_BLEND_SRC_ALPHA:           return D3D11_BLEND_SRC_ALPHA;
+    case RA_BLEND_ONE_MINUS_SRC_ALPHA: return D3D11_BLEND_INV_SRC_ALPHA;
+    };
+}
+
+static size_t vbuf_upload(struct ra *ra, void *data, size_t size)
+{
+    struct ra_d3d11 *p = ra->priv;
+    HRESULT hr;
+
+    // Arbitrary size limit in case there is an insane number of vertices
+    if (size > 1e9) {
+        MP_ERR(ra, "Vertex buffer is too large\n");
+        return -1;
+    }
+
+    // If the vertex data doesn't fit, realloc the vertex buffer
+    if (size > p->vbuf_size) {
+        size_t new_size = p->vbuf_size;
+        // Arbitrary base size
+        if (!new_size)
+            new_size = 64 * 1024;
+        while (new_size < size)
+            new_size *= 2;
+
+        ID3D11Buffer *new_buf;
+        D3D11_BUFFER_DESC vbuf_desc = {
+            .ByteWidth = new_size,
+            .Usage = D3D11_USAGE_DYNAMIC,
+            .BindFlags = D3D11_BIND_VERTEX_BUFFER,
+            .CPUAccessFlags = D3D11_CPU_ACCESS_WRITE,
+        };
+        hr = ID3D11Device_CreateBuffer(p->dev, &vbuf_desc, NULL, &new_buf);
+        if (FAILED(hr)) {
+            MP_ERR(ra, "Failed to create vertex buffer: %s\n",
+                   mp_HRESULT_to_str(hr));
+            return -1;
+        }
+
+        SAFE_RELEASE(p->vbuf);
+        p->vbuf = new_buf;
+        p->vbuf_size = new_size;
+        p->vbuf_used = 0;
+    }
+
+    bool discard = false;
+    size_t offset = p->vbuf_used;
+    if (offset + size > p->vbuf_size) {
+        // We reached the end of the buffer, so discard and wrap around
+        discard = true;
+        offset = 0;
+    }
+
+    D3D11_MAPPED_SUBRESOURCE map = { 0 };
+    hr = ID3D11DeviceContext_Map(p->ctx, (ID3D11Resource *)p->vbuf, 0,
+        discard ? D3D11_MAP_WRITE_DISCARD : D3D11_MAP_WRITE_NO_OVERWRITE,
+        0, &map);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to map vertex buffer: %s\n", mp_HRESULT_to_str(hr));
+        return -1;
+    }
+
+    char *cdata = map.pData;
+    memcpy(cdata + offset, data, size);
+
+    ID3D11DeviceContext_Unmap(p->ctx, (ID3D11Resource *)p->vbuf, 0);
+
+    p->vbuf_used = offset + size;
+    return offset;
+}
+
+static const char cache_magic[4] = "RD11";
+static const int cache_version = 3;
+
+struct cache_header {
+    char magic[sizeof(cache_magic)];
+    int cache_version;
+    char compiler[SPIRV_NAME_MAX_LEN];
+    int spv_compiler_version;
+    unsigned spvc_compiler_major;
+    unsigned spvc_compiler_minor;
+    unsigned spvc_compiler_patch;
+    struct dll_version d3d_compiler_version;
+    int feature_level;
+    size_t vert_bytecode_len;
+    size_t frag_bytecode_len;
+    size_t comp_bytecode_len;
+};
+
+static void load_cached_program(struct ra *ra,
+                                const struct ra_renderpass_params *params,
+                                bstr *vert_bc,
+                                bstr *frag_bc,
+                                bstr *comp_bc)
+{
+    struct ra_d3d11 *p = ra->priv;
+    struct spirv_compiler *spirv = p->spirv;
+    bstr cache = params->cached_program;
+
+    if (cache.len < sizeof(struct cache_header))
+        return;
+
+    struct cache_header *header = (struct cache_header *)cache.start;
+    cache = bstr_cut(cache, sizeof(*header));
+
+    unsigned spvc_major, spvc_minor, spvc_patch;
+    spvc_get_version(&spvc_major, &spvc_minor, &spvc_patch);
+
+    if (strncmp(header->magic, cache_magic, sizeof(cache_magic)) != 0)
+        return;
+    if (header->cache_version != cache_version)
+        return;
+    if (strncmp(header->compiler, spirv->name, sizeof(header->compiler)) != 0)
+        return;
+    if (header->spv_compiler_version != spirv->compiler_version)
+        return;
+    if (header->spvc_compiler_major != spvc_major)
+        return;
+    if (header->spvc_compiler_minor != spvc_minor)
+        return;
+    if (header->spvc_compiler_patch != spvc_patch)
+        return;
+    if (!dll_version_equal(header->d3d_compiler_version, p->d3d_compiler_ver))
+        return;
+    if (header->feature_level != p->fl)
+        return;
+
+    if (header->vert_bytecode_len && vert_bc) {
+        *vert_bc = bstr_splice(cache, 0, header->vert_bytecode_len);
+        MP_VERBOSE(ra, "Using cached vertex shader\n");
+    }
+    cache = bstr_cut(cache, header->vert_bytecode_len);
+
+    if (header->frag_bytecode_len && frag_bc) {
+        *frag_bc = bstr_splice(cache, 0, header->frag_bytecode_len);
+        MP_VERBOSE(ra, "Using cached fragment shader\n");
+    }
+    cache = bstr_cut(cache, header->frag_bytecode_len);
+
+    if (header->comp_bytecode_len && comp_bc) {
+        *comp_bc = bstr_splice(cache, 0, header->comp_bytecode_len);
+        MP_VERBOSE(ra, "Using cached compute shader\n");
+    }
+    cache = bstr_cut(cache, header->comp_bytecode_len);
+}
+
+static void save_cached_program(struct ra *ra, struct ra_renderpass *pass,
+                                bstr vert_bc,
+                                bstr frag_bc,
+                                bstr comp_bc)
+{
+    struct ra_d3d11 *p = ra->priv;
+    struct spirv_compiler *spirv = p->spirv;
+
+    unsigned spvc_major, spvc_minor, spvc_patch;
+    spvc_get_version(&spvc_major, &spvc_minor, &spvc_patch);
+
+    struct cache_header header = {
+        .cache_version = cache_version,
+        .spv_compiler_version = p->spirv->compiler_version,
+        .spvc_compiler_major = spvc_major,
+        .spvc_compiler_minor = spvc_minor,
+        .spvc_compiler_patch = spvc_patch,
+        .d3d_compiler_version = p->d3d_compiler_ver,
+        .feature_level = p->fl,
+        .vert_bytecode_len = vert_bc.len,
+        .frag_bytecode_len = frag_bc.len,
+        .comp_bytecode_len = comp_bc.len,
+    };
+    memcpy(header.magic, cache_magic, sizeof(header.magic));
+    strncpy(header.compiler, spirv->name, sizeof(header.compiler));
+
+    struct bstr *prog = &pass->params.cached_program;
+    bstr_xappend(pass, prog, (bstr){ (char *) &header, sizeof(header) });
+    bstr_xappend(pass, prog, vert_bc);
+    bstr_xappend(pass, prog, frag_bc);
+    bstr_xappend(pass, prog, comp_bc);
+}
+
+static struct ra_renderpass *renderpass_create_raster(struct ra *ra,
+    struct ra_renderpass *pass, const struct ra_renderpass_params *params)
+{
+    struct ra_d3d11 *p = ra->priv;
+    struct d3d_rpass *pass_p = pass->priv;
+    ID3DBlob *vs_blob = NULL;
+    ID3DBlob *ps_blob = NULL;
+    HRESULT hr;
+
+    // load_cached_program will load compiled shader bytecode into vert_bc and
+    // frag_bc if the cache is valid. If not, vert_bc/frag_bc will remain NULL.
+    bstr vert_bc = {0};
+    bstr frag_bc = {0};
+    load_cached_program(ra, params, &vert_bc, &frag_bc, NULL);
+
+    if (!vert_bc.start) {
+        if (!compile_glsl(ra, GLSL_SHADER_VERTEX, params->vertex_shader,
+                          &vs_blob))
+            goto error;
+        vert_bc = (bstr){
+            ID3D10Blob_GetBufferPointer(vs_blob),
+            ID3D10Blob_GetBufferSize(vs_blob),
+        };
+    }
+
+    hr = ID3D11Device_CreateVertexShader(p->dev, vert_bc.start, vert_bc.len,
+                                         NULL, &pass_p->vs);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to create vertex shader: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto error;
+    }
+
+    if (!frag_bc.start) {
+        if (!compile_glsl(ra, GLSL_SHADER_FRAGMENT, params->frag_shader,
+                          &ps_blob))
+            goto error;
+        frag_bc = (bstr){
+            ID3D10Blob_GetBufferPointer(ps_blob),
+            ID3D10Blob_GetBufferSize(ps_blob),
+        };
+    }
+
+    hr = ID3D11Device_CreatePixelShader(p->dev, frag_bc.start, frag_bc.len,
+                                        NULL, &pass_p->ps);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to create pixel shader: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto error;
+    }
+
+    D3D11_INPUT_ELEMENT_DESC *in_descs = talloc_array(pass,
+        D3D11_INPUT_ELEMENT_DESC, params->num_vertex_attribs);
+    for (int i = 0; i < params->num_vertex_attribs; i++) {
+        struct ra_renderpass_input *inp = &params->vertex_attribs[i];
+
+        DXGI_FORMAT fmt = DXGI_FORMAT_UNKNOWN;
+        switch (inp->type) {
+        case RA_VARTYPE_FLOAT:
+            switch (inp->dim_v) {
+            case 1: fmt = DXGI_FORMAT_R32_FLOAT;          break;
+            case 2: fmt = DXGI_FORMAT_R32G32_FLOAT;       break;
+            case 3: fmt = DXGI_FORMAT_R32G32B32_FLOAT;    break;
+            case 4: fmt = DXGI_FORMAT_R32G32B32A32_FLOAT; break;
+            }
+            break;
+        case RA_VARTYPE_BYTE_UNORM:
+            switch (inp->dim_v) {
+            case 1: fmt = DXGI_FORMAT_R8_UNORM;       break;
+            case 2: fmt = DXGI_FORMAT_R8G8_UNORM;     break;
+            // There is no 3-component 8-bit DXGI format
+            case 4: fmt = DXGI_FORMAT_R8G8B8A8_UNORM; break;
+            }
+            break;
+        }
+        if (fmt == DXGI_FORMAT_UNKNOWN) {
+            MP_ERR(ra, "Could not find suitable vertex input format\n");
+            goto error;
+        }
+
+        in_descs[i] = (D3D11_INPUT_ELEMENT_DESC) {
+            // The semantic name doesn't mean much and is just used to verify
+            // the input description matches the shader. SPIRV-Cross always
+            // uses TEXCOORD, so we should too.
+            .SemanticName = "TEXCOORD",
+            .SemanticIndex = i,
+            .AlignedByteOffset = inp->offset,
+            .Format = fmt,
+        };
+    }
+
+    hr = ID3D11Device_CreateInputLayout(p->dev, in_descs,
+        params->num_vertex_attribs, vert_bc.start, vert_bc.len,
+        &pass_p->layout);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to create IA layout: %s\n", mp_HRESULT_to_str(hr));
+        goto error;
+    }
+    talloc_free(in_descs);
+    in_descs = NULL;
+
+    D3D11_BLEND_DESC bdesc = {
+        .RenderTarget[0] = {
+            .BlendEnable = params->enable_blend,
+            .SrcBlend = map_ra_blend(params->blend_src_rgb),
+            .DestBlend = map_ra_blend(params->blend_dst_rgb),
+            .BlendOp = D3D11_BLEND_OP_ADD,
+            .SrcBlendAlpha = map_ra_blend(params->blend_src_alpha),
+            .DestBlendAlpha = map_ra_blend(params->blend_dst_alpha),
+            .BlendOpAlpha = D3D11_BLEND_OP_ADD,
+            .RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL,
+        },
+    };
+    hr = ID3D11Device_CreateBlendState(p->dev, &bdesc, &pass_p->bstate);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to create blend state: %s\n", mp_HRESULT_to_str(hr));
+        goto error;
+    }
+
+    save_cached_program(ra, pass, vert_bc, frag_bc, (bstr){0});
+
+    SAFE_RELEASE(vs_blob);
+    SAFE_RELEASE(ps_blob);
+    return pass;
+
+error:
+    renderpass_destroy(ra, pass);
+    SAFE_RELEASE(vs_blob);
+    SAFE_RELEASE(ps_blob);
+    return NULL;
+}
+
+static struct ra_renderpass *renderpass_create_compute(struct ra *ra,
+    struct ra_renderpass *pass, const struct ra_renderpass_params *params)
+{
+    struct ra_d3d11 *p = ra->priv;
+    struct d3d_rpass *pass_p = pass->priv;
+    ID3DBlob *cs_blob = NULL;
+    HRESULT hr;
+
+    bstr comp_bc = {0};
+    load_cached_program(ra, params, NULL, NULL, &comp_bc);
+
+    if (!comp_bc.start) {
+        if (!compile_glsl(ra, GLSL_SHADER_COMPUTE, params->compute_shader,
+                          &cs_blob))
+            goto error;
+        comp_bc = (bstr){
+            ID3D10Blob_GetBufferPointer(cs_blob),
+            ID3D10Blob_GetBufferSize(cs_blob),
+        };
+    }
+    hr = ID3D11Device_CreateComputeShader(p->dev, comp_bc.start, comp_bc.len,
+                                          NULL, &pass_p->cs);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to create compute shader: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto error;
+    }
+
+    save_cached_program(ra, pass, (bstr){0}, (bstr){0}, comp_bc);
+
+    SAFE_RELEASE(cs_blob);
+    return pass;
+error:
+    renderpass_destroy(ra, pass);
+    SAFE_RELEASE(cs_blob);
+    return NULL;
+}
+
+static struct ra_renderpass *renderpass_create(struct ra *ra,
+    const struct ra_renderpass_params *params)
+{
+    struct ra_renderpass *pass = talloc_zero(NULL, struct ra_renderpass);
+    pass->params = *ra_renderpass_params_copy(pass, params);
+    pass->params.cached_program = (bstr){0};
+    pass->priv = talloc_zero(pass, struct d3d_rpass);
+
+    if (params->type == RA_RENDERPASS_TYPE_COMPUTE) {
+        return renderpass_create_compute(ra, pass, params);
+    } else {
+        return renderpass_create_raster(ra, pass, params);
+    }
+}
+
+static void renderpass_run_raster(struct ra *ra,
+                                  const struct ra_renderpass_run_params *params,
+                                  ID3D11Buffer *ubos[], int ubos_len,
+                                  ID3D11SamplerState *samplers[],
+                                  ID3D11ShaderResourceView *srvs[],
+                                  int samplers_len,
+                                  ID3D11UnorderedAccessView *uavs[],
+                                  int uavs_len)
+{
+    struct ra_d3d11 *p = ra->priv;
+    struct ra_renderpass *pass = params->pass;
+    struct d3d_rpass *pass_p = pass->priv;
+
+    UINT vbuf_offset = vbuf_upload(ra, params->vertex_data,
+        pass->params.vertex_stride * params->vertex_count);
+    if (vbuf_offset == (UINT)-1)
+        return;
+
+    ID3D11DeviceContext_IASetInputLayout(p->ctx, pass_p->layout);
+    ID3D11DeviceContext_IASetVertexBuffers(p->ctx, 0, 1, &p->vbuf,
+        &pass->params.vertex_stride, &vbuf_offset);
+    ID3D11DeviceContext_IASetPrimitiveTopology(p->ctx,
+        D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST);
+
+    ID3D11DeviceContext_VSSetShader(p->ctx, pass_p->vs, NULL, 0);
+
+    ID3D11DeviceContext_RSSetViewports(p->ctx, 1, (&(D3D11_VIEWPORT) {
+        .TopLeftX = params->viewport.x0,
+        .TopLeftY = params->viewport.y0,
+        .Width = mp_rect_w(params->viewport),
+        .Height = mp_rect_h(params->viewport),
+        .MinDepth = 0,
+        .MaxDepth = 1,
+    }));
+    ID3D11DeviceContext_RSSetScissorRects(p->ctx, 1, (&(D3D11_RECT) {
+        .left = params->scissors.x0,
+        .top = params->scissors.y0,
+        .right = params->scissors.x1,
+        .bottom = params->scissors.y1,
+    }));
+    ID3D11DeviceContext_PSSetShader(p->ctx, pass_p->ps, NULL, 0);
+    ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, ubos_len, ubos);
+    ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, samplers_len, srvs);
+    ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, samplers_len, samplers);
+
+    struct ra_tex *target = params->target;
+    struct d3d_tex *target_p = target->priv;
+    ID3D11DeviceContext_OMSetRenderTargetsAndUnorderedAccessViews(p->ctx, 1,
+        &target_p->rtv, NULL, 1, uavs_len, uavs, NULL);
+    ID3D11DeviceContext_OMSetBlendState(p->ctx, pass_p->bstate, NULL,
+                                        D3D11_DEFAULT_SAMPLE_MASK);
+
+    ID3D11DeviceContext_Draw(p->ctx, params->vertex_count, 0);
+
+    // Unbind everything. It's easier to do this than to actually track state,
+    // and if we leave the RTV bound, it could trip up D3D's conflict checker.
+    for (int i = 0; i < ubos_len; i++)
+        ubos[i] = NULL;
+    for (int i = 0; i < samplers_len; i++) {
+        samplers[i] = NULL;
+        srvs[i] = NULL;
+    }
+    for (int i = 0; i < uavs_len; i++)
+        uavs[i] = NULL;
+    ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, ubos_len, ubos);
+    ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, samplers_len, srvs);
+    ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, samplers_len, samplers);
+    ID3D11DeviceContext_OMSetRenderTargetsAndUnorderedAccessViews(p->ctx, 0,
+        NULL, NULL, 1, uavs_len, uavs, NULL);
+}
+
+static void renderpass_run_compute(struct ra *ra,
+                                   const struct ra_renderpass_run_params *params,
+                                   ID3D11Buffer *ubos[], int ubos_len,
+                                   ID3D11SamplerState *samplers[],
+                                   ID3D11ShaderResourceView *srvs[],
+                                   int samplers_len,
+                                   ID3D11UnorderedAccessView *uavs[],
+                                   int uavs_len)
+{
+    struct ra_d3d11 *p = ra->priv;
+    struct ra_renderpass *pass = params->pass;
+    struct d3d_rpass *pass_p = pass->priv;
+
+    ID3D11DeviceContext_CSSetShader(p->ctx, pass_p->cs, NULL, 0);
+    ID3D11DeviceContext_CSSetConstantBuffers(p->ctx, 0, ubos_len, ubos);
+    ID3D11DeviceContext_CSSetShaderResources(p->ctx, 0, samplers_len, srvs);
+    ID3D11DeviceContext_CSSetSamplers(p->ctx, 0, samplers_len, samplers);
+    ID3D11DeviceContext_CSSetUnorderedAccessViews(p->ctx, 0, uavs_len, uavs,
+                                                  NULL);
+
+    ID3D11DeviceContext_Dispatch(p->ctx, params->compute_groups[0],
+                                         params->compute_groups[1],
+                                         params->compute_groups[2]);
+
+    for (int i = 0; i < ubos_len; i++)
+        ubos[i] = NULL;
+    for (int i = 0; i < samplers_len; i++) {
+        samplers[i] = NULL;
+        srvs[i] = NULL;
+    }
+    for (int i = 0; i < uavs_len; i++)
+        uavs[i] = NULL;
+    ID3D11DeviceContext_CSSetConstantBuffers(p->ctx, 0, ubos_len, ubos);
+    ID3D11DeviceContext_CSSetShaderResources(p->ctx, 0, samplers_len, srvs);
+    ID3D11DeviceContext_CSSetSamplers(p->ctx, 0, samplers_len, samplers);
+    ID3D11DeviceContext_CSSetUnorderedAccessViews(p->ctx, 0, uavs_len, uavs,
+                                                  NULL);
+}
+
+static void renderpass_run(struct ra *ra,
+                           const struct ra_renderpass_run_params *params)
+{
+    struct ra_d3d11 *p = ra->priv;
+    struct ra_renderpass *pass = params->pass;
+    enum ra_renderpass_type type = pass->params.type;
+
+    ID3D11Buffer *ubos[D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT] = {0};
+    int ubos_len = 0;
+
+    ID3D11SamplerState *samplers[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT] = {0};
+    ID3D11ShaderResourceView *srvs[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT] = {0};
+    int samplers_len = 0;
+
+    ID3D11UnorderedAccessView *uavs[D3D11_1_UAV_SLOT_COUNT] = {0};
+    int uavs_len = 0;
+
+    // In a raster pass, one of the UAV slots is used by the runtime for the RTV
+    int uavs_max = type == RA_RENDERPASS_TYPE_COMPUTE ? p->max_uavs
+                                                      : p->max_uavs - 1;
+
+    // Gather the input variables used in this pass. These will be mapped to
+    // HLSL registers.
+    for (int i = 0; i < params->num_values; i++) {
+        struct ra_renderpass_input_val *val = &params->values[i];
+        int binding = pass->params.inputs[val->index].binding;
+        switch (pass->params.inputs[val->index].type) {
+        case RA_VARTYPE_BUF_RO:
+            if (binding >= MP_ARRAY_SIZE(ubos)) {
+                MP_ERR(ra, "Too many constant buffers in pass\n");
+                return;
+            }
+            struct ra_buf *buf_ro = *(struct ra_buf **)val->data;
+            buf_resolve(ra, buf_ro);
+            struct d3d_buf *buf_ro_p = buf_ro->priv;
+            ubos[binding] = buf_ro_p->buf;
+            ubos_len = MPMAX(ubos_len, binding + 1);
+            break;
+        case RA_VARTYPE_BUF_RW:
+            if (binding > uavs_max) {
+                MP_ERR(ra, "Too many UAVs in pass\n");
+                return;
+            }
+            struct ra_buf *buf_rw = *(struct ra_buf **)val->data;
+            buf_resolve(ra, buf_rw);
+            struct d3d_buf *buf_rw_p = buf_rw->priv;
+            uavs[binding] = buf_rw_p->uav;
+            uavs_len = MPMAX(uavs_len, binding + 1);
+            break;
+        case RA_VARTYPE_TEX:
+            if (binding >= MP_ARRAY_SIZE(samplers)) {
+                MP_ERR(ra, "Too many textures in pass\n");
+                return;
+            }
+            struct ra_tex *tex = *(struct ra_tex **)val->data;
+            struct d3d_tex *tex_p = tex->priv;
+            samplers[binding] = tex_p->sampler;
+            srvs[binding] = tex_p->srv;
+            samplers_len = MPMAX(samplers_len, binding + 1);
+            break;
+        case RA_VARTYPE_IMG_W:
+            if (binding > uavs_max) {
+                MP_ERR(ra, "Too many UAVs in pass\n");
+                return;
+            }
+            struct ra_tex *img = *(struct ra_tex **)val->data;
+            struct d3d_tex *img_p = img->priv;
+            uavs[binding] = img_p->uav;
+            uavs_len = MPMAX(uavs_len, binding + 1);
+            break;
+        }
+    }
+
+    if (type == RA_RENDERPASS_TYPE_COMPUTE) {
+        renderpass_run_compute(ra, params, ubos, ubos_len, samplers, srvs,
+                               samplers_len, uavs, uavs_len);
+    } else {
+        renderpass_run_raster(ra, params, ubos, ubos_len, samplers, srvs,
+                              samplers_len, uavs, uavs_len);
+    }
+}
+
+static void timer_destroy(struct ra *ra, ra_timer *ratimer)
+{
+    if (!ratimer)
+        return;
+    struct d3d_timer *timer = ratimer;
+
+    SAFE_RELEASE(timer->ts_start);
+    SAFE_RELEASE(timer->ts_end);
+    SAFE_RELEASE(timer->disjoint);
+    talloc_free(timer);
+}
+
+static ra_timer *timer_create(struct ra *ra)
+{
+    struct ra_d3d11 *p = ra->priv;
+    if (!p->has_timestamp_queries)
+        return NULL;
+
+    struct d3d_timer *timer = talloc_zero(NULL, struct d3d_timer);
+    HRESULT hr;
+
+    hr = ID3D11Device_CreateQuery(p->dev,
+        &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, &timer->ts_start);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to create start query: %s\n", mp_HRESULT_to_str(hr));
+        goto error;
+    }
+
+    hr = ID3D11Device_CreateQuery(p->dev,
+        &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, &timer->ts_end);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to create end query: %s\n", mp_HRESULT_to_str(hr));
+        goto error;
+    }
+
+    // Measuring duration in D3D11 requires three queries: start and end
+    // timestamps, and a disjoint query containing a flag which says whether
+    // the timestamps are usable or if a discontinuity occurred between them,
+    // like a change in power state or clock speed. The disjoint query also
+    // contains the timer frequency, so the timestamps are useless without it.
+    hr = ID3D11Device_CreateQuery(p->dev,
+        &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP_DISJOINT }, &timer->disjoint);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to create timer query: %s\n", mp_HRESULT_to_str(hr));
+        goto error;
+    }
+
+    return timer;
+error:
+    timer_destroy(ra, timer);
+    return NULL;
+}
+
+static uint64_t timestamp_to_ns(uint64_t timestamp, uint64_t freq)
+{
+    static const uint64_t ns_per_s = 1000000000llu;
+    return timestamp / freq * ns_per_s + timestamp % freq * ns_per_s / freq;
+}
+
+static uint64_t timer_get_result(struct ra *ra, ra_timer *ratimer)
+{
+    struct ra_d3d11 *p = ra->priv;
+    struct d3d_timer *timer = ratimer;
+    HRESULT hr;
+
+    UINT64 start, end;
+    D3D11_QUERY_DATA_TIMESTAMP_DISJOINT dj;
+
+    hr = ID3D11DeviceContext_GetData(p->ctx,
+        (ID3D11Asynchronous *)timer->ts_end, &end, sizeof(end),
+        D3D11_ASYNC_GETDATA_DONOTFLUSH);
+    if (FAILED(hr) || hr == S_FALSE)
+        return 0;
+    hr = ID3D11DeviceContext_GetData(p->ctx,
+        (ID3D11Asynchronous *)timer->ts_start, &start, sizeof(start),
+        D3D11_ASYNC_GETDATA_DONOTFLUSH);
+    if (FAILED(hr) || hr == S_FALSE)
+        return 0;
+    hr = ID3D11DeviceContext_GetData(p->ctx,
+        (ID3D11Asynchronous *)timer->disjoint, &dj, sizeof(dj),
+        D3D11_ASYNC_GETDATA_DONOTFLUSH);
+    if (FAILED(hr) || hr == S_FALSE || dj.Disjoint || !dj.Frequency)
+        return 0;
+
+    return timestamp_to_ns(end - start, dj.Frequency);
+}
+
+static void timer_start(struct ra *ra, ra_timer *ratimer)
+{
+    struct ra_d3d11 *p = ra->priv;
+    struct d3d_timer *timer = ratimer;
+
+    // Latch the last result of this ra_timer (returned by timer_stop)
+    timer->result = timer_get_result(ra, ratimer);
+
+    ID3D11DeviceContext_Begin(p->ctx, (ID3D11Asynchronous *)timer->disjoint);
+    ID3D11DeviceContext_End(p->ctx, (ID3D11Asynchronous *)timer->ts_start);
+}
+
+static uint64_t timer_stop(struct ra *ra, ra_timer *ratimer)
+{
+    struct ra_d3d11 *p = ra->priv;
+    struct d3d_timer *timer = ratimer;
+
+    ID3D11DeviceContext_End(p->ctx, (ID3D11Asynchronous *)timer->ts_end);
+    ID3D11DeviceContext_End(p->ctx, (ID3D11Asynchronous *)timer->disjoint);
+
+    return timer->result;
+}
+
+static int map_msg_severity(D3D11_MESSAGE_SEVERITY sev)
+{
+    switch (sev) {
+    case D3D11_MESSAGE_SEVERITY_CORRUPTION:
+        return MSGL_FATAL;
+    case D3D11_MESSAGE_SEVERITY_ERROR:
+        return MSGL_ERR;
+    case D3D11_MESSAGE_SEVERITY_WARNING:
+        return MSGL_WARN;
+    default:
+    case D3D11_MESSAGE_SEVERITY_INFO:
+    case D3D11_MESSAGE_SEVERITY_MESSAGE:
+        return MSGL_DEBUG;
+    }
+}
+
+static int map_msg_severity_by_id(D3D11_MESSAGE_ID id,
+                                  D3D11_MESSAGE_SEVERITY sev)
+{
+    switch (id) {
+    // These are normal. The RA timer queue habitually reuses timer objects
+    // without retrieving the results.
+    case D3D11_MESSAGE_ID_QUERY_BEGIN_ABANDONING_PREVIOUS_RESULTS:
+    case D3D11_MESSAGE_ID_QUERY_END_ABANDONING_PREVIOUS_RESULTS:
+        return MSGL_TRACE;
+
+    // D3D11 writes log messages every time an object is created or
+    // destroyed. That results in a lot of log spam, so force MSGL_TRACE.
+#define OBJ_LIFETIME_MESSAGES(obj)          \
+    case D3D11_MESSAGE_ID_CREATE_ ## obj:   \
+    case D3D11_MESSAGE_ID_DESTROY_ ## obj
+
+    OBJ_LIFETIME_MESSAGES(CONTEXT):
+    OBJ_LIFETIME_MESSAGES(BUFFER):
+    OBJ_LIFETIME_MESSAGES(TEXTURE1D):
+    OBJ_LIFETIME_MESSAGES(TEXTURE2D):
+    OBJ_LIFETIME_MESSAGES(TEXTURE3D):
+    OBJ_LIFETIME_MESSAGES(SHADERRESOURCEVIEW):
+    OBJ_LIFETIME_MESSAGES(RENDERTARGETVIEW):
+    OBJ_LIFETIME_MESSAGES(DEPTHSTENCILVIEW):
+    OBJ_LIFETIME_MESSAGES(VERTEXSHADER):
+    OBJ_LIFETIME_MESSAGES(HULLSHADER):
+    OBJ_LIFETIME_MESSAGES(DOMAINSHADER):
+    OBJ_LIFETIME_MESSAGES(GEOMETRYSHADER):
+    OBJ_LIFETIME_MESSAGES(PIXELSHADER):
+    OBJ_LIFETIME_MESSAGES(INPUTLAYOUT):
+    OBJ_LIFETIME_MESSAGES(SAMPLER):
+    OBJ_LIFETIME_MESSAGES(BLENDSTATE):
+    OBJ_LIFETIME_MESSAGES(DEPTHSTENCILSTATE):
+    OBJ_LIFETIME_MESSAGES(RASTERIZERSTATE):
+    OBJ_LIFETIME_MESSAGES(QUERY):
+    OBJ_LIFETIME_MESSAGES(PREDICATE):
+    OBJ_LIFETIME_MESSAGES(COUNTER):
+    OBJ_LIFETIME_MESSAGES(COMMANDLIST):
+    OBJ_LIFETIME_MESSAGES(CLASSINSTANCE):
+    OBJ_LIFETIME_MESSAGES(CLASSLINKAGE):
+    OBJ_LIFETIME_MESSAGES(COMPUTESHADER):
+    OBJ_LIFETIME_MESSAGES(UNORDEREDACCESSVIEW):
+    OBJ_LIFETIME_MESSAGES(VIDEODECODER):
+    OBJ_LIFETIME_MESSAGES(VIDEOPROCESSORENUM):
+    OBJ_LIFETIME_MESSAGES(VIDEOPROCESSOR):
+    OBJ_LIFETIME_MESSAGES(DECODEROUTPUTVIEW):
+    OBJ_LIFETIME_MESSAGES(PROCESSORINPUTVIEW):
+    OBJ_LIFETIME_MESSAGES(PROCESSOROUTPUTVIEW):
+    OBJ_LIFETIME_MESSAGES(DEVICECONTEXTSTATE):
+    OBJ_LIFETIME_MESSAGES(FENCE):
+        return MSGL_TRACE;
+
+#undef OBJ_LIFETIME_MESSAGES
+
+    default:
+        return map_msg_severity(sev);
+    }
+}
+
+static void debug_marker(struct ra *ra, const char *msg)
+{
+    struct ra_d3d11 *p = ra->priv;
+    void *talloc_ctx = talloc_new(NULL);
+    HRESULT hr;
+
+    if (!p->iqueue)
+        goto done;
+
+    // Copy debug-layer messages to mpv's log output
+    bool printed_header = false;
+    uint64_t messages = ID3D11InfoQueue_GetNumStoredMessages(p->iqueue);
+    for (uint64_t i = 0; i < messages; i++) {
+        SIZE_T len;
+        hr = ID3D11InfoQueue_GetMessage(p->iqueue, i, NULL, &len);
+        if (FAILED(hr) || !len)
+            goto done;
+
+        D3D11_MESSAGE *d3dmsg = talloc_size(talloc_ctx, len);
+        hr = ID3D11InfoQueue_GetMessage(p->iqueue, i, d3dmsg, &len);
+        if (FAILED(hr))
+            goto done;
+
+        int msgl = map_msg_severity_by_id(d3dmsg->ID, d3dmsg->Severity);
+        if (mp_msg_test(ra->log, msgl)) {
+            if (!printed_header)
+                MP_INFO(ra, "%s:\n", msg);
+            printed_header = true;
+
+            MP_MSG(ra, msgl, "%d: %.*s\n", (int)d3dmsg->ID,
+                (int)d3dmsg->DescriptionByteLength, d3dmsg->pDescription);
+            talloc_free(d3dmsg);
+        }
+    }
+
+    ID3D11InfoQueue_ClearStoredMessages(p->iqueue);
+done:
+    talloc_free(talloc_ctx);
+}
+
+static void destroy(struct ra *ra)
+{
+    struct ra_d3d11 *p = ra->priv;
+
+    // Release everything except the interfaces needed to perform leak checking
+    SAFE_RELEASE(p->clear_ps);
+    SAFE_RELEASE(p->clear_vs);
+    SAFE_RELEASE(p->clear_layout);
+    SAFE_RELEASE(p->clear_vbuf);
+    SAFE_RELEASE(p->clear_cbuf);
+    SAFE_RELEASE(p->blit_float_ps);
+    SAFE_RELEASE(p->blit_vs);
+    SAFE_RELEASE(p->blit_layout);
+    SAFE_RELEASE(p->blit_vbuf);
+    SAFE_RELEASE(p->blit_sampler);
+    SAFE_RELEASE(p->vbuf);
+    SAFE_RELEASE(p->ctx1);
+    SAFE_RELEASE(p->dev1);
+    SAFE_RELEASE(p->dev);
+
+    if (p->ctx) {
+        // Destroy the device context synchronously so referenced objects don't
+        // show up in the leak check
+        ID3D11DeviceContext_ClearState(p->ctx);
+        ID3D11DeviceContext_Flush(p->ctx);
+    }
+    SAFE_RELEASE(p->ctx);
+
+    if (p->debug) {
+        // Report any leaked objects
+        debug_marker(ra, "after destroy");
+        ID3D11Debug_ReportLiveDeviceObjects(p->debug, D3D11_RLDO_DETAIL);
+        debug_marker(ra, "after leak check");
+        ID3D11Debug_ReportLiveDeviceObjects(p->debug, D3D11_RLDO_SUMMARY);
+        debug_marker(ra, "after leak summary");
+    }
+    SAFE_RELEASE(p->debug);
+    SAFE_RELEASE(p->iqueue);
+
+    talloc_free(ra);
+}
+
+static struct ra_fns ra_fns_d3d11 = {
+    .destroy            = destroy,
+    .tex_create         = tex_create,
+    .tex_destroy        = tex_destroy,
+    .tex_upload         = tex_upload,
+    .tex_download       = tex_download,
+    .buf_create         = buf_create,
+    .buf_destroy        = buf_destroy,
+    .buf_update         = buf_update,
+    .clear              = clear,
+    .blit               = blit,
+    .uniform_layout     = std140_layout,
+    .desc_namespace     = desc_namespace,
+    .renderpass_create  = renderpass_create,
+    .renderpass_destroy = renderpass_destroy,
+    .renderpass_run     = renderpass_run,
+    .timer_create       = timer_create,
+    .timer_destroy      = timer_destroy,
+    .timer_start        = timer_start,
+    .timer_stop         = timer_stop,
+    .debug_marker       = debug_marker,
+};
+
+void ra_d3d11_flush(struct ra *ra)
+{
+    struct ra_d3d11 *p = ra->priv;
+    ID3D11DeviceContext_Flush(p->ctx);
+}
+
+static void init_debug_layer(struct ra *ra)
+{
+    struct ra_d3d11 *p = ra->priv;
+    HRESULT hr;
+
+    hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11Debug,
+                                     (void**)&p->debug);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to get debug device: %s\n", mp_HRESULT_to_str(hr));
+        return;
+    }
+
+    hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11InfoQueue,
+                                     (void**)&p->iqueue);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to get info queue: %s\n", mp_HRESULT_to_str(hr));
+        return;
+    }
+
+    // Store an unlimited amount of messages in the buffer. This is fine
+    // because we flush stored messages regularly (in debug_marker.)
+    ID3D11InfoQueue_SetMessageCountLimit(p->iqueue, -1);
+
+    // Push empty filter to get everything
+    D3D11_INFO_QUEUE_FILTER filter = {0};
+    ID3D11InfoQueue_PushStorageFilter(p->iqueue, &filter);
+}
+
+static struct dll_version get_dll_version(HMODULE dll)
+{
+    void *ctx = talloc_new(NULL);
+    struct dll_version ret = { 0 };
+
+    HRSRC rsrc = FindResourceW(dll, MAKEINTRESOURCEW(VS_VERSION_INFO),
+                               VS_FILE_INFO);
+    if (!rsrc)
+        goto done;
+    DWORD size = SizeofResource(dll, rsrc);
+    HGLOBAL res = LoadResource(dll, rsrc);
+    if (!res)
+        goto done;
+    void *ptr = LockResource(res);
+    if (!ptr)
+        goto done;
+    void *copy = talloc_memdup(ctx, ptr, size);
+
+    VS_FIXEDFILEINFO *ffi;
+    UINT ffi_len;
+    if (!VerQueryValueW(copy, L"\\", (void**)&ffi, &ffi_len))
+        goto done;
+    if (ffi_len < sizeof(*ffi))
+        goto done;
+
+    ret.major = HIWORD(ffi->dwFileVersionMS);
+    ret.minor = LOWORD(ffi->dwFileVersionMS);
+    ret.build = HIWORD(ffi->dwFileVersionLS);
+    ret.revision = LOWORD(ffi->dwFileVersionLS);
+
+done:
+    talloc_free(ctx);
+    return ret;
+}
+
+static bool load_d3d_compiler(struct ra *ra)
+{
+    struct ra_d3d11 *p = ra->priv;
+    HMODULE d3dcompiler = NULL;
+
+    // Try the inbox D3DCompiler first (Windows 8.1 and up)
+    if (IsWindows8Point1OrGreater()) {
+        d3dcompiler = LoadLibraryExW(L"d3dcompiler_47.dll", NULL,
+                                     LOAD_LIBRARY_SEARCH_SYSTEM32);
+    }
+    // Check for a packaged version of d3dcompiler_47.dll
+    if (!d3dcompiler)
+        d3dcompiler = LoadLibraryW(L"d3dcompiler_47.dll");
+    // Try d3dcompiler_46.dll from the Windows 8 SDK
+    if (!d3dcompiler)
+        d3dcompiler = LoadLibraryW(L"d3dcompiler_46.dll");
+    // Try d3dcompiler_43.dll from the June 2010 DirectX SDK
+    if (!d3dcompiler)
+        d3dcompiler = LoadLibraryW(L"d3dcompiler_43.dll");
+    // Can't find any compiler DLL, so give up
+    if (!d3dcompiler)
+        return false;
+
+    p->d3d_compiler_ver = get_dll_version(d3dcompiler);
+
+    p->D3DCompile = (pD3DCompile)GetProcAddress(d3dcompiler, "D3DCompile");
+    if (!p->D3DCompile)
+        return false;
+    return true;
+}
+
+static void find_max_texture_dimension(struct ra *ra)
+{
+    struct ra_d3d11 *p = ra->priv;
+
+    D3D11_TEXTURE2D_DESC desc = {
+        .Width = ra->max_texture_wh,
+        .Height = ra->max_texture_wh,
+        .MipLevels = 1,
+        .ArraySize = 1,
+        .SampleDesc.Count = 1,
+        .Format = DXGI_FORMAT_R8_UNORM,
+        .BindFlags = D3D11_BIND_SHADER_RESOURCE,
+    };
+    while (true) {
+        desc.Height = desc.Width *= 2;
+        if (desc.Width >= 0x8000000u)
+            return;
+        if (FAILED(ID3D11Device_CreateTexture2D(p->dev, &desc, NULL, NULL)))
+            return;
+        ra->max_texture_wh = desc.Width;
+    }
+}
+
+struct ra *ra_d3d11_create(ID3D11Device *dev, struct mp_log *log,
+                           struct spirv_compiler *spirv)
+{
+    HRESULT hr;
+
+    struct ra *ra = talloc_zero(NULL, struct ra);
+    ra->log = log;
+    ra->fns = &ra_fns_d3d11;
+
+    // Even Direct3D 10level9 supports 3D textures
+    ra->caps = RA_CAP_TEX_3D | RA_CAP_DIRECT_UPLOAD | RA_CAP_BUF_RO |
+               RA_CAP_BLIT | spirv->ra_caps;
+
+    ra->glsl_version = spirv->glsl_version;
+    ra->glsl_vulkan = true;
+
+    struct ra_d3d11 *p = ra->priv = talloc_zero(ra, struct ra_d3d11);
+    p->spirv = spirv;
+
+    int minor = 0;
+    ID3D11Device_AddRef(dev);
+    p->dev = dev;
+    ID3D11Device_GetImmediateContext(p->dev, &p->ctx);
+    hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11Device1,
+                                     (void**)&p->dev1);
+    if (SUCCEEDED(hr)) {
+        minor = 1;
+        ID3D11Device1_GetImmediateContext1(p->dev1, &p->ctx1);
+
+        D3D11_FEATURE_DATA_D3D11_OPTIONS fopts = { 0 };
+        hr = ID3D11Device_CheckFeatureSupport(p->dev,
+            D3D11_FEATURE_D3D11_OPTIONS, &fopts, sizeof(fopts));
+        if (SUCCEEDED(hr)) {
+            p->has_clear_view = fopts.ClearView;
+        }
+    }
+
+    MP_VERBOSE(ra, "Using Direct3D 11.%d runtime\n", minor);
+
+    p->fl = ID3D11Device_GetFeatureLevel(p->dev);
+    if (p->fl >= D3D_FEATURE_LEVEL_11_0) {
+        ra->max_texture_wh = D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION;
+    } else if (p->fl >= D3D_FEATURE_LEVEL_10_0) {
+        ra->max_texture_wh = D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION;
+    } else if (p->fl >= D3D_FEATURE_LEVEL_9_3) {
+        ra->max_texture_wh = D3D_FL9_3_REQ_TEXTURE2D_U_OR_V_DIMENSION;
+    } else {
+        ra->max_texture_wh = D3D_FL9_1_REQ_TEXTURE2D_U_OR_V_DIMENSION;
+    }
+
+    if (p->fl >= D3D_FEATURE_LEVEL_11_0)
+        ra->caps |= RA_CAP_GATHER;
+    if (p->fl >= D3D_FEATURE_LEVEL_10_0)
+        ra->caps |= RA_CAP_FRAGCOORD;
+
+    // Some 10_0 hardware has compute shaders, but only 11_0 has image load/store
+    if (p->fl >= D3D_FEATURE_LEVEL_11_0) {
+        ra->caps |= RA_CAP_COMPUTE | RA_CAP_BUF_RW;
+        ra->max_shmem = 32 * 1024;
+        ra->max_compute_group_threads =
+            D3D11_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP;
+    }
+
+    if (p->fl >= D3D_FEATURE_LEVEL_11_1) {
+        p->max_uavs = D3D11_1_UAV_SLOT_COUNT;
+    } else {
+        p->max_uavs = D3D11_PS_CS_UAV_REGISTER_COUNT;
+    }
+
+    if (ID3D11Device_GetCreationFlags(p->dev) & D3D11_CREATE_DEVICE_DEBUG)
+        init_debug_layer(ra);
+
+    // Some level 9_x devices don't have timestamp queries
+    hr = ID3D11Device_CreateQuery(p->dev,
+        &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, NULL);
+    p->has_timestamp_queries = SUCCEEDED(hr);
+
+    debug_marker(ra, "before maximum Texture2D size lookup");
+
+    // According to MSDN, the above texture sizes are just minimums and drivers
+    // may support larger textures. See:
+    // https://msdn.microsoft.com/en-us/library/windows/desktop/ff476874.aspx
+    find_max_texture_dimension(ra);
+
+    // Ignore any messages during find_max_texture_dimension
+    if (p->iqueue)
+        ID3D11InfoQueue_ClearStoredMessages(p->iqueue);
+
+    MP_VERBOSE(ra, "Maximum Texture2D size: %dx%d\n", ra->max_texture_wh,
+               ra->max_texture_wh);
+
+    if (!load_d3d_compiler(ra)) {
+        MP_FATAL(ra, "Could not find D3DCompiler DLL\n");
+        goto error;
+    }
+
+    MP_VERBOSE(ra, "D3DCompiler version: %u.%u.%u.%u\n",
+               p->d3d_compiler_ver.major, p->d3d_compiler_ver.minor,
+               p->d3d_compiler_ver.build, p->d3d_compiler_ver.revision);
+
+    setup_formats(ra);
+
+    // The rasterizer state never changes, so set it up here
+    ID3D11RasterizerState *rstate;
+    D3D11_RASTERIZER_DESC rdesc = {
+        .FillMode = D3D11_FILL_SOLID,
+        .CullMode = D3D11_CULL_NONE,
+        .FrontCounterClockwise = FALSE,
+        .DepthClipEnable = TRUE, // Required for 10level9
+        .ScissorEnable = TRUE,
+    };
+    hr = ID3D11Device_CreateRasterizerState(p->dev, &rdesc, &rstate);
+    if (FAILED(hr)) {
+        MP_ERR(ra, "Failed to create rasterizer state: %s\n", mp_HRESULT_to_str(hr));
+        goto error;
+    }
+    ID3D11DeviceContext_RSSetState(p->ctx, rstate);
+    SAFE_RELEASE(rstate);
+
+    // If the device doesn't support ClearView, we have to set up a
+    // shader-based clear() implementation
+    if (!p->has_clear_view && !setup_clear_rpass(ra))
+        goto error;
+
+    if (!setup_blit_rpass(ra))
+        goto error;
+
+    return ra;
+
+error:
+    destroy(ra);
+    return NULL;
+}
+
+ID3D11Device *ra_d3d11_get_device(struct ra *ra)
+{
+    struct ra_d3d11 *p = ra->priv;
+    ID3D11Device_AddRef(p->dev);
+    return p->dev;
+}
+
+bool ra_is_d3d11(struct ra *ra)
+{
+    return ra->fns == &ra_fns_d3d11;
+}
diff --git a/video/out/d3d11/ra_d3d11.h b/video/out/d3d11/ra_d3d11.h
new file mode 100644
index 0000000..6f62a7f
--- /dev/null
+++ b/video/out/d3d11/ra_d3d11.h
@@ -0,0 +1,47 @@
+#pragma once
+
+#include <stdbool.h>
+#include <windows.h>
+#include <d3d11.h>
+#include <dxgi1_2.h>
+
+#include "video/out/gpu/ra.h"
+#include "video/out/gpu/spirv.h"
+
+// Get the underlying DXGI format from an RA format
+DXGI_FORMAT ra_d3d11_get_format(const struct ra_format *fmt);
+
+// Gets the matching ra_format for a given DXGI format.
+// Returns a nullptr in case of no known match.
+const struct ra_format *ra_d3d11_get_ra_format(struct ra *ra, DXGI_FORMAT fmt);
+
+// Create an RA instance from a D3D11 device. This takes a reference to the
+// device, which is released when the RA instance is destroyed.
+struct ra *ra_d3d11_create(ID3D11Device *device, struct mp_log *log,
+                           struct spirv_compiler *spirv);
+
+// Flush the immediate context of the wrapped D3D11 device
+void ra_d3d11_flush(struct ra *ra);
+
+// Create an RA texture from a D3D11 resource. This takes a reference to the
+// texture, which is released when the RA texture is destroyed.
+struct ra_tex *ra_d3d11_wrap_tex(struct ra *ra, ID3D11Resource *res);
+
+// As above, but for a D3D11VA video resource. The fmt parameter selects which
+// plane of a planar format will be mapped when the RA texture is used.
+// array_slice should be set for texture arrays and is ignored for non-arrays.
+struct ra_tex *ra_d3d11_wrap_tex_video(struct ra *ra, ID3D11Texture2D *res,
+                                       int w, int h, int array_slice,
+                                       const struct ra_format *fmt);
+
+// Get the underlying D3D11 resource from an RA texture. The returned resource
+// is refcounted and must be released by the caller.
+ID3D11Resource *ra_d3d11_get_raw_tex(struct ra *ra, struct ra_tex *tex,
+                                     int *array_slice);
+
+// Get the underlying D3D11 device from an RA instance. The returned device is
+// refcounted and must be released by the caller.
+ID3D11Device *ra_d3d11_get_device(struct ra *ra);
+
+// True if the RA instance was created with ra_d3d11_create()
+bool ra_is_d3d11(struct ra *ra);
diff --git a/video/out/dither.c b/video/out/dither.c
new file mode 100644
index 0000000..44558ba
--- /dev/null
+++ b/video/out/dither.c
@@ -0,0 +1,175 @@
+/*
+ * Generate a dithering matrix for downsampling images.
+ *
+ * Copyright © 2013  Wessel Dankers <wsl@fruit.je>
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <inttypes.h>
+#include <string.h>
+#include <assert.h>
+#include <math.h>
+
+#include <libavutil/lfg.h>
+
+#include "mpv_talloc.h"
+#include "dither.h"
+
+#define MAX_SIZEB 8
+#define MAX_SIZE (1 << MAX_SIZEB)
+#define MAX_SIZE2 (MAX_SIZE * MAX_SIZE)
+
+#define WRAP_SIZE2(k, x) ((unsigned int)((unsigned int)(x) & ((k)->size2 - 1)))
+#define XY(k, x, y) ((unsigned int)(((x) | ((y) << (k)->sizeb))))
+
+struct ctx {
+    unsigned int sizeb, size, size2;
+    unsigned int gauss_radius;
+    unsigned int gauss_middle;
+    uint64_t gauss[MAX_SIZE2];
+    unsigned int randomat[MAX_SIZE2];
+    bool calcmat[MAX_SIZE2];
+    uint64_t gaussmat[MAX_SIZE2];
+    unsigned int unimat[MAX_SIZE2];
+    AVLFG avlfg;
+};
+
+static void makegauss(struct ctx *k, unsigned int sizeb)
+{
+    assert(sizeb >= 1 && sizeb <= MAX_SIZEB);
+
+    av_lfg_init(&k->avlfg, 123);
+
+    k->sizeb = sizeb;
+    k->size = 1 << k->sizeb;
+    k->size2 = k->size * k->size;
+
+    k->gauss_radius = k->size / 2 - 1;
+    k->gauss_middle = XY(k, k->gauss_radius, k->gauss_radius);
+
+    unsigned int gauss_size = k->gauss_radius * 2 + 1;
+    unsigned int gauss_size2 = gauss_size * gauss_size;
+
+    for (unsigned int c = 0; c < k->size2; c++)
+        k->gauss[c] = 0;
+
+    double sigma = -log(1.5 / (double) UINT64_MAX * gauss_size2) / k->gauss_radius;
+
+    for (unsigned int gy = 0; gy <= k->gauss_radius; gy++) {
+        for (unsigned int gx = 0; gx <= gy; gx++) {
+            int cx = (int)gx - k->gauss_radius;
+            int cy = (int)gy - k->gauss_radius;
+            int sq = cx * cx + cy * cy;
+            double e = exp(-sqrt(sq) * sigma);
+            uint64_t v = e / gauss_size2 * (double) UINT64_MAX;
+            k->gauss[XY(k, gx, gy)] =
+                k->gauss[XY(k, gy, gx)] =
+                k->gauss[XY(k, gx, gauss_size - 1 - gy)] =
+                k->gauss[XY(k, gy, gauss_size - 1 - gx)] =
+                k->gauss[XY(k, gauss_size - 1 - gx, gy)] =
+                k->gauss[XY(k, gauss_size - 1 - gy, gx)] =
+                k->gauss[XY(k, gauss_size - 1 - gx, gauss_size - 1 - gy)] =
+                k->gauss[XY(k, gauss_size - 1 - gy, gauss_size - 1 - gx)] = v;
+        }
+    }
+    uint64_t total = 0;
+    for (unsigned int c = 0; c < k->size2; c++) {
+        uint64_t oldtotal = total;
+        total += k->gauss[c];
+        assert(total >= oldtotal);
+    }
+}
+
+static void setbit(struct ctx *k, unsigned int c)
+{
+    if (k->calcmat[c])
+        return;
+    k->calcmat[c] = true;
+    uint64_t *m = k->gaussmat;
+    uint64_t *me = k->gaussmat + k->size2;
+    uint64_t *g = k->gauss + WRAP_SIZE2(k, k->gauss_middle + k->size2 - c);
+    uint64_t *ge = k->gauss + k->size2;
+    while (g < ge)
+        *m++ += *g++;
+    g = k->gauss;
+    while (m < me)
+        *m++ += *g++;
+}
+
+static unsigned int getmin(struct ctx *k)
+{
+    uint64_t min = UINT64_MAX;
+    unsigned int resnum = 0;
+    unsigned int size2 = k->size2;
+    for (unsigned int c = 0; c < size2; c++) {
+        if (k->calcmat[c])
+            continue;
+        uint64_t total = k->gaussmat[c];
+        if (total <= min) {
+            if (total != min) {
+                min = total;
+                resnum = 0;
+            }
+            k->randomat[resnum++] = c;
+        }
+    }
+    if (resnum == 1)
+        return k->randomat[0];
+    if (resnum == size2)
+        return size2 / 2;
+    return k->randomat[av_lfg_get(&k->avlfg) % resnum];
+}
+
+static void makeuniform(struct ctx *k)
+{
+    unsigned int size2 = k->size2;
+    for (unsigned int c = 0; c < size2; c++) {
+        unsigned int r = getmin(k);
+        setbit(k, r);
+        k->unimat[r] = c;
+    }
+}
+
+// out_matrix is a reactangular tsize * tsize array, where tsize = (1 << size).
+void mp_make_fruit_dither_matrix(float *out_matrix, int size)
+{
+    struct ctx *k = talloc_zero(NULL, struct ctx);
+    makegauss(k, size);
+    makeuniform(k);
+    float invscale = k->size2;
+    for(unsigned int y = 0; y < k->size; y++) {
+        for(unsigned int x = 0; x < k->size; x++)
+            out_matrix[x + y * k->size] = k->unimat[XY(k, x, y)] / invscale;
+    }
+    talloc_free(k);
+}
+
+void mp_make_ordered_dither_matrix(unsigned char *m, int size)
+{
+    m[0] = 0;
+    for (int sz = 1; sz < size; sz *= 2) {
+        int offset[] = {sz*size, sz, sz * (size+1), 0};
+        for (int i = 0; i < 4; i++)
+            for (int y = 0; y < sz * size; y += size)
+                for (int x = 0; x < sz; x++)
+                    m[x+y+offset[i]] = m[x+y] * 4 + (3-i) * 256/size/size;
+    }
+}
diff --git a/video/out/dither.h b/video/out/dither.h
new file mode 100644
index 0000000..ca804e3
--- /dev/null
+++ b/video/out/dither.h
@@ -0,0 +1,2 @@
+void mp_make_fruit_dither_matrix(float *out_matrix, int size);
+void mp_make_ordered_dither_matrix(unsigned char *m, int size);
diff --git a/video/out/dr_helper.c b/video/out/dr_helper.c
new file mode 100644
index 0000000..ac440a7
--- /dev/null
+++ b/video/out/dr_helper.c
@@ -0,0 +1,162 @@
+#include <assert.h>
+#include <stdatomic.h>
+#include <stdlib.h>
+
+#include <libavutil/buffer.h>
+
+#include "misc/dispatch.h"
+#include "mpv_talloc.h"
+#include "osdep/threads.h"
+#include "video/mp_image.h"
+
+#include "dr_helper.h"
+
+struct dr_helper {
+    mp_mutex thread_lock;
+    mp_thread_id thread_id;
+    bool thread_valid; // (POSIX defines no "unset" mp_thread value yet)
+
+    struct mp_dispatch_queue *dispatch;
+    atomic_ullong dr_in_flight;
+
+    struct mp_image *(*get_image)(void *ctx, int imgfmt, int w, int h,
+                                  int stride_align, int flags);
+    void *get_image_ctx;
+};
+
+static void dr_helper_destroy(void *ptr)
+{
+    struct dr_helper *dr = ptr;
+
+    // All references must have been freed on destruction, or we'll have
+    // dangling pointers.
+    assert(atomic_load(&dr->dr_in_flight) == 0);
+
+    mp_mutex_destroy(&dr->thread_lock);
+}
+
+struct dr_helper *dr_helper_create(struct mp_dispatch_queue *dispatch,
+            struct mp_image *(*get_image)(void *ctx, int imgfmt, int w, int h,
+                                          int stride_align, int flags),
+            void *get_image_ctx)
+{
+    struct dr_helper *dr = talloc_ptrtype(NULL, dr);
+    talloc_set_destructor(dr, dr_helper_destroy);
+    *dr = (struct dr_helper){
+        .dispatch = dispatch,
+        .dr_in_flight = 0,
+        .get_image = get_image,
+        .get_image_ctx = get_image_ctx,
+    };
+    mp_mutex_init(&dr->thread_lock);
+    return dr;
+}
+
+void dr_helper_acquire_thread(struct dr_helper *dr)
+{
+    mp_mutex_lock(&dr->thread_lock);
+    assert(!dr->thread_valid); // fails on API user errors
+    dr->thread_valid = true;
+    dr->thread_id = mp_thread_current_id();
+    mp_mutex_unlock(&dr->thread_lock);
+}
+
+void dr_helper_release_thread(struct dr_helper *dr)
+{
+    mp_mutex_lock(&dr->thread_lock);
+    // Fails on API user errors.
+    assert(dr->thread_valid);
+    assert(mp_thread_id_equal(dr->thread_id, mp_thread_current_id()));
+    dr->thread_valid = false;
+    mp_mutex_unlock(&dr->thread_lock);
+}
+
+struct free_dr_context {
+    struct dr_helper *dr;
+    AVBufferRef *ref;
+};
+
+static void dr_thread_free(void *ptr)
+{
+    struct free_dr_context *ctx = ptr;
+
+    unsigned long long v = atomic_fetch_add(&ctx->dr->dr_in_flight, -1);
+    assert(v); // value before sub is 0 - unexpected underflow.
+
+    av_buffer_unref(&ctx->ref);
+    talloc_free(ctx);
+}
+
+static void free_dr_buffer_on_dr_thread(void *opaque, uint8_t *data)
+{
+    struct free_dr_context *ctx = opaque;
+    struct dr_helper *dr = ctx->dr;
+
+    mp_mutex_lock(&dr->thread_lock);
+    bool on_this_thread =
+        dr->thread_valid && mp_thread_id_equal(ctx->dr->thread_id, mp_thread_current_id());
+    mp_mutex_unlock(&dr->thread_lock);
+
+    // The image could be unreffed even on the DR thread. In practice, this
+    // matters most on DR destruction.
+    if (on_this_thread) {
+        dr_thread_free(ctx);
+    } else {
+        mp_dispatch_enqueue(dr->dispatch, dr_thread_free, ctx);
+    }
+}
+
+struct get_image_cmd {
+    struct dr_helper *dr;
+    int imgfmt, w, h, stride_align, flags;
+    struct mp_image *res;
+};
+
+static void sync_get_image(void *ptr)
+{
+    struct get_image_cmd *cmd = ptr;
+    struct dr_helper *dr = cmd->dr;
+
+    cmd->res = dr->get_image(dr->get_image_ctx, cmd->imgfmt, cmd->w, cmd->h,
+                             cmd->stride_align, cmd->flags);
+    if (!cmd->res)
+        return;
+
+    // We require exactly 1 AVBufferRef.
+    assert(cmd->res->bufs[0]);
+    assert(!cmd->res->bufs[1]);
+
+    // Apply some magic to get it free'd on the DR thread as well. For this to
+    // work, we create a dummy-ref that aliases the original ref, which is why
+    // the original ref must be writable in the first place. (A newly allocated
+    // image should be always writable of course.)
+    assert(mp_image_is_writeable(cmd->res));
+
+    struct free_dr_context *ctx = talloc_zero(NULL, struct free_dr_context);
+    *ctx = (struct free_dr_context){
+        .dr = dr,
+        .ref = cmd->res->bufs[0],
+    };
+
+    AVBufferRef *new_ref = av_buffer_create(ctx->ref->data, ctx->ref->size,
+                                            free_dr_buffer_on_dr_thread, ctx, 0);
+    MP_HANDLE_OOM(new_ref);
+
+    cmd->res->bufs[0] = new_ref;
+
+    atomic_fetch_add(&dr->dr_in_flight, 1);
+}
+
+struct mp_image *dr_helper_get_image(struct dr_helper *dr, int imgfmt,
+                                     int w, int h, int stride_align, int flags)
+{
+    struct get_image_cmd cmd = {
+        .dr = dr,
+        .imgfmt = imgfmt,
+        .w = w, .h = h,
+        .stride_align = stride_align,
+        .flags = flags,
+    };
+    mp_dispatch_run(dr->dispatch, sync_get_image, &cmd);
+    return cmd.res;
+}
diff --git a/video/out/dr_helper.h b/video/out/dr_helper.h
new file mode 100644
index 0000000..cf2ed14
--- /dev/null
+++ b/video/out/dr_helper.h
@@ -0,0 +1,37 @@
+#pragma once
+
+// This is a helper for implementing thread-safety for DR callbacks. These need
+// to allocate GPU buffers on the GPU thread (e.g. OpenGL with its forced TLS),
+// and the buffers also need to be freed on the GPU thread.
+// This is not a helpful "Dr.", rather it represents Satan in form of C code.
+struct dr_helper;
+
+struct mp_image;
+struct mp_dispatch_queue;
+
+// dr_helper_get_image() calls will use the dispatch queue to run get_image on
+// a target thread, which processes the dispatch queue.
+// Note: the dispatch queue must process outstanding async. work before the
+//       dr_helper instance can be destroyed.
+struct dr_helper *dr_helper_create(struct mp_dispatch_queue *dispatch,
+            struct mp_image *(*get_image)(void *ctx, int imgfmt, int w, int h,
+                                          int stride_align, int flags),
+            void *get_image_ctx);
+
+// Make DR release calls (freeing images) reentrant if they are called on current
+// thread. That means any free call will directly release the image as allocated
+// with get_image().
+// Only 1 thread can use this at a time. Note that it would make no sense to
+// call this on more than 1 thread, as get_image is assumed not thread-safe.
+void dr_helper_acquire_thread(struct dr_helper *dr);
+
+// This _must_ be called on the same thread as dr_helper_acquire_thread() was
+// called. Every release call must be paired with an acquire call.
+void dr_helper_release_thread(struct dr_helper *dr);
+
+// Allocate an image by running the get_image callback on the target thread.
+// Always blocks on dispatch queue processing. This implies there is no way to
+// allocate a DR'ed image on the render thread (at least not in a way which
+// actually works if you want foreign threads to be able to free them).
+struct mp_image *dr_helper_get_image(struct dr_helper *dr, int imgfmt,
+                                     int w, int h, int stride_align, int flags);
diff --git a/video/out/drm_atomic.c b/video/out/drm_atomic.c
new file mode 100644
index 0000000..5754504
--- /dev/null
+++ b/video/out/drm_atomic.c
@@ -0,0 +1,458 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <errno.h>
+#include <inttypes.h>
+
+#include "common/common.h"
+#include "common/msg.h"
+#include "drm_atomic.h"
+
+int drm_object_create_properties(struct mp_log *log, int fd,
+                                 struct drm_object *object)
+{
+    object->props = drmModeObjectGetProperties(fd, object->id, object->type);
+    if (object->props) {
+        object->props_info = talloc_zero_size(NULL, object->props->count_props
+                                              * sizeof(object->props_info));
+        if (object->props_info) {
+            for (int i = 0; i < object->props->count_props; i++)
+                object->props_info[i] = drmModeGetProperty(fd, object->props->props[i]);
+        } else {
+            mp_err(log, "Out of memory\n");
+            goto fail;
+        }
+    } else {
+        mp_err(log, "Failed to retrieve properties for object id %d\n", object->id);
+        goto fail;
+    }
+
+    return 0;
+
+  fail:
+    drm_object_free_properties(object);
+    return -1;
+}
+
+void drm_object_free_properties(struct drm_object *object)
+{
+    if (object->props) {
+        for (int i = 0; i < object->props->count_props; i++) {
+            if (object->props_info[i]) {
+                drmModeFreeProperty(object->props_info[i]);
+                object->props_info[i] = NULL;
+            }
+        }
+
+        talloc_free(object->props_info);
+        object->props_info = NULL;
+
+        drmModeFreeObjectProperties(object->props);
+        object->props = NULL;
+    }
+}
+
+int drm_object_get_property(struct drm_object *object, char *name, uint64_t *value)
+{
+   for (int i = 0; i < object->props->count_props; i++) {
+       if (strcasecmp(name, object->props_info[i]->name) == 0) {
+           *value = object->props->prop_values[i];
+           return 0;
+       }
+   }
+
+   return -EINVAL;
+}
+
+drmModePropertyBlobPtr drm_object_get_property_blob(struct drm_object *object, char *name)
+{
+   uint64_t blob_id;
+
+   if (!drm_object_get_property(object, name, &blob_id)) {
+       return drmModeGetPropertyBlob(object->fd, blob_id);
+   }
+
+   return NULL;
+}
+
+int drm_object_set_property(drmModeAtomicReq *request, struct drm_object *object,
+                            char *name, uint64_t value)
+{
+   for (int i = 0; i < object->props->count_props; i++) {
+       if (strcasecmp(name, object->props_info[i]->name) == 0) {
+           if (object->props_info[i]->flags & DRM_MODE_PROP_IMMUTABLE) {
+               /* Do not try to set immutable values, as this might cause the
+                * atomic commit operation to fail. */
+               return -EINVAL;
+           }
+           return drmModeAtomicAddProperty(request, object->id,
+                                           object->props_info[i]->prop_id, value);
+       }
+   }
+
+   return -EINVAL;
+}
+
+struct drm_object *drm_object_create(struct mp_log *log, int fd,
+                                     uint32_t object_id, uint32_t type)
+{
+    struct drm_object *obj = NULL;
+    obj = talloc_zero(NULL, struct drm_object);
+    obj->fd = fd;
+    obj->id = object_id;
+    obj->type = type;
+
+    if (drm_object_create_properties(log, fd, obj)) {
+        talloc_free(obj);
+        return NULL;
+    }
+
+    return obj;
+}
+
+void drm_object_free(struct drm_object *object)
+{
+    if (object) {
+        drm_object_free_properties(object);
+        talloc_free(object);
+    }
+}
+
+void drm_object_print_info(struct mp_log *log, struct drm_object *object)
+{
+    mp_err(log, "Object ID = %d (type = %x) has %d properties\n",
+           object->id, object->type, object->props->count_props);
+
+    for (int i = 0; i < object->props->count_props; i++)
+        mp_err(log, "    Property '%s' = %lld\n", object->props_info[i]->name,
+               (long long)object->props->prop_values[i]);
+}
+
+struct drm_atomic_context *drm_atomic_create_context(struct mp_log *log, int fd, int crtc_id,
+                                                     int connector_id,
+                                                     int draw_plane_idx, int drmprime_video_plane_idx)
+{
+    drmModePlaneRes *plane_res = NULL;
+    drmModeRes *res = NULL;
+    struct drm_object *plane = NULL;
+    struct drm_atomic_context *ctx;
+    int crtc_index = -1;
+    int layercount = -1;
+    int primary_id = 0;
+    int overlay_id = 0;
+
+    uint64_t value;
+
+    res = drmModeGetResources(fd);
+    if (!res) {
+        mp_err(log, "Cannot retrieve DRM resources: %s\n", mp_strerror(errno));
+        goto fail;
+    }
+
+    plane_res = drmModeGetPlaneResources(fd);
+    if (!plane_res) {
+        mp_err(log, "Cannot retrieve plane resources: %s\n", mp_strerror(errno));
+        goto fail;
+    }
+
+    ctx = talloc_zero(NULL, struct drm_atomic_context);
+    if (!ctx) {
+        mp_err(log, "Out of memory\n");
+        goto fail;
+    }
+
+    ctx->fd = fd;
+    ctx->crtc = drm_object_create(log, ctx->fd, crtc_id, DRM_MODE_OBJECT_CRTC);
+    if (!ctx->crtc) {
+        mp_err(log, "Failed to create CRTC object\n");
+        goto fail;
+    }
+
+    for (int i = 0; i < res->count_crtcs; i++) {
+        if (res->crtcs[i] == crtc_id) {
+            crtc_index = i;
+            break;
+        }
+    }
+
+    for (int i = 0; i < res->count_connectors; i++) {
+        drmModeConnector *connector = drmModeGetConnector(fd, res->connectors[i]);
+        if (connector) {
+            if (connector->connector_id == connector_id)
+                ctx->connector =  drm_object_create(log, ctx->fd, connector->connector_id,
+                                                    DRM_MODE_OBJECT_CONNECTOR);
+            drmModeFreeConnector(connector);
+            if (ctx->connector)
+                break;
+        }
+    }
+
+    for (unsigned int j = 0; j < plane_res->count_planes; j++) {
+
+        drmModePlane *drmplane = drmModeGetPlane(ctx->fd, plane_res->planes[j]);
+        const uint32_t possible_crtcs = drmplane->possible_crtcs;
+        const uint32_t plane_id = drmplane->plane_id;
+        drmModeFreePlane(drmplane);
+        drmplane = NULL;
+
+        if (possible_crtcs & (1 << crtc_index)) {
+            plane = drm_object_create(log, ctx->fd, plane_id, DRM_MODE_OBJECT_PLANE);
+
+            if (!plane) {
+                mp_err(log, "Failed to create Plane object from plane ID %d\n",
+                       plane_id);
+                goto fail;
+            }
+
+            if (drm_object_get_property(plane, "TYPE", &value) == -EINVAL) {
+                mp_err(log, "Unable to retrieve type property from plane %d\n", j);
+                goto fail;
+            }
+
+            if (value != DRM_PLANE_TYPE_CURSOR) { // Skip cursor planes
+                layercount++;
+
+                if ((!primary_id) && (value == DRM_PLANE_TYPE_PRIMARY))
+                    primary_id = plane_id;
+
+                if ((!overlay_id) && (value == DRM_PLANE_TYPE_OVERLAY))
+                    overlay_id = plane_id;
+
+                if (layercount == draw_plane_idx) {
+                    ctx->draw_plane = plane;
+                    continue;
+                }
+
+                if (layercount == drmprime_video_plane_idx) {
+                    ctx->drmprime_video_plane = plane;
+                    continue;
+                }
+            }
+
+            drm_object_free(plane);
+            plane = NULL;
+        }
+    }
+
+    // draw plane was specified as either of the special options: any primary plane or any overlay plane
+    if (!ctx->draw_plane) {
+        const int draw_plane_id = (draw_plane_idx == DRM_OPTS_OVERLAY_PLANE) ? overlay_id : primary_id;
+        const char *plane_type = (draw_plane_idx == DRM_OPTS_OVERLAY_PLANE) ? "overlay" : "primary";
+        if (draw_plane_id) {
+            mp_verbose(log, "Using %s plane %d as draw plane\n", plane_type, draw_plane_id);
+            ctx->draw_plane = drm_object_create(log, ctx->fd, draw_plane_id, DRM_MODE_OBJECT_PLANE);
+        } else {
+            mp_err(log, "Failed to find draw plane with idx=%d\n", draw_plane_idx);
+            goto fail;
+        }
+    } else {
+        mp_verbose(log, "Found draw plane with ID %d\n", ctx->draw_plane->id);
+    }
+
+    // drmprime plane was specified as either of the special options: any primary plane or any overlay plane
+    if (!ctx->drmprime_video_plane) {
+        const int drmprime_video_plane_id = (drmprime_video_plane_idx == DRM_OPTS_PRIMARY_PLANE) ? primary_id : overlay_id;
+        const char *plane_type = (drmprime_video_plane_idx == DRM_OPTS_PRIMARY_PLANE) ? "primary" : "overlay";
+
+        if (drmprime_video_plane_id) {
+            mp_verbose(log, "Using %s plane %d as drmprime plane\n", plane_type, drmprime_video_plane_id);
+            ctx->drmprime_video_plane = drm_object_create(log, ctx->fd, drmprime_video_plane_id, DRM_MODE_OBJECT_PLANE);
+        } else {
+            mp_verbose(log, "Failed to find drmprime plane with idx=%d. drmprime-overlay hwdec interop will not work\n", drmprime_video_plane_idx);
+        }
+    } else {
+        mp_verbose(log, "Found drmprime plane with ID %d\n", ctx->drmprime_video_plane->id);
+    }
+
+    drmModeFreePlaneResources(plane_res);
+    drmModeFreeResources(res);
+    return ctx;
+
+fail:
+    if (res)
+        drmModeFreeResources(res);
+    if (plane_res)
+        drmModeFreePlaneResources(plane_res);
+    if (plane)
+        drm_object_free(plane);
+    return NULL;
+}
+
+void drm_atomic_destroy_context(struct drm_atomic_context *ctx)
+{
+    drm_mode_destroy_blob(ctx->fd, &ctx->old_state.crtc.mode);
+    drm_object_free(ctx->crtc);
+    drm_object_free(ctx->connector);
+    drm_object_free(ctx->draw_plane);
+    drm_object_free(ctx->drmprime_video_plane);
+    talloc_free(ctx);
+}
+
+static bool drm_atomic_save_plane_state(struct drm_object *plane,
+                                        struct drm_atomic_plane_state *plane_state)
+{
+    if (!plane)
+        return true;
+
+    bool ret = true;
+
+    if (0 > drm_object_get_property(plane, "FB_ID", &plane_state->fb_id))
+        ret = false;
+    if (0 > drm_object_get_property(plane, "CRTC_ID", &plane_state->crtc_id))
+        ret = false;
+    if (0 > drm_object_get_property(plane, "SRC_X", &plane_state->src_x))
+        ret = false;
+    if (0 > drm_object_get_property(plane, "SRC_Y", &plane_state->src_y))
+        ret = false;
+    if (0 > drm_object_get_property(plane, "SRC_W", &plane_state->src_w))
+        ret = false;
+    if (0 > drm_object_get_property(plane, "SRC_H", &plane_state->src_h))
+        ret = false;
+    if (0 > drm_object_get_property(plane, "CRTC_X", &plane_state->crtc_x))
+        ret = false;
+    if (0 > drm_object_get_property(plane, "CRTC_Y", &plane_state->crtc_y))
+        ret = false;
+    if (0 > drm_object_get_property(plane, "CRTC_W", &plane_state->crtc_w))
+        ret = false;
+    if (0 > drm_object_get_property(plane, "CRTC_H", &plane_state->crtc_h))
+        ret = false;
+    // ZPOS might not exist, so ignore whether or not this succeeds
+    drm_object_get_property(plane, "ZPOS", &plane_state->zpos);
+
+    return ret;
+}
+
+static bool drm_atomic_restore_plane_state(drmModeAtomicReq *request,
+                                           struct drm_object *plane,
+                                           const struct drm_atomic_plane_state *plane_state)
+{
+    if (!plane)
+        return true;
+
+    bool ret = true;
+
+    if (0 > drm_object_set_property(request, plane, "FB_ID", plane_state->fb_id))
+        ret = false;
+    if (0 > drm_object_set_property(request, plane, "CRTC_ID", plane_state->crtc_id))
+        ret = false;
+    if (0 > drm_object_set_property(request, plane, "SRC_X", plane_state->src_x))
+        ret = false;
+    if (0 > drm_object_set_property(request, plane, "SRC_Y", plane_state->src_y))
+        ret = false;
+    if (0 > drm_object_set_property(request, plane, "SRC_W", plane_state->src_w))
+        ret = false;
+    if (0 > drm_object_set_property(request, plane, "SRC_H", plane_state->src_h))
+        ret = false;
+    if (0 > drm_object_set_property(request, plane, "CRTC_X", plane_state->crtc_x))
+        ret = false;
+    if (0 > drm_object_set_property(request, plane, "CRTC_Y", plane_state->crtc_y))
+        ret = false;
+    if (0 > drm_object_set_property(request, plane, "CRTC_W", plane_state->crtc_w))
+        ret = false;
+    if (0 > drm_object_set_property(request, plane, "CRTC_H", plane_state->crtc_h))
+        ret = false;
+    // ZPOS might not exist, or be immutable, so ignore whether or not this succeeds
+    drm_object_set_property(request, plane, "ZPOS", plane_state->zpos);
+
+    return ret;
+}
+
+bool drm_atomic_save_old_state(struct drm_atomic_context *ctx)
+{
+    if (ctx->old_state.saved)
+        return false;
+
+    bool ret = true;
+
+    drmModeCrtc *crtc = drmModeGetCrtc(ctx->fd, ctx->crtc->id);
+    if (crtc == NULL)
+        return false;
+    ctx->old_state.crtc.mode.mode = crtc->mode;
+    drmModeFreeCrtc(crtc);
+
+    if (0 > drm_object_get_property(ctx->crtc, "ACTIVE", &ctx->old_state.crtc.active))
+        ret = false;
+
+    // This property was added in kernel 5.0. We will just ignore any errors.
+    drm_object_get_property(ctx->crtc, "VRR_ENABLED", &ctx->old_state.crtc.vrr_enabled);
+
+    if (0 > drm_object_get_property(ctx->connector, "CRTC_ID", &ctx->old_state.connector.crtc_id))
+        ret = false;
+
+    if (!drm_atomic_save_plane_state(ctx->draw_plane, &ctx->old_state.draw_plane))
+        ret = false;
+    if (!drm_atomic_save_plane_state(ctx->drmprime_video_plane, &ctx->old_state.drmprime_video_plane))
+        ret = false;
+
+    ctx->old_state.saved = true;
+
+    return ret;
+}
+
+bool drm_atomic_restore_old_state(drmModeAtomicReqPtr request, struct drm_atomic_context *ctx)
+{
+    if (!ctx->old_state.saved)
+        return false;
+
+    bool ret = true;
+
+    if (0 > drm_object_set_property(request, ctx->connector, "CRTC_ID", ctx->old_state.connector.crtc_id))
+        ret = false;
+
+    // This property was added in kernel 5.0. We will just ignore any errors.
+    drm_object_set_property(request, ctx->crtc, "VRR_ENABLED", ctx->old_state.crtc.vrr_enabled);
+
+    if (!drm_mode_ensure_blob(ctx->fd, &ctx->old_state.crtc.mode))
+        ret = false;
+    if (0 > drm_object_set_property(request, ctx->crtc, "MODE_ID", ctx->old_state.crtc.mode.blob_id))
+        ret = false;
+    if (0 > drm_object_set_property(request, ctx->crtc, "ACTIVE", ctx->old_state.crtc.active))
+        ret = false;
+
+    if (!drm_atomic_restore_plane_state(request, ctx->draw_plane, &ctx->old_state.draw_plane))
+        ret = false;
+    if (!drm_atomic_restore_plane_state(request, ctx->drmprime_video_plane, &ctx->old_state.drmprime_video_plane))
+        ret = false;
+
+    ctx->old_state.saved = false;
+
+    return ret;
+}
+
+bool drm_mode_ensure_blob(int fd, struct drm_mode *mode)
+{
+    int ret = 0;
+
+    if (!mode->blob_id) {
+        ret = drmModeCreatePropertyBlob(fd, &mode->mode, sizeof(drmModeModeInfo),
+                                        &mode->blob_id);
+    }
+
+    return (ret == 0);
+}
+
+bool drm_mode_destroy_blob(int fd, struct drm_mode *mode)
+{
+    int ret = 0;
+
+    if (mode->blob_id) {
+        ret = drmModeDestroyPropertyBlob(fd, mode->blob_id);
+        mode->blob_id = 0;
+    }
+
+    return (ret == 0);
+}
diff --git a/video/out/drm_atomic.h b/video/out/drm_atomic.h
new file mode 100644
index 0000000..499aa33
--- /dev/null
+++ b/video/out/drm_atomic.h
@@ -0,0 +1,100 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MP_DRMATOMIC_H
+#define MP_DRMATOMIC_H
+
+#include <stdlib.h>
+#include <stdbool.h>
+#include <xf86drm.h>
+#include <xf86drmMode.h>
+
+#include "common/msg.h"
+#include "drm_common.h"
+
+#define DRM_OPTS_PRIMARY_PLANE -1
+#define DRM_OPTS_OVERLAY_PLANE -2
+
+struct drm_atomic_plane_state {
+    uint64_t fb_id;
+    uint64_t crtc_id;
+    uint64_t src_x;
+    uint64_t src_y;
+    uint64_t src_w;
+    uint64_t src_h;
+    uint64_t crtc_x;
+    uint64_t crtc_y;
+    uint64_t crtc_w;
+    uint64_t crtc_h;
+    uint64_t zpos;
+};
+
+// Used to store the restore state for VT switching and uninit
+struct drm_atomic_state {
+    bool saved;
+    struct {
+        uint64_t crtc_id;
+    } connector;
+    struct {
+        struct drm_mode mode;
+        uint64_t active;
+        uint64_t vrr_enabled;
+    } crtc;
+    struct drm_atomic_plane_state draw_plane;
+    struct drm_atomic_plane_state drmprime_video_plane;
+};
+
+struct drm_object {
+    int fd;
+    uint32_t id;
+    uint32_t type;
+    drmModeObjectProperties *props;
+    drmModePropertyRes **props_info;
+};
+
+struct drm_atomic_context {
+    int fd;
+
+    struct drm_object *crtc;
+    struct drm_object *connector;
+    struct drm_object *draw_plane;
+    struct drm_object *drmprime_video_plane;
+
+    drmModeAtomicReq *request;
+
+    struct drm_atomic_state old_state;
+};
+
+int drm_object_create_properties(struct mp_log *log, int fd, struct drm_object *object);
+void drm_object_free_properties(struct drm_object *object);
+int drm_object_get_property(struct drm_object *object, char *name, uint64_t *value);
+int drm_object_set_property(drmModeAtomicReq *request, struct drm_object *object, char *name, uint64_t value);
+drmModePropertyBlobPtr drm_object_get_property_blob(struct drm_object *object, char *name);
+struct drm_object *drm_object_create(struct mp_log *log, int fd, uint32_t object_id, uint32_t type);
+void drm_object_free(struct drm_object *object);
+void drm_object_print_info(struct mp_log *log, struct drm_object *object);
+struct drm_atomic_context *drm_atomic_create_context(struct mp_log *log, int fd, int crtc_id, int connector_id,
+                                                     int draw_plane_idx, int drmprime_video_plane_idx);
+void drm_atomic_destroy_context(struct drm_atomic_context *ctx);
+
+bool drm_atomic_save_old_state(struct drm_atomic_context *ctx);
+bool drm_atomic_restore_old_state(drmModeAtomicReq *request, struct drm_atomic_context *ctx);
+
+bool drm_mode_ensure_blob(int fd, struct drm_mode *mode);
+bool drm_mode_destroy_blob(int fd, struct drm_mode *mode);
+
+#endif // MP_DRMATOMIC_H
diff --git a/video/out/drm_common.c b/video/out/drm_common.c
new file mode 100644
index 0000000..da45ca2
--- /dev/null
+++ b/video/out/drm_common.c
@@ -0,0 +1,1289 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <errno.h>
+#include <string.h>
+#include <signal.h>
+#include <sys/ioctl.h>
+#include <poll.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <limits.h>
+#include <math.h>
+#include <time.h>
+#include <drm_fourcc.h>
+
+#include "config.h"
+
+#if HAVE_CONSIO_H
+#include <sys/consio.h>
+#else
+#include <sys/vt.h>
+#endif
+
+#include "drm_atomic.h"
+#include "drm_common.h"
+
+#include "common/common.h"
+#include "common/msg.h"
+#include "misc/ctype.h"
+#include "options/m_config.h"
+#include "osdep/io.h"
+#include "osdep/poll_wrapper.h"
+#include "osdep/timer.h"
+#include "present_sync.h"
+#include "video/out/vo.h"
+
+#define EVT_RELEASE 1
+#define EVT_ACQUIRE 2
+#define EVT_INTERRUPT 255
+#define HANDLER_ACQUIRE 0
+#define HANDLER_RELEASE 1
+#define RELEASE_SIGNAL SIGUSR1
+#define ACQUIRE_SIGNAL SIGUSR2
+#define MAX_CONNECTOR_NAME_LEN 20
+
+static int vt_switcher_pipe[2];
+
+static int drm_connector_opt_help(struct mp_log *log, const struct m_option *opt,
+                                  struct bstr name);
+
+static int drm_mode_opt_help(struct mp_log *log, const struct m_option *opt,
+                             struct bstr name);
+
+static int drm_validate_mode_opt(struct mp_log *log, const struct m_option *opt,
+                                 struct bstr name, const char **value);
+
+static void drm_show_available_modes(struct mp_log *log, const drmModeConnector *connector);
+
+static void drm_show_available_connectors(struct mp_log *log, int card_no,
+                                          const char *card_path);
+static double mode_get_Hz(const drmModeModeInfo *mode);
+
+#define OPT_BASE_STRUCT struct drm_opts
+const struct m_sub_options drm_conf = {
+    .opts = (const struct m_option[]) {
+        {"drm-device", OPT_STRING(device_path), .flags = M_OPT_FILE},
+        {"drm-connector", OPT_STRING(connector_spec),
+            .help = drm_connector_opt_help},
+        {"drm-mode", OPT_STRING_VALIDATE(mode_spec, drm_validate_mode_opt),
+            .help = drm_mode_opt_help},
+        {"drm-atomic", OPT_CHOICE(drm_atomic, {"no", 0}, {"auto", 1}),
+            .deprecation_message = "this option is deprecated: DRM Atomic is required"},
+        {"drm-draw-plane", OPT_CHOICE(draw_plane,
+            {"primary", DRM_OPTS_PRIMARY_PLANE},
+            {"overlay", DRM_OPTS_OVERLAY_PLANE}),
+            M_RANGE(0, INT_MAX)},
+        {"drm-drmprime-video-plane", OPT_CHOICE(drmprime_video_plane,
+            {"primary", DRM_OPTS_PRIMARY_PLANE},
+            {"overlay", DRM_OPTS_OVERLAY_PLANE}),
+            M_RANGE(0, INT_MAX)},
+        {"drm-format", OPT_CHOICE(drm_format,
+            {"xrgb8888",    DRM_OPTS_FORMAT_XRGB8888},
+            {"xrgb2101010", DRM_OPTS_FORMAT_XRGB2101010},
+            {"xbgr8888",    DRM_OPTS_FORMAT_XBGR8888},
+            {"xbgr2101010", DRM_OPTS_FORMAT_XBGR2101010})},
+        {"drm-draw-surface-size", OPT_SIZE_BOX(draw_surface_size)},
+        {"drm-vrr-enabled", OPT_CHOICE(vrr_enabled,
+            {"no", 0}, {"yes", 1}, {"auto", -1})},
+        {0},
+    },
+    .defaults = &(const struct drm_opts) {
+        .mode_spec = "preferred",
+        .drm_atomic = 1,
+        .draw_plane = DRM_OPTS_PRIMARY_PLANE,
+        .drmprime_video_plane = DRM_OPTS_OVERLAY_PLANE,
+    },
+    .size = sizeof(struct drm_opts),
+};
+
+static const char *connector_names[] = {
+    "Unknown",   // DRM_MODE_CONNECTOR_Unknown
+    "VGA",       // DRM_MODE_CONNECTOR_VGA
+    "DVI-I",     // DRM_MODE_CONNECTOR_DVII
+    "DVI-D",     // DRM_MODE_CONNECTOR_DVID
+    "DVI-A",     // DRM_MODE_CONNECTOR_DVIA
+    "Composite", // DRM_MODE_CONNECTOR_Composite
+    "SVIDEO",    // DRM_MODE_CONNECTOR_SVIDEO
+    "LVDS",      // DRM_MODE_CONNECTOR_LVDS
+    "Component", // DRM_MODE_CONNECTOR_Component
+    "DIN",       // DRM_MODE_CONNECTOR_9PinDIN
+    "DP",        // DRM_MODE_CONNECTOR_DisplayPort
+    "HDMI-A",    // DRM_MODE_CONNECTOR_HDMIA
+    "HDMI-B",    // DRM_MODE_CONNECTOR_HDMIB
+    "TV",        // DRM_MODE_CONNECTOR_TV
+    "eDP",       // DRM_MODE_CONNECTOR_eDP
+    "Virtual",   // DRM_MODE_CONNECTOR_VIRTUAL
+    "DSI",       // DRM_MODE_CONNECTOR_DSI
+    "DPI",       // DRM_MODE_CONNECTOR_DPI
+    "Writeback", // DRM_MODE_CONNECTOR_WRITEBACK
+    "SPI",       // DRM_MODE_CONNECTOR_SPI
+    "USB",       // DRM_MODE_CONNECTOR_USB
+};
+
+struct drm_mode_spec {
+    enum {
+        DRM_MODE_SPEC_BY_IDX,     // Specified by idx
+        DRM_MODE_SPEC_BY_NUMBERS, // Specified by width, height and opt. refresh
+        DRM_MODE_SPEC_PREFERRED,  // Select the preferred mode of the display
+        DRM_MODE_SPEC_HIGHEST,    // Select the mode with the highest resolution
+    } type;
+    unsigned int idx;
+    unsigned int width;
+    unsigned int height;
+    double refresh;
+};
+
+/* VT Switcher */
+static void vt_switcher_sighandler(int sig)
+{
+    int saved_errno = errno;
+    unsigned char event = sig == RELEASE_SIGNAL ? EVT_RELEASE : EVT_ACQUIRE;
+    (void)write(vt_switcher_pipe[1], &event, sizeof(event));
+    errno = saved_errno;
+}
+
+static bool has_signal_installed(int signo)
+{
+    struct sigaction act = { 0 };
+    sigaction(signo, 0, &act);
+    return act.sa_handler != 0;
+}
+
+static int install_signal(int signo, void (*handler)(int))
+{
+    struct sigaction act = { 0 };
+    act.sa_handler = handler;
+    sigemptyset(&act.sa_mask);
+    act.sa_flags = SA_RESTART;
+    return sigaction(signo, &act, NULL);
+}
+
+static void release_vt(void *data)
+{
+    struct vo_drm_state *drm = data;
+    MP_VERBOSE(drm, "Releasing VT\n");
+    vo_drm_release_crtc(drm);
+}
+
+static void acquire_vt(void *data)
+{
+    struct vo_drm_state *drm = data;
+    MP_VERBOSE(drm, "Acquiring VT\n");
+    vo_drm_acquire_crtc(drm);
+}
+
+static void vt_switcher_acquire(struct vt_switcher *s,
+                         void (*handler)(void*), void *user_data)
+{
+    s->handlers[HANDLER_ACQUIRE] = handler;
+    s->handler_data[HANDLER_ACQUIRE] = user_data;
+}
+
+static void vt_switcher_release(struct vt_switcher *s,
+                         void (*handler)(void*), void *user_data)
+{
+    s->handlers[HANDLER_RELEASE] = handler;
+    s->handler_data[HANDLER_RELEASE] = user_data;
+}
+
+static bool vt_switcher_init(struct vt_switcher *s, struct mp_log *log)
+{
+    s->tty_fd = -1;
+    s->log = log;
+    vt_switcher_pipe[0] = -1;
+    vt_switcher_pipe[1] = -1;
+
+    if (mp_make_cloexec_pipe(vt_switcher_pipe)) {
+        mp_err(log, "Creating pipe failed: %s\n", mp_strerror(errno));
+        return false;
+    }
+
+    s->tty_fd = open("/dev/tty", O_RDWR | O_CLOEXEC);
+    if (s->tty_fd < 0) {
+        mp_err(log, "Can't open TTY for VT control: %s\n", mp_strerror(errno));
+        return false;
+    }
+
+    if (has_signal_installed(RELEASE_SIGNAL)) {
+        mp_err(log, "Can't handle VT release - signal already used\n");
+        return false;
+    }
+    if (has_signal_installed(ACQUIRE_SIGNAL)) {
+        mp_err(log, "Can't handle VT acquire - signal already used\n");
+        return false;
+    }
+
+    if (install_signal(RELEASE_SIGNAL, vt_switcher_sighandler)) {
+        mp_err(log, "Failed to install release signal: %s\n", mp_strerror(errno));
+        return false;
+    }
+    if (install_signal(ACQUIRE_SIGNAL, vt_switcher_sighandler)) {
+        mp_err(log, "Failed to install acquire signal: %s\n", mp_strerror(errno));
+        return false;
+    }
+
+    struct vt_mode vt_mode = { 0 };
+    if (ioctl(s->tty_fd, VT_GETMODE, &vt_mode) < 0) {
+        mp_err(log, "VT_GETMODE failed: %s\n", mp_strerror(errno));
+        return false;
+    }
+
+    vt_mode.mode = VT_PROCESS;
+    vt_mode.relsig = RELEASE_SIGNAL;
+    vt_mode.acqsig = ACQUIRE_SIGNAL;
+    // frsig is a signal for forced release. Not implemented on Linux,
+    // Solaris, BSDs but must be set to a valid signal on some of those.
+    vt_mode.frsig = SIGIO; // unused
+    if (ioctl(s->tty_fd, VT_SETMODE, &vt_mode) < 0) {
+        mp_err(log, "VT_SETMODE failed: %s\n", mp_strerror(errno));
+        return false;
+    }
+
+    // Block the VT switching signals from interrupting the VO thread (they will
+    // still be picked up by other threads, which will fill vt_switcher_pipe for us)
+    sigset_t set;
+    sigemptyset(&set);
+    sigaddset(&set, RELEASE_SIGNAL);
+    sigaddset(&set, ACQUIRE_SIGNAL);
+    pthread_sigmask(SIG_BLOCK, &set, NULL);
+
+    return true;
+}
+
+static void vt_switcher_interrupt_poll(struct vt_switcher *s)
+{
+    unsigned char event = EVT_INTERRUPT;
+    (void)write(vt_switcher_pipe[1], &event, sizeof(event));
+}
+
+static void vt_switcher_destroy(struct vt_switcher *s)
+{
+    struct vt_mode vt_mode = {0};
+    vt_mode.mode = VT_AUTO;
+    if (ioctl(s->tty_fd, VT_SETMODE, &vt_mode) < 0) {
+        MP_ERR(s, "VT_SETMODE failed: %s\n", mp_strerror(errno));
+        return;
+    }
+
+    install_signal(RELEASE_SIGNAL, SIG_DFL);
+    install_signal(ACQUIRE_SIGNAL, SIG_DFL);
+    close(s->tty_fd);
+    close(vt_switcher_pipe[0]);
+    close(vt_switcher_pipe[1]);
+}
+
+static void vt_switcher_poll(struct vt_switcher *s, int timeout_ns)
+{
+    struct pollfd fds[1] = {
+        { .events = POLLIN, .fd = vt_switcher_pipe[0] },
+    };
+    mp_poll(fds, 1, timeout_ns);
+    if (!fds[0].revents)
+        return;
+
+    unsigned char event;
+    if (read(fds[0].fd, &event, sizeof(event)) != sizeof(event))
+        return;
+
+    switch (event) {
+    case EVT_RELEASE:
+        s->handlers[HANDLER_RELEASE](s->handler_data[HANDLER_RELEASE]);
+        if (ioctl(s->tty_fd, VT_RELDISP, 1) < 0) {
+            MP_ERR(s, "Failed to release virtual terminal\n");
+        }
+        break;
+    case EVT_ACQUIRE:
+        s->handlers[HANDLER_ACQUIRE](s->handler_data[HANDLER_ACQUIRE]);
+        if (ioctl(s->tty_fd, VT_RELDISP, VT_ACKACQ) < 0) {
+            MP_ERR(s, "Failed to acquire virtual terminal\n");
+        }
+        break;
+    case EVT_INTERRUPT:
+        break;
+    }
+}
+
+bool vo_drm_acquire_crtc(struct vo_drm_state *drm)
+{
+    if (drm->active)
+        return true;
+    drm->active = true;
+
+    if (drmSetMaster(drm->fd)) {
+        MP_WARN(drm, "Failed to acquire DRM master: %s\n",
+                mp_strerror(errno));
+    }
+
+    struct drm_atomic_context *atomic_ctx = drm->atomic_context;
+
+    if (!drm_atomic_save_old_state(atomic_ctx))
+        MP_WARN(drm, "Failed to save old DRM atomic state\n");
+
+    drmModeAtomicReqPtr request = drmModeAtomicAlloc();
+    if (!request) {
+        MP_ERR(drm, "Failed to allocate drm atomic request\n");
+        goto err;
+    }
+
+    if (drm_object_set_property(request, atomic_ctx->connector, "CRTC_ID", drm->crtc_id) < 0) {
+        MP_ERR(drm, "Could not set CRTC_ID on connector\n");
+        goto err;
+    }
+
+    if (!drm_mode_ensure_blob(drm->fd, &drm->mode)) {
+        MP_ERR(drm, "Failed to create DRM mode blob\n");
+        goto err;
+    }
+    if (drm_object_set_property(request, atomic_ctx->crtc, "MODE_ID", drm->mode.blob_id) < 0) {
+        MP_ERR(drm, "Could not set MODE_ID on crtc\n");
+        goto err;
+    }
+    if (drm_object_set_property(request, atomic_ctx->crtc, "ACTIVE", 1) < 0) {
+        MP_ERR(drm, "Could not set ACTIVE on crtc\n");
+        goto err;
+    }
+
+    /*
+     * VRR related properties were added in kernel 5.0. We will not fail if we
+     * cannot query or set the value, but we will log as appropriate.
+     */
+    uint64_t vrr_capable = 0;
+    drm_object_get_property(atomic_ctx->connector, "VRR_CAPABLE", &vrr_capable);
+    MP_VERBOSE(drm, "crtc is%s VRR capable\n", vrr_capable ? "" : " not");
+
+    uint64_t vrr_requested = drm->opts->vrr_enabled;
+    if (vrr_requested == 1 || (vrr_capable && vrr_requested == -1)) {
+        if (drm_object_set_property(request, atomic_ctx->crtc, "VRR_ENABLED", 1) < 0) {
+            MP_WARN(drm, "Could not enable VRR on crtc\n");
+        } else {
+            MP_VERBOSE(drm, "Enabled VRR on crtc\n");
+        }
+    }
+
+    drm_object_set_property(request, atomic_ctx->draw_plane, "FB_ID",   drm->fb->id);
+    drm_object_set_property(request, atomic_ctx->draw_plane, "CRTC_ID", drm->crtc_id);
+    drm_object_set_property(request, atomic_ctx->draw_plane, "SRC_X",   0);
+    drm_object_set_property(request, atomic_ctx->draw_plane, "SRC_Y",   0);
+    drm_object_set_property(request, atomic_ctx->draw_plane, "SRC_W",   drm->width << 16);
+    drm_object_set_property(request, atomic_ctx->draw_plane, "SRC_H",   drm->height << 16);
+    drm_object_set_property(request, atomic_ctx->draw_plane, "CRTC_X",  0);
+    drm_object_set_property(request, atomic_ctx->draw_plane, "CRTC_Y",  0);
+    drm_object_set_property(request, atomic_ctx->draw_plane, "CRTC_W",  drm->mode.mode.hdisplay);
+    drm_object_set_property(request, atomic_ctx->draw_plane, "CRTC_H",  drm->mode.mode.vdisplay);
+
+    if (drmModeAtomicCommit(drm->fd, request, DRM_MODE_ATOMIC_ALLOW_MODESET, NULL)) {
+        MP_ERR(drm, "Failed to commit ModeSetting atomic request: %s\n", strerror(errno));
+        goto err;
+    }
+
+    drmModeAtomicFree(request);
+    return true;
+
+err:
+    drmModeAtomicFree(request);
+    return false;
+}
+
+
+void vo_drm_release_crtc(struct vo_drm_state *drm)
+{
+    if (!drm->active)
+        return;
+    drm->active = false;
+
+    if (!drm->atomic_context->old_state.saved)
+        return;
+
+    bool success = true;
+    struct drm_atomic_context *atomic_ctx = drm->atomic_context;
+    drmModeAtomicReqPtr request = drmModeAtomicAlloc();
+    if (!request) {
+        MP_ERR(drm, "Failed to allocate drm atomic request\n");
+        success = false;
+    }
+
+    if (request && !drm_atomic_restore_old_state(request, atomic_ctx)) {
+        MP_WARN(drm, "Got error while restoring old state\n");
+        success = false;
+    }
+
+    if (request) {
+        if (drmModeAtomicCommit(drm->fd, request, DRM_MODE_ATOMIC_ALLOW_MODESET, NULL)) {
+            MP_WARN(drm, "Failed to commit ModeSetting atomic request: %s\n",
+                    mp_strerror(errno));
+            success = false;
+        }
+    }
+
+    if (request)
+        drmModeAtomicFree(request);
+
+    if (!success)
+        MP_ERR(drm, "Failed to restore previous mode\n");
+
+    if (drmDropMaster(drm->fd)) {
+        MP_WARN(drm, "Failed to drop DRM master: %s\n",
+                mp_strerror(errno));
+    }
+}
+
+/* libdrm */
+static void get_connector_name(const drmModeConnector *connector,
+                               char ret[MAX_CONNECTOR_NAME_LEN])
+{
+    const char *type_name;
+
+    if (connector->connector_type < MP_ARRAY_SIZE(connector_names)) {
+        type_name = connector_names[connector->connector_type];
+    } else {
+        type_name = "UNKNOWN";
+    }
+
+    snprintf(ret, MAX_CONNECTOR_NAME_LEN, "%s-%d", type_name,
+             connector->connector_type_id);
+}
+
+// Gets the first connector whose name matches the input parameter.
+// The returned connector may be disconnected.
+// Result must be freed with drmModeFreeConnector.
+static drmModeConnector *get_connector_by_name(const drmModeRes *res,
+                                               const char *connector_name,
+                                               int fd)
+{
+    for (int i = 0; i < res->count_connectors; i++) {
+        drmModeConnector *connector
+            = drmModeGetConnector(fd, res->connectors[i]);
+        if (!connector)
+            continue;
+        char other_connector_name[MAX_CONNECTOR_NAME_LEN];
+        get_connector_name(connector, other_connector_name);
+        if (!strcmp(connector_name, other_connector_name))
+            return connector;
+        drmModeFreeConnector(connector);
+    }
+    return NULL;
+}
+
+// Gets the first connected connector.
+// Result must be freed with drmModeFreeConnector.
+static drmModeConnector *get_first_connected_connector(const drmModeRes *res,
+                                                       int fd)
+{
+    for (int i = 0; i < res->count_connectors; i++) {
+        drmModeConnector *connector = drmModeGetConnector(fd, res->connectors[i]);
+        if (!connector)
+            continue;
+        if (connector->connection == DRM_MODE_CONNECTED && connector->count_modes > 0) {
+            return connector;
+        }
+        drmModeFreeConnector(connector);
+    }
+    return NULL;
+}
+
+static bool setup_connector(struct vo_drm_state *drm, const drmModeRes *res,
+                            const char *connector_name)
+{
+    drmModeConnector *connector;
+
+    if (connector_name && strcmp(connector_name, "") && strcmp(connector_name, "auto")) {
+        connector = get_connector_by_name(res, connector_name, drm->fd);
+        if (!connector) {
+            MP_ERR(drm, "No connector with name %s found\n", connector_name);
+            drm_show_available_connectors(drm->log, drm->card_no, drm->card_path);
+            return false;
+        }
+    } else {
+        connector = get_first_connected_connector(res, drm->fd);
+        if (!connector) {
+            MP_ERR(drm, "No connected connectors found\n");
+            return false;
+        }
+    }
+
+    if (connector->connection != DRM_MODE_CONNECTED) {
+        drmModeFreeConnector(connector);
+        MP_ERR(drm, "Chosen connector is disconnected\n");
+        return false;
+    }
+
+    if (connector->count_modes == 0) {
+        drmModeFreeConnector(connector);
+        MP_ERR(drm, "Chosen connector has no valid modes\n");
+        return false;
+    }
+
+    drm->connector = connector;
+    return true;
+}
+
+static bool setup_crtc(struct vo_drm_state *drm, const drmModeRes *res)
+{
+    // First try to find currently connected encoder and its current CRTC
+    for (unsigned int i = 0; i < res->count_encoders; i++) {
+        drmModeEncoder *encoder = drmModeGetEncoder(drm->fd, res->encoders[i]);
+        if (!encoder) {
+            MP_WARN(drm, "Cannot retrieve encoder %u:%u: %s\n",
+                    i, res->encoders[i], mp_strerror(errno));
+            continue;
+        }
+
+        if (encoder->encoder_id == drm->connector->encoder_id && encoder->crtc_id != 0) {
+            MP_VERBOSE(drm, "Connector %u currently connected to encoder %u\n",
+                       drm->connector->connector_id, drm->connector->encoder_id);
+            drm->encoder = encoder;
+            drm->crtc_id = encoder->crtc_id;
+            goto success;
+        }
+
+        drmModeFreeEncoder(encoder);
+    }
+
+    // Otherwise pick first legal encoder and CRTC combo for the connector
+    for (unsigned int i = 0; i < drm->connector->count_encoders; ++i) {
+        drmModeEncoder *encoder
+            = drmModeGetEncoder(drm->fd, drm->connector->encoders[i]);
+        if (!encoder) {
+            MP_WARN(drm, "Cannot retrieve encoder %u:%u: %s\n",
+                    i, drm->connector->encoders[i], mp_strerror(errno));
+            continue;
+        }
+
+        // iterate all global CRTCs
+        for (unsigned int j = 0; j < res->count_crtcs; ++j) {
+            // check whether this CRTC works with the encoder
+            if (!(encoder->possible_crtcs & (1 << j)))
+                continue;
+
+            drm->encoder = encoder;
+            drm->crtc_id = res->crtcs[j];
+            goto success;
+        }
+
+        drmModeFreeEncoder(encoder);
+    }
+
+    MP_ERR(drm, "Connector %u has no suitable CRTC\n",
+           drm->connector->connector_id);
+    return false;
+
+  success:
+    MP_VERBOSE(drm, "Selected Encoder %u with CRTC %u\n",
+               drm->encoder->encoder_id, drm->crtc_id);
+    return true;
+}
+
+static bool all_digits(const char *str)
+{
+    if (str == NULL || str[0] == '\0') {
+        return false;
+    }
+
+    for (const char *c = str; *c != '\0'; ++c) {
+        if (!mp_isdigit(*c))
+            return false;
+    }
+    return true;
+}
+
+static bool parse_mode_spec(const char *spec, struct drm_mode_spec *parse_result)
+{
+    if (spec == NULL || spec[0] == '\0' || strcmp(spec, "preferred") == 0) {
+        if (parse_result) {
+            *parse_result =
+                (struct drm_mode_spec) { .type = DRM_MODE_SPEC_PREFERRED };
+        }
+        return true;
+    }
+
+    if (strcmp(spec, "highest") == 0) {
+        if (parse_result) {
+            *parse_result =
+                (struct drm_mode_spec) { .type = DRM_MODE_SPEC_HIGHEST };
+        }
+        return true;
+    }
+
+    // If the string is made up of only digits, it means that it is an index number
+    if (all_digits(spec)) {
+        if (parse_result) {
+            *parse_result = (struct drm_mode_spec) {
+                .type = DRM_MODE_SPEC_BY_IDX,
+                .idx = strtoul(spec, NULL, 10),
+            };
+        }
+        return true;
+    }
+
+    if (!mp_isdigit(spec[0]))
+        return false;
+    char *height_part, *refresh_part;
+    const unsigned int width = strtoul(spec, &height_part, 10);
+    if (spec == height_part || height_part[0] == '\0' || height_part[0] != 'x')
+        return false;
+
+    height_part += 1;
+    if (!mp_isdigit(height_part[0]))
+        return false;
+    const unsigned int height = strtoul(height_part, &refresh_part, 10);
+    if (height_part == refresh_part)
+        return false;
+
+    char *rest = NULL;
+    double refresh;
+    switch (refresh_part[0]) {
+    case '\0':
+        refresh = nan("");
+        break;
+    case '@':
+        refresh_part += 1;
+        if (!(mp_isdigit(refresh_part[0]) || refresh_part[0] == '.'))
+            return false;
+        refresh = strtod(refresh_part, &rest);
+        if (refresh_part == rest || rest[0] != '\0' || refresh < 0.0)
+            return false;
+        break;
+    default:
+        return false;
+    }
+
+    if (parse_result) {
+        *parse_result = (struct drm_mode_spec) {
+            .type = DRM_MODE_SPEC_BY_NUMBERS,
+            .width = width,
+            .height = height,
+            .refresh = refresh,
+        };
+    }
+    return true;
+}
+
+static bool setup_mode_by_idx(struct vo_drm_state *drm, unsigned int mode_idx)
+{
+    if (mode_idx >= drm->connector->count_modes) {
+        MP_ERR(drm, "Bad mode index (max = %d).\n",
+               drm->connector->count_modes - 1);
+        return false;
+    }
+
+    drm->mode.mode = drm->connector->modes[mode_idx];
+    return true;
+}
+
+static bool mode_match(const drmModeModeInfo *mode,
+                       unsigned int width,
+                       unsigned int height,
+                       double refresh)
+{
+    if (isnan(refresh)) {
+        return
+            (mode->hdisplay == width) &&
+            (mode->vdisplay == height);
+    } else {
+        const double mode_refresh = mode_get_Hz(mode);
+        return
+            (mode->hdisplay == width) &&
+            (mode->vdisplay == height) &&
+            ((int)round(refresh*100) == (int)round(mode_refresh*100));
+    }
+}
+
+static bool setup_mode_by_numbers(struct vo_drm_state *drm,
+                                  unsigned int width,
+                                  unsigned int height,
+                                  double refresh)
+{
+    for (unsigned int i = 0; i < drm->connector->count_modes; ++i) {
+        drmModeModeInfo *current_mode = &drm->connector->modes[i];
+        if (mode_match(current_mode, width, height, refresh)) {
+            drm->mode.mode = *current_mode;
+            return true;
+        }
+    }
+
+    MP_ERR(drm, "Could not find mode matching %s\n", drm->opts->mode_spec);
+    return false;
+}
+
+static bool setup_mode_preferred(struct vo_drm_state *drm)
+{
+    for (unsigned int i = 0; i < drm->connector->count_modes; ++i) {
+        drmModeModeInfo *current_mode = &drm->connector->modes[i];
+        if (current_mode->type & DRM_MODE_TYPE_PREFERRED) {
+            drm->mode.mode = *current_mode;
+            return true;
+        }
+    }
+
+    // Fall back to first mode
+    MP_WARN(drm, "Could not find any preferred mode. Picking the first mode.\n");
+    drm->mode.mode = drm->connector->modes[0];
+    return true;
+}
+
+static bool setup_mode_highest(struct vo_drm_state *drm)
+{
+    unsigned int area = 0;
+    drmModeModeInfo *highest_resolution_mode = &drm->connector->modes[0];
+    for (unsigned int i = 0; i < drm->connector->count_modes; ++i) {
+        drmModeModeInfo *current_mode = &drm->connector->modes[i];
+
+        const unsigned int current_area =
+            current_mode->hdisplay * current_mode->vdisplay;
+        if (current_area > area) {
+            highest_resolution_mode = current_mode;
+            area = current_area;
+        }
+    }
+
+    drm->mode.mode = *highest_resolution_mode;
+    return true;
+}
+
+static bool setup_mode(struct vo_drm_state *drm)
+{
+    if (drm->connector->count_modes <= 0) {
+        MP_ERR(drm, "No available modes\n");
+        return false;
+    }
+
+    struct drm_mode_spec parsed;
+    if (!parse_mode_spec(drm->opts->mode_spec, &parsed)) {
+        MP_ERR(drm, "Parse error\n");
+        goto err;
+    }
+
+    switch (parsed.type) {
+    case DRM_MODE_SPEC_BY_IDX:
+        if (!setup_mode_by_idx(drm, parsed.idx))
+            goto err;
+        break;
+    case DRM_MODE_SPEC_BY_NUMBERS:
+        if (!setup_mode_by_numbers(drm, parsed.width, parsed.height, parsed.refresh))
+            goto err;
+        break;
+    case DRM_MODE_SPEC_PREFERRED:
+        if (!setup_mode_preferred(drm))
+            goto err;
+        break;
+    case DRM_MODE_SPEC_HIGHEST:
+        if (!setup_mode_highest(drm))
+            goto err;
+        break;
+    default:
+        MP_ERR(drm, "setup_mode: Internal error\n");
+        goto err;
+    }
+
+    drmModeModeInfo *mode = &drm->mode.mode;
+    MP_VERBOSE(drm, "Selected mode: %s (%dx%d@%.2fHz)\n",
+        mode->name, mode->hdisplay, mode->vdisplay, mode_get_Hz(mode));
+
+    return true;
+
+err:
+    MP_INFO(drm, "Available modes:\n");
+    drm_show_available_modes(drm->log, drm->connector);
+    return false;
+}
+
+static int open_card_path(const char *path)
+{
+    return open(path, O_RDWR | O_CLOEXEC);
+}
+
+static bool card_supports_kms(const char *path)
+{
+    int fd = open_card_path(path);
+    bool ret = fd != -1 && drmIsKMS(fd);
+    if (fd != -1)
+        close(fd);
+    return ret;
+}
+
+static bool card_has_connection(const char *path)
+{
+    int fd = open_card_path(path);
+    bool ret = false;
+    if (fd != -1) {
+        drmModeRes *res = drmModeGetResources(fd);
+        if (res) {
+            drmModeConnector *connector = get_first_connected_connector(res, fd);
+            if (connector)
+                ret = true;
+            drmModeFreeConnector(connector);
+            drmModeFreeResources(res);
+        }
+        close(fd);
+    }
+    return ret;
+}
+
+static void get_primary_device_path(struct vo_drm_state *drm)
+{
+    if (drm->opts->device_path) {
+        drm->card_path = talloc_strdup(drm, drm->opts->device_path);
+        return;
+    }
+
+    drmDevice *devices[DRM_MAX_MINOR] = { 0 };
+    int card_count = drmGetDevices2(0, devices, MP_ARRAY_SIZE(devices));
+    bool card_no_given = drm->card_no >= 0;
+
+    if (card_count < 0) {
+        MP_ERR(drm, "Listing DRM devices with drmGetDevices failed! (%s)\n",
+               mp_strerror(errno));
+        goto err;
+    }
+
+    if (card_no_given && drm->card_no > (card_count - 1)) {
+        MP_ERR(drm, "Card number %d given too high! %d devices located.\n",
+               drm->card_no, card_count);
+        goto err;
+    }
+
+    for (int i = card_no_given ? drm->card_no : 0; i < card_count; i++) {
+        drmDevice *dev = devices[i];
+
+        if (!(dev->available_nodes & (1 << DRM_NODE_PRIMARY))) {
+            if (card_no_given) {
+                MP_ERR(drm, "DRM card number %d given, but it does not have "
+                       "a primary node!\n", i);
+                break;
+            }
+
+            continue;
+        }
+
+        const char *card_path = dev->nodes[DRM_NODE_PRIMARY];
+
+        if (!card_supports_kms(card_path)) {
+            if (card_no_given) {
+                MP_ERR(drm,
+                       "DRM card number %d given, but it does not support "
+                       "KMS!\n", i);
+                break;
+            }
+
+            continue;
+        }
+
+        if (!card_has_connection(card_path)) {
+            if (card_no_given) {
+                MP_ERR(drm,
+                        "DRM card number %d given, but it does not have any "
+                        "connected outputs.\n", i);
+                break;
+            }
+
+            continue;
+        }
+
+        MP_VERBOSE(drm, "Picked DRM card %d, primary node %s%s.\n",
+                   i, card_path,
+                   card_no_given ? "" : " as the default");
+
+        drm->card_path = talloc_strdup(drm, card_path);
+        drm->card_no = i;
+        break;
+    }
+
+    if (!drm->card_path)
+        MP_ERR(drm, "No primary DRM device could be picked!\n");
+
+err:
+    drmFreeDevices(devices, card_count);
+}
+
+static void drm_pflip_cb(int fd, unsigned int msc, unsigned int sec,
+                         unsigned int usec, void *data)
+{
+    struct vo_drm_state *drm = data;
+
+    int64_t ust = MP_TIME_S_TO_NS(sec) + MP_TIME_US_TO_NS(usec);
+    present_sync_update_values(drm->present, ust, msc);
+    present_sync_swap(drm->present);
+    drm->waiting_for_flip = false;
+}
+
+int vo_drm_control(struct vo *vo, int *events, int request, void *arg)
+{
+    struct vo_drm_state *drm = vo->drm;
+    switch (request) {
+    case VOCTRL_GET_DISPLAY_FPS: {
+        double fps = vo_drm_get_display_fps(drm);
+        if (fps <= 0)
+            break;
+        *(double*)arg = fps;
+        return VO_TRUE;
+    }
+    case VOCTRL_GET_DISPLAY_RES: {
+        ((int *)arg)[0] = drm->mode.mode.hdisplay;
+        ((int *)arg)[1] = drm->mode.mode.vdisplay;
+        return VO_TRUE;
+    }
+    case VOCTRL_PAUSE:
+        vo->want_redraw = true;
+        drm->paused = true;
+        return VO_TRUE;
+    case VOCTRL_RESUME:
+        drm->paused = false;
+        return VO_TRUE;
+    }
+    return VO_NOTIMPL;
+}
+
+bool vo_drm_init(struct vo *vo)
+{
+    vo->drm = talloc_zero(NULL, struct vo_drm_state);
+    struct vo_drm_state *drm = vo->drm;
+
+    *drm = (struct vo_drm_state) {
+        .vo = vo,
+        .log = mp_log_new(drm, vo->log, "drm"),
+        .mode = {{0}},
+        .crtc_id = -1,
+        .card_no = -1,
+    };
+
+    drm->vt_switcher_active = vt_switcher_init(&drm->vt_switcher, drm->log);
+    if (drm->vt_switcher_active) {
+        vt_switcher_acquire(&drm->vt_switcher, acquire_vt, drm);
+        vt_switcher_release(&drm->vt_switcher, release_vt, drm);
+    } else {
+        MP_WARN(drm, "Failed to set up VT switcher. Terminal switching will be unavailable.\n");
+    }
+
+    drm->opts = mp_get_config_group(drm, drm->vo->global, &drm_conf);
+
+    drmModeRes *res = NULL;
+    get_primary_device_path(drm);
+
+    if (!drm->card_path) {
+        MP_ERR(drm, "Failed to find a usable DRM primary node!\n");
+        goto err;
+    }
+
+    drm->fd = open_card_path(drm->card_path);
+    if (drm->fd < 0) {
+        MP_ERR(drm, "Cannot open card \"%d\": %s.\n", drm->card_no, mp_strerror(errno));
+        goto err;
+    }
+
+    drmVersionPtr ver = drmGetVersion(drm->fd);
+    if (ver) {
+        MP_VERBOSE(drm, "Driver: %s %d.%d.%d (%s)\n", ver->name, ver->version_major,
+                   ver->version_minor, ver->version_patchlevel, ver->date);
+        drmFreeVersion(ver);
+    }
+
+    res = drmModeGetResources(drm->fd);
+    if (!res) {
+        MP_ERR(drm, "Cannot retrieve DRM resources: %s\n", mp_strerror(errno));
+        goto err;
+    }
+
+    if (!setup_connector(drm, res, drm->opts->connector_spec))
+        goto err;
+    if (!setup_crtc(drm, res))
+        goto err;
+    if (!setup_mode(drm))
+        goto err;
+
+    // Universal planes allows accessing all the planes (including primary)
+    if (drmSetClientCap(drm->fd, DRM_CLIENT_CAP_UNIVERSAL_PLANES, 1)) {
+        MP_ERR(drm, "Failed to set Universal planes capability\n");
+    }
+
+    if (drmSetClientCap(drm->fd, DRM_CLIENT_CAP_ATOMIC, 1)) {
+        MP_ERR(drm, "Failed to create DRM atomic context, no DRM Atomic support\n");
+        goto err;
+    } else {
+        MP_VERBOSE(drm, "DRM Atomic support found\n");
+        drm->atomic_context = drm_atomic_create_context(drm->log, drm->fd, drm->crtc_id,
+                                                        drm->connector->connector_id,
+                                                        drm->opts->draw_plane,
+                                                        drm->opts->drmprime_video_plane);
+        if (!drm->atomic_context) {
+            MP_ERR(drm, "Failed to create DRM atomic context\n");
+            goto err;
+        }
+    }
+
+    drmModeFreeResources(res);
+
+    drm->ev.version = DRM_EVENT_CONTEXT_VERSION;
+    drm->ev.page_flip_handler = &drm_pflip_cb;
+    drm->present = mp_present_initialize(drm, drm->vo->opts, VO_MAX_SWAPCHAIN_DEPTH);
+
+    return true;
+
+err:
+    if (res)
+        drmModeFreeResources(res);
+
+    vo_drm_uninit(vo);
+    return false;
+}
+
+void vo_drm_uninit(struct vo *vo)
+{
+    struct vo_drm_state *drm = vo->drm;
+    if (!drm)
+        return;
+
+    vo_drm_release_crtc(drm);
+    if (drm->vt_switcher_active)
+        vt_switcher_destroy(&drm->vt_switcher);
+
+    drm_mode_destroy_blob(drm->fd, &drm->mode);
+
+    if (drm->connector) {
+        drmModeFreeConnector(drm->connector);
+        drm->connector = NULL;
+    }
+    if (drm->encoder) {
+        drmModeFreeEncoder(drm->encoder);
+        drm->encoder = NULL;
+    }
+    if (drm->atomic_context) {
+        drm_atomic_destroy_context(drm->atomic_context);
+    }
+
+    close(drm->fd);
+    talloc_free(drm);
+    vo->drm = NULL;
+}
+
+static double mode_get_Hz(const drmModeModeInfo *mode)
+{
+    double rate = mode->clock * 1000.0 / mode->htotal / mode->vtotal;
+    if (mode->flags & DRM_MODE_FLAG_INTERLACE)
+        rate *= 2.0;
+    return rate;
+}
+
+static void drm_show_available_modes(struct mp_log *log,
+                                     const drmModeConnector *connector)
+{
+    for (unsigned int i = 0; i < connector->count_modes; i++) {
+        mp_info(log, "  Mode %d: %s (%dx%d@%.2fHz)\n", i,
+                connector->modes[i].name,
+                connector->modes[i].hdisplay,
+                connector->modes[i].vdisplay,
+                mode_get_Hz(&connector->modes[i]));
+    }
+}
+
+static void drm_show_foreach_connector(struct mp_log *log, int card_no,
+                                       const char *card_path,
+                                       void (*show_fn)(struct mp_log*, int,
+                                                       const drmModeConnector*))
+{
+    int fd = open_card_path(card_path);
+    if (fd < 0) {
+        mp_err(log, "Failed to open card %d (%s)\n", card_no, card_path);
+        return;
+    }
+
+    drmModeRes *res = drmModeGetResources(fd);
+    if (!res) {
+        mp_err(log, "Cannot retrieve DRM resources: %s\n", mp_strerror(errno));
+        goto err;
+    }
+
+    for (int i = 0; i < res->count_connectors; i++) {
+        drmModeConnector *connector = drmModeGetConnector(fd, res->connectors[i]);
+        if (!connector)
+            continue;
+        show_fn(log, card_no, connector);
+        drmModeFreeConnector(connector);
+    }
+
+err:
+    if (fd >= 0)
+        close(fd);
+    if (res)
+        drmModeFreeResources(res);
+}
+
+static void drm_show_connector_name_and_state_callback(struct mp_log *log, int card_no,
+                                                       const drmModeConnector *connector)
+{
+    char other_connector_name[MAX_CONNECTOR_NAME_LEN];
+    get_connector_name(connector, other_connector_name);
+    const char *connection_str = (connector->connection == DRM_MODE_CONNECTED) ?
+                                 "connected" : "disconnected";
+    mp_info(log, "  %s (%s)\n", other_connector_name, connection_str);
+}
+
+static void drm_show_available_connectors(struct mp_log *log, int card_no,
+                                          const char *card_path)
+{
+    mp_info(log, "Available connectors for card %d (%s):\n", card_no,
+            card_path);
+    drm_show_foreach_connector(log, card_no, card_path,
+                               drm_show_connector_name_and_state_callback);
+    mp_info(log, "\n");
+}
+
+static void drm_show_connector_modes_callback(struct mp_log *log, int card_no,
+                                              const drmModeConnector *connector)
+{
+    if (connector->connection != DRM_MODE_CONNECTED)
+        return;
+
+    char other_connector_name[MAX_CONNECTOR_NAME_LEN];
+    get_connector_name(connector, other_connector_name);
+    mp_info(log, "Available modes for drm-connector=%d.%s\n",
+            card_no, other_connector_name);
+    drm_show_available_modes(log, connector);
+    mp_info(log, "\n");
+}
+
+static void drm_show_available_connectors_and_modes(struct mp_log *log,
+                                                    int card_no,
+                                                    const char *card_path)
+{
+    drm_show_foreach_connector(log, card_no, card_path,
+                               drm_show_connector_modes_callback);
+}
+
+static void drm_show_foreach_card(struct mp_log *log,
+                                  void (*show_fn)(struct mp_log *, int,
+                                                  const char *))
+{
+    drmDevice *devices[DRM_MAX_MINOR] = { 0 };
+    int card_count = drmGetDevices2(0, devices, MP_ARRAY_SIZE(devices));
+    if (card_count < 0) {
+        mp_err(log, "Listing DRM devices with drmGetDevices failed! (%s)\n",
+               mp_strerror(errno));
+        return;
+    }
+
+    for (int i = 0; i < card_count; i++) {
+        drmDevice *dev = devices[i];
+
+        if (!(dev->available_nodes & (1 << DRM_NODE_PRIMARY)))
+            continue;
+
+        const char *card_path = dev->nodes[DRM_NODE_PRIMARY];
+
+        int fd = open_card_path(card_path);
+        if (fd < 0) {
+            mp_err(log, "Failed to open primary DRM node path %s!\n",
+                   card_path);
+            continue;
+        }
+
+        close(fd);
+        show_fn(log, i, card_path);
+    }
+
+    drmFreeDevices(devices, card_count);
+}
+
+static void drm_show_available_cards_and_connectors(struct mp_log *log)
+{
+    drm_show_foreach_card(log, drm_show_available_connectors);
+}
+
+static void drm_show_available_cards_connectors_and_modes(struct mp_log *log)
+{
+    drm_show_foreach_card(log, drm_show_available_connectors_and_modes);
+}
+
+static int drm_connector_opt_help(struct mp_log *log, const struct m_option *opt,
+                                  struct bstr name)
+{
+    drm_show_available_cards_and_connectors(log);
+    return M_OPT_EXIT;
+}
+
+static int drm_mode_opt_help(struct mp_log *log, const struct m_option *opt,
+                             struct bstr name)
+{
+    drm_show_available_cards_connectors_and_modes(log);
+    return M_OPT_EXIT;
+}
+
+static int drm_validate_mode_opt(struct mp_log *log, const struct m_option *opt,
+                                 struct bstr name, const char **value)
+{
+    const char *param = *value;
+    if (!parse_mode_spec(param, NULL)) {
+        mp_fatal(log, "Invalid value for option drm-mode. Must be a positive number, a string of the format WxH[@R] or 'help'\n");
+        return M_OPT_INVALID;
+    }
+
+    return 1;
+}
+
+/* Helpers */
+double vo_drm_get_display_fps(struct vo_drm_state *drm)
+{
+    return mode_get_Hz(&drm->mode.mode);
+}
+
+void vo_drm_set_monitor_par(struct vo *vo)
+{
+    struct vo_drm_state *drm = vo->drm;
+    if (vo->opts->force_monitor_aspect != 0.0) {
+        vo->monitor_par = drm->fb->width / (double) drm->fb->height /
+                          vo->opts->force_monitor_aspect;
+    } else {
+        vo->monitor_par = 1 / vo->opts->monitor_pixel_aspect;
+    }
+    MP_VERBOSE(drm, "Monitor pixel aspect: %g\n", vo->monitor_par);
+}
+
+void vo_drm_wait_events(struct vo *vo, int64_t until_time_ns)
+{
+    struct vo_drm_state *drm = vo->drm;
+    if (drm->vt_switcher_active) {
+        int64_t wait_ns = until_time_ns - mp_time_ns();
+        int64_t timeout_ns = MPCLAMP(wait_ns, 0, MP_TIME_S_TO_NS(10));
+        vt_switcher_poll(&drm->vt_switcher, timeout_ns);
+    } else {
+        vo_wait_default(vo, until_time_ns);
+    }
+}
+
+void vo_drm_wait_on_flip(struct vo_drm_state *drm)
+{
+    // poll page flip finish event
+    while (drm->waiting_for_flip) {
+        const int timeout_ms = 3000;
+        struct pollfd fds[1] = { { .events = POLLIN, .fd = drm->fd } };
+        poll(fds, 1, timeout_ms);
+        if (fds[0].revents & POLLIN) {
+            const int ret = drmHandleEvent(drm->fd, &drm->ev);
+            if (ret != 0) {
+                MP_ERR(drm, "drmHandleEvent failed: %i\n", ret);
+                return;
+            }
+        }
+    }
+}
+
+void vo_drm_wakeup(struct vo *vo)
+{
+    struct vo_drm_state *drm = vo->drm;
+    if (drm->vt_switcher_active)
+        vt_switcher_interrupt_poll(&drm->vt_switcher);
+}
diff --git a/video/out/drm_common.h b/video/out/drm_common.h
new file mode 100644
index 0000000..581151f
--- /dev/null
+++ b/video/out/drm_common.h
@@ -0,0 +1,108 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MP_VT_SWITCHER_H
+#define MP_VT_SWITCHER_H
+
+#include <stdbool.h>
+#include <xf86drm.h>
+#include <xf86drmMode.h>
+#include "vo.h"
+
+#define DRM_OPTS_FORMAT_XRGB8888    0
+#define DRM_OPTS_FORMAT_XRGB2101010 1
+#define DRM_OPTS_FORMAT_XBGR8888    2
+#define DRM_OPTS_FORMAT_XBGR2101010 3
+
+struct framebuffer {
+    int fd;
+    uint32_t width;
+    uint32_t height;
+    uint32_t stride;
+    uint32_t size;
+    uint32_t handle;
+    uint8_t *map;
+    uint32_t id;
+};
+
+struct drm_mode {
+    drmModeModeInfo mode;
+    uint32_t blob_id;
+};
+
+struct drm_opts {
+    char *device_path;
+    char *connector_spec;
+    char *mode_spec;
+    int drm_atomic;
+    int draw_plane;
+    int drmprime_video_plane;
+    int drm_format;
+    struct m_geometry draw_surface_size;
+    int vrr_enabled;
+};
+
+struct vt_switcher {
+    int tty_fd;
+    struct mp_log *log;
+    void (*handlers[2])(void*);
+    void *handler_data[2];
+};
+
+struct vo_drm_state {
+    drmModeConnector *connector;
+    drmModeEncoder *encoder;
+    drmEventContext ev;
+
+    struct drm_atomic_context *atomic_context;
+    struct drm_mode mode;
+    struct drm_opts *opts;
+    struct framebuffer *fb;
+    struct mp_log *log;
+    struct mp_present *present;
+    struct vo *vo;
+    struct vt_switcher vt_switcher;
+
+    bool active;
+    bool paused;
+    bool still;
+    bool vt_switcher_active;
+    bool waiting_for_flip;
+
+    char *card_path;
+    int card_no;
+    int fd;
+
+    uint32_t crtc_id;
+    uint32_t height;
+    uint32_t width;
+};
+
+bool vo_drm_init(struct vo *vo);
+int vo_drm_control(struct vo *vo, int *events, int request, void *arg);
+
+double vo_drm_get_display_fps(struct vo_drm_state *drm);
+void vo_drm_set_monitor_par(struct vo *vo);
+void vo_drm_uninit(struct vo *vo);
+void vo_drm_wait_events(struct vo *vo, int64_t until_time_ns);
+void vo_drm_wait_on_flip(struct vo_drm_state *drm);
+void vo_drm_wakeup(struct vo *vo);
+
+bool vo_drm_acquire_crtc(struct vo_drm_state *drm);
+void vo_drm_release_crtc(struct vo_drm_state *drm);
+
+#endif
diff --git a/video/out/drm_prime.c b/video/out/drm_prime.c
new file mode 100644
index 0000000..9335fa8
--- /dev/null
+++ b/video/out/drm_prime.c
@@ -0,0 +1,160 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <errno.h>
+#include <unistd.h>
+#include <xf86drm.h>
+#include <xf86drmMode.h>
+#include <drm_mode.h>
+
+#include "common/common.h"
+#include "common/msg.h"
+#include "drm_common.h"
+#include "drm_prime.h"
+
+int drm_prime_create_framebuffer(struct mp_log *log, int fd,
+                                 AVDRMFrameDescriptor *descriptor, int width,
+                                 int height, struct drm_prime_framebuffer *framebuffer,
+                                 struct drm_prime_handle_refs *handle_refs)
+{
+    AVDRMLayerDescriptor *layer = NULL;
+    uint32_t pitches[4] = { 0 };
+    uint32_t offsets[4] = { 0 };
+    uint32_t handles[4] = { 0 };
+    uint64_t modifiers[4] = { 0 };
+    int ret, layer_fd;
+
+    if (descriptor && descriptor->nb_layers) {
+        *framebuffer = (struct drm_prime_framebuffer){0};
+
+        for (int object = 0; object < descriptor->nb_objects; object++) {
+            ret = drmPrimeFDToHandle(fd, descriptor->objects[object].fd,
+                                     &framebuffer->gem_handles[object]);
+            if (ret < 0) {
+                mp_err(log, "Failed to retrieve the Prime Handle from handle %d (%d).\n",
+                       object, descriptor->objects[object].fd);
+                goto fail;
+            }
+            modifiers[object] = descriptor->objects[object].format_modifier;
+        }
+
+        layer = &descriptor->layers[0];
+
+        for (int plane = 0; plane < AV_DRM_MAX_PLANES; plane++) {
+            layer_fd = framebuffer->gem_handles[layer->planes[plane].object_index];
+            if (layer_fd && layer->planes[plane].pitch) {
+                pitches[plane] = layer->planes[plane].pitch;
+                offsets[plane] = layer->planes[plane].offset;
+                handles[plane] = layer_fd;
+            } else {
+                pitches[plane] = 0;
+                offsets[plane] = 0;
+                handles[plane] = 0;
+                modifiers[plane] = 0;
+            }
+        }
+
+        ret = drmModeAddFB2WithModifiers(fd, width, height, layer->format,
+                                         handles, pitches, offsets,
+                                         modifiers, &framebuffer->fb_id,
+                                         DRM_MODE_FB_MODIFIERS);
+        if (ret < 0) {
+            ret = drmModeAddFB2(fd, width, height, layer->format,
+                                handles, pitches, offsets,
+                                &framebuffer->fb_id, 0);
+            if (ret < 0) {
+                mp_err(log, "Failed to create framebuffer with drmModeAddFB2 on layer %d: %s\n",
+                        0, mp_strerror(errno));
+                goto fail;
+            }
+        }
+
+        for (int plane = 0; plane < AV_DRM_MAX_PLANES; plane++) {
+            drm_prime_add_handle_ref(handle_refs, framebuffer->gem_handles[plane]);
+        }
+   }
+
+   return 0;
+
+fail:
+   memset(framebuffer, 0, sizeof(*framebuffer));
+   return -1;
+}
+
+void drm_prime_destroy_framebuffer(struct mp_log *log, int fd,
+                                   struct drm_prime_framebuffer *framebuffer,
+                                   struct drm_prime_handle_refs *handle_refs)
+{
+    if (framebuffer->fb_id)
+        drmModeRmFB(fd, framebuffer->fb_id);
+
+    for (int i = 0; i < AV_DRM_MAX_PLANES; i++) {
+        if (framebuffer->gem_handles[i]) {
+            drm_prime_remove_handle_ref(handle_refs,
+                                        framebuffer->gem_handles[i]);
+            if (!drm_prime_get_handle_ref_count(handle_refs,
+                                                framebuffer->gem_handles[i])) {
+                drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &framebuffer->gem_handles[i]);
+            }
+        }
+    }
+
+    memset(framebuffer, 0, sizeof(*framebuffer));
+}
+
+void drm_prime_init_handle_ref_count(void *talloc_parent,
+    struct drm_prime_handle_refs *handle_refs)
+{
+    handle_refs->handle_ref_count = talloc_zero(talloc_parent, uint32_t);
+    handle_refs->size = 1;
+    handle_refs->ctx = talloc_parent;
+}
+
+void drm_prime_add_handle_ref(struct drm_prime_handle_refs *handle_refs,
+                              uint32_t handle)
+{
+    if (handle) {
+        if (handle > handle_refs->size) {
+            handle_refs->size = handle;
+            MP_TARRAY_GROW(handle_refs->ctx, handle_refs->handle_ref_count,
+                           handle_refs->size);
+        }
+        handle_refs->handle_ref_count[handle - 1]++;
+    }
+}
+
+void drm_prime_remove_handle_ref(struct drm_prime_handle_refs *handle_refs,
+                                 uint32_t handle)
+{
+    if (handle) {
+        if (handle <= handle_refs->size &&
+             handle_refs->handle_ref_count[handle - 1])
+        {
+             handle_refs->handle_ref_count[handle - 1]--;
+        }
+    }
+}
+
+uint32_t drm_prime_get_handle_ref_count(struct drm_prime_handle_refs *handle_refs,
+                                        uint32_t handle)
+{
+    if (handle) {
+        if (handle <= handle_refs->size)
+            return handle_refs->handle_ref_count[handle - 1];
+    }
+    return 0;
+}
diff --git a/video/out/drm_prime.h b/video/out/drm_prime.h
new file mode 100644
index 0000000..69acba6
--- /dev/null
+++ b/video/out/drm_prime.h
@@ -0,0 +1,45 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef DRM_PRIME_H
+#define DRM_PRIME_H
+
+#include <libavutil/hwcontext_drm.h>
+
+#include "common/msg.h"
+
+struct drm_prime_framebuffer {
+    uint32_t fb_id;
+    uint32_t gem_handles[AV_DRM_MAX_PLANES];
+};
+
+struct drm_prime_handle_refs {
+    uint32_t *handle_ref_count;
+    size_t size;
+    void *ctx;
+};
+
+int drm_prime_create_framebuffer(struct mp_log *log, int fd, AVDRMFrameDescriptor *descriptor, int width, int height,
+                                 struct  drm_prime_framebuffer *framebuffers,
+                                 struct drm_prime_handle_refs *handle_refs);
+void drm_prime_destroy_framebuffer(struct mp_log *log, int fd, struct  drm_prime_framebuffer *framebuffers,
+                                   struct  drm_prime_handle_refs *handle_refs);
+void drm_prime_init_handle_ref_count(void *talloc_parent, struct drm_prime_handle_refs *handle_refs);
+void drm_prime_add_handle_ref(struct drm_prime_handle_refs *handle_refs, uint32_t handle);
+void drm_prime_remove_handle_ref(struct drm_prime_handle_refs *handle_refs, uint32_t handle);
+uint32_t drm_prime_get_handle_ref_count(struct drm_prime_handle_refs *handle_refs, uint32_t handle);
+#endif // DRM_PRIME_H
diff --git a/video/out/filter_kernels.c b/video/out/filter_kernels.c
new file mode 100644
index 0000000..95d99ff
--- /dev/null
+++ b/video/out/filter_kernels.c
@@ -0,0 +1,411 @@
+/*
+ * Some of the filter code was taken from Glumpy:
+ * # Copyright (c) 2009-2016 Nicolas P. Rougier. All rights reserved.
+ * # Distributed under the (new) BSD License.
+ * (https://github.com/glumpy/glumpy/blob/master/glumpy/library/build-spatial-filters.py)
+ *
+ * Also see:
+ * - http://vector-agg.cvs.sourceforge.net/viewvc/vector-agg/agg-2.5/include/agg_image_filters.h
+ * - Vapoursynth plugin fmtconv (WTFPL Licensed), which is based on
+ *   dither plugin for avisynth from the same author:
+ *   https://github.com/vapoursynth/fmtconv/tree/master/src/fmtc
+ * - Paul Heckbert's "zoom"
+ * - XBMC: ConvolutionKernels.cpp etc.
+ *
+ * This file is part of mpv.
+ *
+ * This file can be distributed under the 3-clause license ("New BSD License").
+ *
+ * You can alternatively redistribute the non-Glumpy parts of this file and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ */
+
+#include <stddef.h>
+#include <string.h>
+#include <math.h>
+#include <assert.h>
+
+#include "filter_kernels.h"
+#include "common/common.h"
+
+// NOTE: all filters are designed for discrete convolution
+
+const struct filter_window *mp_find_filter_window(const char *name)
+{
+    if (!name)
+        return NULL;
+    for (const struct filter_window *w = mp_filter_windows; w->name; w++) {
+        if (strcmp(w->name, name) == 0)
+            return w;
+    }
+    return NULL;
+}
+
+const struct filter_kernel *mp_find_filter_kernel(const char *name)
+{
+    if (!name)
+        return NULL;
+    for (const struct filter_kernel *k = mp_filter_kernels; k->f.name; k++) {
+        if (strcmp(k->f.name, name) == 0)
+            return k;
+    }
+    return NULL;
+}
+
+// sizes = sorted list of available filter sizes, terminated with size 0
+// inv_scale = source_size / dest_size
+bool mp_init_filter(struct filter_kernel *filter, const int *sizes,
+                    double inv_scale)
+{
+    assert(filter->f.radius > 0);
+    double blur = filter->f.blur > 0.0 ? filter->f.blur : 1.0;
+    filter->radius = blur * filter->f.radius;
+
+    // Only downscaling requires widening the filter
+    filter->filter_scale = MPMAX(1.0, inv_scale);
+    double src_radius = filter->radius * filter->filter_scale;
+    // Polar filters are dependent solely on the radius
+    if (filter->polar) {
+        filter->size = 1; // Not meaningful for EWA/polar scalers.
+        // Safety precaution to avoid generating a gigantic shader
+        if (src_radius > 16.0) {
+            src_radius = 16.0;
+            filter->filter_scale = src_radius / filter->radius;
+            return false;
+        }
+        return true;
+    }
+    int size = ceil(2.0 * src_radius);
+    // round up to smallest available size that's still large enough
+    if (size < sizes[0])
+        size = sizes[0];
+    const int *cursize = sizes;
+    while (size > *cursize && *cursize)
+        cursize++;
+    if (*cursize) {
+        filter->size = *cursize;
+        return true;
+    } else {
+        // The filter doesn't fit - instead of failing completely, use the
+        // largest filter available. This is incorrect, but better than refusing
+        // to do anything.
+        filter->size = cursize[-1];
+        filter->filter_scale = (filter->size/2.0) / filter->radius;
+        return false;
+    }
+}
+
+// Sample from a blurred and tapered window
+static double sample_window(struct filter_window *kernel, double x)
+{
+    if (!kernel->weight)
+        return 1.0;
+
+    // All windows are symmetric, this makes life easier
+    x = fabs(x);
+
+    // Stretch and taper the window size as needed
+    x = kernel->blur > 0.0 ? x / kernel->blur : x;
+    x = x <= kernel->taper ? 0.0 : (x - kernel->taper) / (1 - kernel->taper);
+
+    if (x < kernel->radius)
+        return kernel->weight(kernel, x);
+    return 0.0;
+}
+
+// Evaluate a filter's kernel and window at a given absolute position
+static double sample_filter(struct filter_kernel *filter, double x)
+{
+    // The window is always stretched to the entire kernel
+    double w = sample_window(&filter->w, x / filter->radius * filter->w.radius);
+    double k = w * sample_window(&filter->f, x);
+    return k < 0 ? (1 - filter->clamp) * k : k;
+}
+
+// Calculate the 1D filtering kernel for N sample points.
+// N = number of samples, which is filter->size
+// The weights will be stored in out_w[0] to out_w[N - 1]
+// f = x0 - abs(x0), subpixel position in the range [0,1) or [0,1].
+static void mp_compute_weights(struct filter_kernel *filter, double f,
+                               float *out_w)
+{
+    assert(filter->size > 0);
+    double sum = 0;
+    for (int n = 0; n < filter->size; n++) {
+        double x = f - (n - filter->size / 2 + 1);
+        double w = sample_filter(filter, x / filter->filter_scale);
+        out_w[n] = w;
+        sum += w;
+    }
+    // Normalize to preserve energy
+    for (int n = 0; n < filter->size; n++)
+        out_w[n] /= sum;
+}
+
+// Fill the given array with weights for the range [0.0, 1.0]. The array is
+// interpreted as rectangular array of count * filter->size items, with a
+// stride of `stride` floats in between each array element. (For polar filters,
+// the `count` indicates the row size and filter->size/stride are ignored)
+//
+// There will be slight sampling error if these weights are used in a OpenGL
+// texture as LUT directly. The sampling point of a texel is located at its
+// center, so out_array[0] will end up at 0.5 / count instead of 0.0.
+// Correct lookup requires a linear coordinate mapping from [0.0, 1.0] to
+// [0.5 / count, 1.0 - 0.5 / count].
+void mp_compute_lut(struct filter_kernel *filter, int count, int stride,
+                    float *out_array)
+{
+    if (filter->polar) {
+        filter->radius_cutoff = 0.0;
+        // Compute a 1D array indexed by radius
+        for (int x = 0; x < count; x++) {
+            double r = x * filter->radius / (count - 1);
+            out_array[x] = sample_filter(filter, r);
+
+            if (fabs(out_array[x]) > 1e-3f)
+                filter->radius_cutoff = r;
+        }
+    } else {
+        // Compute a 2D array indexed by subpixel position
+        for (int n = 0; n < count; n++) {
+            mp_compute_weights(filter, n / (double)(count - 1),
+                               out_array + stride * n);
+        }
+    }
+}
+
+typedef struct filter_window params;
+
+static double box(params *p, double x)
+{
+    // This is mathematically 1.0 everywhere, the clipping is done implicitly
+    // based on the radius.
+    return 1.0;
+}
+
+static double triangle(params *p, double x)
+{
+    return fmax(0.0, 1.0 - fabs(x / p->radius));
+}
+
+static double cosine(params *p, double x)
+{
+    return cos(x);
+}
+
+static double hanning(params *p, double x)
+{
+    return 0.5 + 0.5 * cos(M_PI * x);
+}
+
+static double hamming(params *p, double x)
+{
+    return 0.54 + 0.46 * cos(M_PI * x);
+}
+
+static double quadric(params *p, double x)
+{
+    if (x <  0.5) {
+        return 0.75 - x * x;
+    } else if (x <  1.5) {
+        double t = x - 1.5;
+        return 0.5 * t * t;
+    }
+    return 0.0;
+}
+
+static double bessel_i0(double x)
+{
+    double s = 1.0;
+    double y = x * x / 4.0;
+    double t = y;
+    int i = 2;
+    while (t > 1e-12) {
+        s += t;
+        t *= y / (i * i);
+        i += 1;
+    }
+    return s;
+}
+
+static double kaiser(params *p, double x)
+{
+    if (x > 1)
+        return 0;
+    double i0a = 1.0 / bessel_i0(p->params[0]);
+    return bessel_i0(p->params[0] * sqrt(1.0 - x * x)) * i0a;
+}
+
+static double blackman(params *p, double x)
+{
+    double a = p->params[0];
+    double a0 = (1-a)/2.0, a1 = 1/2.0, a2 = a/2.0;
+    double pix = M_PI * x;
+    return a0 + a1*cos(pix) + a2*cos(2 * pix);
+}
+
+static double welch(params *p, double x)
+{
+    return 1.0 - x*x;
+}
+
+// Family of cubic B/C splines
+static double cubic_bc(params *p, double x)
+{
+    double b = p->params[0],
+           c = p->params[1];
+    double p0 = (6.0 - 2.0 * b) / 6.0,
+           p2 = (-18.0 + 12.0 * b + 6.0 * c) / 6.0,
+           p3 = (12.0 - 9.0 * b - 6.0 * c) / 6.0,
+           q0 = (8.0 * b + 24.0 * c) / 6.0,
+           q1 = (-12.0 * b - 48.0 * c) / 6.0,
+           q2 = (6.0 * b + 30.0 * c) / 6.0,
+           q3 = (-b - 6.0 * c) / 6.0;
+
+    if (x < 1.0) {
+        return p0 + x * x * (p2 + x * p3);
+    } else if (x < 2.0) {
+        return q0 + x * (q1 + x * (q2 + x * q3));
+    }
+    return 0.0;
+}
+
+static double spline16(params *p, double x)
+{
+    if (x < 1.0) {
+        return ((x - 9.0/5.0 ) * x - 1.0/5.0 ) * x + 1.0;
+    } else {
+        return ((-1.0/3.0 * (x-1) + 4.0/5.0) * (x-1) - 7.0/15.0 ) * (x-1);
+    }
+}
+
+static double spline36(params *p, double x)
+{
+    if (x < 1.0) {
+        return ((13.0/11.0 * x - 453.0/209.0) * x - 3.0/209.0) * x + 1.0;
+    } else if (x < 2.0) {
+        return ((-6.0/11.0 * (x-1) + 270.0/209.0) * (x-1) - 156.0/ 209.0) * (x-1);
+    } else {
+        return ((1.0/11.0 * (x-2) - 45.0/209.0) * (x-2) +  26.0/209.0) * (x-2);
+    }
+}
+
+static double spline64(params *p, double x)
+{
+    if (x < 1.0) {
+        return ((49.0/41.0 * x - 6387.0/2911.0) * x - 3.0/2911.0) * x + 1.0;
+    } else if (x < 2.0) {
+        return ((-24.0/41.0 * (x-1) + 4032.0/2911.0) * (x-1) - 2328.0/2911.0) * (x-1);
+    } else if (x < 3.0) {
+        return ((6.0/41.0 * (x-2) - 1008.0/2911.0) * (x-2) + 582.0/2911.0) * (x-2);
+    } else {
+        return ((-1.0/41.0 * (x-3) + 168.0/2911.0) * (x-3) - 97.0/2911.0) * (x-3);
+    }
+}
+
+static double gaussian(params *p, double x)
+{
+    return exp(-2.0 * x * x / p->params[0]);
+}
+
+static double sinc(params *p, double x)
+{
+    if (fabs(x) < 1e-8)
+        return 1.0;
+    x *= M_PI;
+    return sin(x) / x;
+}
+
+static double jinc(params *p, double x)
+{
+    if (fabs(x) < 1e-8)
+        return 1.0;
+    x *= M_PI;
+    return 2.0 * j1(x) / x;
+}
+
+static double sphinx(params *p, double x)
+{
+    if (fabs(x) < 1e-8)
+        return 1.0;
+    x *= M_PI;
+    return 3.0 * (sin(x) - x * cos(x)) / (x * x * x);
+}
+
+const struct filter_window mp_filter_windows[] = {
+    {"box",            1,   box},
+    {"triangle",       1,   triangle},
+    {"bartlett",       1,   triangle},
+    {"cosine",         M_PI_2, cosine},
+    {"hanning",        1,   hanning},
+    {"tukey",          1,   hanning, .taper = 0.5},
+    {"hamming",        1,   hamming},
+    {"quadric",        1.5, quadric},
+    {"welch",          1,   welch},
+    {"kaiser",         1,   kaiser,   .params = {6.33, NAN} },
+    {"blackman",       1,   blackman, .params = {0.16, NAN} },
+    {"gaussian",       2,   gaussian, .params = {1.00, NAN} },
+    {"sinc",           1,   sinc},
+    {"jinc",           1.2196698912665045, jinc},
+    {"sphinx",         1.4302966531242027, sphinx},
+    {0}
+};
+
+#define JINC_R3 3.2383154841662362
+#define JINC_R4 4.2410628637960699
+
+const struct filter_kernel mp_filter_kernels[] = {
+    // Spline filters
+    {{"spline16",       2,   spline16}},
+    {{"spline36",       3,   spline36}},
+    {{"spline64",       4,   spline64}},
+    // Sinc filters
+    {{"sinc",           2,  sinc, .resizable = true}},
+    {{"lanczos",        3,  sinc, .resizable = true}, .window = "sinc"},
+    {{"ginseng",        3,  sinc, .resizable = true}, .window = "jinc"},
+    // Jinc filters
+    {{"jinc",           JINC_R3, jinc, .resizable = true}, .polar = true},
+    {{"ewa_lanczos",    JINC_R3, jinc, .resizable = true}, .polar = true, .window = "jinc"},
+    {{"ewa_hanning",    JINC_R3, jinc, .resizable = true}, .polar = true, .window = "hanning" },
+    {{"ewa_ginseng",    JINC_R3, jinc, .resizable = true}, .polar = true, .window = "sinc"},
+    // Slightly sharpened to minimize the 1D step response error (to better
+    // preserve horizontal/vertical lines)
+    {{"ewa_lanczossharp", JINC_R3, jinc, .blur = 0.9812505837223707, .resizable = true},
+        .polar = true, .window = "jinc"},
+    // Similar to the above, but sharpened substantially to the point of
+    // minimizing the total impulse response error on an integer grid. Tends
+    // to preserve hash patterns well. Very sharp but rings a lot.
+    {{"ewa_lanczos4sharpest", JINC_R4, jinc, .blur = 0.8845120932605005, .resizable = true},
+        .polar = true, .window = "jinc"},
+    // Similar to the above, but softened instead, to make even/odd integer
+    // contributions exactly symmetrical. Designed to smooth out hash patterns.
+    {{"ewa_lanczossoft", JINC_R3, jinc, .blur = 1.0164667662867047, .resizable = true},
+        .polar = true, .window = "jinc"},
+    // Very soft (blurred) hanning-windowed jinc; removes almost all aliasing.
+    // Blur parameter picked to match orthogonal and diagonal contributions
+    {{"haasnsoft", JINC_R3, jinc, .blur = 1.11, .resizable = true},
+        .polar = true, .window = "hanning"},
+    // Cubic filters
+    {{"bicubic",        2,   cubic_bc, .params = {1.0, 0.0} }},
+    {{"hermite",        1,   cubic_bc, .params = {0.0, 0.0} }},
+    {{"catmull_rom",    2,   cubic_bc, .params = {0.0, 0.5} }},
+    {{"mitchell",       2,   cubic_bc, .params = {1.0/3.0, 1.0/3.0} }},
+    {{"robidoux",       2,   cubic_bc, .params = {12 / (19 + 9 * M_SQRT2),
+                                                  113 / (58 + 216 * M_SQRT2)} }},
+    {{"robidouxsharp",  2,   cubic_bc, .params = {6 / (13 + 7 * M_SQRT2),
+                                                  7 / (2 + 12 * M_SQRT2)} }},
+    {{"ewa_robidoux",   2,   cubic_bc, .params = {12 / (19 + 9 * M_SQRT2),
+                                                  113 / (58 + 216 * M_SQRT2)}},
+            .polar = true},
+    {{"ewa_robidouxsharp", 2,cubic_bc, .params = {6 / (13 + 7 * M_SQRT2),
+                                                  7 / (2 + 12 * M_SQRT2)}},
+            .polar = true},
+    // Miscellaneous filters
+    {{"box",            1,   box, .resizable = true}},
+    {{"nearest",        0.5, box}},
+    {{"triangle",       1,   triangle, .resizable = true}},
+    {{"gaussian",       2,   gaussian, .params = {1.0, NAN}, .resizable = true}},
+    {{0}}
+};
diff --git a/video/out/filter_kernels.h b/video/out/filter_kernels.h
new file mode 100644
index 0000000..b8b2f67
--- /dev/null
+++ b/video/out/filter_kernels.h
@@ -0,0 +1,56 @@
+/*
+ * This file is part of mpv.
+ *
+ * This file can be distributed under the 3-clause license ("New BSD License").
+ *
+ * You can alternatively redistribute the non-Glumpy parts of this file and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ */
+
+#ifndef MPLAYER_FILTER_KERNELS_H
+#define MPLAYER_FILTER_KERNELS_H
+
+#include <stdbool.h>
+
+struct filter_window {
+    const char *name;
+    double radius; // Preferred radius, should only be changed if resizable
+    double (*weight)(struct filter_window *k, double x);
+    bool resizable; // Filter supports any given radius
+    double params[2]; // User-defined custom filter parameters. Not used by
+                      // all filters
+    double blur; // Blur coefficient (sharpens or widens the filter)
+    double taper; // Taper coefficient (flattens the filter's center)
+};
+
+struct filter_kernel {
+    struct filter_window f; // the kernel itself
+    struct filter_window w; // window storage
+    double clamp; // clamping factor, affects negative weights
+    // Constant values
+    const char *window; // default window
+    bool polar;         // whether or not the filter uses polar coordinates
+    // The following values are set by mp_init_filter() at runtime.
+    int size;           // number of coefficients (may depend on radius)
+    double radius;        // true filter radius, derived from f.radius and f.blur
+    double filter_scale;  // Factor to convert the mathematical filter
+                          // function radius to the possibly wider
+                          // (in the case of downsampling) filter sample
+                          // radius.
+    double radius_cutoff; // the radius at which we can cut off the filter
+};
+
+extern const struct filter_window mp_filter_windows[];
+extern const struct filter_kernel mp_filter_kernels[];
+
+const struct filter_window *mp_find_filter_window(const char *name);
+const struct filter_kernel *mp_find_filter_kernel(const char *name);
+
+bool mp_init_filter(struct filter_kernel *filter, const int *sizes,
+                    double scale);
+void mp_compute_lut(struct filter_kernel *filter, int count, int stride,
+                    float *out_array);
+
+#endif /* MPLAYER_FILTER_KERNELS_H */
diff --git a/video/out/gpu/context.c b/video/out/gpu/context.c
new file mode 100644
index 0000000..5ce18af
--- /dev/null
+++ b/video/out/gpu/context.c
@@ -0,0 +1,277 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <stdbool.h>
+#include <math.h>
+#include <assert.h>
+
+#include "config.h"
+#include "common/common.h"
+#include "common/msg.h"
+#include "options/options.h"
+#include "options/m_option.h"
+#include "video/out/vo.h"
+
+#include "context.h"
+#include "spirv.h"
+
+/* OpenGL */
+extern const struct ra_ctx_fns ra_ctx_glx;
+extern const struct ra_ctx_fns ra_ctx_x11_egl;
+extern const struct ra_ctx_fns ra_ctx_drm_egl;
+extern const struct ra_ctx_fns ra_ctx_wayland_egl;
+extern const struct ra_ctx_fns ra_ctx_wgl;
+extern const struct ra_ctx_fns ra_ctx_angle;
+extern const struct ra_ctx_fns ra_ctx_dxgl;
+extern const struct ra_ctx_fns ra_ctx_rpi;
+extern const struct ra_ctx_fns ra_ctx_android;
+
+/* Vulkan */
+extern const struct ra_ctx_fns ra_ctx_vulkan_wayland;
+extern const struct ra_ctx_fns ra_ctx_vulkan_win;
+extern const struct ra_ctx_fns ra_ctx_vulkan_xlib;
+extern const struct ra_ctx_fns ra_ctx_vulkan_android;
+extern const struct ra_ctx_fns ra_ctx_vulkan_display;
+extern const struct ra_ctx_fns ra_ctx_vulkan_mac;
+
+/* Direct3D 11 */
+extern const struct ra_ctx_fns ra_ctx_d3d11;
+
+/* No API */
+extern const struct ra_ctx_fns ra_ctx_wldmabuf;
+
+static const struct ra_ctx_fns *contexts[] = {
+#if HAVE_D3D11
+    &ra_ctx_d3d11,
+#endif
+
+// OpenGL contexts:
+#if HAVE_EGL_ANDROID
+    &ra_ctx_android,
+#endif
+#if HAVE_RPI
+    &ra_ctx_rpi,
+#endif
+#if HAVE_EGL_ANGLE_WIN32
+    &ra_ctx_angle,
+#endif
+#if HAVE_GL_WIN32
+    &ra_ctx_wgl,
+#endif
+#if HAVE_GL_DXINTEROP
+    &ra_ctx_dxgl,
+#endif
+#if HAVE_EGL_WAYLAND
+    &ra_ctx_wayland_egl,
+#endif
+#if HAVE_EGL_X11
+    &ra_ctx_x11_egl,
+#endif
+#if HAVE_GL_X11
+    &ra_ctx_glx,
+#endif
+#if HAVE_EGL_DRM
+    &ra_ctx_drm_egl,
+#endif
+
+// Vulkan contexts:
+#if HAVE_VULKAN
+
+#if HAVE_ANDROID
+    &ra_ctx_vulkan_android,
+#endif
+#if HAVE_WIN32_DESKTOP
+    &ra_ctx_vulkan_win,
+#endif
+#if HAVE_WAYLAND
+    &ra_ctx_vulkan_wayland,
+#endif
+#if HAVE_X11
+    &ra_ctx_vulkan_xlib,
+#endif
+#if HAVE_VK_KHR_DISPLAY
+    &ra_ctx_vulkan_display,
+#endif
+#if HAVE_COCOA && HAVE_SWIFT
+    &ra_ctx_vulkan_mac,
+#endif
+#endif
+
+/* No API contexts: */
+#if HAVE_DMABUF_WAYLAND
+    &ra_ctx_wldmabuf,
+#endif
+};
+
+static int ra_ctx_api_help(struct mp_log *log, const struct m_option *opt,
+                           struct bstr name)
+{
+    mp_info(log, "GPU APIs (contexts):\n");
+    mp_info(log, "    auto (autodetect)\n");
+    for (int n = 0; n < MP_ARRAY_SIZE(contexts); n++) {
+        if (!contexts[n]->hidden)
+            mp_info(log, "    %s (%s)\n", contexts[n]->type, contexts[n]->name);
+    }
+    return M_OPT_EXIT;
+}
+
+static int ra_ctx_validate_api(struct mp_log *log, const struct m_option *opt,
+                               struct bstr name, const char **value)
+{
+    struct bstr param = bstr0(*value);
+    if (bstr_equals0(param, "auto"))
+        return 1;
+    for (int i = 0; i < MP_ARRAY_SIZE(contexts); i++) {
+        if (bstr_equals0(param, contexts[i]->type) && !contexts[i]->hidden)
+            return 1;
+    }
+    return M_OPT_INVALID;
+}
+
+static int ra_ctx_context_help(struct mp_log *log, const struct m_option *opt,
+                               struct bstr name)
+{
+    mp_info(log, "GPU contexts (APIs):\n");
+    mp_info(log, "    auto (autodetect)\n");
+    for (int n = 0; n < MP_ARRAY_SIZE(contexts); n++) {
+        if (!contexts[n]->hidden)
+            mp_info(log, "    %s (%s)\n", contexts[n]->name, contexts[n]->type);
+    }
+    return M_OPT_EXIT;
+}
+
+static int ra_ctx_validate_context(struct mp_log *log, const struct m_option *opt,
+                                   struct bstr name, const char **value)
+{
+    struct bstr param = bstr0(*value);
+    if (bstr_equals0(param, "auto"))
+        return 1;
+    for (int i = 0; i < MP_ARRAY_SIZE(contexts); i++) {
+        if (bstr_equals0(param, contexts[i]->name) && !contexts[i]->hidden)
+            return 1;
+    }
+    return M_OPT_INVALID;
+}
+
+// Create a VO window and create a RA context on it.
+//  vo_flags: passed to the backend's create window function
+struct ra_ctx *ra_ctx_create(struct vo *vo, struct ra_ctx_opts opts)
+{
+    bool api_auto = !opts.context_type || strcmp(opts.context_type, "auto") == 0;
+    bool ctx_auto = !opts.context_name || strcmp(opts.context_name, "auto") == 0;
+
+    if (ctx_auto) {
+        MP_VERBOSE(vo, "Probing for best GPU context.\n");
+        opts.probing = true;
+    }
+
+    // Hack to silence backend (X11/Wayland/etc.) errors. Kill it once backends
+    // are separate from `struct vo`
+    bool old_probing = vo->probing;
+    vo->probing = opts.probing;
+
+    for (int i = 0; i < MP_ARRAY_SIZE(contexts); i++) {
+        if (contexts[i]->hidden)
+            continue;
+        if (!opts.probing && strcmp(contexts[i]->name, opts.context_name) != 0)
+            continue;
+        if (!api_auto && strcmp(contexts[i]->type, opts.context_type) != 0)
+            continue;
+
+        struct ra_ctx *ctx = talloc_ptrtype(NULL, ctx);
+        *ctx = (struct ra_ctx) {
+            .vo = vo,
+            .global = vo->global,
+            .log = mp_log_new(ctx, vo->log, contexts[i]->type),
+            .opts = opts,
+            .fns = contexts[i],
+        };
+
+        MP_VERBOSE(ctx, "Initializing GPU context '%s'\n", ctx->fns->name);
+        if (contexts[i]->init(ctx)) {
+            vo->probing = old_probing;
+            return ctx;
+        }
+
+        talloc_free(ctx);
+    }
+
+    vo->probing = old_probing;
+
+    // If we've reached this point, then none of the contexts matched the name
+    // requested, or the backend creation failed for all of them.
+    if (!vo->probing)
+        MP_ERR(vo, "Failed initializing any suitable GPU context!\n");
+    return NULL;
+}
+
+struct ra_ctx *ra_ctx_create_by_name(struct vo *vo, const char *name)
+{
+    for (int i = 0; i < MP_ARRAY_SIZE(contexts); i++) {
+        if (strcmp(name, contexts[i]->name) != 0)
+            continue;
+
+        struct ra_ctx *ctx = talloc_ptrtype(NULL, ctx);
+        *ctx = (struct ra_ctx) {
+            .vo = vo,
+            .global = vo->global,
+            .log = mp_log_new(ctx, vo->log, contexts[i]->type),
+            .fns = contexts[i],
+        };
+
+        MP_VERBOSE(ctx, "Initializing GPU context '%s'\n", ctx->fns->name);
+        if (contexts[i]->init(ctx))
+            return ctx;
+        talloc_free(ctx);
+    }
+    return NULL;
+}
+
+void ra_ctx_destroy(struct ra_ctx **ctx_ptr)
+{
+    struct ra_ctx *ctx = *ctx_ptr;
+    if (!ctx)
+        return;
+
+    if (ctx->spirv && ctx->spirv->fns->uninit)
+        ctx->spirv->fns->uninit(ctx);
+
+    ctx->fns->uninit(ctx);
+    talloc_free(ctx);
+
+    *ctx_ptr = NULL;
+}
+
+#define OPT_BASE_STRUCT struct ra_ctx_opts
+const struct m_sub_options ra_ctx_conf = {
+    .opts = (const m_option_t[]) {
+        {"gpu-context",
+            OPT_STRING_VALIDATE(context_name, ra_ctx_validate_context),
+            .help = ra_ctx_context_help},
+        {"gpu-api",
+            OPT_STRING_VALIDATE(context_type, ra_ctx_validate_api),
+            .help = ra_ctx_api_help},
+        {"gpu-debug", OPT_BOOL(debug)},
+        {"gpu-sw", OPT_BOOL(allow_sw)},
+        {0}
+    },
+    .size = sizeof(struct ra_ctx_opts),
+};
diff --git a/video/out/gpu/context.h b/video/out/gpu/context.h
new file mode 100644
index 0000000..6788e6f
--- /dev/null
+++ b/video/out/gpu/context.h
@@ -0,0 +1,107 @@
+#pragma once
+
+#include "video/out/vo.h"
+#include "video/csputils.h"
+
+#include "ra.h"
+
+struct ra_ctx_opts {
+    bool allow_sw;        // allow software renderers
+    bool want_alpha;      // create an alpha framebuffer if possible
+    bool debug;           // enable debugging layers/callbacks etc.
+    bool probing;        // the backend was auto-probed
+    char *context_name;  // filter by `ra_ctx_fns.name`
+    char *context_type;  // filter by `ra_ctx_fns.type`
+};
+
+extern const struct m_sub_options ra_ctx_conf;
+
+struct ra_ctx {
+    struct vo *vo;
+    struct ra *ra;
+    struct mpv_global *global;
+    struct mp_log *log;
+
+    struct ra_ctx_opts opts;
+    const struct ra_ctx_fns *fns;
+    struct ra_swapchain *swapchain;
+    struct spirv_compiler *spirv;
+
+    void *priv;
+};
+
+// The functions that make up a ra_ctx.
+struct ra_ctx_fns {
+    const char *type; // API type (for --gpu-api)
+    const char *name; // name (for --gpu-context)
+
+    bool hidden; // hide the ra_ctx from users
+
+    // Resize the window, or create a new window if there isn't one yet.
+    // Currently, there is an unfortunate interaction with ctx->vo, and
+    // display size etc. are determined by it.
+    bool (*reconfig)(struct ra_ctx *ctx);
+
+    // This behaves exactly like vo_driver.control().
+    int (*control)(struct ra_ctx *ctx, int *events, int request, void *arg);
+
+    // These behave exactly like vo_driver.wakeup/wait_events. They are
+    // optional.
+    void (*wakeup)(struct ra_ctx *ctx);
+    void (*wait_events)(struct ra_ctx *ctx, int64_t until_time_ns);
+    void (*update_render_opts)(struct ra_ctx *ctx);
+
+    // Initialize/destroy the 'struct ra' and possibly the underlying VO backend.
+    // Not normally called by the user of the ra_ctx.
+    bool (*init)(struct ra_ctx *ctx);
+    void (*uninit)(struct ra_ctx *ctx);
+};
+
+// Extra struct for the swapchain-related functions so they can be easily
+// inherited from helpers.
+struct ra_swapchain {
+    struct ra_ctx *ctx;
+    struct priv *priv;
+    const struct ra_swapchain_fns *fns;
+};
+
+// Represents a framebuffer / render target
+struct ra_fbo {
+    struct ra_tex *tex;
+    bool flip; // rendering needs to be inverted
+
+    // Host system's colorspace that it will be interpreting
+    // the frame buffer as.
+    struct mp_colorspace color_space;
+};
+
+struct ra_swapchain_fns {
+    // Gets the current framebuffer depth in bits (0 if unknown). Optional.
+    int (*color_depth)(struct ra_swapchain *sw);
+
+    // Called when rendering starts. Returns NULL on failure. This must be
+    // followed by submit_frame, to submit the rendered frame. This function
+    // can also fail sporadically, and such errors should be ignored unless
+    // they persist.
+    bool (*start_frame)(struct ra_swapchain *sw, struct ra_fbo *out_fbo);
+
+    // Present the frame. Issued in lockstep with start_frame, with rendering
+    // commands in between. The `frame` is just there for timing data, for
+    // swapchains smart enough to do something with it.
+    bool (*submit_frame)(struct ra_swapchain *sw, const struct vo_frame *frame);
+
+    // Performs a buffer swap. This blocks for as long as necessary to meet
+    // params.swapchain_depth, or until the next vblank (for vsynced contexts)
+    void (*swap_buffers)(struct ra_swapchain *sw);
+
+    // See vo. Usually called after swap_buffers().
+    void (*get_vsync)(struct ra_swapchain *sw, struct vo_vsync_info *info);
+};
+
+// Create and destroy a ra_ctx. This also takes care of creating and destroying
+// the underlying `struct ra`, and perhaps the underlying VO backend.
+struct ra_ctx *ra_ctx_create(struct vo *vo, struct ra_ctx_opts opts);
+void ra_ctx_destroy(struct ra_ctx **ctx);
+
+// Special case of creating a ra_ctx while specifying a specific context by name.
+struct ra_ctx *ra_ctx_create_by_name(struct vo *vo, const char *name);
diff --git a/video/out/gpu/d3d11_helpers.c b/video/out/gpu/d3d11_helpers.c
new file mode 100644
index 0000000..30d9eae
--- /dev/null
+++ b/video/out/gpu/d3d11_helpers.c
@@ -0,0 +1,966 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <windows.h>
+#include <d3d11.h>
+#include <dxgi1_6.h>
+#include <versionhelpers.h>
+
+#include "common/common.h"
+#include "common/msg.h"
+#include "misc/bstr.h"
+#include "osdep/io.h"
+#include "osdep/threads.h"
+#include "osdep/windows_utils.h"
+
+#include "d3d11_helpers.h"
+
+// Windows 8 enum value, not present in mingw-w64 headers
+#define DXGI_ADAPTER_FLAG_SOFTWARE (2)
+typedef HRESULT(WINAPI *PFN_CREATE_DXGI_FACTORY)(REFIID riid, void **ppFactory);
+
+static mp_once d3d11_once = MP_STATIC_ONCE_INITIALIZER;
+static PFN_D3D11_CREATE_DEVICE pD3D11CreateDevice = NULL;
+static PFN_CREATE_DXGI_FACTORY pCreateDXGIFactory1 = NULL;
+static void d3d11_load(void)
+{
+    HMODULE d3d11   = LoadLibraryW(L"d3d11.dll");
+    HMODULE dxgilib = LoadLibraryW(L"dxgi.dll");
+    if (!d3d11 || !dxgilib)
+        return;
+
+    pD3D11CreateDevice = (PFN_D3D11_CREATE_DEVICE)
+        GetProcAddress(d3d11, "D3D11CreateDevice");
+    pCreateDXGIFactory1 = (PFN_CREATE_DXGI_FACTORY)
+        GetProcAddress(dxgilib, "CreateDXGIFactory1");
+}
+
+static bool load_d3d11_functions(struct mp_log *log)
+{
+    mp_exec_once(&d3d11_once, d3d11_load);
+    if (!pD3D11CreateDevice || !pCreateDXGIFactory1) {
+        mp_fatal(log, "Failed to load base d3d11 functionality: "
+                      "CreateDevice: %s, CreateDXGIFactory1: %s\n",
+                 pD3D11CreateDevice ? "success" : "failure",
+                 pCreateDXGIFactory1 ? "success": "failure");
+        return false;
+    }
+
+    return true;
+}
+
+#define D3D11_DXGI_ENUM(prefix, define) { case prefix ## define: return #define; }
+
+static const char *d3d11_get_format_name(DXGI_FORMAT fmt)
+{
+    switch (fmt) {
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, UNKNOWN);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32A32_TYPELESS);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32A32_FLOAT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32A32_UINT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32A32_SINT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32_TYPELESS);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32_FLOAT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32_UINT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32_SINT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_TYPELESS);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_FLOAT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_UNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_UINT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_SNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_SINT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32_TYPELESS);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32_FLOAT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32_UINT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32_SINT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G8X24_TYPELESS);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, D32_FLOAT_S8X24_UINT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R32_FLOAT_X8X24_TYPELESS);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, X32_TYPELESS_G8X24_UINT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R10G10B10A2_TYPELESS);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R10G10B10A2_UNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R10G10B10A2_UINT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R11G11B10_FLOAT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_TYPELESS);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_UNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_UNORM_SRGB);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_UINT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_SNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_SINT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_TYPELESS);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_FLOAT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_UNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_UINT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_SNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_SINT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R32_TYPELESS);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, D32_FLOAT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R32_FLOAT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R32_UINT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R32_SINT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R24G8_TYPELESS);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, D24_UNORM_S8_UINT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R24_UNORM_X8_TYPELESS);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, X24_TYPELESS_G8_UINT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_TYPELESS);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_UNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_UINT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_SNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_SINT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_TYPELESS);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_FLOAT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, D16_UNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_UNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_UINT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_SNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_SINT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R8_TYPELESS);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R8_UNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R8_UINT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R8_SNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R8_SINT);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, A8_UNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R1_UNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R9G9B9E5_SHAREDEXP);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_B8G8_UNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, G8R8_G8B8_UNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, BC1_TYPELESS);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, BC1_UNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, BC1_UNORM_SRGB);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, BC2_TYPELESS);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, BC2_UNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, BC2_UNORM_SRGB);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, BC3_TYPELESS);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, BC3_UNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, BC3_UNORM_SRGB);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, BC4_TYPELESS);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, BC4_UNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, BC4_SNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, BC5_TYPELESS);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, BC5_UNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, BC5_SNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, B5G6R5_UNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, B5G5R5A1_UNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8A8_UNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8X8_UNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, R10G10B10_XR_BIAS_A2_UNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8A8_TYPELESS);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8A8_UNORM_SRGB);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8X8_TYPELESS);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8X8_UNORM_SRGB);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, BC6H_TYPELESS);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, BC6H_UF16);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, BC6H_SF16);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, BC7_TYPELESS);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, BC7_UNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, BC7_UNORM_SRGB);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, AYUV);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, Y410);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, Y416);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, NV12);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, P010);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, P016);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, 420_OPAQUE);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, YUY2);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, Y210);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, Y216);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, NV11);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, AI44);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, IA44);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, P8);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, A8P8);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, B4G4R4A4_UNORM);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, P208);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, V208);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, V408);
+    D3D11_DXGI_ENUM(DXGI_FORMAT_, FORCE_UINT);
+    default:
+        return "<Unknown>";
+    }
+}
+
+static const char *d3d11_get_csp_name(DXGI_COLOR_SPACE_TYPE csp)
+{
+    switch (csp) {
+    D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_FULL_G22_NONE_P709);
+    D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_FULL_G10_NONE_P709);
+    D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_STUDIO_G22_NONE_P709);
+    D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_STUDIO_G22_NONE_P2020);
+    D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RESERVED);
+    D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_FULL_G22_NONE_P709_X601);
+    D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G22_LEFT_P601);
+    D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_FULL_G22_LEFT_P601);
+    D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G22_LEFT_P709);
+    D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_FULL_G22_LEFT_P709);
+    D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G22_LEFT_P2020);
+    D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_FULL_G22_LEFT_P2020);
+    D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_FULL_G2084_NONE_P2020);
+    D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G2084_LEFT_P2020);
+    D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_STUDIO_G2084_NONE_P2020);
+    D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G22_TOPLEFT_P2020);
+    D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G2084_TOPLEFT_P2020);
+    D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_FULL_G22_NONE_P2020);
+    D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_GHLG_TOPLEFT_P2020);
+    D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_FULL_GHLG_TOPLEFT_P2020);
+    D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_STUDIO_G24_NONE_P709);
+    D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_STUDIO_G24_NONE_P2020);
+    D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G24_LEFT_P709);
+    D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G24_LEFT_P2020);
+    D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G24_TOPLEFT_P2020);
+    D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, CUSTOM);
+    default:
+        return "<Unknown>";
+    }
+}
+
+static bool d3d11_get_mp_csp(DXGI_COLOR_SPACE_TYPE csp,
+                             struct mp_colorspace *mp_csp)
+{
+    if (!mp_csp)
+        return false;
+
+    // Colorspaces utilizing gamma 2.2 (G22) are set to
+    // AUTO as that keeps the current default flow regarding
+    // SDR transfer function handling.
+    // (no adjustment is done unless the user has a CMS LUT).
+    //
+    // Additionally, only set primary information with colorspaces
+    // utilizing non-709 primaries to keep the current behavior
+    // regarding not doing conversion from BT.601 to BT.709.
+    switch (csp) {
+    case DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709:
+        *mp_csp = (struct mp_colorspace){
+            .gamma     = MP_CSP_TRC_AUTO,
+            .primaries = MP_CSP_PRIM_AUTO,
+        };
+        break;
+    case DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709:
+        *mp_csp = (struct mp_colorspace) {
+            .gamma     = MP_CSP_TRC_LINEAR,
+            .primaries = MP_CSP_PRIM_AUTO,
+        };
+        break;
+    case DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020:
+        *mp_csp = (struct mp_colorspace) {
+            .gamma     = MP_CSP_TRC_PQ,
+            .primaries = MP_CSP_PRIM_BT_2020,
+        };
+        break;
+    case DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P2020:
+        *mp_csp = (struct mp_colorspace) {
+            .gamma     = MP_CSP_TRC_AUTO,
+            .primaries = MP_CSP_PRIM_BT_2020,
+        };
+        break;
+    default:
+        return false;
+    }
+
+    return true;
+}
+
+static bool query_output_format_and_colorspace(struct mp_log *log,
+                                               IDXGISwapChain *swapchain,
+                                               DXGI_FORMAT *out_fmt,
+                                               DXGI_COLOR_SPACE_TYPE *out_cspace)
+{
+    IDXGIOutput *output = NULL;
+    IDXGIOutput6 *output6 = NULL;
+    DXGI_OUTPUT_DESC1 desc = { 0 };
+    char *monitor_name = NULL;
+    bool success = false;
+
+    if (!out_fmt || !out_cspace)
+        return false;
+
+    HRESULT hr = IDXGISwapChain_GetContainingOutput(swapchain, &output);
+    if (FAILED(hr)) {
+        mp_err(log, "Failed to get swap chain's containing output: %s!\n",
+               mp_HRESULT_to_str(hr));
+        goto done;
+    }
+
+    hr = IDXGIOutput_QueryInterface(output, &IID_IDXGIOutput6,
+                                    (void**)&output6);
+    if (FAILED(hr)) {
+        // point where systems older than Windows 10 would fail,
+        // thus utilizing error log level only with windows 10+
+        mp_msg(log, IsWindows10OrGreater() ? MSGL_ERR : MSGL_V,
+               "Failed to create a DXGI 1.6 output interface: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto done;
+    }
+
+    hr = IDXGIOutput6_GetDesc1(output6, &desc);
+    if (FAILED(hr)) {
+        mp_err(log, "Failed to query swap chain's output information: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto done;
+    }
+
+    monitor_name = mp_to_utf8(NULL, desc.DeviceName);
+
+    mp_verbose(log, "Queried output: %s, %ldx%ld @ %d bits, colorspace: %s (%d)\n",
+               monitor_name,
+               desc.DesktopCoordinates.right - desc.DesktopCoordinates.left,
+               desc.DesktopCoordinates.bottom - desc.DesktopCoordinates.top,
+               desc.BitsPerColor,
+               d3d11_get_csp_name(desc.ColorSpace),
+               desc.ColorSpace);
+
+    *out_cspace = desc.ColorSpace;
+
+    // limit ourselves to the 8bit and 10bit formats for now.
+    // while the 16bit float format would be preferable as something
+    // to default to, it seems to be hard-coded to linear transfer
+    // in windowed mode, and follows configured colorspace in full screen.
+    *out_fmt = desc.BitsPerColor > 8 ?
+               DXGI_FORMAT_R10G10B10A2_UNORM : DXGI_FORMAT_R8G8B8A8_UNORM;
+
+    success = true;
+
+done:
+    talloc_free(monitor_name);
+    SAFE_RELEASE(output6);
+    SAFE_RELEASE(output);
+    return success;
+}
+
+// Get a const array of D3D_FEATURE_LEVELs from max_fl to min_fl (inclusive)
+static int get_feature_levels(int max_fl, int min_fl,
+                              const D3D_FEATURE_LEVEL **out)
+{
+    static const D3D_FEATURE_LEVEL levels[] = {
+        D3D_FEATURE_LEVEL_12_1,
+        D3D_FEATURE_LEVEL_12_0,
+        D3D_FEATURE_LEVEL_11_1,
+        D3D_FEATURE_LEVEL_11_0,
+        D3D_FEATURE_LEVEL_10_1,
+        D3D_FEATURE_LEVEL_10_0,
+        D3D_FEATURE_LEVEL_9_3,
+        D3D_FEATURE_LEVEL_9_2,
+        D3D_FEATURE_LEVEL_9_1,
+    };
+    static const int levels_len = MP_ARRAY_SIZE(levels);
+
+    int start = 0;
+    for (; start < levels_len; start++) {
+        if (levels[start] <= max_fl)
+            break;
+    }
+    int len = 0;
+    for (; start + len < levels_len; len++) {
+        if (levels[start + len] < min_fl)
+            break;
+    }
+    *out = &levels[start];
+    return len;
+}
+
+static IDXGIAdapter1 *get_d3d11_adapter(struct mp_log *log,
+                                        struct bstr requested_adapter_name,
+                                        struct bstr *listing)
+{
+    HRESULT hr = S_OK;
+    IDXGIFactory1 *factory;
+    IDXGIAdapter1 *picked_adapter = NULL;
+
+    hr = pCreateDXGIFactory1(&IID_IDXGIFactory1, (void **)&factory);
+    if (FAILED(hr)) {
+        mp_fatal(log, "Failed to create a DXGI factory: %s\n",
+                 mp_HRESULT_to_str(hr));
+        return NULL;
+    }
+
+    for (unsigned int adapter_num = 0; hr != DXGI_ERROR_NOT_FOUND; adapter_num++)
+    {
+        IDXGIAdapter1 *adapter = NULL;
+        DXGI_ADAPTER_DESC1 desc = { 0 };
+        char *adapter_description = NULL;
+
+        hr = IDXGIFactory1_EnumAdapters1(factory, adapter_num, &adapter);
+        if (FAILED(hr)) {
+            if (hr != DXGI_ERROR_NOT_FOUND) {
+                mp_fatal(log, "Failed to enumerate at adapter %u\n",
+                         adapter_num);
+            }
+            continue;
+        }
+
+        if (FAILED(IDXGIAdapter1_GetDesc1(adapter, &desc))) {
+            mp_fatal(log, "Failed to get adapter description when listing at adapter %u\n",
+                     adapter_num);
+            continue;
+        }
+
+        adapter_description = mp_to_utf8(NULL, desc.Description);
+
+        if (listing) {
+            bstr_xappend_asprintf(NULL, listing,
+                                  "Adapter %u: vendor: %u, description: %s\n",
+                                  adapter_num, desc.VendorId,
+                                  adapter_description);
+        }
+
+        if (requested_adapter_name.len &&
+            bstr_case_startswith(bstr0(adapter_description),
+                                 requested_adapter_name))
+        {
+            picked_adapter = adapter;
+        }
+
+        talloc_free(adapter_description);
+
+        if (picked_adapter) {
+            break;
+        }
+
+        SAFE_RELEASE(adapter);
+    }
+
+    SAFE_RELEASE(factory);
+
+    return picked_adapter;
+}
+
+static HRESULT create_device(struct mp_log *log, IDXGIAdapter1 *adapter,
+                             bool warp, bool debug, int max_fl, int min_fl,
+                             ID3D11Device **dev)
+{
+    const D3D_FEATURE_LEVEL *levels;
+    int levels_len = get_feature_levels(max_fl, min_fl, &levels);
+    if (!levels_len) {
+        mp_fatal(log, "No suitable Direct3D feature level found\n");
+        return E_FAIL;
+    }
+
+    D3D_DRIVER_TYPE type = warp ? D3D_DRIVER_TYPE_WARP
+                                : D3D_DRIVER_TYPE_HARDWARE;
+    UINT flags = debug ? D3D11_CREATE_DEVICE_DEBUG : 0;
+    return pD3D11CreateDevice((IDXGIAdapter *)adapter, adapter ? D3D_DRIVER_TYPE_UNKNOWN : type,
+                              NULL, flags, levels, levels_len, D3D11_SDK_VERSION, dev, NULL, NULL);
+}
+
+bool mp_d3d11_list_or_verify_adapters(struct mp_log *log,
+                                      bstr adapter_name,
+                                      bstr *listing)
+{
+    IDXGIAdapter1 *picked_adapter = NULL;
+
+    if (!load_d3d11_functions(log)) {
+        return false;
+    }
+
+    if ((picked_adapter = get_d3d11_adapter(log, adapter_name, listing))) {
+        SAFE_RELEASE(picked_adapter);
+        return true;
+    }
+
+    return false;
+}
+
+// Create a Direct3D 11 device for rendering and presentation. This is meant to
+// reduce boilerplate in backends that D3D11, while also making sure they share
+// the same device creation logic and log the same information.
+bool mp_d3d11_create_present_device(struct mp_log *log,
+                                    struct d3d11_device_opts *opts,
+                                    ID3D11Device **dev_out)
+{
+    bool debug = opts->debug;
+    bool warp = opts->force_warp;
+    int max_fl = opts->max_feature_level;
+    int min_fl = opts->min_feature_level;
+    // Normalize nullptr and an empty string to nullptr to simplify handling.
+    char *adapter_name = (opts->adapter_name && *(opts->adapter_name)) ?
+                         opts->adapter_name : NULL;
+    ID3D11Device *dev = NULL;
+    IDXGIDevice1 *dxgi_dev = NULL;
+    IDXGIAdapter1 *adapter = NULL;
+    bool success = false;
+    HRESULT hr;
+
+    if (!load_d3d11_functions(log)) {
+        goto done;
+    }
+
+    adapter = get_d3d11_adapter(log, bstr0(adapter_name), NULL);
+
+    if (adapter_name && !adapter) {
+        mp_warn(log, "Adapter matching '%s' was not found in the system! "
+                     "Will fall back to the default adapter.\n",
+                 adapter_name);
+    }
+
+    // Return here to retry creating the device
+    do {
+        // Use these default feature levels if they are not set
+        max_fl = max_fl ? max_fl : D3D_FEATURE_LEVEL_11_0;
+        min_fl = min_fl ? min_fl : D3D_FEATURE_LEVEL_9_1;
+
+        hr = create_device(log, adapter, warp, debug, max_fl, min_fl, &dev);
+
+        // Retry without debug, if SDK is not available
+        if (debug && hr == DXGI_ERROR_SDK_COMPONENT_MISSING) {
+            mp_warn(log, "gpu-debug disabled due to error: %s\n", mp_HRESULT_to_str(hr));
+            debug = false;
+            continue;
+        }
+
+        if (SUCCEEDED(hr))
+            break;
+
+        // Trying to create a D3D_FEATURE_LEVEL_12_0 device on Windows 8.1 or
+        // below will not succeed. Try an 11_1 device.
+        if (max_fl >= D3D_FEATURE_LEVEL_12_0 &&
+            min_fl <= D3D_FEATURE_LEVEL_11_1)
+        {
+            mp_dbg(log, "Failed to create 12_0+ device, trying 11_1\n");
+            max_fl = D3D_FEATURE_LEVEL_11_1;
+            continue;
+        }
+
+        // Trying to create a D3D_FEATURE_LEVEL_11_1 device on Windows 7
+        // without the platform update will not succeed. Try an 11_0 device.
+        if (max_fl >= D3D_FEATURE_LEVEL_11_1 &&
+            min_fl <= D3D_FEATURE_LEVEL_11_0)
+        {
+            mp_dbg(log, "Failed to create 11_1+ device, trying 11_0\n");
+            max_fl = D3D_FEATURE_LEVEL_11_0;
+            continue;
+        }
+
+        // Retry with WARP if allowed
+        if (!warp && opts->allow_warp) {
+            mp_dbg(log, "Failed to create hardware device, trying WARP\n");
+            warp = true;
+            max_fl = opts->max_feature_level;
+            min_fl = opts->min_feature_level;
+            continue;
+        }
+
+        mp_fatal(log, "Failed to create Direct3D 11 device: %s\n",
+                 mp_HRESULT_to_str(hr));
+        goto done;
+    } while (true);
+
+    // if we picked an adapter, release it here - we're taking another
+    // from the device.
+    SAFE_RELEASE(adapter);
+
+    hr = ID3D11Device_QueryInterface(dev, &IID_IDXGIDevice1, (void**)&dxgi_dev);
+    if (FAILED(hr)) {
+        mp_fatal(log, "Failed to get DXGI device\n");
+        goto done;
+    }
+    hr = IDXGIDevice1_GetParent(dxgi_dev, &IID_IDXGIAdapter1, (void**)&adapter);
+    if (FAILED(hr)) {
+        mp_fatal(log, "Failed to get DXGI adapter\n");
+        goto done;
+    }
+
+    IDXGIDevice1_SetMaximumFrameLatency(dxgi_dev, opts->max_frame_latency);
+
+    DXGI_ADAPTER_DESC1 desc;
+    hr = IDXGIAdapter1_GetDesc1(adapter, &desc);
+    if (FAILED(hr)) {
+        mp_fatal(log, "Failed to get adapter description\n");
+        goto done;
+    }
+
+    D3D_FEATURE_LEVEL selected_level = ID3D11Device_GetFeatureLevel(dev);
+    mp_verbose(log, "Using Direct3D 11 feature level %u_%u\n",
+               ((unsigned)selected_level) >> 12,
+               (((unsigned)selected_level) >> 8) & 0xf);
+
+    char *dev_name = mp_to_utf8(NULL, desc.Description);
+    mp_verbose(log, "Device Name: %s\n"
+                    "Device ID: %04x:%04x (rev %02x)\n"
+                    "Subsystem ID: %04x:%04x\n"
+                    "LUID: %08lx%08lx\n",
+               dev_name,
+               desc.VendorId, desc.DeviceId, desc.Revision,
+               LOWORD(desc.SubSysId), HIWORD(desc.SubSysId),
+               desc.AdapterLuid.HighPart, desc.AdapterLuid.LowPart);
+    talloc_free(dev_name);
+
+    if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE)
+        warp = true;
+    // If the primary display adapter is a software adapter, the
+    // DXGI_ADAPTER_FLAG_SOFTWARE flag won't be set, but the device IDs should
+    // still match the Microsoft Basic Render Driver
+    if (desc.VendorId == 0x1414 && desc.DeviceId == 0x8c)
+        warp = true;
+    if (warp) {
+        mp_msg(log, opts->force_warp ? MSGL_V : MSGL_WARN,
+               "Using a software adapter\n");
+    }
+
+    *dev_out = dev;
+    dev = NULL;
+    success = true;
+
+done:
+    SAFE_RELEASE(adapter);
+    SAFE_RELEASE(dxgi_dev);
+    SAFE_RELEASE(dev);
+    return success;
+}
+
+static HRESULT create_swapchain_1_2(ID3D11Device *dev, IDXGIFactory2 *factory,
+                                    struct mp_log *log,
+                                    struct d3d11_swapchain_opts *opts,
+                                    bool flip, DXGI_FORMAT format,
+                                    IDXGISwapChain **swapchain_out)
+{
+    IDXGISwapChain *swapchain = NULL;
+    IDXGISwapChain1 *swapchain1 = NULL;
+    HRESULT hr;
+
+    DXGI_SWAP_CHAIN_DESC1 desc = {
+        .Width = opts->width ? opts->width : 1,
+        .Height = opts->height ? opts->height : 1,
+        .Format = format,
+        .SampleDesc = { .Count = 1 },
+        .BufferUsage = opts->usage,
+    };
+
+    if (flip) {
+        // UNORDERED_ACCESS with FLIP_SEQUENTIAL seems to be buggy with
+        // Windows 7 drivers
+        if ((desc.BufferUsage & DXGI_USAGE_UNORDERED_ACCESS) &&
+            !IsWindows8OrGreater())
+        {
+            mp_verbose(log, "Disabling UNORDERED_ACCESS for flip-model "
+                            "swapchain backbuffers in Windows 7\n");
+            desc.BufferUsage &= ~DXGI_USAGE_UNORDERED_ACCESS;
+        }
+
+        if (IsWindows10OrGreater()) {
+            desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD;
+        } else {
+            desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL;
+        }
+        desc.BufferCount = opts->length;
+    } else {
+        desc.SwapEffect = DXGI_SWAP_EFFECT_DISCARD;
+        desc.BufferCount = 1;
+    }
+
+    hr = IDXGIFactory2_CreateSwapChainForHwnd(factory, (IUnknown*)dev,
+        opts->window, &desc, NULL, NULL, &swapchain1);
+    if (FAILED(hr))
+        goto done;
+    hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain,
+                                        (void**)&swapchain);
+    if (FAILED(hr))
+        goto done;
+
+    *swapchain_out = swapchain;
+    swapchain = NULL;
+
+done:
+    SAFE_RELEASE(swapchain1);
+    SAFE_RELEASE(swapchain);
+    return hr;
+}
+
+static HRESULT create_swapchain_1_1(ID3D11Device *dev, IDXGIFactory1 *factory,
+                                    struct mp_log *log,
+                                    struct d3d11_swapchain_opts *opts,
+                                    DXGI_FORMAT format,
+                                    IDXGISwapChain **swapchain_out)
+{
+    DXGI_SWAP_CHAIN_DESC desc = {
+        .BufferDesc = {
+            .Width = opts->width ? opts->width : 1,
+            .Height = opts->height ? opts->height : 1,
+            .Format = format,
+        },
+        .SampleDesc = { .Count = 1 },
+        .BufferUsage = opts->usage,
+        .BufferCount = 1,
+        .OutputWindow = opts->window,
+        .Windowed = TRUE,
+        .SwapEffect = DXGI_SWAP_EFFECT_DISCARD,
+    };
+
+    return IDXGIFactory1_CreateSwapChain(factory, (IUnknown*)dev, &desc,
+                                         swapchain_out);
+}
+
+static bool update_swapchain_format(struct mp_log *log,
+                                    IDXGISwapChain *swapchain,
+                                    DXGI_FORMAT format)
+{
+    DXGI_SWAP_CHAIN_DESC desc;
+
+    HRESULT hr = IDXGISwapChain_GetDesc(swapchain, &desc);
+    if (FAILED(hr)) {
+        mp_fatal(log, "Failed to query swap chain's current state: %s\n",
+                 mp_HRESULT_to_str(hr));
+        return false;
+    }
+
+    hr = IDXGISwapChain_ResizeBuffers(swapchain, 0, desc.BufferDesc.Width,
+                                      desc.BufferDesc.Height,
+                                      format, 0);
+    if (FAILED(hr)) {
+        mp_fatal(log, "Couldn't update swapchain format: %s\n",
+                 mp_HRESULT_to_str(hr));
+        return false;
+    }
+
+    return true;
+}
+
+static bool update_swapchain_color_space(struct mp_log *log,
+                                         IDXGISwapChain *swapchain,
+                                         DXGI_COLOR_SPACE_TYPE color_space)
+{
+    IDXGISwapChain4 *swapchain4 = NULL;
+    const char *csp_name = d3d11_get_csp_name(color_space);
+    bool success = false;
+    HRESULT hr = E_FAIL;
+    unsigned int csp_support_flags;
+
+    hr = IDXGISwapChain_QueryInterface(swapchain, &IID_IDXGISwapChain4,
+                                       (void *)&(swapchain4));
+    if (FAILED(hr)) {
+        mp_err(log, "Failed to create v4 swapchain for color space "
+                    "configuration (%s)!\n",
+               mp_HRESULT_to_str(hr));
+        goto done;
+    }
+
+    hr = IDXGISwapChain4_CheckColorSpaceSupport(swapchain4,
+                                                color_space,
+                                                &csp_support_flags);
+    if (FAILED(hr)) {
+        mp_err(log, "Failed to check color space support for color space "
+                    "%s (%d): %s!\n",
+               csp_name, color_space, mp_HRESULT_to_str(hr));
+        goto done;
+    }
+
+    mp_verbose(log,
+               "Swapchain capabilities for color space %s (%d): "
+               "normal: %s, overlay: %s\n",
+               csp_name, color_space,
+               (csp_support_flags & DXGI_SWAP_CHAIN_COLOR_SPACE_SUPPORT_FLAG_PRESENT) ?
+               "yes" : "no",
+               (csp_support_flags & DXGI_SWAP_CHAIN_COLOR_SPACE_SUPPORT_FLAG_OVERLAY_PRESENT) ?
+               "yes" : "no");
+
+    if (!(csp_support_flags & DXGI_SWAP_CHAIN_COLOR_SPACE_SUPPORT_FLAG_PRESENT)) {
+        mp_err(log, "Color space %s (%d) is not supported by this swapchain!\n",
+               csp_name, color_space);
+        goto done;
+    }
+
+    hr = IDXGISwapChain4_SetColorSpace1(swapchain4, color_space);
+    if (FAILED(hr)) {
+        mp_err(log, "Failed to set color space %s (%d) for this swapchain "
+                    "(%s)!\n",
+               csp_name, color_space, mp_HRESULT_to_str(hr));
+        goto done;
+    }
+
+    mp_verbose(log, "Swapchain successfully configured to color space %s (%d)!\n",
+               csp_name, color_space);
+
+    success = true;
+
+done:
+    SAFE_RELEASE(swapchain4);
+    return success;
+}
+
+static bool configure_created_swapchain(struct mp_log *log,
+                                        IDXGISwapChain *swapchain,
+                                        DXGI_FORMAT requested_format,
+                                        DXGI_COLOR_SPACE_TYPE requested_csp,
+                                        struct mp_colorspace *configured_csp)
+{
+    DXGI_FORMAT probed_format = DXGI_FORMAT_UNKNOWN;
+    DXGI_FORMAT selected_format = DXGI_FORMAT_UNKNOWN;
+    DXGI_COLOR_SPACE_TYPE probed_colorspace = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709;
+    DXGI_COLOR_SPACE_TYPE selected_colorspace;
+    const char *format_name = NULL;
+    const char *csp_name = NULL;
+    struct mp_colorspace mp_csp = { 0 };
+    bool mp_csp_mapped = false;
+
+    query_output_format_and_colorspace(log, swapchain,
+                                       &probed_format,
+                                       &probed_colorspace);
+
+
+    selected_format = requested_format != DXGI_FORMAT_UNKNOWN ?
+                      requested_format :
+                      (probed_format != DXGI_FORMAT_UNKNOWN ?
+                       probed_format : DXGI_FORMAT_R8G8B8A8_UNORM);
+    selected_colorspace = requested_csp != -1 ?
+                          requested_csp : probed_colorspace;
+    format_name   = d3d11_get_format_name(selected_format);
+    csp_name      = d3d11_get_csp_name(selected_colorspace);
+    mp_csp_mapped = d3d11_get_mp_csp(selected_colorspace, &mp_csp);
+
+    mp_verbose(log, "Selected swapchain format %s (%d), attempting "
+                    "to utilize it.\n",
+               format_name, selected_format);
+
+    if (!update_swapchain_format(log, swapchain, selected_format)) {
+        return false;
+    }
+
+    if (!IsWindows10OrGreater()) {
+        // On older than Windows 10, query_output_format_and_colorspace
+        // will not change probed_colorspace, and even if a user sets
+        // a colorspace it will not get applied. Thus warn user in case a
+        // value was specifically set and finish.
+        if (requested_csp != -1) {
+            mp_warn(log, "User selected a D3D11 color space %s (%d), "
+                         "but configuration of color spaces is only supported"
+                         "from Windows 10! The default configuration has been "
+                         "left as-is.\n",
+                    csp_name, selected_colorspace);
+        }
+
+        return true;
+    }
+
+    if (!mp_csp_mapped) {
+        mp_warn(log, "Color space %s (%d) does not have an mpv color space "
+                     "mapping! Overriding to standard sRGB!\n",
+                csp_name, selected_colorspace);
+        selected_colorspace = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709;
+        d3d11_get_mp_csp(selected_colorspace, &mp_csp);
+    }
+
+    mp_verbose(log, "Selected swapchain color space %s (%d), attempting to "
+                    "utilize it.\n",
+               csp_name, selected_colorspace);
+
+    if (!update_swapchain_color_space(log, swapchain, selected_colorspace)) {
+        return false;
+    }
+
+    if (configured_csp) {
+        *configured_csp = mp_csp;
+    }
+
+    return true;
+}
+
+// Create a Direct3D 11 swapchain
+bool mp_d3d11_create_swapchain(ID3D11Device *dev, struct mp_log *log,
+                               struct d3d11_swapchain_opts *opts,
+                               IDXGISwapChain **swapchain_out)
+{
+    IDXGIDevice1 *dxgi_dev = NULL;
+    IDXGIAdapter1 *adapter = NULL;
+    IDXGIFactory1 *factory = NULL;
+    IDXGIFactory2 *factory2 = NULL;
+    IDXGISwapChain *swapchain = NULL;
+    bool success = false;
+    HRESULT hr;
+
+    hr = ID3D11Device_QueryInterface(dev, &IID_IDXGIDevice1, (void**)&dxgi_dev);
+    if (FAILED(hr)) {
+        mp_fatal(log, "Failed to get DXGI device\n");
+        goto done;
+    }
+    hr = IDXGIDevice1_GetParent(dxgi_dev, &IID_IDXGIAdapter1, (void**)&adapter);
+    if (FAILED(hr)) {
+        mp_fatal(log, "Failed to get DXGI adapter\n");
+        goto done;
+    }
+    hr = IDXGIAdapter1_GetParent(adapter, &IID_IDXGIFactory1, (void**)&factory);
+    if (FAILED(hr)) {
+        mp_fatal(log, "Failed to get DXGI factory\n");
+        goto done;
+    }
+    hr = IDXGIFactory1_QueryInterface(factory, &IID_IDXGIFactory2,
+                                      (void**)&factory2);
+    if (FAILED(hr))
+        factory2 = NULL;
+
+    bool flip = factory2 && opts->flip;
+
+    // Return here to retry creating the swapchain
+    do {
+        if (factory2) {
+            // Create a DXGI 1.2+ (Windows 8+) swap chain if possible
+            hr = create_swapchain_1_2(dev, factory2, log, opts, flip,
+                                      DXGI_FORMAT_R8G8B8A8_UNORM, &swapchain);
+        } else {
+            // Fall back to DXGI 1.1 (Windows 7)
+            hr = create_swapchain_1_1(dev, factory, log, opts,
+                                      DXGI_FORMAT_R8G8B8A8_UNORM, &swapchain);
+        }
+        if (SUCCEEDED(hr))
+            break;
+
+        if (flip) {
+            mp_dbg(log, "Failed to create flip-model swapchain, trying bitblt\n");
+            flip = false;
+            continue;
+        }
+
+        mp_fatal(log, "Failed to create swapchain: %s\n", mp_HRESULT_to_str(hr));
+        goto done;
+    } while (true);
+
+    // Prevent DXGI from making changes to the VO window, otherwise it will
+    // hook the Alt+Enter keystroke and make it trigger an ugly transition to
+    // exclusive fullscreen mode instead of running the user-set command.
+    IDXGIFactory_MakeWindowAssociation(factory, opts->window,
+        DXGI_MWA_NO_WINDOW_CHANGES | DXGI_MWA_NO_ALT_ENTER |
+        DXGI_MWA_NO_PRINT_SCREEN);
+
+    if (factory2) {
+        mp_verbose(log, "Using DXGI 1.2+\n");
+    } else {
+        mp_verbose(log, "Using DXGI 1.1\n");
+    }
+
+    configure_created_swapchain(log, swapchain, opts->format,
+                                opts->color_space,
+                                opts->configured_csp);
+
+    DXGI_SWAP_CHAIN_DESC scd = {0};
+    IDXGISwapChain_GetDesc(swapchain, &scd);
+    if (scd.SwapEffect == DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL ||
+        scd.SwapEffect == DXGI_SWAP_EFFECT_FLIP_DISCARD)
+    {
+        mp_verbose(log, "Using flip-model presentation\n");
+    } else {
+        mp_verbose(log, "Using bitblt-model presentation\n");
+    }
+
+    *swapchain_out = swapchain;
+    swapchain = NULL;
+    success = true;
+
+done:
+    SAFE_RELEASE(swapchain);
+    SAFE_RELEASE(factory2);
+    SAFE_RELEASE(factory);
+    SAFE_RELEASE(adapter);
+    SAFE_RELEASE(dxgi_dev);
+    return success;
+}
diff --git a/video/out/gpu/d3d11_helpers.h b/video/out/gpu/d3d11_helpers.h
new file mode 100644
index 0000000..c115d33
--- /dev/null
+++ b/video/out/gpu/d3d11_helpers.h
@@ -0,0 +1,103 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MP_D3D11_HELPERS_H_
+#define MP_D3D11_HELPERS_H_
+
+#include <stdbool.h>
+#include <windows.h>
+#include <d3d11.h>
+#include <dxgi1_2.h>
+
+#include "video/mp_image.h"
+
+#define D3D_FEATURE_LEVEL_12_0 (0xc000)
+#define D3D_FEATURE_LEVEL_12_1 (0xc100)
+
+#define DXGI_COLOR_SPACE_RGB_STUDIO_G24_NONE_P709       ((DXGI_COLOR_SPACE_TYPE)20)
+#define DXGI_COLOR_SPACE_RGB_STUDIO_G24_NONE_P2020      ((DXGI_COLOR_SPACE_TYPE)21)
+#define DXGI_COLOR_SPACE_YCBCR_STUDIO_G24_LEFT_P709     ((DXGI_COLOR_SPACE_TYPE)22)
+#define DXGI_COLOR_SPACE_YCBCR_STUDIO_G24_LEFT_P2020    ((DXGI_COLOR_SPACE_TYPE)23)
+#define DXGI_COLOR_SPACE_YCBCR_STUDIO_G24_TOPLEFT_P2020 ((DXGI_COLOR_SPACE_TYPE)24)
+
+struct d3d11_device_opts {
+    // Enable the debug layer (D3D11_CREATE_DEVICE_DEBUG)
+    bool debug;
+
+    // Allow a software (WARP) adapter. Note, sometimes a software adapter will
+    // be used even when allow_warp is false. This is because, on Windows 8 and
+    // up, if there are no hardware adapters, Windows will pretend the WARP
+    // adapter is the primary hardware adapter.
+    bool allow_warp;
+
+    // Always use a WARP adapter. This is mainly for testing purposes.
+    bool force_warp;
+
+    // The maximum number of pending frames allowed to be queued to a swapchain
+    int max_frame_latency;
+
+    // The maximum Direct3D 11 feature level to attempt to create
+    // If unset, defaults to D3D_FEATURE_LEVEL_11_0
+    int max_feature_level;
+
+    // The minimum Direct3D 11 feature level to attempt to create. If this is
+    // not supported, device creation will fail.
+    // If unset, defaults to D3D_FEATURE_LEVEL_9_1
+    int min_feature_level;
+
+    // The adapter name to utilize if a specific adapter is required
+    // If unset, the default adapter will be utilized when creating
+    // a device.
+    char *adapter_name;
+};
+
+bool mp_d3d11_list_or_verify_adapters(struct mp_log *log,
+                                      bstr adapter_name,
+                                      bstr *listing);
+
+bool mp_d3d11_create_present_device(struct mp_log *log,
+                                    struct d3d11_device_opts *opts,
+                                    ID3D11Device **dev_out);
+
+struct d3d11_swapchain_opts {
+    HWND window;
+    int width;
+    int height;
+    DXGI_FORMAT format;
+    DXGI_COLOR_SPACE_TYPE color_space;
+
+    // mp_colorspace mapping of the configured swapchain colorspace
+    // shall be written into this memory location if configuration
+    // succeeds. Will be ignored if NULL.
+    struct mp_colorspace *configured_csp;
+
+    // Use DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL if possible
+    bool flip;
+
+    // Number of surfaces in the swapchain
+    int length;
+
+    // The BufferUsage value for swapchain surfaces. This should probably
+    // contain DXGI_USAGE_RENDER_TARGET_OUTPUT.
+    DXGI_USAGE usage;
+};
+
+bool mp_d3d11_create_swapchain(ID3D11Device *dev, struct mp_log *log,
+                               struct d3d11_swapchain_opts *opts,
+                               IDXGISwapChain **swapchain_out);
+
+#endif
diff --git a/video/out/gpu/error_diffusion.c b/video/out/gpu/error_diffusion.c
new file mode 100644
index 0000000..c1ea542
--- /dev/null
+++ b/video/out/gpu/error_diffusion.c
@@ -0,0 +1,316 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdlib.h>
+
+#include "error_diffusion.h"
+
+#include "common/common.h"
+
+#define GLSL(...) gl_sc_addf(sc, __VA_ARGS__)
+#define GLSLH(...) gl_sc_haddf(sc, __VA_ARGS__)
+
+// After a (y, x) -> (y, x + y * shift) mapping, find the right most column that
+// will be affected by the current column.
+static int compute_rightmost_shifted_column(const struct error_diffusion_kernel *k)
+{
+    int ret = 0;
+    for (int y = 0; y <= EF_MAX_DELTA_Y; y++) {
+        for (int x = EF_MIN_DELTA_X; x <= EF_MAX_DELTA_X; x++) {
+            if (k->pattern[y][x - EF_MIN_DELTA_X] != 0) {
+                int shifted_x = x + y * k->shift;
+
+                // The shift mapping guarantees current column (or left of it)
+                // won't be affected by error diffusion.
+                assert(shifted_x > 0);
+
+                ret = MPMAX(ret, shifted_x);
+            }
+        }
+    }
+    return ret;
+}
+
+const struct error_diffusion_kernel *mp_find_error_diffusion_kernel(const char *name)
+{
+    if (!name)
+        return NULL;
+    for (const struct error_diffusion_kernel *k = mp_error_diffusion_kernels;
+         k->name;
+         k++) {
+        if (strcmp(k->name, name) == 0)
+            return k;
+    }
+    return NULL;
+}
+
+int mp_ef_compute_shared_memory_size(const struct error_diffusion_kernel *k,
+                                     int height)
+{
+    // We add EF_MAX_DELTA_Y empty lines on the bottom to handle errors
+    // propagated out from bottom side.
+    int rows = height + EF_MAX_DELTA_Y;
+    int shifted_columns = compute_rightmost_shifted_column(k) + 1;
+
+    // The shared memory is an array of size rows*shifted_columns. Each element
+    // is a single uint for three RGB component.
+    return rows * shifted_columns * 4;
+}
+
+void pass_error_diffusion(struct gl_shader_cache *sc,
+                          const struct error_diffusion_kernel *k,
+                          int tex, int width, int height, int depth, int block_size)
+{
+    assert(block_size <= height);
+
+    // The parallel error diffusion works by applying the shift mapping first.
+    // Taking the Floyd and Steinberg algorithm for example. After applying
+    // the (y, x) -> (y, x + y * shift) mapping (with shift=2), all errors are
+    // propagated into the next few columns, which makes parallel processing on
+    // the same column possible.
+    //
+    //           X    7/16                X    7/16
+    //    3/16  5/16  1/16   ==>    0     0    3/16  5/16  1/16
+
+    // Figuring out the size of rectangle containing all shifted pixels.
+    // The rectangle height is not changed.
+    int shifted_width = width + (height - 1) * k->shift;
+
+    // We process all pixels from the shifted rectangles column by column, with
+    // a single global work group of size |block_size|.
+    // Figuring out how many block are required to process all pixels. We need
+    // this explicitly to make the number of barrier() calls match.
+    int blocks = (height * shifted_width + block_size - 1) / block_size;
+
+    // If we figure out how many of the next columns will be affected while the
+    // current columns is being processed. We can store errors of only a few
+    // columns in the shared memory. Using a ring buffer will further save the
+    // cost while iterating to next column.
+    int ring_buffer_rows = height + EF_MAX_DELTA_Y;
+    int ring_buffer_columns = compute_rightmost_shifted_column(k) + 1;
+    int ring_buffer_size = ring_buffer_rows * ring_buffer_columns;
+
+    // Defines the ring buffer in shared memory.
+    GLSLH("shared uint err_rgb8[%d];\n", ring_buffer_size);
+
+    // Initialize the ring buffer.
+    GLSL("for (int i = int(gl_LocalInvocationIndex); i < %d; i += %d) ",
+         ring_buffer_size, block_size);
+    GLSL("err_rgb8[i] = 0u;\n");
+
+    GLSL("for (int block_id = 0; block_id < %d; ++block_id) {\n", blocks);
+
+    // Add barrier here to have previous block all processed before starting
+    // the processing of the next.
+    GLSL("groupMemoryBarrier();\n");
+    GLSL("barrier();\n");
+
+    // Compute the coordinate of the pixel we are currently processing, both
+    // before and after the shift mapping.
+    GLSL("int id = int(gl_LocalInvocationIndex) + block_id * %d;\n", block_size);
+    GLSL("int y = id %% %d, x_shifted = id / %d;\n", height, height);
+    GLSL("int x = x_shifted - y * %d;\n", k->shift);
+
+    // Proceed only if we are processing a valid pixel.
+    GLSL("if (0 <= x && x < %d) {\n", width);
+
+    // The index that the current pixel have on the ring buffer.
+    GLSL("int idx = (x_shifted * %d + y) %% %d;\n", ring_buffer_rows, ring_buffer_size);
+
+    // Fetch the current pixel.
+    GLSL("vec3 pix = texelFetch(texture%d, ivec2(x, y), 0).rgb;\n", tex);
+
+    // The dithering will quantize pixel value into multiples of 1/dither_quant.
+    int dither_quant = (1 << depth) - 1;
+
+    // We encode errors in RGB components into a single 32-bit unsigned integer.
+    // The error we propagate from the current pixel is in range of
+    // [-0.5 / dither_quant, 0.5 / dither_quant]. While not quite obvious, the
+    // sum of all errors been propagated into a pixel is also in the same range.
+    // It's possible to map errors in this range into [-127, 127], and use an
+    // unsigned 8-bit integer to store it (using standard two's complement).
+    // The three 8-bit unsigned integers can then be encoded into a single
+    // 32-bit unsigned integer, with two 4-bit padding to prevent addition
+    // operation overflows affecting other component. There are at most 12
+    // addition operations on each pixel, so 4-bit padding should be enough.
+    // The overflow from R component will be discarded.
+    //
+    // The following figure is how the encoding looks like.
+    //
+    //     +------------------------------------+
+    //     |RRRRRRRR|0000|GGGGGGGG|0000|BBBBBBBB|
+    //     +------------------------------------+
+    //
+
+    // The bitshift position for R and G component.
+    int bitshift_r = 24, bitshift_g = 12;
+    // The multiplier we use to map [-0.5, 0.5] to [-127, 127].
+    int uint8_mul = 127 * 2;
+
+    // Adding the error previously propagated into current pixel, and clear it
+    // in the buffer.
+    GLSL("uint err_u32 = err_rgb8[idx] + %uu;\n",
+         (128u << bitshift_r) | (128u << bitshift_g) | 128u);
+    GLSL("pix = pix * %d.0 + vec3("
+         "int((err_u32 >> %d) & 255u) - 128,"
+         "int((err_u32 >> %d) & 255u) - 128,"
+         "int( err_u32        & 255u) - 128"
+         ") / %d.0;\n", dither_quant, bitshift_r, bitshift_g, uint8_mul);
+    GLSL("err_rgb8[idx] = 0u;\n");
+
+    // Write the dithered pixel.
+    GLSL("vec3 dithered = round(pix);\n");
+    GLSL("imageStore(out_image, ivec2(x, y), vec4(dithered / %d.0, 0.0));\n",
+         dither_quant);
+
+    GLSL("vec3 err_divided = (pix - dithered) * %d.0 / %d.0;\n",
+         uint8_mul, k->divisor);
+    GLSL("ivec3 tmp;\n");
+
+    // Group error propagation with same weight factor together, in order to
+    // reduce the number of annoying error encoding.
+    for (int dividend = 1; dividend <= k->divisor; dividend++) {
+        bool err_assigned = false;
+
+        for (int y = 0; y <= EF_MAX_DELTA_Y; y++) {
+            for (int x = EF_MIN_DELTA_X; x <= EF_MAX_DELTA_X; x++) {
+                if (k->pattern[y][x - EF_MIN_DELTA_X] != dividend)
+                    continue;
+
+                if (!err_assigned) {
+                    err_assigned = true;
+
+                    GLSL("tmp = ivec3(round(err_divided * %d.0));\n", dividend);
+
+                    GLSL("err_u32 = "
+                         "(uint(tmp.r & 255) << %d)|"
+                         "(uint(tmp.g & 255) << %d)|"
+                         " uint(tmp.b & 255);\n",
+                         bitshift_r, bitshift_g);
+                }
+
+                int shifted_x = x + y * k->shift;
+
+                // Unlike the right border, errors propagated out from left
+                // border will remain in the ring buffer. This will produce
+                // visible artifacts near the left border, especially for
+                // shift=3 kernels.
+                if (x < 0)
+                    GLSL("if (x >= %d) ", -x);
+
+                // Calculate the new position in the ring buffer to propagate
+                // the error into.
+                int ring_buffer_delta = shifted_x * ring_buffer_rows + y;
+                GLSL("atomicAdd(err_rgb8[(idx + %d) %% %d], err_u32);\n",
+                     ring_buffer_delta, ring_buffer_size);
+            }
+        }
+    }
+
+    GLSL("}\n"); // if (0 <= x && x < width)
+
+    GLSL("}\n"); // block_id
+}
+
+// Different kernels for error diffusion.
+// Patterns are from http://www.efg2.com/Lab/Library/ImageProcessing/DHALF.TXT
+const struct error_diffusion_kernel mp_error_diffusion_kernels[] = {
+    {
+        .name = "simple",
+        .shift = 1,
+        .pattern = {{0, 0, 0, 1, 0},
+                    {0, 0, 1, 0, 0},
+                    {0, 0, 0, 0, 0}},
+        .divisor = 2
+    },
+    {
+        // The "false" Floyd-Steinberg kernel
+        .name = "false-fs",
+        .shift = 1,
+        .pattern = {{0, 0, 0, 3, 0},
+                    {0, 0, 3, 2, 0},
+                    {0, 0, 0, 0, 0}},
+        .divisor = 8
+    },
+    {
+        .name = "sierra-lite",
+        .shift = 2,
+        .pattern = {{0, 0, 0, 2, 0},
+                    {0, 1, 1, 0, 0},
+                    {0, 0, 0, 0, 0}},
+        .divisor = 4
+    },
+    {
+        .name = "floyd-steinberg",
+        .shift = 2,
+        .pattern = {{0, 0, 0, 7, 0},
+                    {0, 3, 5, 1, 0},
+                    {0, 0, 0, 0, 0}},
+        .divisor = 16
+    },
+    {
+        .name = "atkinson",
+        .shift = 2,
+        .pattern = {{0, 0, 0, 1, 1},
+                    {0, 1, 1, 1, 0},
+                    {0, 0, 1, 0, 0}},
+        .divisor = 8
+    },
+    // All kernels below have shift value of 3, and probably are too heavy for
+    // low end GPU.
+    {
+        .name = "jarvis-judice-ninke",
+        .shift = 3,
+        .pattern = {{0, 0, 0, 7, 5},
+                    {3, 5, 7, 5, 3},
+                    {1, 3, 5, 3, 1}},
+        .divisor = 48
+    },
+    {
+        .name = "stucki",
+        .shift = 3,
+        .pattern = {{0, 0, 0, 8, 4},
+                    {2, 4, 8, 4, 2},
+                    {1, 2, 4, 2, 1}},
+        .divisor = 42
+    },
+    {
+        .name = "burkes",
+        .shift = 3,
+        .pattern = {{0, 0, 0, 8, 4},
+                    {2, 4, 8, 4, 2},
+                    {0, 0, 0, 0, 0}},
+        .divisor = 32
+    },
+    {
+        .name = "sierra-3",
+        .shift = 3,
+        .pattern = {{0, 0, 0, 5, 3},
+                    {2, 4, 5, 4, 2},
+                    {0, 2, 3, 2, 0}},
+        .divisor = 32
+    },
+    {
+        .name = "sierra-2",
+        .shift = 3,
+        .pattern = {{0, 0, 0, 4, 3},
+                    {1, 2, 3, 2, 1},
+                    {0, 0, 0, 0, 0}},
+        .divisor = 16
+    },
+    {0}
+};
diff --git a/video/out/gpu/error_diffusion.h b/video/out/gpu/error_diffusion.h
new file mode 100644
index 0000000..6bdcea1
--- /dev/null
+++ b/video/out/gpu/error_diffusion.h
@@ -0,0 +1,48 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MP_GL_ERROR_DIFFUSION
+#define MP_GL_ERROR_DIFFUSION
+
+#include "shader_cache.h"
+
+// defines the border of all error diffusion kernels
+#define EF_MIN_DELTA_X (-2)
+#define EF_MAX_DELTA_X  (2)
+#define EF_MAX_DELTA_Y  (2)
+
+struct error_diffusion_kernel {
+    const char *name;
+
+    // The minimum value such that a (y, x) -> (y, x + y * shift) mapping will
+    // make all error pushing operations affect next column (and after it) only.
+    int shift;
+
+    // The diffusion factor for (y, x) is pattern[y][x - EF_MIN_DELTA_X] / divisor.
+    int pattern[EF_MAX_DELTA_Y + 1][EF_MAX_DELTA_X - EF_MIN_DELTA_X + 1];
+    int divisor;
+};
+
+extern const struct error_diffusion_kernel mp_error_diffusion_kernels[];
+
+const struct error_diffusion_kernel *mp_find_error_diffusion_kernel(const char *name);
+int mp_ef_compute_shared_memory_size(const struct error_diffusion_kernel *k, int height);
+void pass_error_diffusion(struct gl_shader_cache *sc,
+                          const struct error_diffusion_kernel *k,
+                          int tex, int width, int height, int depth, int block_size);
+
+#endif /* MP_GL_ERROR_DIFFUSION */
diff --git a/video/out/gpu/hwdec.c b/video/out/gpu/hwdec.c
new file mode 100644
index 0000000..c8098f3
--- /dev/null
+++ b/video/out/gpu/hwdec.c
@@ -0,0 +1,358 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stddef.h>
+#include <string.h>
+
+#include "config.h"
+
+#include "common/common.h"
+#include "common/msg.h"
+#include "options/m_config.h"
+#include "hwdec.h"
+
+extern const struct ra_hwdec_driver ra_hwdec_vaapi;
+extern const struct ra_hwdec_driver ra_hwdec_videotoolbox;
+extern const struct ra_hwdec_driver ra_hwdec_vdpau;
+extern const struct ra_hwdec_driver ra_hwdec_dxva2egl;
+extern const struct ra_hwdec_driver ra_hwdec_d3d11egl;
+extern const struct ra_hwdec_driver ra_hwdec_dxva2gldx;
+extern const struct ra_hwdec_driver ra_hwdec_d3d11va;
+extern const struct ra_hwdec_driver ra_hwdec_dxva2dxgi;
+extern const struct ra_hwdec_driver ra_hwdec_cuda;
+extern const struct ra_hwdec_driver ra_hwdec_rpi_overlay;
+extern const struct ra_hwdec_driver ra_hwdec_drmprime;
+extern const struct ra_hwdec_driver ra_hwdec_drmprime_overlay;
+extern const struct ra_hwdec_driver ra_hwdec_aimagereader;
+extern const struct ra_hwdec_driver ra_hwdec_vulkan;
+
+const struct ra_hwdec_driver *const ra_hwdec_drivers[] = {
+#if HAVE_VAAPI
+    &ra_hwdec_vaapi,
+#endif
+#if HAVE_VIDEOTOOLBOX_GL || HAVE_IOS_GL || HAVE_VIDEOTOOLBOX_PL
+    &ra_hwdec_videotoolbox,
+#endif
+#if HAVE_D3D_HWACCEL
+ #if HAVE_EGL_ANGLE
+    &ra_hwdec_d3d11egl,
+  #if HAVE_D3D9_HWACCEL
+    &ra_hwdec_dxva2egl,
+  #endif
+ #endif
+ #if HAVE_D3D11
+    &ra_hwdec_d3d11va,
+  #if HAVE_D3D9_HWACCEL
+    &ra_hwdec_dxva2dxgi,
+  #endif
+ #endif
+#endif
+#if HAVE_GL_DXINTEROP_D3D9
+    &ra_hwdec_dxva2gldx,
+#endif
+#if HAVE_CUDA_INTEROP
+    &ra_hwdec_cuda,
+#endif
+#if HAVE_VDPAU_GL_X11
+    &ra_hwdec_vdpau,
+#endif
+#if HAVE_RPI_MMAL
+    &ra_hwdec_rpi_overlay,
+#endif
+#if HAVE_DRM
+    &ra_hwdec_drmprime,
+    &ra_hwdec_drmprime_overlay,
+#endif
+#if HAVE_ANDROID_MEDIA_NDK
+    &ra_hwdec_aimagereader,
+#endif
+#if HAVE_VULKAN_INTEROP
+    &ra_hwdec_vulkan,
+#endif
+
+    NULL
+};
+
+struct ra_hwdec *ra_hwdec_load_driver(struct ra_ctx *ra_ctx,
+                                      struct mp_log *log,
+                                      struct mpv_global *global,
+                                      struct mp_hwdec_devices *devs,
+                                      const struct ra_hwdec_driver *drv,
+                                      bool is_auto)
+{
+    struct ra_hwdec *hwdec = talloc(NULL, struct ra_hwdec);
+    *hwdec = (struct ra_hwdec) {
+        .driver = drv,
+        .log = mp_log_new(hwdec, log, drv->name),
+        .global = global,
+        .ra_ctx = ra_ctx,
+        .devs = devs,
+        .probing = is_auto,
+        .priv = talloc_zero_size(hwdec, drv->priv_size),
+    };
+    mp_verbose(log, "Loading hwdec driver '%s'\n", drv->name);
+    if (hwdec->driver->init(hwdec) < 0) {
+        ra_hwdec_uninit(hwdec);
+        mp_verbose(log, "Loading failed.\n");
+        return NULL;
+    }
+    return hwdec;
+}
+
+void ra_hwdec_uninit(struct ra_hwdec *hwdec)
+{
+    if (hwdec)
+        hwdec->driver->uninit(hwdec);
+    talloc_free(hwdec);
+}
+
+bool ra_hwdec_test_format(struct ra_hwdec *hwdec, int imgfmt)
+{
+    for (int n = 0; hwdec->driver->imgfmts[n]; n++) {
+        if (hwdec->driver->imgfmts[n] == imgfmt)
+            return true;
+    }
+    return false;
+}
+
+struct ra_hwdec_mapper *ra_hwdec_mapper_create(struct ra_hwdec *hwdec,
+                                               const struct mp_image_params *params)
+{
+    assert(ra_hwdec_test_format(hwdec, params->imgfmt));
+
+    struct ra_hwdec_mapper *mapper = talloc_ptrtype(NULL, mapper);
+    *mapper = (struct ra_hwdec_mapper){
+        .owner = hwdec,
+        .driver = hwdec->driver->mapper,
+        .log = hwdec->log,
+        .ra = hwdec->ra_ctx->ra,
+        .priv = talloc_zero_size(mapper, hwdec->driver->mapper->priv_size),
+        .src_params = *params,
+        .dst_params = *params,
+    };
+    if (mapper->driver->init(mapper) < 0)
+        ra_hwdec_mapper_free(&mapper);
+    return mapper;
+}
+
+void ra_hwdec_mapper_free(struct ra_hwdec_mapper **mapper)
+{
+    struct ra_hwdec_mapper *p = *mapper;
+    if (p) {
+        ra_hwdec_mapper_unmap(p);
+        p->driver->uninit(p);
+        talloc_free(p);
+    }
+    *mapper = NULL;
+}
+
+void ra_hwdec_mapper_unmap(struct ra_hwdec_mapper *mapper)
+{
+    if (mapper->driver->unmap)
+        mapper->driver->unmap(mapper);
+
+    // Clean up after the image if the mapper didn't already
+    mp_image_unrefp(&mapper->src);
+}
+
+int ra_hwdec_mapper_map(struct ra_hwdec_mapper *mapper, struct mp_image *img)
+{
+    ra_hwdec_mapper_unmap(mapper);
+    mp_image_setrefp(&mapper->src, img);
+    if (mapper->driver->map(mapper) < 0) {
+        ra_hwdec_mapper_unmap(mapper);
+        return -1;
+    }
+    return 0;
+}
+
+static int ra_hwdec_validate_opt_full(struct mp_log *log, bool include_modes,
+                                      const m_option_t *opt,
+                                      struct bstr name, const char **value)
+{
+    struct bstr param = bstr0(*value);
+    bool help = bstr_equals0(param, "help");
+    if (help)
+        mp_info(log, "Available hwdecs:\n");
+    for (int n = 0; ra_hwdec_drivers[n]; n++) {
+        const struct ra_hwdec_driver *drv = ra_hwdec_drivers[n];
+        if (help) {
+            mp_info(log, "    %s\n", drv->name);
+        } else if (bstr_equals0(param, drv->name)) {
+            return 1;
+        }
+    }
+    if (help) {
+        if (include_modes) {
+            mp_info(log, "    auto (behavior depends on context)\n"
+                        "    all (load all hwdecs)\n"
+                        "    no (do not load any and block loading on demand)\n");
+        }
+        return M_OPT_EXIT;
+    }
+    if (!param.len)
+        return 1; // "" is treated specially
+    if (include_modes &&
+       (bstr_equals0(param, "all") || bstr_equals0(param, "auto") ||
+        bstr_equals0(param, "no")))
+        return 1;
+    mp_fatal(log, "No hwdec backend named '%.*s' found!\n", BSTR_P(param));
+    return M_OPT_INVALID;
+}
+
+int ra_hwdec_validate_opt(struct mp_log *log, const m_option_t *opt,
+                          struct bstr name, const char **value)
+{
+    return ra_hwdec_validate_opt_full(log, true, opt, name, value);
+}
+
+int ra_hwdec_validate_drivers_only_opt(struct mp_log *log,
+                                       const m_option_t *opt,
+                                       struct bstr name, const char **value)
+{
+    return ra_hwdec_validate_opt_full(log, false, opt, name, value);
+}
+
+static void load_add_hwdec(struct ra_hwdec_ctx *ctx, struct mp_hwdec_devices *devs,
+                           const struct ra_hwdec_driver *drv, bool is_auto)
+{
+    // Don't load duplicate hwdecs
+    for (int j = 0; j < ctx->num_hwdecs; j++) {
+        if (ctx->hwdecs[j]->driver == drv)
+            return;
+    }
+
+    struct ra_hwdec *hwdec =
+        ra_hwdec_load_driver(ctx->ra_ctx, ctx->log, ctx->global, devs, drv, is_auto);
+    if (hwdec)
+        MP_TARRAY_APPEND(NULL, ctx->hwdecs, ctx->num_hwdecs, hwdec);
+}
+
+static void load_hwdecs_all(struct ra_hwdec_ctx *ctx, struct mp_hwdec_devices *devs)
+{
+    if (!ctx->loading_done) {
+        for (int n = 0; ra_hwdec_drivers[n]; n++)
+            load_add_hwdec(ctx, devs, ra_hwdec_drivers[n], true);
+        ctx->loading_done = true;
+    }
+}
+
+void ra_hwdec_ctx_init(struct ra_hwdec_ctx *ctx, struct mp_hwdec_devices *devs,
+                       const char *type, bool load_all_by_default)
+{
+    assert(ctx->ra_ctx);
+
+    /*
+     * By default, or if the option value is "auto", we will not pre-emptively
+     * load any interops, and instead allow them to be loaded on-demand.
+     *
+     * If the option value is "no", then no interops will be loaded now, and
+     * no interops will be loaded, even if requested later.
+     *
+     * If the option value is "all", then all interops will be loaded now, and
+     * obviously no interops will need to be loaded later.
+     *
+     * Finally, if a specific interop is requested, it will be loaded now, and
+     * other interops can be loaded, if requested later.
+     */
+    if (!type || !type[0] || strcmp(type, "auto") == 0) {
+        if (!load_all_by_default)
+            return;
+        type = "all";
+    }
+    if (strcmp(type, "no") == 0) {
+        // do nothing, just block further loading
+    } else if (strcmp(type, "all") == 0) {
+        load_hwdecs_all(ctx, devs);
+    } else {
+        for (int n = 0; ra_hwdec_drivers[n]; n++) {
+            const struct ra_hwdec_driver *drv = ra_hwdec_drivers[n];
+            if (strcmp(type, drv->name) == 0) {
+                load_add_hwdec(ctx, devs, drv, false);
+                break;
+            }
+        }
+    }
+    ctx->loading_done = true;
+}
+
+void ra_hwdec_ctx_uninit(struct ra_hwdec_ctx *ctx)
+{
+    for (int n = 0; n < ctx->num_hwdecs; n++)
+        ra_hwdec_uninit(ctx->hwdecs[n]);
+
+    talloc_free(ctx->hwdecs);
+    memset(ctx, 0, sizeof(*ctx));
+}
+
+void ra_hwdec_ctx_load_fmt(struct ra_hwdec_ctx *ctx, struct mp_hwdec_devices *devs,
+                           struct hwdec_imgfmt_request *params)
+{
+    int imgfmt = params->imgfmt;
+    if (ctx->loading_done) {
+        /*
+         * If we previously marked interop loading as done (for reasons
+         * discussed above), then do not load any other interops regardless
+         * of imgfmt.
+         */
+        return;
+    }
+
+    if (imgfmt == IMGFMT_NONE) {
+        MP_VERBOSE(ctx, "Loading hwdec drivers for all formats\n");
+        load_hwdecs_all(ctx, devs);
+        return;
+    }
+
+    MP_VERBOSE(ctx, "Loading hwdec drivers for format: '%s'\n",
+               mp_imgfmt_to_name(imgfmt));
+    for (int i = 0; ra_hwdec_drivers[i]; i++) {
+        bool matched_fmt = false;
+        const struct ra_hwdec_driver *drv = ra_hwdec_drivers[i];
+        for (int j = 0; drv->imgfmts[j]; j++) {
+            if (imgfmt == drv->imgfmts[j]) {
+                matched_fmt = true;
+                break;
+            }
+        }
+        if (!matched_fmt) {
+            continue;
+        }
+
+        load_add_hwdec(ctx, devs, drv, params->probing);
+    }
+}
+
+struct ra_hwdec *ra_hwdec_get(struct ra_hwdec_ctx *ctx, int imgfmt)
+{
+    for (int n = 0; n < ctx->num_hwdecs; n++) {
+        if (ra_hwdec_test_format(ctx->hwdecs[n], imgfmt))
+            return ctx->hwdecs[n];
+    }
+
+    return NULL;
+}
+
+int ra_hwdec_driver_get_imgfmt_for_name(const char *name)
+{
+    for (int i = 0; ra_hwdec_drivers[i]; i++) {
+        if (!strcmp(ra_hwdec_drivers[i]->name, name)) {
+            return ra_hwdec_drivers[i]->imgfmts[0];
+        }
+    }
+    return IMGFMT_NONE;
+}
diff --git a/video/out/gpu/hwdec.h b/video/out/gpu/hwdec.h
new file mode 100644
index 0000000..7766073
--- /dev/null
+++ b/video/out/gpu/hwdec.h
@@ -0,0 +1,156 @@
+#ifndef MPGL_HWDEC_H_
+#define MPGL_HWDEC_H_
+
+#include "video/mp_image.h"
+#include "context.h"
+#include "ra.h"
+#include "video/hwdec.h"
+
+// Helper to organize/load hwdecs dynamically
+struct ra_hwdec_ctx {
+    // Set these before calling `ra_hwdec_ctx_init`
+    struct mp_log *log;
+    struct mpv_global *global;
+    struct ra_ctx *ra_ctx;
+
+    bool loading_done;
+    struct ra_hwdec **hwdecs;
+    int num_hwdecs;
+};
+
+int ra_hwdec_validate_opt(struct mp_log *log, const m_option_t *opt,
+                          struct bstr name, const char **value);
+
+int ra_hwdec_validate_drivers_only_opt(struct mp_log *log,
+                                       const m_option_t *opt,
+                                       struct bstr name, const char **value);
+
+void ra_hwdec_ctx_init(struct ra_hwdec_ctx *ctx, struct mp_hwdec_devices *devs,
+                       const char *opt, bool load_all_by_default);
+void ra_hwdec_ctx_uninit(struct ra_hwdec_ctx *ctx);
+
+void ra_hwdec_ctx_load_fmt(struct ra_hwdec_ctx *ctx, struct mp_hwdec_devices *devs,
+                           struct hwdec_imgfmt_request *params);
+
+// Gets the right `ra_hwdec` for a format, if any
+struct ra_hwdec *ra_hwdec_get(struct ra_hwdec_ctx *ctx, int imgfmt);
+
+struct ra_hwdec {
+    const struct ra_hwdec_driver *driver;
+    struct mp_log *log;
+    struct mpv_global *global;
+    struct ra_ctx *ra_ctx;
+    struct mp_hwdec_devices *devs;
+    // GLSL extensions required to sample textures from this.
+    const char **glsl_extensions;
+    // For free use by hwdec driver
+    void *priv;
+    // For working around the vdpau vs. vaapi mess.
+    bool probing;
+    // Used in overlay mode only.
+    float overlay_colorkey[4];
+};
+
+struct ra_hwdec_mapper {
+    const struct ra_hwdec_mapper_driver *driver;
+    struct mp_log *log;
+    struct ra *ra;
+    void *priv;
+    struct ra_hwdec *owner;
+    // Input frame parameters. (Set before init(), immutable.)
+    struct mp_image_params src_params;
+    // Output frame parameters (represents the format the textures return). Must
+    // be set by init(), immutable afterwards,
+    struct mp_image_params dst_params;
+
+    // The currently mapped source image (or the image about to be mapped in
+    // ->map()). NULL if unmapped. The mapper can also clear this reference if
+    // the mapped textures contain a full copy.
+    struct mp_image *src;
+
+    // The mapped textures and metadata about them. These fields change if a
+    // new frame is mapped (or unmapped), but otherwise remain constant.
+    // The common code won't mess with these, so you can e.g. set them in the
+    // .init() callback.
+    struct ra_tex *tex[4];
+};
+
+// This can be used to map frames of a specific hw format as GL textures.
+struct ra_hwdec_mapper_driver {
+    // Used to create ra_hwdec_mapper.priv.
+    size_t priv_size;
+
+    // Init the mapper implementation. At this point, the field src_params,
+    // fns, devs, priv are initialized.
+    int (*init)(struct ra_hwdec_mapper *mapper);
+    // Destroy the mapper. unmap is called before this.
+    void (*uninit)(struct ra_hwdec_mapper *mapper);
+
+    // Map mapper->src as texture, and set mapper->frame to textures using it.
+    // It is expected that the textures remain valid until the next unmap
+    // or uninit call.
+    // The function is allowed to unref mapper->src if it's not needed (i.e.
+    // this function creates a copy).
+    // The underlying format can change, so you might need to do some form
+    // of change detection. You also must reject unsupported formats with an
+    // error.
+    // On error, returns negative value on error and remains unmapped.
+    int (*map)(struct ra_hwdec_mapper *mapper);
+    // Unmap the frame. Does nothing if already unmapped. Optional.
+    void (*unmap)(struct ra_hwdec_mapper *mapper);
+};
+
+struct ra_hwdec_driver {
+    // Name of the interop backend. This is used for informational purposes and
+    // for use with debugging options.
+    const char *name;
+    // Used to create ra_hwdec.priv.
+    size_t priv_size;
+    // One of the hardware surface IMGFMT_ that must be passed to map_image later.
+    // Terminated with a 0 entry. (Extend the array size as needed.)
+    const int imgfmts[3];
+
+    // Create the hwdec device. It must add it to hw->devs, if applicable.
+    int (*init)(struct ra_hwdec *hw);
+    void (*uninit)(struct ra_hwdec *hw);
+
+    // This will be used to create a ra_hwdec_mapper from ra_hwdec.
+    const struct ra_hwdec_mapper_driver *mapper;
+
+    // The following function provides an alternative API. Each ra_hwdec_driver
+    // must have either provide a mapper or overlay_frame (not both or none), and
+    // if overlay_frame is set, it operates in overlay mode. In this mode,
+    // OSD etc. is rendered via OpenGL, but the video is rendered as a separate
+    // layer below it.
+    // Non-overlay mode is strictly preferred, so try not to use overlay mode.
+    // Set the given frame as overlay, replacing the previous one. This can also
+    // just change the position of the overlay.
+    // hw_image==src==dst==NULL is passed to clear the overlay.
+    int (*overlay_frame)(struct ra_hwdec *hw, struct mp_image *hw_image,
+                         struct mp_rect *src, struct mp_rect *dst, bool newframe);
+};
+
+extern const struct ra_hwdec_driver *const ra_hwdec_drivers[];
+
+struct ra_hwdec *ra_hwdec_load_driver(struct ra_ctx *ra_ctx,
+                                      struct mp_log *log,
+                                      struct mpv_global *global,
+                                      struct mp_hwdec_devices *devs,
+                                      const struct ra_hwdec_driver *drv,
+                                      bool is_auto);
+
+void ra_hwdec_uninit(struct ra_hwdec *hwdec);
+
+bool ra_hwdec_test_format(struct ra_hwdec *hwdec, int imgfmt);
+
+struct ra_hwdec_mapper *ra_hwdec_mapper_create(struct ra_hwdec *hwdec,
+                                               const struct mp_image_params *params);
+void ra_hwdec_mapper_free(struct ra_hwdec_mapper **mapper);
+void ra_hwdec_mapper_unmap(struct ra_hwdec_mapper *mapper);
+int ra_hwdec_mapper_map(struct ra_hwdec_mapper *mapper, struct mp_image *img);
+
+// Get the primary image format for the given driver name.
+// Returns IMGFMT_NONE if the name doesn't get matched.
+int ra_hwdec_driver_get_imgfmt_for_name(const char *name);
+
+#endif
diff --git a/video/out/gpu/lcms.c b/video/out/gpu/lcms.c
new file mode 100644
index 0000000..7006a96
--- /dev/null
+++ b/video/out/gpu/lcms.c
@@ -0,0 +1,526 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <string.h>
+#include <math.h>
+
+#include "mpv_talloc.h"
+
+#include "config.h"
+
+#include "stream/stream.h"
+#include "common/common.h"
+#include "misc/bstr.h"
+#include "common/msg.h"
+#include "options/m_option.h"
+#include "options/path.h"
+#include "video/csputils.h"
+#include "lcms.h"
+
+#include "osdep/io.h"
+
+#if HAVE_LCMS2
+
+#include <lcms2.h>
+#include <libavutil/sha.h>
+#include <libavutil/mem.h>
+
+struct gl_lcms {
+    void *icc_data;
+    size_t icc_size;
+    struct AVBufferRef *vid_profile;
+    char *current_profile;
+    bool using_memory_profile;
+    bool changed;
+    enum mp_csp_prim current_prim;
+    enum mp_csp_trc current_trc;
+
+    struct mp_log *log;
+    struct mpv_global *global;
+    struct mp_icc_opts *opts;
+};
+
+static void lcms2_error_handler(cmsContext ctx, cmsUInt32Number code,
+                                const char *msg)
+{
+    struct gl_lcms *p = cmsGetContextUserData(ctx);
+    MP_ERR(p, "lcms2: %s\n", msg);
+}
+
+static void load_profile(struct gl_lcms *p)
+{
+    talloc_free(p->icc_data);
+    p->icc_data = NULL;
+    p->icc_size = 0;
+    p->using_memory_profile = false;
+    talloc_free(p->current_profile);
+    p->current_profile = NULL;
+
+    if (!p->opts->profile || !p->opts->profile[0])
+        return;
+
+    char *fname = mp_get_user_path(NULL, p->global, p->opts->profile);
+    MP_VERBOSE(p, "Opening ICC profile '%s'\n", fname);
+    struct bstr iccdata = stream_read_file(fname, p, p->global,
+                                           100000000); // 100 MB
+    talloc_free(fname);
+    if (!iccdata.len)
+        return;
+
+    talloc_free(p->icc_data);
+
+    p->icc_data = iccdata.start;
+    p->icc_size = iccdata.len;
+    p->current_profile = talloc_strdup(p, p->opts->profile);
+}
+
+static void gl_lcms_destructor(void *ptr)
+{
+    struct gl_lcms *p = ptr;
+    av_buffer_unref(&p->vid_profile);
+}
+
+struct gl_lcms *gl_lcms_init(void *talloc_ctx, struct mp_log *log,
+                             struct mpv_global *global,
+                             struct mp_icc_opts *opts)
+{
+    struct gl_lcms *p = talloc_ptrtype(talloc_ctx, p);
+    talloc_set_destructor(p, gl_lcms_destructor);
+    *p = (struct gl_lcms) {
+        .global = global,
+        .log = log,
+        .opts = opts,
+    };
+    gl_lcms_update_options(p);
+    return p;
+}
+
+void gl_lcms_update_options(struct gl_lcms *p)
+{
+    if ((p->using_memory_profile && !p->opts->profile_auto) ||
+        !bstr_equals(bstr0(p->opts->profile), bstr0(p->current_profile)))
+    {
+        load_profile(p);
+    }
+
+    p->changed = true; // probably
+}
+
+// Warning: profile.start must point to a ta allocation, and the function
+//          takes over ownership.
+// Returns whether the internal profile was changed.
+bool gl_lcms_set_memory_profile(struct gl_lcms *p, bstr profile)
+{
+    if (!p->opts->profile_auto || (p->opts->profile && p->opts->profile[0])) {
+        talloc_free(profile.start);
+        return false;
+    }
+
+    if (p->using_memory_profile &&
+        p->icc_data && profile.start &&
+        profile.len == p->icc_size &&
+        memcmp(profile.start, p->icc_data, p->icc_size) == 0)
+    {
+        talloc_free(profile.start);
+        return false;
+    }
+
+    p->changed = true;
+    p->using_memory_profile = true;
+
+    talloc_free(p->icc_data);
+
+    p->icc_data = talloc_steal(p, profile.start);
+    p->icc_size = profile.len;
+
+    return true;
+}
+
+// Guards against NULL and uses bstr_equals to short-circuit some special cases
+static bool vid_profile_eq(struct AVBufferRef *a, struct AVBufferRef *b)
+{
+    if (!a || !b)
+        return a == b;
+
+    return bstr_equals((struct bstr){ a->data, a->size },
+                       (struct bstr){ b->data, b->size });
+}
+
+// Return whether the profile or config has changed since the last time it was
+// retrieved. If it has changed, gl_lcms_get_lut3d() should be called.
+bool gl_lcms_has_changed(struct gl_lcms *p, enum mp_csp_prim prim,
+                         enum mp_csp_trc trc, struct AVBufferRef *vid_profile)
+{
+    if (p->changed || p->current_prim != prim || p->current_trc != trc)
+        return true;
+
+    return !vid_profile_eq(p->vid_profile, vid_profile);
+}
+
+// Whether a profile is set. (gl_lcms_get_lut3d() is expected to return a lut,
+// but it could still fail due to runtime errors, such as invalid icc data.)
+bool gl_lcms_has_profile(struct gl_lcms *p)
+{
+    return p->icc_size > 0;
+}
+
+static cmsHPROFILE get_vid_profile(struct gl_lcms *p, cmsContext cms,
+                                   cmsHPROFILE disp_profile,
+                                   enum mp_csp_prim prim, enum mp_csp_trc trc)
+{
+    if (p->opts->use_embedded && p->vid_profile) {
+        // Try using the embedded ICC profile
+        cmsHPROFILE prof = cmsOpenProfileFromMemTHR(cms, p->vid_profile->data,
+                                                    p->vid_profile->size);
+        if (prof) {
+            MP_VERBOSE(p, "Successfully opened embedded ICC profile\n");
+            return prof;
+        }
+
+        // Otherwise, warn the user and generate the profile as usual
+        MP_WARN(p, "Video contained an invalid ICC profile! Ignoring...\n");
+    }
+
+    // The input profile for the transformation is dependent on the video
+    // primaries and transfer characteristics
+    struct mp_csp_primaries csp = mp_get_csp_primaries(prim);
+    cmsCIExyY wp_xyY = {csp.white.x, csp.white.y, 1.0};
+    cmsCIExyYTRIPLE prim_xyY = {
+        .Red   = {csp.red.x,   csp.red.y,   1.0},
+        .Green = {csp.green.x, csp.green.y, 1.0},
+        .Blue  = {csp.blue.x,  csp.blue.y,  1.0},
+    };
+
+    cmsToneCurve *tonecurve[3] = {0};
+    switch (trc) {
+    case MP_CSP_TRC_LINEAR:  tonecurve[0] = cmsBuildGamma(cms, 1.0); break;
+    case MP_CSP_TRC_GAMMA18: tonecurve[0] = cmsBuildGamma(cms, 1.8); break;
+    case MP_CSP_TRC_GAMMA20: tonecurve[0] = cmsBuildGamma(cms, 2.0); break;
+    case MP_CSP_TRC_GAMMA22: tonecurve[0] = cmsBuildGamma(cms, 2.2); break;
+    case MP_CSP_TRC_GAMMA24: tonecurve[0] = cmsBuildGamma(cms, 2.4); break;
+    case MP_CSP_TRC_GAMMA26: tonecurve[0] = cmsBuildGamma(cms, 2.6); break;
+    case MP_CSP_TRC_GAMMA28: tonecurve[0] = cmsBuildGamma(cms, 2.8); break;
+
+    case MP_CSP_TRC_SRGB:
+        // Values copied from Little-CMS
+        tonecurve[0] = cmsBuildParametricToneCurve(cms, 4,
+                (double[5]){2.40, 1/1.055, 0.055/1.055, 1/12.92, 0.04045});
+        break;
+
+    case MP_CSP_TRC_PRO_PHOTO:
+        tonecurve[0] = cmsBuildParametricToneCurve(cms, 4,
+                (double[5]){1.8, 1.0, 0.0, 1/16.0, 0.03125});
+        break;
+
+    case MP_CSP_TRC_BT_1886: {
+        double src_black[3];
+        if (p->opts->contrast < 0) {
+            // User requested infinite contrast, return 2.4 profile
+            tonecurve[0] = cmsBuildGamma(cms, 2.4);
+            break;
+        } else if (p->opts->contrast > 0) {
+            MP_VERBOSE(p, "Using specified contrast: %d\n", p->opts->contrast);
+            for (int i = 0; i < 3; i++)
+                src_black[i] = 1.0 / p->opts->contrast;
+        } else {
+            // To build an appropriate BT.1886 transformation we need access to
+            // the display's black point, so we use LittleCMS' detection
+            // function. Relative colorimetric is used since we want to
+            // approximate the BT.1886 to the target device's actual black
+            // point even in e.g. perceptual mode
+            const int intent = MP_INTENT_RELATIVE_COLORIMETRIC;
+            cmsCIEXYZ bp_XYZ;
+            if (!cmsDetectBlackPoint(&bp_XYZ, disp_profile, intent, 0))
+                return false;
+
+            // Map this XYZ value back into the (linear) source space
+            cmsHPROFILE rev_profile;
+            cmsToneCurve *linear = cmsBuildGamma(cms, 1.0);
+            rev_profile = cmsCreateRGBProfileTHR(cms, &wp_xyY, &prim_xyY,
+                    (cmsToneCurve*[3]){linear, linear, linear});
+            cmsHPROFILE xyz_profile = cmsCreateXYZProfile();
+            cmsHTRANSFORM xyz2src = cmsCreateTransformTHR(cms,
+                    xyz_profile, TYPE_XYZ_DBL, rev_profile, TYPE_RGB_DBL,
+                    intent, cmsFLAGS_NOCACHE | cmsFLAGS_NOOPTIMIZE);
+            cmsFreeToneCurve(linear);
+            cmsCloseProfile(rev_profile);
+            cmsCloseProfile(xyz_profile);
+            if (!xyz2src)
+                return false;
+
+            cmsDoTransform(xyz2src, &bp_XYZ, src_black, 1);
+            cmsDeleteTransform(xyz2src);
+
+            double contrast = 3.0 / (src_black[0] + src_black[1] + src_black[2]);
+            MP_VERBOSE(p, "Detected ICC profile contrast: %f\n", contrast);
+        }
+
+        // Build the parametric BT.1886 transfer curve, one per channel
+        for (int i = 0; i < 3; i++) {
+            const double gamma = 2.40;
+            double binv = pow(src_black[i], 1.0/gamma);
+            tonecurve[i] = cmsBuildParametricToneCurve(cms, 6,
+                    (double[4]){gamma, 1.0 - binv, binv, 0.0});
+        }
+        break;
+    }
+
+    default:
+        abort();
+    }
+
+    if (!tonecurve[0])
+        return false;
+
+    if (!tonecurve[1]) tonecurve[1] = tonecurve[0];
+    if (!tonecurve[2]) tonecurve[2] = tonecurve[0];
+
+    cmsHPROFILE *vid_profile = cmsCreateRGBProfileTHR(cms, &wp_xyY, &prim_xyY,
+                                                      tonecurve);
+
+    if (tonecurve[2] != tonecurve[0]) cmsFreeToneCurve(tonecurve[2]);
+    if (tonecurve[1] != tonecurve[0]) cmsFreeToneCurve(tonecurve[1]);
+    cmsFreeToneCurve(tonecurve[0]);
+
+    return vid_profile;
+}
+
+bool gl_lcms_get_lut3d(struct gl_lcms *p, struct lut3d **result_lut3d,
+                       enum mp_csp_prim prim, enum mp_csp_trc trc,
+                       struct AVBufferRef *vid_profile)
+{
+    int s_r, s_g, s_b;
+    bool result = false;
+
+    p->changed = false;
+    p->current_prim = prim;
+    p->current_trc = trc;
+
+    // We need to hold on to a reference to the video's ICC profile for as long
+    // as we still need to perform equality checking, so generate a new
+    // reference here
+    av_buffer_unref(&p->vid_profile);
+    if (vid_profile) {
+        MP_VERBOSE(p, "Got an embedded ICC profile.\n");
+        p->vid_profile = av_buffer_ref(vid_profile);
+        MP_HANDLE_OOM(p->vid_profile);
+    }
+
+    if (!gl_parse_3dlut_size(p->opts->size_str, &s_r, &s_g, &s_b))
+        return false;
+
+    if (!gl_lcms_has_profile(p))
+        return false;
+
+    // For simplicity, default to 65x65x65, which is large enough to cover
+    // typical profiles with good accuracy while not being too wasteful
+    s_r = s_r ? s_r : 65;
+    s_g = s_g ? s_g : 65;
+    s_b = s_b ? s_b : 65;
+
+    void *tmp = talloc_new(NULL);
+    uint16_t *output = talloc_array(tmp, uint16_t, s_r * s_g * s_b * 4);
+    struct lut3d *lut = NULL;
+    cmsContext cms = NULL;
+
+    char *cache_file = NULL;
+    if (p->opts->cache) {
+        // Gamma is included in the header to help uniquely identify it,
+        // because we may change the parameter in the future or make it
+        // customizable, same for the primaries.
+        char *cache_info = talloc_asprintf(tmp,
+                "ver=1.4, intent=%d, size=%dx%dx%d, prim=%d, trc=%d, "
+                "contrast=%d\n",
+                p->opts->intent, s_r, s_g, s_b, prim, trc, p->opts->contrast);
+
+        uint8_t hash[32];
+        struct AVSHA *sha = av_sha_alloc();
+        MP_HANDLE_OOM(sha);
+        av_sha_init(sha, 256);
+        av_sha_update(sha, cache_info, strlen(cache_info));
+        if (vid_profile)
+            av_sha_update(sha, vid_profile->data, vid_profile->size);
+        av_sha_update(sha, p->icc_data, p->icc_size);
+        av_sha_final(sha, hash);
+        av_free(sha);
+
+        char *cache_dir = p->opts->cache_dir;
+        if (cache_dir && cache_dir[0]) {
+            cache_dir = mp_get_user_path(tmp, p->global, cache_dir);
+        } else {
+            cache_dir = mp_find_user_file(tmp, p->global, "cache", "");
+        }
+
+        if (cache_dir && cache_dir[0]) {
+            cache_file = talloc_strdup(tmp, "");
+            for (int i = 0; i < sizeof(hash); i++)
+                cache_file = talloc_asprintf_append(cache_file, "%02X", hash[i]);
+            cache_file = mp_path_join(tmp, cache_dir, cache_file);
+            mp_mkdirp(cache_dir);
+        }
+    }
+
+    // check cache
+    if (cache_file && stat(cache_file, &(struct stat){0}) == 0) {
+        MP_VERBOSE(p, "Opening 3D LUT cache in file '%s'.\n", cache_file);
+        struct bstr cachedata = stream_read_file(cache_file, tmp, p->global,
+                                                 1000000000); // 1 GB
+        if (cachedata.len == talloc_get_size(output)) {
+            memcpy(output, cachedata.start, cachedata.len);
+            goto done;
+        } else {
+            MP_WARN(p, "3D LUT cache invalid!\n");
+        }
+    }
+
+    cms = cmsCreateContext(NULL, p);
+    if (!cms)
+        goto error_exit;
+    cmsSetLogErrorHandlerTHR(cms, lcms2_error_handler);
+
+    cmsHPROFILE profile =
+        cmsOpenProfileFromMemTHR(cms, p->icc_data, p->icc_size);
+    if (!profile)
+        goto error_exit;
+
+    cmsHPROFILE vid_hprofile = get_vid_profile(p, cms, profile, prim, trc);
+    if (!vid_hprofile) {
+        cmsCloseProfile(profile);
+        goto error_exit;
+    }
+
+    cmsHTRANSFORM trafo = cmsCreateTransformTHR(cms, vid_hprofile, TYPE_RGB_16,
+                                                profile, TYPE_RGBA_16,
+                                                p->opts->intent,
+                                                cmsFLAGS_NOCACHE |
+                                                cmsFLAGS_NOOPTIMIZE |
+                                                cmsFLAGS_BLACKPOINTCOMPENSATION);
+    cmsCloseProfile(profile);
+    cmsCloseProfile(vid_hprofile);
+
+    if (!trafo)
+        goto error_exit;
+
+    // transform a (s_r)x(s_g)x(s_b) cube, with 3 components per channel
+    uint16_t *input = talloc_array(tmp, uint16_t, s_r * 3);
+    for (int b = 0; b < s_b; b++) {
+        for (int g = 0; g < s_g; g++) {
+            for (int r = 0; r < s_r; r++) {
+                input[r * 3 + 0] = r * 65535 / (s_r - 1);
+                input[r * 3 + 1] = g * 65535 / (s_g - 1);
+                input[r * 3 + 2] = b * 65535 / (s_b - 1);
+            }
+            size_t base = (b * s_r * s_g + g * s_r) * 4;
+            cmsDoTransform(trafo, input, output + base, s_r);
+        }
+    }
+
+    cmsDeleteTransform(trafo);
+
+    if (cache_file) {
+        FILE *out = fopen(cache_file, "wb");
+        if (out) {
+            fwrite(output, talloc_get_size(output), 1, out);
+            fclose(out);
+        }
+    }
+
+done: ;
+
+    lut = talloc_ptrtype(NULL, lut);
+    *lut = (struct lut3d) {
+        .data = talloc_steal(lut, output),
+        .size = {s_r, s_g, s_b},
+    };
+
+    *result_lut3d = lut;
+    result = true;
+
+error_exit:
+
+    if (cms)
+        cmsDeleteContext(cms);
+
+    if (!lut)
+        MP_FATAL(p, "Error loading ICC profile.\n");
+
+    talloc_free(tmp);
+    return result;
+}
+
+#else /* HAVE_LCMS2 */
+
+struct gl_lcms *gl_lcms_init(void *talloc_ctx, struct mp_log *log,
+                             struct mpv_global *global,
+                             struct mp_icc_opts *opts)
+{
+    return (struct gl_lcms *) talloc_new(talloc_ctx);
+}
+
+void gl_lcms_update_options(struct gl_lcms *p) { }
+bool gl_lcms_set_memory_profile(struct gl_lcms *p, bstr profile) {return false;}
+
+bool gl_lcms_has_changed(struct gl_lcms *p, enum mp_csp_prim prim,
+                         enum mp_csp_trc trc, struct AVBufferRef *vid_profile)
+{
+    return false;
+}
+
+bool gl_lcms_has_profile(struct gl_lcms *p)
+{
+    return false;
+}
+
+bool gl_lcms_get_lut3d(struct gl_lcms *p, struct lut3d **result_lut3d,
+                       enum mp_csp_prim prim, enum mp_csp_trc trc,
+                       struct AVBufferRef *vid_profile)
+{
+    return false;
+}
+
+#endif
+
+static int validate_3dlut_size_opt(struct mp_log *log, const m_option_t *opt,
+                                   struct bstr name, const char **value)
+{
+    int p1, p2, p3;
+    return gl_parse_3dlut_size(*value, &p1, &p2, &p3) ? 0 : M_OPT_INVALID;
+}
+
+#define OPT_BASE_STRUCT struct mp_icc_opts
+const struct m_sub_options mp_icc_conf = {
+    .opts = (const m_option_t[]) {
+        {"use-embedded-icc-profile", OPT_BOOL(use_embedded)},
+        {"icc-profile", OPT_STRING(profile), .flags = M_OPT_FILE},
+        {"icc-profile-auto", OPT_BOOL(profile_auto)},
+        {"icc-cache", OPT_BOOL(cache)},
+        {"icc-cache-dir", OPT_STRING(cache_dir), .flags = M_OPT_FILE},
+        {"icc-intent", OPT_INT(intent)},
+        {"icc-force-contrast", OPT_CHOICE(contrast, {"no", 0}, {"inf", -1}),
+            M_RANGE(0, 1000000)},
+        {"icc-3dlut-size", OPT_STRING_VALIDATE(size_str, validate_3dlut_size_opt)},
+        {"icc-use-luma", OPT_BOOL(icc_use_luma)},
+        {0}
+    },
+    .size = sizeof(struct mp_icc_opts),
+    .defaults = &(const struct mp_icc_opts) {
+        .size_str = "auto",
+        .intent = MP_INTENT_RELATIVE_COLORIMETRIC,
+        .use_embedded = true,
+        .cache = true,
+    },
+};
diff --git a/video/out/gpu/lcms.h b/video/out/gpu/lcms.h
new file mode 100644
index 0000000..607353a
--- /dev/null
+++ b/video/out/gpu/lcms.h
@@ -0,0 +1,61 @@
+#ifndef MP_GL_LCMS_H
+#define MP_GL_LCMS_H
+
+#include <stddef.h>
+#include <stdbool.h>
+#include "misc/bstr.h"
+#include "video/csputils.h"
+#include <libavutil/buffer.h>
+
+extern const struct m_sub_options mp_icc_conf;
+
+struct mp_icc_opts {
+    bool use_embedded;
+    char *profile;
+    bool profile_auto;
+    bool cache;
+    char *cache_dir;
+    char *size_str;
+    int intent;
+    int contrast;
+    bool icc_use_luma;
+};
+
+struct lut3d {
+    uint16_t *data;
+    int size[3];
+};
+
+struct mp_log;
+struct mpv_global;
+struct gl_lcms;
+
+struct gl_lcms *gl_lcms_init(void *talloc_ctx, struct mp_log *log,
+                             struct mpv_global *global,
+                             struct mp_icc_opts *opts);
+void gl_lcms_update_options(struct gl_lcms *p);
+bool gl_lcms_set_memory_profile(struct gl_lcms *p, bstr profile);
+bool gl_lcms_has_profile(struct gl_lcms *p);
+bool gl_lcms_get_lut3d(struct gl_lcms *p, struct lut3d **,
+                       enum mp_csp_prim prim, enum mp_csp_trc trc,
+                       struct AVBufferRef *vid_profile);
+bool gl_lcms_has_changed(struct gl_lcms *p, enum mp_csp_prim prim,
+                         enum mp_csp_trc trc, struct AVBufferRef *vid_profile);
+
+static inline bool gl_parse_3dlut_size(const char *arg, int *p1, int *p2, int *p3)
+{
+    if (!strcmp(arg, "auto")) {
+        *p1 = *p2 = *p3 = 0;
+        return true;
+    }
+    if (sscanf(arg, "%dx%dx%d", p1, p2, p3) != 3)
+        return false;
+    for (int n = 0; n < 3; n++) {
+        int s = ((int[]) { *p1, *p2, *p3 })[n];
+        if (s < 2 || s > 512)
+            return false;
+    }
+    return true;
+}
+
+#endif
diff --git a/video/out/gpu/libmpv_gpu.c b/video/out/gpu/libmpv_gpu.c
new file mode 100644
index 0000000..aae1d18
--- /dev/null
+++ b/video/out/gpu/libmpv_gpu.c
@@ -0,0 +1,248 @@
+#include "config.h"
+#include "hwdec.h"
+#include "libmpv_gpu.h"
+#include "libmpv/render_gl.h"
+#include "video.h"
+#include "video/out/libmpv.h"
+
+static const struct libmpv_gpu_context_fns *context_backends[] = {
+#if HAVE_GL
+    &libmpv_gpu_context_gl,
+#endif
+    NULL
+};
+
+struct priv {
+    struct libmpv_gpu_context *context;
+
+    struct gl_video *renderer;
+};
+
+struct native_resource_entry {
+    const char *name;   // ra_add_native_resource() internal name argument
+    size_t size;        // size of struct pointed to (0 for no copy)
+};
+
+static const struct native_resource_entry native_resource_map[] = {
+    [MPV_RENDER_PARAM_X11_DISPLAY] = {
+        .name = "x11",
+        .size = 0,
+    },
+    [MPV_RENDER_PARAM_WL_DISPLAY] = {
+        .name = "wl",
+        .size = 0,
+    },
+    [MPV_RENDER_PARAM_DRM_DRAW_SURFACE_SIZE] = {
+        .name = "drm_draw_surface_size",
+        .size = sizeof (mpv_opengl_drm_draw_surface_size),
+    },
+    [MPV_RENDER_PARAM_DRM_DISPLAY_V2] = {
+        .name = "drm_params_v2",
+        .size = sizeof (mpv_opengl_drm_params_v2),
+    },
+};
+
+static int init(struct render_backend *ctx, mpv_render_param *params)
+{
+    ctx->priv = talloc_zero(NULL, struct priv);
+    struct priv *p = ctx->priv;
+
+    char *api = get_mpv_render_param(params, MPV_RENDER_PARAM_API_TYPE, NULL);
+    if (!api)
+        return MPV_ERROR_INVALID_PARAMETER;
+
+    for (int n = 0; context_backends[n]; n++) {
+        const struct libmpv_gpu_context_fns *backend = context_backends[n];
+        if (strcmp(backend->api_name, api) == 0) {
+            p->context = talloc_zero(NULL, struct libmpv_gpu_context);
+            *p->context = (struct libmpv_gpu_context){
+                .global = ctx->global,
+                .log = ctx->log,
+                .fns = backend,
+            };
+            break;
+        }
+    }
+
+    if (!p->context)
+        return MPV_ERROR_NOT_IMPLEMENTED;
+
+    int err = p->context->fns->init(p->context, params);
+    if (err < 0)
+        return err;
+
+    for (int n = 0; params && params[n].type; n++) {
+        if (params[n].type > 0 &&
+            params[n].type < MP_ARRAY_SIZE(native_resource_map) &&
+            native_resource_map[params[n].type].name)
+        {
+            const struct native_resource_entry *entry =
+                &native_resource_map[params[n].type];
+            void *data = params[n].data;
+            if (entry->size)
+                data = talloc_memdup(p, data, entry->size);
+            ra_add_native_resource(p->context->ra_ctx->ra, entry->name, data);
+        }
+    }
+
+    p->renderer = gl_video_init(p->context->ra_ctx->ra, ctx->log, ctx->global);
+
+    ctx->hwdec_devs = hwdec_devices_create();
+    gl_video_init_hwdecs(p->renderer, p->context->ra_ctx, ctx->hwdec_devs, true);
+    ctx->driver_caps = VO_CAP_ROTATE90;
+    return 0;
+}
+
+static bool check_format(struct render_backend *ctx, int imgfmt)
+{
+    struct priv *p = ctx->priv;
+
+    return gl_video_check_format(p->renderer, imgfmt);
+}
+
+static int set_parameter(struct render_backend *ctx, mpv_render_param param)
+{
+    struct priv *p = ctx->priv;
+
+    switch (param.type) {
+    case MPV_RENDER_PARAM_ICC_PROFILE: {
+        mpv_byte_array *data = param.data;
+        gl_video_set_icc_profile(p->renderer, (bstr){data->data, data->size});
+        return 0;
+    }
+    case MPV_RENDER_PARAM_AMBIENT_LIGHT: {
+        int lux = *(int *)param.data;
+        gl_video_set_ambient_lux(p->renderer, lux);
+        return 0;
+    }
+    default:
+        return MPV_ERROR_NOT_IMPLEMENTED;
+    }
+}
+
+static void reconfig(struct render_backend *ctx, struct mp_image_params *params)
+{
+    struct priv *p = ctx->priv;
+
+    gl_video_config(p->renderer, params);
+}
+
+static void reset(struct render_backend *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    gl_video_reset(p->renderer);
+}
+
+static void update_external(struct render_backend *ctx, struct vo *vo)
+{
+    struct priv *p = ctx->priv;
+
+    gl_video_set_osd_source(p->renderer, vo ? vo->osd : NULL);
+    if (vo)
+        gl_video_configure_queue(p->renderer, vo);
+}
+
+static void resize(struct render_backend *ctx, struct mp_rect *src,
+                   struct mp_rect *dst, struct mp_osd_res *osd)
+{
+    struct priv *p = ctx->priv;
+
+    gl_video_resize(p->renderer, src, dst, osd);
+}
+
+static int get_target_size(struct render_backend *ctx, mpv_render_param *params,
+                           int *out_w, int *out_h)
+{
+    struct priv *p = ctx->priv;
+
+    // Mapping the surface is cheap, better than adding new backend entrypoints.
+    struct ra_tex *tex;
+    int err = p->context->fns->wrap_fbo(p->context, params, &tex);
+    if (err < 0)
+        return err;
+    *out_w = tex->params.w;
+    *out_h = tex->params.h;
+    return 0;
+}
+
+static int render(struct render_backend *ctx, mpv_render_param *params,
+                  struct vo_frame *frame)
+{
+    struct priv *p = ctx->priv;
+
+    // Mapping the surface is cheap, better than adding new backend entrypoints.
+    struct ra_tex *tex;
+    int err = p->context->fns->wrap_fbo(p->context, params, &tex);
+    if (err < 0)
+        return err;
+
+    int depth = *(int *)get_mpv_render_param(params, MPV_RENDER_PARAM_DEPTH,
+                                             &(int){0});
+    gl_video_set_fb_depth(p->renderer, depth);
+
+    bool flip = *(int *)get_mpv_render_param(params, MPV_RENDER_PARAM_FLIP_Y,
+                                             &(int){0});
+
+    struct ra_fbo target = {.tex = tex, .flip = flip};
+    gl_video_render_frame(p->renderer, frame, target, RENDER_FRAME_DEF);
+    p->context->fns->done_frame(p->context, frame->display_synced);
+
+    return 0;
+}
+
+static struct mp_image *get_image(struct render_backend *ctx, int imgfmt,
+                                  int w, int h, int stride_align, int flags)
+{
+    struct priv *p = ctx->priv;
+
+    return gl_video_get_image(p->renderer, imgfmt, w, h, stride_align, flags);
+}
+
+static void screenshot(struct render_backend *ctx, struct vo_frame *frame,
+                       struct voctrl_screenshot *args)
+{
+    struct priv *p = ctx->priv;
+
+    gl_video_screenshot(p->renderer, frame, args);
+}
+
+static void perfdata(struct render_backend *ctx,
+                     struct voctrl_performance_data *out)
+{
+    struct priv *p = ctx->priv;
+
+    gl_video_perfdata(p->renderer, out);
+}
+
+static void destroy(struct render_backend *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    if (p->renderer)
+        gl_video_uninit(p->renderer);
+
+    hwdec_devices_destroy(ctx->hwdec_devs);
+
+    if (p->context) {
+        p->context->fns->destroy(p->context);
+        talloc_free(p->context->priv);
+        talloc_free(p->context);
+    }
+}
+
+const struct render_backend_fns render_backend_gpu = {
+    .init = init,
+    .check_format = check_format,
+    .set_parameter = set_parameter,
+    .reconfig = reconfig,
+    .reset = reset,
+    .update_external = update_external,
+    .resize = resize,
+    .get_target_size = get_target_size,
+    .render = render,
+    .get_image = get_image,
+    .screenshot = screenshot,
+    .perfdata = perfdata,
+    .destroy = destroy,
+};
diff --git a/video/out/gpu/libmpv_gpu.h b/video/out/gpu/libmpv_gpu.h
new file mode 100644
index 0000000..497dcc3
--- /dev/null
+++ b/video/out/gpu/libmpv_gpu.h
@@ -0,0 +1,40 @@
+#pragma once
+
+#include "video/out/libmpv.h"
+
+struct ra_tex;
+
+struct libmpv_gpu_context {
+    struct mpv_global *global;
+    struct mp_log *log;
+    const struct libmpv_gpu_context_fns *fns;
+
+    struct ra_ctx *ra_ctx;
+    void *priv;
+};
+
+// Manage backend specific interaction between libmpv and ra backend, that can't
+// be managed by ra itself (initialization and passing FBOs).
+struct libmpv_gpu_context_fns {
+    // The libmpv API type name, see MPV_RENDER_PARAM_API_TYPE.
+    const char *api_name;
+    // Pretty much works like render_backend_fns.init, except that the
+    // API type is already checked by the caller.
+    // Successful init must set ctx->ra.
+    int (*init)(struct libmpv_gpu_context *ctx, mpv_render_param *params);
+    // Wrap the surface passed to mpv_render_context_render() (via the params
+    // array) into a ra_tex and return it. Returns a libmpv error code, and sets
+    // *out to a temporary object on success. The returned object is valid until
+    // another wrap_fbo() or done_frame() is called.
+    // This does not need to care about generic attributes, like flipping.
+    int (*wrap_fbo)(struct libmpv_gpu_context *ctx, mpv_render_param *params,
+                    struct ra_tex **out);
+    // Signal that the ra_tex object obtained with wrap_fbo is no longer used.
+    // For certain backends, this might also be used to signal the end of
+    // rendering (like OpenGL doing weird crap).
+    void (*done_frame)(struct libmpv_gpu_context *ctx, bool ds);
+    // Free all data in ctx->priv.
+    void (*destroy)(struct libmpv_gpu_context *ctx);
+};
+
+extern const struct libmpv_gpu_context_fns libmpv_gpu_context_gl;
diff --git a/video/out/gpu/osd.c b/video/out/gpu/osd.c
new file mode 100644
index 0000000..91505a9
--- /dev/null
+++ b/video/out/gpu/osd.c
@@ -0,0 +1,363 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdlib.h>
+#include <assert.h>
+#include <limits.h>
+
+#include "common/common.h"
+#include "common/msg.h"
+#include "video/csputils.h"
+#include "video/mp_image.h"
+#include "osd.h"
+
+#define GLSL(x) gl_sc_add(sc, #x "\n");
+
+// glBlendFuncSeparate() arguments
+static const int blend_factors[SUBBITMAP_COUNT][4] = {
+    [SUBBITMAP_LIBASS] = {RA_BLEND_SRC_ALPHA, RA_BLEND_ONE_MINUS_SRC_ALPHA,
+                          RA_BLEND_ONE,       RA_BLEND_ONE_MINUS_SRC_ALPHA},
+    [SUBBITMAP_BGRA] =   {RA_BLEND_ONE,       RA_BLEND_ONE_MINUS_SRC_ALPHA,
+                          RA_BLEND_ONE,       RA_BLEND_ONE_MINUS_SRC_ALPHA},
+};
+
+struct vertex {
+    float position[2];
+    float texcoord[2];
+    uint8_t ass_color[4];
+};
+
+static const struct ra_renderpass_input vertex_vao[] = {
+    {"position",  RA_VARTYPE_FLOAT,      2, 1, offsetof(struct vertex, position)},
+    {"texcoord" , RA_VARTYPE_FLOAT,      2, 1, offsetof(struct vertex, texcoord)},
+    {"ass_color", RA_VARTYPE_BYTE_UNORM, 4, 1, offsetof(struct vertex, ass_color)},
+};
+
+struct mpgl_osd_part {
+    enum sub_bitmap_format format;
+    int change_id;
+    struct ra_tex *texture;
+    int w, h;
+    int num_subparts;
+    int prev_num_subparts;
+    struct sub_bitmap *subparts;
+    int num_vertices;
+    struct vertex *vertices;
+};
+
+struct mpgl_osd {
+    struct mp_log *log;
+    struct osd_state *osd;
+    struct ra *ra;
+    struct mpgl_osd_part *parts[MAX_OSD_PARTS];
+    const struct ra_format *fmt_table[SUBBITMAP_COUNT];
+    bool formats[SUBBITMAP_COUNT];
+    bool change_flag; // for reporting to API user only
+    // temporary
+    int stereo_mode;
+    struct mp_osd_res osd_res;
+    void *scratch;
+};
+
+struct mpgl_osd *mpgl_osd_init(struct ra *ra, struct mp_log *log,
+                               struct osd_state *osd)
+{
+    struct mpgl_osd *ctx = talloc_ptrtype(NULL, ctx);
+    *ctx = (struct mpgl_osd) {
+        .log = log,
+        .osd = osd,
+        .ra = ra,
+        .change_flag = true,
+        .scratch = talloc_zero_size(ctx, 1),
+    };
+
+    ctx->fmt_table[SUBBITMAP_LIBASS] = ra_find_unorm_format(ra, 1, 1);
+    ctx->fmt_table[SUBBITMAP_BGRA]   = ra_find_unorm_format(ra, 1, 4);
+
+    for (int n = 0; n < MAX_OSD_PARTS; n++)
+        ctx->parts[n] = talloc_zero(ctx, struct mpgl_osd_part);
+
+    for (int n = 0; n < SUBBITMAP_COUNT; n++)
+        ctx->formats[n] = !!ctx->fmt_table[n];
+
+    return ctx;
+}
+
+void mpgl_osd_destroy(struct mpgl_osd *ctx)
+{
+    if (!ctx)
+        return;
+
+    for (int n = 0; n < MAX_OSD_PARTS; n++) {
+        struct mpgl_osd_part *p = ctx->parts[n];
+        ra_tex_free(ctx->ra, &p->texture);
+    }
+    talloc_free(ctx);
+}
+
+static int next_pow2(int v)
+{
+    for (int x = 0; x < 30; x++) {
+        if ((1 << x) >= v)
+            return 1 << x;
+    }
+    return INT_MAX;
+}
+
+static bool upload_osd(struct mpgl_osd *ctx, struct mpgl_osd_part *osd,
+                       struct sub_bitmaps *imgs)
+{
+    struct ra *ra = ctx->ra;
+    bool ok = false;
+
+    assert(imgs->packed);
+
+    int req_w = next_pow2(imgs->packed_w);
+    int req_h = next_pow2(imgs->packed_h);
+
+    const struct ra_format *fmt = ctx->fmt_table[imgs->format];
+    assert(fmt);
+
+    if (!osd->texture || req_w > osd->w || req_h > osd->h ||
+        osd->format != imgs->format)
+    {
+        ra_tex_free(ra, &osd->texture);
+
+        osd->format = imgs->format;
+        osd->w = MPMAX(32, req_w);
+        osd->h = MPMAX(32, req_h);
+
+        MP_VERBOSE(ctx, "Reallocating OSD texture to %dx%d.\n", osd->w, osd->h);
+
+        if (osd->w > ra->max_texture_wh || osd->h > ra->max_texture_wh) {
+            MP_ERR(ctx, "OSD bitmaps do not fit on a surface with the maximum "
+                   "supported size %dx%d.\n", ra->max_texture_wh,
+                   ra->max_texture_wh);
+            goto done;
+        }
+
+        struct ra_tex_params params = {
+            .dimensions = 2,
+            .w = osd->w,
+            .h = osd->h,
+            .d = 1,
+            .format = fmt,
+            .render_src = true,
+            .src_linear = true,
+            .host_mutable = true,
+        };
+        osd->texture = ra_tex_create(ra, &params);
+        if (!osd->texture)
+            goto done;
+    }
+
+    struct ra_tex_upload_params params = {
+        .tex = osd->texture,
+        .src = imgs->packed->planes[0],
+        .invalidate = true,
+        .rc = &(struct mp_rect){0, 0, imgs->packed_w, imgs->packed_h},
+        .stride = imgs->packed->stride[0],
+    };
+
+    ok = ra->fns->tex_upload(ra, &params);
+
+done:
+    return ok;
+}
+
+static void gen_osd_cb(void *pctx, struct sub_bitmaps *imgs)
+{
+    struct mpgl_osd *ctx = pctx;
+
+    if (imgs->num_parts == 0 || !ctx->formats[imgs->format])
+        return;
+
+    struct mpgl_osd_part *osd = ctx->parts[imgs->render_index];
+
+    bool ok = true;
+    if (imgs->change_id != osd->change_id) {
+        if (!upload_osd(ctx, osd, imgs))
+            ok = false;
+
+        osd->change_id = imgs->change_id;
+        ctx->change_flag = true;
+    }
+    osd->num_subparts = ok ? imgs->num_parts : 0;
+
+    MP_TARRAY_GROW(osd, osd->subparts, osd->num_subparts);
+    memcpy(osd->subparts, imgs->parts,
+           osd->num_subparts * sizeof(osd->subparts[0]));
+}
+
+bool mpgl_osd_draw_prepare(struct mpgl_osd *ctx, int index,
+                           struct gl_shader_cache *sc)
+{
+    assert(index >= 0 && index < MAX_OSD_PARTS);
+    struct mpgl_osd_part *part = ctx->parts[index];
+
+    enum sub_bitmap_format fmt = part->format;
+    if (!fmt || !part->num_subparts || !part->texture)
+        return false;
+
+    gl_sc_uniform_texture(sc, "osdtex", part->texture);
+    switch (fmt) {
+    case SUBBITMAP_BGRA: {
+        GLSL(color = texture(osdtex, texcoord).bgra;)
+        break;
+    }
+    case SUBBITMAP_LIBASS: {
+        GLSL(color =
+            vec4(ass_color.rgb, ass_color.a * texture(osdtex, texcoord).r);)
+        break;
+    }
+    default:
+        MP_ASSERT_UNREACHABLE();
+    }
+
+    return true;
+}
+
+static void write_quad(struct vertex *va, struct gl_transform t,
+                       float x0, float y0, float x1, float y1,
+                       float tx0, float ty0, float tx1, float ty1,
+                       float tex_w, float tex_h, const uint8_t color[4])
+{
+    gl_transform_vec(t, &x0, &y0);
+    gl_transform_vec(t, &x1, &y1);
+
+#define COLOR_INIT {color[0], color[1], color[2], color[3]}
+    va[0] = (struct vertex){ {x0, y0}, {tx0 / tex_w, ty0 / tex_h}, COLOR_INIT };
+    va[1] = (struct vertex){ {x0, y1}, {tx0 / tex_w, ty1 / tex_h}, COLOR_INIT };
+    va[2] = (struct vertex){ {x1, y0}, {tx1 / tex_w, ty0 / tex_h}, COLOR_INIT };
+    va[3] = (struct vertex){ {x1, y1}, {tx1 / tex_w, ty1 / tex_h}, COLOR_INIT };
+    va[4] = va[2];
+    va[5] = va[1];
+#undef COLOR_INIT
+}
+
+static void generate_verts(struct mpgl_osd_part *part, struct gl_transform t)
+{
+    MP_TARRAY_GROW(part, part->vertices,
+                   part->num_vertices + part->num_subparts * 6);
+
+    for (int n = 0; n < part->num_subparts; n++) {
+        struct sub_bitmap *b = &part->subparts[n];
+        struct vertex *va = &part->vertices[part->num_vertices];
+
+        // NOTE: the blend color is used with SUBBITMAP_LIBASS only, so it
+        //       doesn't matter that we upload garbage for the other formats
+        uint32_t c = b->libass.color;
+        uint8_t color[4] = { c >> 24, (c >> 16) & 0xff,
+                            (c >> 8) & 0xff, 255 - (c & 0xff) };
+
+        write_quad(va, t,
+                   b->x, b->y, b->x + b->dw, b->y + b->dh,
+                   b->src_x, b->src_y, b->src_x + b->w, b->src_y + b->h,
+                   part->w, part->h, color);
+
+        part->num_vertices += 6;
+    }
+}
+
+// number of screen divisions per axis (x=0, y=1) for the current 3D mode
+static void get_3d_side_by_side(int stereo_mode, int div[2])
+{
+    div[0] = div[1] = 1;
+    switch (stereo_mode) {
+    case MP_STEREO3D_SBS2L:
+    case MP_STEREO3D_SBS2R: div[0] = 2; break;
+    case MP_STEREO3D_AB2R:
+    case MP_STEREO3D_AB2L:  div[1] = 2; break;
+    }
+}
+
+void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int index,
+                          struct gl_shader_cache *sc, struct ra_fbo fbo)
+{
+    struct mpgl_osd_part *part = ctx->parts[index];
+
+    int div[2];
+    get_3d_side_by_side(ctx->stereo_mode, div);
+
+    part->num_vertices = 0;
+
+    for (int x = 0; x < div[0]; x++) {
+        for (int y = 0; y < div[1]; y++) {
+            struct gl_transform t;
+            gl_transform_ortho_fbo(&t, fbo);
+
+            float a_x = ctx->osd_res.w * x;
+            float a_y = ctx->osd_res.h * y;
+            t.t[0] += a_x * t.m[0][0] + a_y * t.m[1][0];
+            t.t[1] += a_x * t.m[0][1] + a_y * t.m[1][1];
+
+            generate_verts(part, t);
+        }
+    }
+
+    const int *factors = &blend_factors[part->format][0];
+    gl_sc_blend(sc, factors[0], factors[1], factors[2], factors[3]);
+
+    gl_sc_dispatch_draw(sc, fbo.tex, false, vertex_vao, MP_ARRAY_SIZE(vertex_vao),
+                        sizeof(struct vertex), part->vertices, part->num_vertices);
+}
+
+static void set_res(struct mpgl_osd *ctx, struct mp_osd_res res, int stereo_mode)
+{
+    int div[2];
+    get_3d_side_by_side(stereo_mode, div);
+
+    res.w /= div[0];
+    res.h /= div[1];
+    ctx->osd_res = res;
+}
+
+void mpgl_osd_generate(struct mpgl_osd *ctx, struct mp_osd_res res, double pts,
+                       int stereo_mode, int draw_flags)
+{
+    for (int n = 0; n < MAX_OSD_PARTS; n++)
+        ctx->parts[n]->num_subparts = 0;
+
+    set_res(ctx, res, stereo_mode);
+
+    osd_draw(ctx->osd, ctx->osd_res, pts, draw_flags, ctx->formats, gen_osd_cb, ctx);
+    ctx->stereo_mode = stereo_mode;
+
+    // Parts going away does not necessarily result in gen_osd_cb() being called
+    // (not even with num_parts==0), so check this separately.
+    for (int n = 0; n < MAX_OSD_PARTS; n++) {
+        struct mpgl_osd_part *part = ctx->parts[n];
+        if (part->num_subparts !=  part->prev_num_subparts)
+            ctx->change_flag = true;
+        part->prev_num_subparts = part->num_subparts;
+    }
+}
+
+// See osd_resize() for remarks. This function is an optional optimization too.
+void mpgl_osd_resize(struct mpgl_osd *ctx, struct mp_osd_res res, int stereo_mode)
+{
+    set_res(ctx, res, stereo_mode);
+    osd_resize(ctx->osd, ctx->osd_res);
+}
+
+bool mpgl_osd_check_change(struct mpgl_osd *ctx, struct mp_osd_res *res,
+                           double pts)
+{
+    ctx->change_flag = false;
+    mpgl_osd_generate(ctx, *res, pts, 0, 0);
+    return ctx->change_flag;
+}
diff --git a/video/out/gpu/osd.h b/video/out/gpu/osd.h
new file mode 100644
index 0000000..00fbc49
--- /dev/null
+++ b/video/out/gpu/osd.h
@@ -0,0 +1,25 @@
+#ifndef MPLAYER_GL_OSD_H
+#define MPLAYER_GL_OSD_H
+
+#include <stdbool.h>
+#include <inttypes.h>
+
+#include "utils.h"
+#include "shader_cache.h"
+#include "sub/osd.h"
+
+struct mpgl_osd *mpgl_osd_init(struct ra *ra, struct mp_log *log,
+                               struct osd_state *osd);
+void mpgl_osd_destroy(struct mpgl_osd *ctx);
+
+void mpgl_osd_generate(struct mpgl_osd *ctx, struct mp_osd_res res, double pts,
+                       int stereo_mode, int draw_flags);
+void mpgl_osd_resize(struct mpgl_osd *ctx, struct mp_osd_res res, int stereo_mode);
+bool mpgl_osd_draw_prepare(struct mpgl_osd *ctx, int index,
+                           struct gl_shader_cache *sc);
+void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int index,
+                          struct gl_shader_cache *sc, struct ra_fbo fbo);
+bool mpgl_osd_check_change(struct mpgl_osd *ctx, struct mp_osd_res *res,
+                           double pts);
+
+#endif
diff --git a/video/out/gpu/ra.c b/video/out/gpu/ra.c
new file mode 100644
index 0000000..855f9b6
--- /dev/null
+++ b/video/out/gpu/ra.c
@@ -0,0 +1,424 @@
+#include "common/common.h"
+#include "common/msg.h"
+#include "video/img_format.h"
+
+#include "ra.h"
+
+void ra_add_native_resource(struct ra *ra, const char *name, void *data)
+{
+    struct ra_native_resource r = {
+        .name = name,
+        .data = data,
+    };
+    MP_TARRAY_APPEND(ra, ra->native_resources, ra->num_native_resources, r);
+}
+
+void *ra_get_native_resource(struct ra *ra, const char *name)
+{
+    for (int n = 0; n < ra->num_native_resources; n++) {
+        struct ra_native_resource *r = &ra->native_resources[n];
+        if (strcmp(r->name, name) == 0)
+            return r->data;
+    }
+
+    return NULL;
+}
+
+struct ra_tex *ra_tex_create(struct ra *ra, const struct ra_tex_params *params)
+{
+    switch (params->dimensions) {
+    case 1:
+        assert(params->h == 1 && params->d == 1);
+        break;
+    case 2:
+        assert(params->d == 1);
+        break;
+    default:
+        assert(params->dimensions >= 1 && params->dimensions <= 3);
+    }
+    return ra->fns->tex_create(ra, params);
+}
+
+void ra_tex_free(struct ra *ra, struct ra_tex **tex)
+{
+    if (*tex)
+        ra->fns->tex_destroy(ra, *tex);
+    *tex = NULL;
+}
+
+struct ra_buf *ra_buf_create(struct ra *ra, const struct ra_buf_params *params)
+{
+    return ra->fns->buf_create(ra, params);
+}
+
+void ra_buf_free(struct ra *ra, struct ra_buf **buf)
+{
+    if (*buf)
+        ra->fns->buf_destroy(ra, *buf);
+    *buf = NULL;
+}
+
+void ra_free(struct ra **ra)
+{
+    if (*ra)
+        (*ra)->fns->destroy(*ra);
+    talloc_free(*ra);
+    *ra = NULL;
+}
+
+size_t ra_vartype_size(enum ra_vartype type)
+{
+    switch (type) {
+    case RA_VARTYPE_INT:        return sizeof(int);
+    case RA_VARTYPE_FLOAT:      return sizeof(float);
+    case RA_VARTYPE_BYTE_UNORM: return 1;
+    default: return 0;
+    }
+}
+
+struct ra_layout ra_renderpass_input_layout(struct ra_renderpass_input *input)
+{
+    size_t el_size = ra_vartype_size(input->type);
+    if (!el_size)
+        return (struct ra_layout){0};
+
+    // host data is always tightly packed
+    return (struct ra_layout) {
+        .align  = 1,
+        .stride = el_size * input->dim_v,
+        .size   = el_size * input->dim_v * input->dim_m,
+    };
+}
+
+static struct ra_renderpass_input *dup_inputs(void *ta_parent,
+            const struct ra_renderpass_input *inputs, int num_inputs)
+{
+    struct ra_renderpass_input *res =
+        talloc_memdup(ta_parent, (void *)inputs, num_inputs * sizeof(inputs[0]));
+    for (int n = 0; n < num_inputs; n++)
+        res[n].name = talloc_strdup(res, res[n].name);
+    return res;
+}
+
+// Return a newly allocated deep-copy of params.
+struct ra_renderpass_params *ra_renderpass_params_copy(void *ta_parent,
+        const struct ra_renderpass_params *params)
+{
+    struct ra_renderpass_params *res = talloc_ptrtype(ta_parent, res);
+    *res = *params;
+    res->inputs = dup_inputs(res, res->inputs, res->num_inputs);
+    res->vertex_attribs =
+        dup_inputs(res, res->vertex_attribs, res->num_vertex_attribs);
+    res->cached_program = bstrdup(res, res->cached_program);
+    res->vertex_shader = talloc_strdup(res, res->vertex_shader);
+    res->frag_shader = talloc_strdup(res, res->frag_shader);
+    res->compute_shader = talloc_strdup(res, res->compute_shader);
+    return res;
+}
+
+struct glsl_fmt {
+    enum ra_ctype ctype;
+    int num_components;
+    int component_depth[4];
+    const char *glsl_format;
+};
+
+// List taken from the GLSL specification, sans snorm and sint formats
+static const struct glsl_fmt ra_glsl_fmts[] = {
+    {RA_CTYPE_FLOAT, 1, {16},             "r16f"},
+    {RA_CTYPE_FLOAT, 1, {32},             "r32f"},
+    {RA_CTYPE_FLOAT, 2, {16, 16},         "rg16f"},
+    {RA_CTYPE_FLOAT, 2, {32, 32},         "rg32f"},
+    {RA_CTYPE_FLOAT, 4, {16, 16, 16, 16}, "rgba16f"},
+    {RA_CTYPE_FLOAT, 4, {32, 32, 32, 32}, "rgba32f"},
+    {RA_CTYPE_FLOAT, 3, {11, 11, 10},     "r11f_g11f_b10f"},
+
+    {RA_CTYPE_UNORM, 1, {8},              "r8"},
+    {RA_CTYPE_UNORM, 1, {16},             "r16"},
+    {RA_CTYPE_UNORM, 2, {8,  8},          "rg8"},
+    {RA_CTYPE_UNORM, 2, {16, 16},         "rg16"},
+    {RA_CTYPE_UNORM, 4, {8,  8,  8,  8},  "rgba8"},
+    {RA_CTYPE_UNORM, 4, {16, 16, 16, 16}, "rgba16"},
+    {RA_CTYPE_UNORM, 4, {10, 10, 10,  2}, "rgb10_a2"},
+
+    {RA_CTYPE_UINT,  1, {8},              "r8ui"},
+    {RA_CTYPE_UINT,  1, {16},             "r16ui"},
+    {RA_CTYPE_UINT,  1, {32},             "r32ui"},
+    {RA_CTYPE_UINT,  2, {8,  8},          "rg8ui"},
+    {RA_CTYPE_UINT,  2, {16, 16},         "rg16ui"},
+    {RA_CTYPE_UINT,  2, {32, 32},         "rg32ui"},
+    {RA_CTYPE_UINT,  4, {8,  8,  8,  8},  "rgba8ui"},
+    {RA_CTYPE_UINT,  4, {16, 16, 16, 16}, "rgba16ui"},
+    {RA_CTYPE_UINT,  4, {32, 32, 32, 32}, "rgba32ui"},
+    {RA_CTYPE_UINT,  4, {10, 10, 10,  2}, "rgb10_a2ui"},
+};
+
+const char *ra_fmt_glsl_format(const struct ra_format *fmt)
+{
+    for (int n = 0; n < MP_ARRAY_SIZE(ra_glsl_fmts); n++) {
+        const struct glsl_fmt *gfmt = &ra_glsl_fmts[n];
+
+        if (fmt->ctype != gfmt->ctype)
+            continue;
+        if (fmt->num_components != gfmt->num_components)
+            continue;
+
+        for (int i = 0; i < fmt->num_components; i++) {
+            if (fmt->component_depth[i] != gfmt->component_depth[i])
+                goto next_fmt;
+        }
+
+        return gfmt->glsl_format;
+
+next_fmt: ; // equivalent to `continue`
+    }
+
+    return NULL;
+}
+
+// Return whether this is a tightly packed format with no external padding and
+// with the same bit size/depth in all components, and the shader returns
+// components in the same order as in memory.
+static bool ra_format_is_regular(const struct ra_format *fmt)
+{
+    if (!fmt->pixel_size || !fmt->num_components || !fmt->ordered)
+        return false;
+    for (int n = 1; n < fmt->num_components; n++) {
+        if (fmt->component_size[n] != fmt->component_size[0] ||
+            fmt->component_depth[n] != fmt->component_depth[0])
+            return false;
+    }
+    if (fmt->component_size[0] * fmt->num_components != fmt->pixel_size * 8)
+        return false;
+    return true;
+}
+
+// Return a regular filterable format using RA_CTYPE_UNORM.
+const struct ra_format *ra_find_unorm_format(struct ra *ra,
+                                             int bytes_per_component,
+                                             int n_components)
+{
+    for (int n = 0; n < ra->num_formats; n++) {
+        const struct ra_format *fmt = ra->formats[n];
+        if (fmt->ctype == RA_CTYPE_UNORM && fmt->num_components == n_components &&
+            fmt->pixel_size == bytes_per_component * n_components &&
+            fmt->component_depth[0] == bytes_per_component * 8 &&
+            fmt->linear_filter && ra_format_is_regular(fmt))
+            return fmt;
+    }
+    return NULL;
+}
+
+// Return a regular format using RA_CTYPE_UINT.
+const struct ra_format *ra_find_uint_format(struct ra *ra,
+                                            int bytes_per_component,
+                                            int n_components)
+{
+    for (int n = 0; n < ra->num_formats; n++) {
+        const struct ra_format *fmt = ra->formats[n];
+        if (fmt->ctype == RA_CTYPE_UINT && fmt->num_components == n_components &&
+            fmt->pixel_size == bytes_per_component * n_components &&
+            fmt->component_depth[0] == bytes_per_component * 8 &&
+            ra_format_is_regular(fmt))
+            return fmt;
+    }
+    return NULL;
+}
+
+// Find a float format of any precision that matches the C type of the same
+// size for upload.
+// May drop bits from the mantissa (such as selecting float16 even if
+// bytes_per_component == 32); prefers possibly faster formats first.
+static const struct ra_format *ra_find_float_format(struct ra *ra,
+                                                    int bytes_per_component,
+                                                    int n_components)
+{
+    // Assumes ra_format are ordered by performance.
+    // The >=16 check is to avoid catching fringe formats.
+    for (int n = 0; n < ra->num_formats; n++) {
+        const struct ra_format *fmt = ra->formats[n];
+        if (fmt->ctype == RA_CTYPE_FLOAT && fmt->num_components == n_components &&
+            fmt->pixel_size == bytes_per_component * n_components &&
+            fmt->component_depth[0] >= 16 &&
+            fmt->linear_filter && ra_format_is_regular(fmt))
+            return fmt;
+    }
+    return NULL;
+}
+
+// Return a filterable regular format that uses at least float16 internally, and
+// uses a normal C float for transfer on the CPU side. (This is just so we don't
+// need 32->16 bit conversion on CPU, which would be messy.)
+const struct ra_format *ra_find_float16_format(struct ra *ra, int n_components)
+{
+    return ra_find_float_format(ra, sizeof(float), n_components);
+}
+
+const struct ra_format *ra_find_named_format(struct ra *ra, const char *name)
+{
+    for (int n = 0; n < ra->num_formats; n++) {
+        const struct ra_format *fmt = ra->formats[n];
+        if (strcmp(fmt->name, name) == 0)
+            return fmt;
+    }
+    return NULL;
+}
+
+// Like ra_find_unorm_format(), but if no fixed point format is available,
+// return an unsigned integer format.
+static const struct ra_format *find_plane_format(struct ra *ra, int bytes,
+                                                 int n_channels,
+                                                 enum mp_component_type ctype)
+{
+    switch (ctype) {
+    case MP_COMPONENT_TYPE_UINT: {
+        const struct ra_format *f = ra_find_unorm_format(ra, bytes, n_channels);
+        if (f)
+            return f;
+        return ra_find_uint_format(ra, bytes, n_channels);
+    }
+    case MP_COMPONENT_TYPE_FLOAT:
+        return ra_find_float_format(ra, bytes, n_channels);
+    default: return NULL;
+    }
+}
+
+// Put a mapping of imgfmt to texture formats into *out. Basically it selects
+// the correct texture formats needed to represent an imgfmt in a shader, with
+// textures using the same memory organization as on the CPU.
+// Each plane is represented by a texture, and each texture has a RGBA
+// component order. out->components describes the meaning of them.
+// May return integer formats for >8 bit formats, if the driver has no
+// normalized 16 bit formats.
+// Returns false (and *out is not touched) if no format found.
+bool ra_get_imgfmt_desc(struct ra *ra, int imgfmt, struct ra_imgfmt_desc *out)
+{
+    struct ra_imgfmt_desc res = {.component_type = RA_CTYPE_UNKNOWN};
+
+    struct mp_regular_imgfmt regfmt;
+    if (mp_get_regular_imgfmt(&regfmt, imgfmt)) {
+        res.num_planes = regfmt.num_planes;
+        res.component_bits = regfmt.component_size * 8;
+        res.component_pad = regfmt.component_pad;
+        for (int n = 0; n < regfmt.num_planes; n++) {
+            struct mp_regular_imgfmt_plane *plane = &regfmt.planes[n];
+            res.planes[n] = find_plane_format(ra, regfmt.component_size,
+                                              plane->num_components,
+                                              regfmt.component_type);
+            if (!res.planes[n])
+                return false;
+            for (int i = 0; i < plane->num_components; i++)
+                res.components[n][i] = plane->components[i];
+            // Dropping LSBs when shifting will lead to dropped MSBs.
+            if (res.component_bits > res.planes[n]->component_depth[0] &&
+                res.component_pad < 0)
+                return false;
+            // Renderer restriction, but actually an unwanted corner case.
+            if (res.component_type != RA_CTYPE_UNKNOWN &&
+                res.component_type != res.planes[n]->ctype)
+                return false;
+            res.component_type = res.planes[n]->ctype;
+        }
+        res.chroma_w = 1 << regfmt.chroma_xs;
+        res.chroma_h = 1 << regfmt.chroma_ys;
+        goto supported;
+    }
+
+    for (int n = 0; n < ra->num_formats; n++) {
+        if (imgfmt && ra->formats[n]->special_imgfmt == imgfmt) {
+            res = *ra->formats[n]->special_imgfmt_desc;
+            goto supported;
+        }
+    }
+
+    // Unsupported format
+    return false;
+
+supported:
+
+    *out = res;
+    return true;
+}
+
+static const char *ctype_to_str(enum ra_ctype ctype)
+{
+    switch (ctype) {
+    case RA_CTYPE_UNORM:    return "unorm";
+    case RA_CTYPE_UINT:     return "uint ";
+    case RA_CTYPE_FLOAT:    return "float";
+    default:                return "unknown";
+    }
+}
+
+void ra_dump_tex_formats(struct ra *ra, int msgl)
+{
+    if (!mp_msg_test(ra->log, msgl))
+        return;
+    MP_MSG(ra, msgl, "Texture formats:\n");
+    MP_MSG(ra, msgl, "  NAME       COMP*TYPE SIZE           DEPTH PER COMP.\n");
+    for (int n = 0; n < ra->num_formats; n++) {
+        const struct ra_format *fmt = ra->formats[n];
+        const char *ctype = ctype_to_str(fmt->ctype);
+        char cl[40] = "";
+        for (int i = 0; i < fmt->num_components; i++) {
+            mp_snprintf_cat(cl, sizeof(cl), "%s%d", i ? " " : "",
+                            fmt->component_size[i]);
+            if (fmt->component_size[i] != fmt->component_depth[i])
+                mp_snprintf_cat(cl, sizeof(cl), "/%d", fmt->component_depth[i]);
+        }
+        MP_MSG(ra, msgl, "  %-10s %d*%s %3dB %s %s %s %s {%s}\n", fmt->name,
+               fmt->num_components, ctype, fmt->pixel_size,
+               fmt->luminance_alpha ? "LA" : "  ",
+               fmt->linear_filter ? "LF" : "  ",
+               fmt->renderable ? "CR" : "  ",
+               fmt->storable ? "ST" : "  ", cl);
+    }
+    MP_MSG(ra, msgl, " LA = LUMINANCE_ALPHA hack format\n");
+    MP_MSG(ra, msgl, " LF = linear filterable\n");
+    MP_MSG(ra, msgl, " CR = can be used for render targets\n");
+    MP_MSG(ra, msgl, " ST = can be used for storable images\n");
+}
+
+void ra_dump_imgfmt_desc(struct ra *ra, const struct ra_imgfmt_desc *desc,
+                         int msgl)
+{
+    char pl[80] = "";
+    char pf[80] = "";
+    for (int n = 0; n < desc->num_planes; n++) {
+        if (n > 0) {
+            mp_snprintf_cat(pl, sizeof(pl), "/");
+            mp_snprintf_cat(pf, sizeof(pf), "/");
+        }
+        char t[5] = {0};
+        for (int i = 0; i < 4; i++)
+            t[i] = "_rgba"[desc->components[n][i]];
+        for (int i = 3; i > 0 && t[i] == '_'; i--)
+            t[i] = '\0';
+        mp_snprintf_cat(pl, sizeof(pl), "%s", t);
+        mp_snprintf_cat(pf, sizeof(pf), "%s", desc->planes[n]->name);
+    }
+    MP_MSG(ra, msgl, "%d planes %dx%d %d/%d [%s] (%s) [%s]\n",
+           desc->num_planes, desc->chroma_w, desc->chroma_h,
+           desc->component_bits, desc->component_pad, pf, pl,
+           ctype_to_str(desc->component_type));
+}
+
+void ra_dump_img_formats(struct ra *ra, int msgl)
+{
+    if (!mp_msg_test(ra->log, msgl))
+        return;
+    MP_MSG(ra, msgl, "Image formats:\n");
+    for (int imgfmt = IMGFMT_START; imgfmt < IMGFMT_END; imgfmt++) {
+        const char *name = mp_imgfmt_to_name(imgfmt);
+        if (strcmp(name, "unknown") == 0)
+            continue;
+        MP_MSG(ra, msgl, "  %s", name);
+        struct ra_imgfmt_desc desc;
+        if (ra_get_imgfmt_desc(ra, imgfmt, &desc)) {
+            MP_MSG(ra, msgl, " => ");
+            ra_dump_imgfmt_desc(ra, &desc, msgl);
+        } else {
+            MP_MSG(ra, msgl, "\n");
+        }
+    }
+}
diff --git a/video/out/gpu/ra.h b/video/out/gpu/ra.h
new file mode 100644
index 0000000..5f229f8
--- /dev/null
+++ b/video/out/gpu/ra.h
@@ -0,0 +1,559 @@
+#pragma once
+
+#include "common/common.h"
+#include "misc/bstr.h"
+
+// Handle for a rendering API backend.
+struct ra {
+    struct ra_fns *fns;
+    void *priv;
+
+    int glsl_version;       // GLSL version (e.g. 300 => 3.0)
+    bool glsl_es;           // use ES dialect
+    bool glsl_vulkan;       // use vulkan dialect
+
+    struct mp_log *log;
+
+    // RA_CAP_* bit field. The RA backend must set supported features at init
+    // time.
+    uint64_t caps;
+
+    // Maximum supported width and height of a 2D texture. Set by the RA backend
+    // at init time.
+    int max_texture_wh;
+
+    // Maximum shared memory for compute shaders. Set by the RA backend at init
+    // time.
+    size_t max_shmem;
+
+    // Maximum number of threads in a compute work group. Set by the RA backend
+    // at init time.
+    size_t max_compute_group_threads;
+
+    // Maximum push constant size. Set by the RA backend at init time.
+    size_t max_pushc_size;
+
+    // Set of supported texture formats. Must be added by RA backend at init time.
+    // If there are equivalent formats with different caveats, the preferred
+    // formats should have a lower index. (E.g. GLES3 should put rg8 before la.)
+    struct ra_format **formats;
+    int num_formats;
+
+    // Accelerate texture uploads via an extra PBO even when
+    // RA_CAP_DIRECT_UPLOAD is supported. This is basically only relevant for
+    // OpenGL. Set by the RA user.
+    bool use_pbo;
+
+    // Array of native resources. For the most part an "escape" mechanism, and
+    // usually does not contain parameters required for basic functionality.
+    struct ra_native_resource *native_resources;
+    int num_native_resources;
+};
+
+// For passing through windowing system specific parameters and such. The
+// names are always internal (the libmpv render API uses mpv_render_param_type
+// and maps them to names internally).
+// For example, a name="x11" entry has a X11 display as (Display*)data.
+struct ra_native_resource {
+    const char *name;
+    void *data;
+};
+
+// Add a ra_native_resource entry. Both name and data pointers must stay valid
+// until ra termination.
+void ra_add_native_resource(struct ra *ra, const char *name, void *data);
+
+// Search ra->native_resources, returns NULL on failure.
+void *ra_get_native_resource(struct ra *ra, const char *name);
+
+enum {
+    RA_CAP_TEX_1D         = 1 << 0, // supports 1D textures (as shader inputs)
+    RA_CAP_TEX_3D         = 1 << 1, // supports 3D textures (as shader inputs)
+    RA_CAP_BLIT           = 1 << 2, // supports ra_fns.blit
+    RA_CAP_COMPUTE        = 1 << 3, // supports compute shaders
+    RA_CAP_DIRECT_UPLOAD  = 1 << 4, // supports tex_upload without ra_buf
+    RA_CAP_BUF_RO         = 1 << 5, // supports RA_VARTYPE_BUF_RO
+    RA_CAP_BUF_RW         = 1 << 6, // supports RA_VARTYPE_BUF_RW
+    RA_CAP_NESTED_ARRAY   = 1 << 7, // supports nested arrays
+    RA_CAP_GLOBAL_UNIFORM = 1 << 8, // supports using "naked" uniforms (not UBO)
+    RA_CAP_GATHER         = 1 << 9, // supports textureGather in GLSL
+    RA_CAP_FRAGCOORD      = 1 << 10, // supports reading from gl_FragCoord
+    RA_CAP_PARALLEL_COMPUTE  = 1 << 11, // supports parallel compute shaders
+    RA_CAP_NUM_GROUPS     = 1 << 12, // supports gl_NumWorkGroups
+    RA_CAP_SLOW_DR        = 1 << 13, // direct rendering is assumed to be slow
+};
+
+enum ra_ctype {
+    RA_CTYPE_UNKNOWN = 0,   // also used for inconsistent multi-component formats
+    RA_CTYPE_UNORM,         // unsigned normalized integer (fixed point) formats
+    RA_CTYPE_UINT,          // full integer formats
+    RA_CTYPE_FLOAT,         // float formats (signed, any bit size)
+};
+
+// All formats must be useable as texture formats. All formats must be byte
+// aligned (all pixels start and end on a byte boundary), at least as far CPU
+// transfers are concerned.
+struct ra_format {
+    // All fields are read-only after creation.
+    const char *name;       // symbolic name for user interaction/debugging
+    void *priv;
+    enum ra_ctype ctype;    // data type of each component
+    bool ordered;           // components are sequential in memory, and returned
+                            // by the shader in memory order (the shader can
+                            // return arbitrary values for unused components)
+    int num_components;     // component count, 0 if not applicable, max. 4
+    int component_size[4];  // in bits, all entries 0 if not applicable
+    int component_depth[4]; // bits in use for each component, 0 if not applicable
+                            // (_must_ be set if component_size[] includes padding,
+                            //  and the real procession as seen by shader is lower)
+    int pixel_size;         // in bytes, total pixel size (0 if opaque)
+    bool luminance_alpha;   // pre-GL_ARB_texture_rg hack for 2 component textures
+                            // if this is set, shader must use .ra instead of .rg
+                            // only applies to 2-component textures
+    bool linear_filter;     // linear filtering available from shader
+    bool renderable;        // can be used for render targets
+    bool storable;          // can be used for storage images
+    bool dummy_format;      // is not a real ra_format but a fake one (e.g. FBO).
+                            // dummy formats cannot be used to create textures
+
+    // If not 0, the format represents some sort of packed fringe format, whose
+    // shader representation is given by the special_imgfmt_desc pointer.
+    int special_imgfmt;
+    const struct ra_imgfmt_desc *special_imgfmt_desc;
+
+    // This gives the GLSL image format corresponding to the format, if any.
+    // (e.g. rgba16ui)
+    const char *glsl_format;
+};
+
+struct ra_tex_params {
+    int dimensions;         // 1-3 for 1D-3D textures
+    // Size of the texture. 1D textures require h=d=1, 2D textures require d=1.
+    int w, h, d;
+    const struct ra_format *format;
+    bool render_src;        // must be useable as source texture in a shader
+    bool render_dst;        // must be useable as target texture in a shader
+    bool storage_dst;       // must be usable as a storage image (RA_VARTYPE_IMG_W)
+    bool blit_src;          // must be usable as a blit source
+    bool blit_dst;          // must be usable as a blit destination
+    bool host_mutable;      // texture may be updated with tex_upload
+    bool downloadable;      // texture can be read with tex_download
+    // When used as render source texture.
+    bool src_linear;        // if false, use nearest sampling (whether this can
+                            // be true depends on ra_format.linear_filter)
+    bool src_repeat;        // if false, clamp texture coordinates to edge
+                            // if true, repeat texture coordinates
+    bool non_normalized;    // hack for GL_TEXTURE_RECTANGLE OSX idiocy
+                            // always set to false, except in OSX code
+    bool external_oes;      // hack for GL_TEXTURE_EXTERNAL_OES idiocy
+    // If non-NULL, the texture will be created with these contents. Using
+    // this does *not* require setting host_mutable. Otherwise, the initial
+    // data is undefined.
+    void *initial_data;
+};
+
+// Conflates the following typical GPU API concepts:
+// - texture itself
+// - sampler state
+// - staging buffers for texture upload
+// - framebuffer objects
+// - wrappers for swapchain framebuffers
+// - synchronization needed for upload/rendering/etc.
+struct ra_tex {
+    // All fields are read-only after creation.
+    struct ra_tex_params params;
+    void *priv;
+};
+
+struct ra_tex_upload_params {
+    struct ra_tex *tex; // Texture to upload to
+    bool invalidate;    // Discard pre-existing data not in the region uploaded
+    // Uploading from buffer:
+    struct ra_buf *buf; // Buffer to upload from (mutually exclusive with `src`)
+    size_t buf_offset;  // Start of data within buffer (bytes)
+    // Uploading directly: (Note: If RA_CAP_DIRECT_UPLOAD is not set, then this
+    // will be internally translated to a tex_upload buffer by the RA)
+    const void *src;    // Address of data
+    // For 2D textures only:
+    struct mp_rect *rc; // Region to upload. NULL means entire image
+    ptrdiff_t stride;   // The size of a horizontal line in bytes (*not* texels!)
+};
+
+struct ra_tex_download_params {
+    struct ra_tex *tex; // Texture to download from
+    // Downloading directly (set by caller, data written to by callee):
+    void *dst;          // Address of data (packed with no alignment)
+    ptrdiff_t stride;   // The size of a horizontal line in bytes (*not* texels!)
+};
+
+// Buffer usage type. This restricts what types of operations may be performed
+// on a buffer.
+enum ra_buf_type {
+    RA_BUF_TYPE_INVALID,
+    RA_BUF_TYPE_TEX_UPLOAD,     // texture upload buffer (pixel buffer object)
+    RA_BUF_TYPE_SHADER_STORAGE, // shader buffer (SSBO), for RA_VARTYPE_BUF_RW
+    RA_BUF_TYPE_UNIFORM,        // uniform buffer (UBO), for RA_VARTYPE_BUF_RO
+    RA_BUF_TYPE_VERTEX,         // not publicly usable (RA-internal usage)
+    RA_BUF_TYPE_SHARED_MEMORY,  // device memory for sharing with external API
+};
+
+struct ra_buf_params {
+    enum ra_buf_type type;
+    size_t size;
+    bool host_mapped;  // create a read-writable persistent mapping (ra_buf.data)
+    bool host_mutable; // contents may be updated via buf_update()
+    // If non-NULL, the buffer will be created with these contents. Otherwise,
+    // the initial data is undefined.
+    void *initial_data;
+};
+
+// A generic buffer, which can be used for many purposes (texture upload,
+// storage buffer, uniform buffer, etc.)
+struct ra_buf {
+    // All fields are read-only after creation.
+    struct ra_buf_params params;
+    void *data; // for persistently mapped buffers, points to the first byte
+    void *priv;
+};
+
+// Type of a shader uniform variable, or a vertex attribute. In all cases,
+// vectors are matrices are done by having more than 1 value.
+enum ra_vartype {
+    RA_VARTYPE_INVALID,
+    RA_VARTYPE_INT,             // C: int, GLSL: int, ivec*
+    RA_VARTYPE_FLOAT,           // C: float, GLSL: float, vec*, mat*
+    RA_VARTYPE_TEX,             // C: ra_tex*, GLSL: various sampler types
+                                // ra_tex.params.render_src must be true
+    RA_VARTYPE_IMG_W,           // C: ra_tex*, GLSL: various image types
+                                // write-only (W) image for compute shaders
+                                // ra_tex.params.storage_dst must be true
+    RA_VARTYPE_BYTE_UNORM,      // C: uint8_t, GLSL: int, vec* (vertex data only)
+    RA_VARTYPE_BUF_RO,          // C: ra_buf*, GLSL: uniform buffer block
+                                // buf type must be RA_BUF_TYPE_UNIFORM
+    RA_VARTYPE_BUF_RW,          // C: ra_buf*, GLSL: shader storage buffer block
+                                // buf type must be RA_BUF_TYPE_SHADER_STORAGE
+    RA_VARTYPE_COUNT
+};
+
+// Returns the host size of a ra_vartype, or 0 for abstract vartypes (e.g. tex)
+size_t ra_vartype_size(enum ra_vartype type);
+
+// Represents a uniform, texture input parameter, and similar things.
+struct ra_renderpass_input {
+    const char *name;       // name as used in the shader
+    enum ra_vartype type;
+    // The total number of values is given by dim_v * dim_m.
+    int dim_v;              // vector dimension (1 for non-vector and non-matrix)
+    int dim_m;              // additional matrix dimension (dim_v x dim_m)
+    // Vertex data: byte offset of the attribute into the vertex struct
+    size_t offset;
+    // RA_VARTYPE_TEX: texture unit
+    // RA_VARTYPE_IMG_W: image unit
+    // RA_VARTYPE_BUF_* buffer binding point
+    // Other uniforms: unused
+    // Bindings must be unique within each namespace, as specified by
+    // desc_namespace()
+    int binding;
+};
+
+// Represents the layout requirements of an input value
+struct ra_layout {
+    size_t align;  // the alignment requirements (always a power of two)
+    size_t stride; // the delta between two rows of an array/matrix
+    size_t size;   // the total size of the input
+};
+
+// Returns the host layout of a render pass input. Returns {0} for renderpass
+// inputs without a corresponding host representation (e.g. textures/buffers)
+struct ra_layout ra_renderpass_input_layout(struct ra_renderpass_input *input);
+
+enum ra_blend {
+    RA_BLEND_ZERO,
+    RA_BLEND_ONE,
+    RA_BLEND_SRC_ALPHA,
+    RA_BLEND_ONE_MINUS_SRC_ALPHA,
+};
+
+enum ra_renderpass_type {
+    RA_RENDERPASS_TYPE_INVALID,
+    RA_RENDERPASS_TYPE_RASTER,  // vertex+fragment shader
+    RA_RENDERPASS_TYPE_COMPUTE, // compute shader
+};
+
+// Static part of a rendering pass. It conflates the following:
+//  - compiled shader and its list of uniforms
+//  - vertex attributes and its shader mappings
+//  - blending parameters
+// (For Vulkan, this would be shader module + pipeline state.)
+// Upon creation, the values of dynamic values such as uniform contents (whose
+// initial values are not provided here) are required to be 0.
+struct ra_renderpass_params {
+    enum ra_renderpass_type type;
+
+    // Uniforms, including texture/sampler inputs.
+    struct ra_renderpass_input *inputs;
+    int num_inputs;
+    size_t push_constants_size; // must be <= ra.max_pushc_size and a multiple of 4
+
+    // Highly implementation-specific byte array storing a compiled version
+    // of the program. Can be used to speed up shader compilation. A backend
+    // xan read this in renderpass_create, or set this on the newly created
+    // ra_renderpass params field.
+    bstr cached_program;
+
+    // --- type==RA_RENDERPASS_TYPE_RASTER only
+
+    // Describes the format of the vertex data. When using ra.glsl_vulkan,
+    // the order of this array must match the vertex attribute locations.
+    struct ra_renderpass_input *vertex_attribs;
+    int num_vertex_attribs;
+    int vertex_stride;
+
+    // Format of the target texture
+    const struct ra_format *target_format;
+
+    // Shader text, in GLSL. (Yes, you need a GLSL compiler.)
+    // These are complete shaders, including prelude and declarations.
+    const char *vertex_shader;
+    const char *frag_shader;
+
+    // Target blending mode. If enable_blend is false, the blend_ fields can
+    // be ignored.
+    bool enable_blend;
+    enum ra_blend blend_src_rgb;
+    enum ra_blend blend_dst_rgb;
+    enum ra_blend blend_src_alpha;
+    enum ra_blend blend_dst_alpha;
+
+    // If true, the contents of `target` not written to will become undefined
+    bool invalidate_target;
+
+    // --- type==RA_RENDERPASS_TYPE_COMPUTE only
+
+    // Shader text, like vertex_shader/frag_shader.
+    const char *compute_shader;
+};
+
+struct ra_renderpass_params *ra_renderpass_params_copy(void *ta_parent,
+        const struct ra_renderpass_params *params);
+
+// Conflates the following typical GPU API concepts:
+// - various kinds of shaders
+// - rendering pipelines
+// - descriptor sets, uniforms, other bindings
+// - all synchronization necessary
+// - the current values of all uniforms (this one makes it relatively stateful
+//   from an API perspective)
+struct ra_renderpass {
+    // All fields are read-only after creation.
+    struct ra_renderpass_params params;
+    void *priv;
+};
+
+// An input value (see ra_renderpass_input).
+struct ra_renderpass_input_val {
+    int index;  // index into ra_renderpass_params.inputs[]
+    void *data; // pointer to data according to ra_renderpass_input
+                // (e.g. type==RA_VARTYPE_FLOAT+dim_v=3,dim_m=3 => float[9])
+};
+
+// Parameters for performing a rendering pass (basically the dynamic params).
+// These change potentially every time.
+struct ra_renderpass_run_params {
+    struct ra_renderpass *pass;
+
+    // Generally this lists parameters only which changed since the last
+    // invocation and need to be updated. The ra_renderpass instance is
+    // supposed to keep unchanged values from the previous run.
+    // For non-primitive types like textures, these entries are always added,
+    // even if they do not change.
+    struct ra_renderpass_input_val *values;
+    int num_values;
+    void *push_constants; // must be set if params.push_constants_size > 0
+
+    // --- pass->params.type==RA_RENDERPASS_TYPE_RASTER only
+
+    // target->params.render_dst must be true, and target->params.format must
+    // match pass->params.target_format.
+    struct ra_tex *target;
+    struct mp_rect viewport;
+    struct mp_rect scissors;
+
+    // (The primitive type is always a triangle list.)
+    void *vertex_data;
+    int vertex_count;   // number of vertex elements, not bytes
+
+    // --- pass->params.type==RA_RENDERPASS_TYPE_COMPUTE only
+
+    // Number of work groups to be run in X/Y/Z dimensions.
+    int compute_groups[3];
+};
+
+// This is an opaque type provided by the implementation, but we want to at
+// least give it a saner name than void* for code readability purposes.
+typedef void ra_timer;
+
+// Rendering API entrypoints. (Note: there are some additional hidden features
+// you need to take care of. For example, hwdec mapping will be provided
+// separately from ra, but might need to call into ra private code.)
+struct ra_fns {
+    void (*destroy)(struct ra *ra);
+
+    // Create a texture (with undefined contents). Return NULL on failure.
+    // This is a rare operation, and normally textures and even FBOs for
+    // temporary rendering intermediate data are cached.
+    struct ra_tex *(*tex_create)(struct ra *ra,
+                                 const struct ra_tex_params *params);
+
+    void (*tex_destroy)(struct ra *ra, struct ra_tex *tex);
+
+    // Upload data to a texture. This is an extremely common operation. When
+    // using a buffer, the contents of the buffer must exactly match the image
+    // - conversions between bit depth etc. are not supported. The buffer *may*
+    // be marked as "in use" while this operation is going on, and the contents
+    // must not be touched again by the API user until buf_poll returns true.
+    // Returns whether successful.
+    bool (*tex_upload)(struct ra *ra, const struct ra_tex_upload_params *params);
+
+    // Copy data from the texture to memory. ra_tex_params.downloadable must
+    // have been set to true on texture creation.
+    bool (*tex_download)(struct ra *ra, struct ra_tex_download_params *params);
+
+    // Create a buffer. This can be used as a persistently mapped buffer,
+    // a uniform buffer, a shader storage buffer or possibly others.
+    // Not all usage types must be supported; may return NULL if unavailable.
+    struct ra_buf *(*buf_create)(struct ra *ra,
+                                 const struct ra_buf_params *params);
+
+    void (*buf_destroy)(struct ra *ra, struct ra_buf *buf);
+
+    // Update the contents of a buffer, starting at a given offset (*must* be a
+    // multiple of 4) and up to a given size, with the contents of *data. This
+    // is an extremely common operation. Calling this while the buffer is
+    // considered "in use" is an error. (See: buf_poll)
+    void (*buf_update)(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset,
+                       const void *data, size_t size);
+
+    // Returns if a buffer is currently "in use" or not. Updating the contents
+    // of a buffer (via buf_update or writing to buf->data) while it is still
+    // in use is an error and may result in graphical corruption. Optional, if
+    // NULL then all buffers are always usable.
+    bool (*buf_poll)(struct ra *ra, struct ra_buf *buf);
+
+    // Returns the layout requirements of a uniform buffer element. Optional,
+    // but must be implemented if RA_CAP_BUF_RO is supported.
+    struct ra_layout (*uniform_layout)(struct ra_renderpass_input *inp);
+
+    // Returns the layout requirements of a push constant element. Optional,
+    // but must be implemented if ra.max_pushc_size > 0.
+    struct ra_layout (*push_constant_layout)(struct ra_renderpass_input *inp);
+
+    // Returns an abstract namespace index for a given renderpass input type.
+    // This will always be a value >= 0 and < RA_VARTYPE_COUNT. This is used to
+    // figure out which inputs may share the same value of `binding`.
+    int (*desc_namespace)(struct ra *ra, enum ra_vartype type);
+
+    // Clear the dst with the given color (rgba) and within the given scissor.
+    // dst must have dst->params.render_dst==true. Content outside of the
+    // scissor is preserved.
+    void (*clear)(struct ra *ra, struct ra_tex *dst, float color[4],
+                  struct mp_rect *scissor);
+
+    // Copy a sub-rectangle from one texture to another. The source/dest region
+    // is always within the texture bounds. Areas outside the dest region are
+    // preserved. The formats of the textures must be loosely compatible. The
+    // dst texture can be a swapchain framebuffer, but src can not. Only 2D
+    // textures are supported.
+    // The textures must have blit_src and blit_dst set, respectively.
+    // Rectangles with negative width/height lead to flipping, different src/dst
+    // sizes lead to point scaling. Coordinates are always in pixels.
+    // Optional. Only available if RA_CAP_BLIT is set (if it's not set, it must
+    // not be called, even if it's non-NULL).
+    void (*blit)(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
+                 struct mp_rect *dst_rc, struct mp_rect *src_rc);
+
+    // Compile a shader and create a pipeline. This is a rare operation.
+    // The params pointer and anything it points to must stay valid until
+    // renderpass_destroy.
+    struct ra_renderpass *(*renderpass_create)(struct ra *ra,
+                                    const struct ra_renderpass_params *params);
+
+    void (*renderpass_destroy)(struct ra *ra, struct ra_renderpass *pass);
+
+    // Perform a render pass, basically drawing a list of triangles to a FBO.
+    // This is an extremely common operation.
+    void (*renderpass_run)(struct ra *ra,
+                           const struct ra_renderpass_run_params *params);
+
+    // Create a timer object. Returns NULL on failure, or if timers are
+    // unavailable for some reason. Optional.
+    ra_timer *(*timer_create)(struct ra *ra);
+
+    void (*timer_destroy)(struct ra *ra, ra_timer *timer);
+
+    // Start recording a timer. Note that valid usage requires you to pair
+    // every start with a stop. Trying to start a timer twice, or trying to
+    // stop a timer before having started it, consistutes invalid usage.
+    void (*timer_start)(struct ra *ra, ra_timer *timer);
+
+    // Stop recording a timer. This also returns any results that have been
+    // measured since the last usage of this ra_timer. It's important to note
+    // that GPU timer measurement are asynchronous, so this function does not
+    // always produce a value - and the values it does produce are typically
+    // delayed by a few frames. When no value is available, this returns 0.
+    uint64_t (*timer_stop)(struct ra *ra, ra_timer *timer);
+
+    // Associates a marker with any past error messages, for debugging
+    // purposes. Optional.
+    void (*debug_marker)(struct ra *ra, const char *msg);
+};
+
+struct ra_tex *ra_tex_create(struct ra *ra, const struct ra_tex_params *params);
+void ra_tex_free(struct ra *ra, struct ra_tex **tex);
+
+struct ra_buf *ra_buf_create(struct ra *ra, const struct ra_buf_params *params);
+void ra_buf_free(struct ra *ra, struct ra_buf **buf);
+
+void ra_free(struct ra **ra);
+
+const struct ra_format *ra_find_unorm_format(struct ra *ra,
+                                             int bytes_per_component,
+                                             int n_components);
+const struct ra_format *ra_find_uint_format(struct ra *ra,
+                                            int bytes_per_component,
+                                            int n_components);
+const struct ra_format *ra_find_float16_format(struct ra *ra, int n_components);
+const struct ra_format *ra_find_named_format(struct ra *ra, const char *name);
+
+struct ra_imgfmt_desc {
+    int num_planes;
+    const struct ra_format *planes[4];
+    // Chroma pixel size (1x1 is 4:4:4)
+    uint8_t chroma_w, chroma_h;
+    // Component storage size in bits (possibly padded). For formats with
+    // different sizes per component, this is arbitrary. For padded formats
+    // like P010 or YUV420P10, padding is included.
+    int component_bits;
+    // Like mp_regular_imgfmt.component_pad.
+    int component_pad;
+    // == planes[n].ctype (RA_CTYPE_UNKNOWN if not applicable)
+    enum ra_ctype component_type;
+    // For each texture and each texture output (rgba order) describe what
+    // component it returns.
+    // The values are like the values in mp_regular_imgfmt_plane.components[].
+    // Access as components[plane_nr][component_index]. Set unused items to 0.
+    // For ra_format.luminance_alpha, this returns 1/2 ("rg") instead of 1/4
+    // ("ra"). the logic is that the texture format has 2 channels, thus the
+    // data must be returned in the first two components. The renderer fixes
+    // this later.
+    uint8_t components[4][4];
+};
+
+const char *ra_fmt_glsl_format(const struct ra_format *fmt);
+
+bool ra_get_imgfmt_desc(struct ra *ra, int imgfmt, struct ra_imgfmt_desc *out);
+
+void ra_dump_tex_formats(struct ra *ra, int msgl);
+void ra_dump_imgfmt_desc(struct ra *ra, const struct ra_imgfmt_desc *desc,
+                         int msgl);
+void ra_dump_img_formats(struct ra *ra, int msgl);
diff --git a/video/out/gpu/shader_cache.c b/video/out/gpu/shader_cache.c
new file mode 100644
index 0000000..3e05173
--- /dev/null
+++ b/video/out/gpu/shader_cache.c
@@ -0,0 +1,1056 @@
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <assert.h>
+
+#include <libavutil/sha.h>
+#include <libavutil/mem.h>
+
+#include "osdep/io.h"
+
+#include "common/common.h"
+#include "options/path.h"
+#include "stream/stream.h"
+#include "shader_cache.h"
+#include "utils.h"
+
+// Force cache flush if more than this number of shaders is created.
+#define SC_MAX_ENTRIES 256
+
+union uniform_val {
+    float f[9];         // RA_VARTYPE_FLOAT
+    int i[4];           // RA_VARTYPE_INT
+    struct ra_tex *tex; // RA_VARTYPE_TEX, RA_VARTYPE_IMG_*
+    struct ra_buf *buf; // RA_VARTYPE_BUF_*
+};
+
+enum sc_uniform_type {
+    SC_UNIFORM_TYPE_GLOBAL = 0, // global uniform (RA_CAP_GLOBAL_UNIFORM)
+    SC_UNIFORM_TYPE_UBO = 1,    // uniform buffer (RA_CAP_BUF_RO)
+    SC_UNIFORM_TYPE_PUSHC = 2,  // push constant (ra.max_pushc_size)
+};
+
+struct sc_uniform {
+    enum sc_uniform_type type;
+    struct ra_renderpass_input input;
+    const char *glsl_type;
+    union uniform_val v;
+    char *buffer_format;
+    // for SC_UNIFORM_TYPE_UBO/PUSHC:
+    struct ra_layout layout;
+    size_t offset; // byte offset within the buffer
+};
+
+struct sc_cached_uniform {
+    union uniform_val v;
+    int index; // for ra_renderpass_input_val
+    bool set; // whether the uniform has ever been set
+};
+
+struct sc_entry {
+    struct ra_renderpass *pass;
+    struct sc_cached_uniform *cached_uniforms;
+    int num_cached_uniforms;
+    bstr total;
+    struct timer_pool *timer;
+    struct ra_buf *ubo;
+    int ubo_index; // for ra_renderpass_input_val.index
+    void *pushc;
+};
+
+struct gl_shader_cache {
+    struct ra *ra;
+    struct mp_log *log;
+
+    // permanent
+    char **exts;
+    int num_exts;
+
+    // this is modified during use (gl_sc_add() etc.) and reset for each shader
+    bstr prelude_text;
+    bstr header_text;
+    bstr text;
+
+    // Next binding point (texture unit, image unit, buffer binding, etc.)
+    // In OpenGL these are separate for each input type
+    int next_binding[RA_VARTYPE_COUNT];
+    bool next_uniform_dynamic;
+
+    struct ra_renderpass_params params;
+
+    struct sc_entry **entries;
+    int num_entries;
+
+    struct sc_entry *current_shader; // set by gl_sc_generate()
+
+    struct sc_uniform *uniforms;
+    int num_uniforms;
+
+    int ubo_binding;
+    size_t ubo_size;
+    size_t pushc_size;
+
+    struct ra_renderpass_input_val *values;
+    int num_values;
+
+    // For checking that the user is calling gl_sc_reset() properly.
+    bool needs_reset;
+
+    bool error_state; // true if an error occurred
+
+    // temporary buffers (avoids frequent reallocations)
+    bstr tmp[6];
+
+    // For the disk-cache.
+    char *cache_dir;
+    struct mpv_global *global; // can be NULL
+};
+
+struct gl_shader_cache *gl_sc_create(struct ra *ra, struct mpv_global *global,
+                                     struct mp_log *log)
+{
+    struct gl_shader_cache *sc = talloc_ptrtype(NULL, sc);
+    *sc = (struct gl_shader_cache){
+        .ra = ra,
+        .global = global,
+        .log = log,
+    };
+    gl_sc_reset(sc);
+    return sc;
+}
+
+// Reset the previous pass. This must be called after gl_sc_generate and before
+// starting a new shader. It may also be called on errors.
+void gl_sc_reset(struct gl_shader_cache *sc)
+{
+    sc->prelude_text.len = 0;
+    sc->header_text.len = 0;
+    sc->text.len = 0;
+    for (int n = 0; n < sc->num_uniforms; n++)
+        talloc_free((void *)sc->uniforms[n].input.name);
+    sc->num_uniforms = 0;
+    sc->ubo_binding = 0;
+    sc->ubo_size = 0;
+    sc->pushc_size = 0;
+    for (int i = 0; i < RA_VARTYPE_COUNT; i++)
+        sc->next_binding[i] = 0;
+    sc->next_uniform_dynamic = false;
+    sc->current_shader = NULL;
+    sc->params = (struct ra_renderpass_params){0};
+    sc->needs_reset = false;
+}
+
+static void sc_flush_cache(struct gl_shader_cache *sc)
+{
+    MP_DBG(sc, "flushing shader cache\n");
+
+    for (int n = 0; n < sc->num_entries; n++) {
+        struct sc_entry *e = sc->entries[n];
+        ra_buf_free(sc->ra, &e->ubo);
+        if (e->pass)
+            sc->ra->fns->renderpass_destroy(sc->ra, e->pass);
+        timer_pool_destroy(e->timer);
+        talloc_free(e);
+    }
+    sc->num_entries = 0;
+}
+
+void gl_sc_destroy(struct gl_shader_cache *sc)
+{
+    if (!sc)
+        return;
+    gl_sc_reset(sc);
+    sc_flush_cache(sc);
+    talloc_free(sc);
+}
+
+bool gl_sc_error_state(struct gl_shader_cache *sc)
+{
+    return sc->error_state;
+}
+
+void gl_sc_reset_error(struct gl_shader_cache *sc)
+{
+    sc->error_state = false;
+}
+
+void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name)
+{
+    for (int n = 0; n < sc->num_exts; n++) {
+        if (strcmp(sc->exts[n], name) == 0)
+            return;
+    }
+    MP_TARRAY_APPEND(sc, sc->exts, sc->num_exts, talloc_strdup(sc, name));
+}
+
+#define bstr_xappend0(sc, b, s) bstr_xappend(sc, b, bstr0(s))
+
+void gl_sc_add(struct gl_shader_cache *sc, const char *text)
+{
+    bstr_xappend0(sc, &sc->text, text);
+}
+
+void gl_sc_addf(struct gl_shader_cache *sc, const char *textf, ...)
+{
+    va_list ap;
+    va_start(ap, textf);
+    bstr_xappend_vasprintf(sc, &sc->text, textf, ap);
+    va_end(ap);
+}
+
+void gl_sc_hadd(struct gl_shader_cache *sc, const char *text)
+{
+    bstr_xappend0(sc, &sc->header_text, text);
+}
+
+void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...)
+{
+    va_list ap;
+    va_start(ap, textf);
+    bstr_xappend_vasprintf(sc, &sc->header_text, textf, ap);
+    va_end(ap);
+}
+
+void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text)
+{
+    bstr_xappend(sc, &sc->header_text, text);
+}
+
+void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...)
+{
+    va_list ap;
+    va_start(ap, textf);
+    bstr_xappend_vasprintf(sc, &sc->prelude_text, textf, ap);
+    va_end(ap);
+}
+
+static struct sc_uniform *find_uniform(struct gl_shader_cache *sc,
+                                       const char *name)
+{
+    struct sc_uniform new = {
+        .input = {
+            .dim_v = 1,
+            .dim_m = 1,
+        },
+    };
+
+    for (int n = 0; n < sc->num_uniforms; n++) {
+        struct sc_uniform *u = &sc->uniforms[n];
+        if (strcmp(u->input.name, name) == 0) {
+            const char *allocname = u->input.name;
+            *u = new;
+            u->input.name = allocname;
+            return u;
+        }
+    }
+
+    // not found -> add it
+    new.input.name = talloc_strdup(NULL, name);
+    MP_TARRAY_APPEND(sc, sc->uniforms, sc->num_uniforms, new);
+    return &sc->uniforms[sc->num_uniforms - 1];
+}
+
+static int gl_sc_next_binding(struct gl_shader_cache *sc, enum ra_vartype type)
+{
+    return sc->next_binding[sc->ra->fns->desc_namespace(sc->ra, type)]++;
+}
+
+void gl_sc_uniform_dynamic(struct gl_shader_cache *sc)
+{
+    sc->next_uniform_dynamic = true;
+}
+
+// Updates the metadata for the given sc_uniform. Assumes sc_uniform->input
+// and glsl_type/buffer_format are already set.
+static void update_uniform_params(struct gl_shader_cache *sc, struct sc_uniform *u)
+{
+    bool dynamic = sc->next_uniform_dynamic;
+    sc->next_uniform_dynamic = false;
+
+    // Try not using push constants for "large" values like matrices, since
+    // this is likely to both exceed the VGPR budget as well as the pushc size
+    // budget
+    bool try_pushc = u->input.dim_m == 1 || dynamic;
+
+    // Attempt using push constants first
+    if (try_pushc && sc->ra->glsl_vulkan && sc->ra->max_pushc_size) {
+        struct ra_layout layout = sc->ra->fns->push_constant_layout(&u->input);
+        size_t offset = MP_ALIGN_UP(sc->pushc_size, layout.align);
+        // Push constants have limited size, so make sure we don't exceed this
+        size_t new_size = offset + layout.size;
+        if (new_size <= sc->ra->max_pushc_size) {
+            u->type = SC_UNIFORM_TYPE_PUSHC;
+            u->layout = layout;
+            u->offset = offset;
+            sc->pushc_size = new_size;
+            return;
+        }
+    }
+
+    // Attempt using uniform buffer next. The GLSL version 440 check is due
+    // to explicit offsets on UBO entries. In theory we could leave away
+    // the offsets and support UBOs for older GL as well, but this is a nice
+    // safety net for driver bugs (and also rules out potentially buggy drivers)
+    // Also avoid UBOs for highly dynamic stuff since that requires synchronizing
+    // the UBO writes every frame
+    bool try_ubo = !(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM) || !dynamic;
+    if (try_ubo && sc->ra->glsl_version >= 440 && (sc->ra->caps & RA_CAP_BUF_RO)) {
+        u->type = SC_UNIFORM_TYPE_UBO;
+        u->layout = sc->ra->fns->uniform_layout(&u->input);
+        u->offset = MP_ALIGN_UP(sc->ubo_size, u->layout.align);
+        sc->ubo_size = u->offset + u->layout.size;
+        return;
+    }
+
+    // If all else fails, use global uniforms
+    assert(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM);
+    u->type = SC_UNIFORM_TYPE_GLOBAL;
+}
+
+void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name,
+                           struct ra_tex *tex)
+{
+    const char *glsl_type = "sampler2D";
+    if (tex->params.dimensions == 1) {
+        glsl_type = "sampler1D";
+    } else if (tex->params.dimensions == 3) {
+        glsl_type = "sampler3D";
+    } else if (tex->params.non_normalized) {
+        glsl_type = "sampler2DRect";
+    } else if (tex->params.external_oes) {
+        glsl_type = "samplerExternalOES";
+    } else if (tex->params.format->ctype == RA_CTYPE_UINT) {
+        glsl_type = sc->ra->glsl_es ? "highp usampler2D" : "usampler2D";
+    }
+
+    struct sc_uniform *u = find_uniform(sc, name);
+    u->input.type = RA_VARTYPE_TEX;
+    u->glsl_type = glsl_type;
+    u->input.binding = gl_sc_next_binding(sc, u->input.type);
+    u->v.tex = tex;
+}
+
+void gl_sc_uniform_image2D_wo(struct gl_shader_cache *sc, const char *name,
+                              struct ra_tex *tex)
+{
+    gl_sc_enable_extension(sc, "GL_ARB_shader_image_load_store");
+
+    struct sc_uniform *u = find_uniform(sc, name);
+    u->input.type = RA_VARTYPE_IMG_W;
+    u->glsl_type = sc->ra->glsl_es ? "writeonly highp image2D" : "writeonly image2D";
+    u->input.binding = gl_sc_next_binding(sc, u->input.type);
+    u->v.tex = tex;
+}
+
+void gl_sc_ssbo(struct gl_shader_cache *sc, char *name, struct ra_buf *buf,
+                char *format, ...)
+{
+    assert(sc->ra->caps & RA_CAP_BUF_RW);
+    gl_sc_enable_extension(sc, "GL_ARB_shader_storage_buffer_object");
+
+    struct sc_uniform *u = find_uniform(sc, name);
+    u->input.type = RA_VARTYPE_BUF_RW;
+    u->glsl_type = "";
+    u->input.binding = gl_sc_next_binding(sc, u->input.type);
+    u->v.buf = buf;
+
+    va_list ap;
+    va_start(ap, format);
+    u->buffer_format = ta_vasprintf(sc, format, ap);
+    va_end(ap);
+}
+
+void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, float f)
+{
+    struct sc_uniform *u = find_uniform(sc, name);
+    u->input.type = RA_VARTYPE_FLOAT;
+    u->glsl_type = "float";
+    update_uniform_params(sc, u);
+    u->v.f[0] = f;
+}
+
+void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, int i)
+{
+    struct sc_uniform *u = find_uniform(sc, name);
+    u->input.type = RA_VARTYPE_INT;
+    u->glsl_type = "int";
+    update_uniform_params(sc, u);
+    u->v.i[0] = i;
+}
+
+void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, float f[2])
+{
+    struct sc_uniform *u = find_uniform(sc, name);
+    u->input.type = RA_VARTYPE_FLOAT;
+    u->input.dim_v = 2;
+    u->glsl_type = "vec2";
+    update_uniform_params(sc, u);
+    u->v.f[0] = f[0];
+    u->v.f[1] = f[1];
+}
+
+void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, float f[3])
+{
+    struct sc_uniform *u = find_uniform(sc, name);
+    u->input.type = RA_VARTYPE_FLOAT;
+    u->input.dim_v = 3;
+    u->glsl_type = "vec3";
+    update_uniform_params(sc, u);
+    u->v.f[0] = f[0];
+    u->v.f[1] = f[1];
+    u->v.f[2] = f[2];
+}
+
+static void transpose2x2(float r[2 * 2])
+{
+    MPSWAP(float, r[0+2*1], r[1+2*0]);
+}
+
+void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name,
+                        bool transpose, float *v)
+{
+    struct sc_uniform *u = find_uniform(sc, name);
+    u->input.type = RA_VARTYPE_FLOAT;
+    u->input.dim_v = 2;
+    u->input.dim_m = 2;
+    u->glsl_type = "mat2";
+    update_uniform_params(sc, u);
+    for (int n = 0; n < 4; n++)
+        u->v.f[n] = v[n];
+    if (transpose)
+        transpose2x2(&u->v.f[0]);
+}
+
+static void transpose3x3(float r[3 * 3])
+{
+    MPSWAP(float, r[0+3*1], r[1+3*0]);
+    MPSWAP(float, r[0+3*2], r[2+3*0]);
+    MPSWAP(float, r[1+3*2], r[2+3*1]);
+}
+
+void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name,
+                        bool transpose, float *v)
+{
+    struct sc_uniform *u = find_uniform(sc, name);
+    u->input.type = RA_VARTYPE_FLOAT;
+    u->input.dim_v = 3;
+    u->input.dim_m = 3;
+    u->glsl_type = "mat3";
+    update_uniform_params(sc, u);
+    for (int n = 0; n < 9; n++)
+        u->v.f[n] = v[n];
+    if (transpose)
+        transpose3x3(&u->v.f[0]);
+}
+
+void gl_sc_blend(struct gl_shader_cache *sc,
+                 enum ra_blend blend_src_rgb,
+                 enum ra_blend blend_dst_rgb,
+                 enum ra_blend blend_src_alpha,
+                 enum ra_blend blend_dst_alpha)
+{
+    sc->params.enable_blend = true;
+    sc->params.blend_src_rgb = blend_src_rgb;
+    sc->params.blend_dst_rgb = blend_dst_rgb;
+    sc->params.blend_src_alpha = blend_src_alpha;
+    sc->params.blend_dst_alpha = blend_dst_alpha;
+}
+
+const char *gl_sc_bvec(struct gl_shader_cache *sc, int dims)
+{
+    static const char *bvecs[] = {
+        [1] = "bool",
+        [2] = "bvec2",
+        [3] = "bvec3",
+        [4] = "bvec4",
+    };
+
+    static const char *vecs[] = {
+        [1] = "float",
+        [2] = "vec2",
+        [3] = "vec3",
+        [4] = "vec4",
+    };
+
+    assert(dims > 0 && dims < MP_ARRAY_SIZE(bvecs));
+    return sc->ra->glsl_version >= 130 ? bvecs[dims] : vecs[dims];
+}
+
+static const char *vao_glsl_type(const struct ra_renderpass_input *e)
+{
+    // pretty dumb... too dumb, but works for us
+    switch (e->dim_v) {
+    case 1: return "float";
+    case 2: return "vec2";
+    case 3: return "vec3";
+    case 4: return "vec4";
+    default: MP_ASSERT_UNREACHABLE();
+    }
+}
+
+static void update_ubo(struct ra *ra, struct ra_buf *ubo, struct sc_uniform *u)
+{
+    uintptr_t src = (uintptr_t) &u->v;
+    size_t dst = u->offset;
+    struct ra_layout src_layout = ra_renderpass_input_layout(&u->input);
+    struct ra_layout dst_layout = u->layout;
+
+    for (int i = 0; i < u->input.dim_m; i++) {
+        ra->fns->buf_update(ra, ubo, dst, (void *)src, src_layout.stride);
+        src += src_layout.stride;
+        dst += dst_layout.stride;
+    }
+}
+
+static void update_pushc(struct ra *ra, void *pushc, struct sc_uniform *u)
+{
+    uintptr_t src = (uintptr_t) &u->v;
+    uintptr_t dst = (uintptr_t) pushc + (ptrdiff_t) u->offset;
+    struct ra_layout src_layout = ra_renderpass_input_layout(&u->input);
+    struct ra_layout dst_layout = u->layout;
+
+    for (int i = 0; i < u->input.dim_m; i++) {
+        memcpy((void *)dst, (void *)src, src_layout.stride);
+        src += src_layout.stride;
+        dst += dst_layout.stride;
+    }
+}
+
+static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e,
+                           struct sc_uniform *u, int n)
+{
+    struct sc_cached_uniform *un = &e->cached_uniforms[n];
+    struct ra_layout layout = ra_renderpass_input_layout(&u->input);
+    if (layout.size > 0 && un->set && memcmp(&un->v, &u->v, layout.size) == 0)
+        return;
+
+    un->v = u->v;
+    un->set = true;
+
+    static const char *desc[] = {
+        [SC_UNIFORM_TYPE_UBO]    = "UBO",
+        [SC_UNIFORM_TYPE_PUSHC]  = "PC",
+        [SC_UNIFORM_TYPE_GLOBAL] = "global",
+    };
+    MP_TRACE(sc, "Updating %s uniform '%s'\n", desc[u->type], u->input.name);
+
+    switch (u->type) {
+    case SC_UNIFORM_TYPE_GLOBAL: {
+        struct ra_renderpass_input_val value = {
+            .index = un->index,
+            .data = &un->v,
+        };
+        MP_TARRAY_APPEND(sc, sc->values, sc->num_values, value);
+        break;
+    }
+    case SC_UNIFORM_TYPE_UBO:
+        assert(e->ubo);
+        update_ubo(sc->ra, e->ubo, u);
+        break;
+    case SC_UNIFORM_TYPE_PUSHC:
+        assert(e->pushc);
+        update_pushc(sc->ra, e->pushc, u);
+        break;
+    default: MP_ASSERT_UNREACHABLE();
+    }
+}
+
+void gl_sc_set_cache_dir(struct gl_shader_cache *sc, char *dir)
+{
+    talloc_free(sc->cache_dir);
+    if (dir && dir[0]) {
+        dir = mp_get_user_path(NULL, sc->global, dir);
+    } else {
+        dir = mp_find_user_file(NULL, sc->global, "cache", "");
+    }
+    sc->cache_dir = talloc_strdup(sc, dir);
+    talloc_free(dir);
+}
+
+static bool create_pass(struct gl_shader_cache *sc, struct sc_entry *entry)
+{
+    bool ret = false;
+
+    void *tmp = talloc_new(NULL);
+    struct ra_renderpass_params params = sc->params;
+
+    const char *cache_header = "mpv shader cache v1\n";
+    char *cache_filename = NULL;
+    char *cache_dir = NULL;
+
+    if (sc->cache_dir && sc->cache_dir[0]) {
+        // Try to load it from a disk cache.
+        cache_dir = mp_get_user_path(tmp, sc->global, sc->cache_dir);
+
+        struct AVSHA *sha = av_sha_alloc();
+        MP_HANDLE_OOM(sha);
+        av_sha_init(sha, 256);
+        av_sha_update(sha, entry->total.start, entry->total.len);
+
+        uint8_t hash[256 / 8];
+        av_sha_final(sha, hash);
+        av_free(sha);
+
+        char hashstr[256 / 8 * 2 + 1];
+        for (int n = 0; n < 256 / 8; n++)
+            snprintf(hashstr + n * 2, sizeof(hashstr) - n * 2, "%02X", hash[n]);
+
+        cache_filename = mp_path_join(tmp, cache_dir, hashstr);
+        if (stat(cache_filename, &(struct stat){0}) == 0) {
+            MP_DBG(sc, "Trying to load shader from disk...\n");
+            struct bstr cachedata =
+                stream_read_file(cache_filename, tmp, sc->global, 1000000000);
+            if (bstr_eatstart0(&cachedata, cache_header))
+                params.cached_program = cachedata;
+        }
+    }
+
+    // If using a UBO, also make sure to add it as an input value so the RA
+    // can see it
+    if (sc->ubo_size) {
+        entry->ubo_index = sc->params.num_inputs;
+        struct ra_renderpass_input ubo_input = {
+            .name = "UBO",
+            .type = RA_VARTYPE_BUF_RO,
+            .dim_v = 1,
+            .dim_m = 1,
+            .binding = sc->ubo_binding,
+        };
+        MP_TARRAY_APPEND(sc, params.inputs, params.num_inputs, ubo_input);
+    }
+
+    if (sc->pushc_size) {
+        params.push_constants_size = MP_ALIGN_UP(sc->pushc_size, 4);
+        entry->pushc = talloc_zero_size(entry, params.push_constants_size);
+    }
+
+    if (sc->ubo_size) {
+        struct ra_buf_params ubo_params = {
+            .type = RA_BUF_TYPE_UNIFORM,
+            .size = sc->ubo_size,
+            .host_mutable = true,
+        };
+
+        entry->ubo = ra_buf_create(sc->ra, &ubo_params);
+        if (!entry->ubo) {
+            MP_ERR(sc, "Failed creating uniform buffer!\n");
+            goto error;
+        }
+    }
+
+    entry->pass = sc->ra->fns->renderpass_create(sc->ra, &params);
+    if (!entry->pass)
+        goto error;
+
+    if (entry->pass && cache_filename) {
+        bstr nc = entry->pass->params.cached_program;
+        if (nc.len && !bstr_equals(params.cached_program, nc)) {
+            mp_mkdirp(cache_dir);
+
+            MP_DBG(sc, "Writing shader cache file: %s\n", cache_filename);
+            FILE *out = fopen(cache_filename, "wb");
+            if (out) {
+                fwrite(cache_header, strlen(cache_header), 1, out);
+                fwrite(nc.start, nc.len, 1, out);
+                fclose(out);
+            }
+        }
+    }
+
+    ret = true;
+
+error:
+    talloc_free(tmp);
+    return ret;
+}
+
+#define ADD(x, ...) bstr_xappend_asprintf(sc, (x), __VA_ARGS__)
+#define ADD_BSTR(x, s) bstr_xappend(sc, (x), (s))
+
+static void add_uniforms(struct gl_shader_cache *sc, bstr *dst)
+{
+    // Add all of the UBO entries separately as members of their own buffer
+    if (sc->ubo_size > 0) {
+        ADD(dst, "layout(std140, binding=%d) uniform UBO {\n", sc->ubo_binding);
+        for (int n = 0; n < sc->num_uniforms; n++) {
+            struct sc_uniform *u = &sc->uniforms[n];
+            if (u->type != SC_UNIFORM_TYPE_UBO)
+                continue;
+            ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset, u->glsl_type,
+                u->input.name);
+        }
+        ADD(dst, "};\n");
+    }
+
+    // Ditto for push constants
+    if (sc->pushc_size > 0) {
+        ADD(dst, "layout(std430, push_constant) uniform PushC {\n");
+        for (int n = 0; n < sc->num_uniforms; n++) {
+            struct sc_uniform *u = &sc->uniforms[n];
+            if (u->type != SC_UNIFORM_TYPE_PUSHC)
+                continue;
+            ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset, u->glsl_type,
+                u->input.name);
+        }
+        ADD(dst, "};\n");
+    }
+
+    for (int n = 0; n < sc->num_uniforms; n++) {
+        struct sc_uniform *u = &sc->uniforms[n];
+        if (u->type != SC_UNIFORM_TYPE_GLOBAL)
+            continue;
+        switch (u->input.type) {
+        case RA_VARTYPE_INT:
+        case RA_VARTYPE_FLOAT:
+            assert(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM);
+            MP_FALLTHROUGH;
+        case RA_VARTYPE_TEX:
+            // Vulkan requires explicitly assigning the bindings in the shader
+            // source. For OpenGL it's optional, but requires higher GL version
+            // so we don't do it (and instead have ra_gl update the bindings
+            // after program creation).
+            if (sc->ra->glsl_vulkan)
+                ADD(dst, "layout(binding=%d) ", u->input.binding);
+            ADD(dst, "uniform %s %s;\n", u->glsl_type, u->input.name);
+            break;
+        case RA_VARTYPE_BUF_RO:
+            ADD(dst, "layout(std140, binding=%d) uniform %s { %s };\n",
+                u->input.binding, u->input.name, u->buffer_format);
+            break;
+        case RA_VARTYPE_BUF_RW:
+            ADD(dst, "layout(std430, binding=%d) restrict coherent buffer %s { %s };\n",
+                u->input.binding, u->input.name, u->buffer_format);
+            break;
+        case RA_VARTYPE_IMG_W: {
+            // For better compatibility, we have to explicitly label the
+            // type of data we will be reading/writing to this image.
+            const char *fmt = u->v.tex->params.format->glsl_format;
+
+            if (sc->ra->glsl_vulkan) {
+                if (fmt) {
+                    ADD(dst, "layout(binding=%d, %s) ", u->input.binding, fmt);
+                } else {
+                    ADD(dst, "layout(binding=%d) ", u->input.binding);
+                }
+            } else if (fmt) {
+                ADD(dst, "layout(%s) ", fmt);
+            }
+            ADD(dst, "uniform restrict %s %s;\n", u->glsl_type, u->input.name);
+        }
+        }
+    }
+}
+
+// 1. Generate vertex and fragment shaders from the fragment shader text added
+//    with gl_sc_add(). The generated shader program is cached (based on the
+//    text), so actual compilation happens only the first time.
+// 2. Update the uniforms and textures set with gl_sc_uniform_*.
+// 3. Make the new shader program current (glUseProgram()).
+// After that, you render, and then you call gc_sc_reset(), which does:
+// 1. Unbind the program and all textures.
+// 2. Reset the sc state and prepare for a new shader program. (All uniforms
+//    and fragment operations needed for the next program have to be re-added.)
+static void gl_sc_generate(struct gl_shader_cache *sc,
+                           enum ra_renderpass_type type,
+                           const struct ra_format *target_format,
+                           const struct ra_renderpass_input *vao,
+                           int vao_len, size_t vertex_stride)
+{
+    int glsl_version = sc->ra->glsl_version;
+    int glsl_es = sc->ra->glsl_es ? glsl_version : 0;
+
+    sc->params.type = type;
+
+    // gl_sc_reset() must be called after ending the previous render process,
+    // and before starting a new one.
+    assert(!sc->needs_reset);
+    sc->needs_reset = true;
+
+    // If using a UBO, pick a binding (needed for shader generation)
+    if (sc->ubo_size)
+        sc->ubo_binding = gl_sc_next_binding(sc, RA_VARTYPE_BUF_RO);
+
+    for (int n = 0; n < MP_ARRAY_SIZE(sc->tmp); n++)
+        sc->tmp[n].len = 0;
+
+    // set up shader text (header + uniforms + body)
+    bstr *header = &sc->tmp[0];
+    ADD(header, "#version %d%s\n", glsl_version, glsl_es >= 300 ? " es" : "");
+    if (type == RA_RENDERPASS_TYPE_COMPUTE) {
+        // This extension cannot be enabled in fragment shader. Enable it as
+        // an exception for compute shader.
+        ADD(header, "#extension GL_ARB_compute_shader : enable\n");
+    }
+    for (int n = 0; n < sc->num_exts; n++)
+        ADD(header, "#extension %s : enable\n", sc->exts[n]);
+    if (glsl_es) {
+        ADD(header, "#ifdef GL_FRAGMENT_PRECISION_HIGH\n");
+        ADD(header, "precision highp float;\n");
+        ADD(header, "#else\n");
+        ADD(header, "precision mediump float;\n");
+        ADD(header, "#endif\n");
+
+        ADD(header, "precision mediump sampler2D;\n");
+        if (sc->ra->caps & RA_CAP_TEX_3D)
+            ADD(header, "precision mediump sampler3D;\n");
+    }
+
+    if (glsl_version >= 130) {
+        ADD(header, "#define tex1D texture\n");
+        ADD(header, "#define tex3D texture\n");
+    } else {
+        ADD(header, "#define tex1D texture1D\n");
+        ADD(header, "#define tex3D texture3D\n");
+        ADD(header, "#define texture texture2D\n");
+    }
+
+    // Additional helpers.
+    ADD(header, "#define LUT_POS(x, lut_size)"
+                " mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x))\n");
+
+    char *vert_in = glsl_version >= 130 ? "in" : "attribute";
+    char *vert_out = glsl_version >= 130 ? "out" : "varying";
+    char *frag_in = glsl_version >= 130 ? "in" : "varying";
+
+    struct bstr *vert = NULL, *frag = NULL, *comp = NULL;
+
+    if (type == RA_RENDERPASS_TYPE_RASTER) {
+        // vertex shader: we don't use the vertex shader, so just setup a
+        // dummy, which passes through the vertex array attributes.
+        bstr *vert_head = &sc->tmp[1];
+        ADD_BSTR(vert_head, *header);
+        bstr *vert_body = &sc->tmp[2];
+        ADD(vert_body, "void main() {\n");
+        bstr *frag_vaos = &sc->tmp[3];
+        for (int n = 0; n < vao_len; n++) {
+            const struct ra_renderpass_input *e = &vao[n];
+            const char *glsl_type = vao_glsl_type(e);
+            char loc[32] = {0};
+            if (sc->ra->glsl_vulkan)
+                snprintf(loc, sizeof(loc), "layout(location=%d) ", n);
+            if (strcmp(e->name, "position") == 0) {
+                // setting raster pos. requires setting gl_Position magic variable
+                assert(e->dim_v == 2 && e->type == RA_VARTYPE_FLOAT);
+                ADD(vert_head, "%s%s vec2 vertex_position;\n", loc, vert_in);
+                ADD(vert_body, "gl_Position = vec4(vertex_position, 1.0, 1.0);\n");
+            } else {
+                ADD(vert_head, "%s%s %s vertex_%s;\n", loc, vert_in, glsl_type, e->name);
+                ADD(vert_head, "%s%s %s %s;\n", loc, vert_out, glsl_type, e->name);
+                ADD(vert_body, "%s = vertex_%s;\n", e->name, e->name);
+                ADD(frag_vaos, "%s%s %s %s;\n", loc, frag_in, glsl_type, e->name);
+            }
+        }
+        ADD(vert_body, "}\n");
+        vert = vert_head;
+        ADD_BSTR(vert, *vert_body);
+
+        // fragment shader; still requires adding used uniforms and VAO elements
+        frag = &sc->tmp[4];
+        ADD_BSTR(frag, *header);
+        if (glsl_version >= 130) {
+            ADD(frag, "%sout vec4 out_color;\n",
+                sc->ra->glsl_vulkan ? "layout(location=0) " : "");
+        }
+        ADD_BSTR(frag, *frag_vaos);
+        add_uniforms(sc, frag);
+
+        ADD_BSTR(frag, sc->prelude_text);
+        ADD_BSTR(frag, sc->header_text);
+
+        ADD(frag, "void main() {\n");
+        // we require _all_ frag shaders to write to a "vec4 color"
+        ADD(frag, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n");
+        ADD_BSTR(frag, sc->text);
+        if (glsl_version >= 130) {
+            ADD(frag, "out_color = color;\n");
+        } else {
+            ADD(frag, "gl_FragColor = color;\n");
+        }
+        ADD(frag, "}\n");
+
+        // We need to fix the format of the render dst at renderpass creation
+        // time
+        assert(target_format);
+        sc->params.target_format = target_format;
+    }
+
+    if (type == RA_RENDERPASS_TYPE_COMPUTE) {
+        comp = &sc->tmp[4];
+        ADD_BSTR(comp, *header);
+
+        add_uniforms(sc, comp);
+
+        ADD_BSTR(comp, sc->prelude_text);
+        ADD_BSTR(comp, sc->header_text);
+
+        ADD(comp, "void main() {\n");
+        ADD(comp, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n"); // convenience
+        ADD_BSTR(comp, sc->text);
+        ADD(comp, "}\n");
+    }
+
+    bstr *hash_total = &sc->tmp[5];
+
+    ADD(hash_total, "type %d\n", sc->params.type);
+
+    if (frag) {
+        ADD_BSTR(hash_total, *frag);
+        sc->params.frag_shader = frag->start;
+    }
+    ADD(hash_total, "\n");
+    if (vert) {
+        ADD_BSTR(hash_total, *vert);
+        sc->params.vertex_shader = vert->start;
+    }
+    ADD(hash_total, "\n");
+    if (comp) {
+        ADD_BSTR(hash_total, *comp);
+        sc->params.compute_shader = comp->start;
+    }
+    ADD(hash_total, "\n");
+
+    if (sc->params.enable_blend) {
+        ADD(hash_total, "blend %d %d %d %d\n",
+            sc->params.blend_src_rgb, sc->params.blend_dst_rgb,
+            sc->params.blend_src_alpha, sc->params.blend_dst_alpha);
+    }
+
+    if (sc->params.target_format)
+        ADD(hash_total, "format %s\n", sc->params.target_format->name);
+
+    struct sc_entry *entry = NULL;
+    for (int n = 0; n < sc->num_entries; n++) {
+        struct sc_entry *cur = sc->entries[n];
+        if (bstr_equals(cur->total, *hash_total)) {
+            entry = cur;
+            break;
+        }
+    }
+    if (!entry) {
+        if (sc->num_entries == SC_MAX_ENTRIES)
+            sc_flush_cache(sc);
+        entry = talloc_ptrtype(NULL, entry);
+        *entry = (struct sc_entry){
+            .total = bstrdup(entry, *hash_total),
+            .timer = timer_pool_create(sc->ra),
+        };
+
+        // The vertex shader uses mangled names for the vertex attributes, so
+        // that the fragment shader can use the "real" names. But the shader is
+        // expecting the vertex attribute names (at least with older GLSL
+        // targets for GL).
+        sc->params.vertex_stride = vertex_stride;
+        for (int n = 0; n < vao_len; n++) {
+            struct ra_renderpass_input attrib = vao[n];
+            attrib.name = talloc_asprintf(entry, "vertex_%s", attrib.name);
+            MP_TARRAY_APPEND(sc, sc->params.vertex_attribs,
+                             sc->params.num_vertex_attribs, attrib);
+        }
+
+        for (int n = 0; n < sc->num_uniforms; n++) {
+            struct sc_cached_uniform u = {0};
+            if (sc->uniforms[n].type == SC_UNIFORM_TYPE_GLOBAL) {
+                // global uniforms need to be made visible to the ra_renderpass
+                u.index = sc->params.num_inputs;
+                MP_TARRAY_APPEND(sc, sc->params.inputs, sc->params.num_inputs,
+                                 sc->uniforms[n].input);
+            }
+            MP_TARRAY_APPEND(entry, entry->cached_uniforms,
+                             entry->num_cached_uniforms, u);
+        }
+        if (!create_pass(sc, entry))
+            sc->error_state = true;
+        MP_TARRAY_APPEND(sc, sc->entries, sc->num_entries, entry);
+    }
+
+    if (!entry->pass) {
+        sc->current_shader = NULL;
+        return;
+    }
+
+    assert(sc->num_uniforms == entry->num_cached_uniforms);
+
+    sc->num_values = 0;
+    for (int n = 0; n < sc->num_uniforms; n++)
+        update_uniform(sc, entry, &sc->uniforms[n], n);
+
+    // If we're using a UBO, make sure to bind it as well
+    if (sc->ubo_size) {
+        struct ra_renderpass_input_val ubo_val = {
+            .index = entry->ubo_index,
+            .data = &entry->ubo,
+        };
+        MP_TARRAY_APPEND(sc, sc->values, sc->num_values, ubo_val);
+    }
+
+    sc->current_shader = entry;
+}
+
+struct mp_pass_perf gl_sc_dispatch_draw(struct gl_shader_cache *sc,
+                                        struct ra_tex *target, bool discard,
+                                        const struct ra_renderpass_input *vao,
+                                        int vao_len, size_t vertex_stride,
+                                        void *vertices, size_t num_vertices)
+{
+    struct timer_pool *timer = NULL;
+
+    sc->params.invalidate_target = discard;
+    gl_sc_generate(sc, RA_RENDERPASS_TYPE_RASTER, target->params.format,
+                   vao, vao_len, vertex_stride);
+    if (!sc->current_shader)
+        goto error;
+
+    timer = sc->current_shader->timer;
+
+    struct mp_rect full_rc = {0, 0, target->params.w, target->params.h};
+
+    struct ra_renderpass_run_params run = {
+        .pass = sc->current_shader->pass,
+        .values = sc->values,
+        .num_values = sc->num_values,
+        .push_constants = sc->current_shader->pushc,
+        .target = target,
+        .vertex_data = vertices,
+        .vertex_count = num_vertices,
+        .viewport = full_rc,
+        .scissors = full_rc,
+    };
+
+    timer_pool_start(timer);
+    sc->ra->fns->renderpass_run(sc->ra, &run);
+    timer_pool_stop(timer);
+
+error:
+    gl_sc_reset(sc);
+    return timer_pool_measure(timer);
+}
+
+struct mp_pass_perf gl_sc_dispatch_compute(struct gl_shader_cache *sc,
+                                           int w, int h, int d)
+{
+    struct timer_pool *timer = NULL;
+
+    gl_sc_generate(sc, RA_RENDERPASS_TYPE_COMPUTE, NULL, NULL, 0, 0);
+    if (!sc->current_shader)
+        goto error;
+
+    timer = sc->current_shader->timer;
+
+    struct ra_renderpass_run_params run = {
+        .pass = sc->current_shader->pass,
+        .values = sc->values,
+        .num_values = sc->num_values,
+        .push_constants = sc->current_shader->pushc,
+        .compute_groups = {w, h, d},
+    };
+
+    timer_pool_start(timer);
+    sc->ra->fns->renderpass_run(sc->ra, &run);
+    timer_pool_stop(timer);
+
+error:
+    gl_sc_reset(sc);
+    return timer_pool_measure(timer);
+}
diff --git a/video/out/gpu/shader_cache.h b/video/out/gpu/shader_cache.h
new file mode 100644
index 0000000..7c51c7a
--- /dev/null
+++ b/video/out/gpu/shader_cache.h
@@ -0,0 +1,66 @@
+#pragma once
+
+#include "common/common.h"
+#include "misc/bstr.h"
+#include "ra.h"
+
+// For mp_pass_perf
+#include "video/out/vo.h"
+
+struct mp_log;
+struct mpv_global;
+struct gl_shader_cache;
+
+struct gl_shader_cache *gl_sc_create(struct ra *ra, struct mpv_global *global,
+                                     struct mp_log *log);
+void gl_sc_destroy(struct gl_shader_cache *sc);
+bool gl_sc_error_state(struct gl_shader_cache *sc);
+void gl_sc_reset_error(struct gl_shader_cache *sc);
+void gl_sc_add(struct gl_shader_cache *sc, const char *text);
+void gl_sc_addf(struct gl_shader_cache *sc, const char *textf, ...)
+    PRINTF_ATTRIBUTE(2, 3);
+void gl_sc_hadd(struct gl_shader_cache *sc, const char *text);
+void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...)
+    PRINTF_ATTRIBUTE(2, 3);
+void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text);
+void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...)
+    PRINTF_ATTRIBUTE(2, 3);
+
+// A hint that the next data-type (i.e. non-binding) uniform is expected to
+// change frequently. This refers to the _f, _i, _vecN etc. uniform types.
+void gl_sc_uniform_dynamic(struct gl_shader_cache *sc);
+void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name,
+                           struct ra_tex *tex);
+void gl_sc_uniform_image2D_wo(struct gl_shader_cache *sc, const char *name,
+                              struct ra_tex *tex);
+void gl_sc_ssbo(struct gl_shader_cache *sc, char *name, struct ra_buf *buf,
+                char *format, ...) PRINTF_ATTRIBUTE(4, 5);
+void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, float f);
+void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, int f);
+void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, float f[2]);
+void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, float f[3]);
+void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name,
+                        bool transpose, float *v);
+void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name,
+                        bool transpose, float *v);
+
+// Return the correct bvecN() variant for using mix() in this GLSL version
+const char *gl_sc_bvec(struct gl_shader_cache *sc, int dims);
+
+void gl_sc_blend(struct gl_shader_cache *sc,
+                 enum ra_blend blend_src_rgb,
+                 enum ra_blend blend_dst_rgb,
+                 enum ra_blend blend_src_alpha,
+                 enum ra_blend blend_dst_alpha);
+void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name);
+struct mp_pass_perf gl_sc_dispatch_draw(struct gl_shader_cache *sc,
+                                        struct ra_tex *target, bool discard,
+                                        const struct ra_renderpass_input *vao,
+                                        int vao_len, size_t vertex_stride,
+                                        void *ptr, size_t num);
+struct mp_pass_perf gl_sc_dispatch_compute(struct gl_shader_cache *sc,
+                                           int w, int h, int d);
+// The application can call this on errors, to reset the current shader. This
+// is normally done implicitly by gl_sc_dispatch_*
+void gl_sc_reset(struct gl_shader_cache *sc);
+void gl_sc_set_cache_dir(struct gl_shader_cache *sc, char *dir);
diff --git a/video/out/gpu/spirv.c b/video/out/gpu/spirv.c
new file mode 100644
index 0000000..67088bc
--- /dev/null
+++ b/video/out/gpu/spirv.c
@@ -0,0 +1,70 @@
+#include "common/msg.h"
+#include "options/m_config.h"
+
+#include "spirv.h"
+#include "config.h"
+
+extern const struct spirv_compiler_fns spirv_shaderc;
+
+// in probe-order
+enum {
+    SPIRV_AUTO = 0,
+    SPIRV_SHADERC, // generally preferred, but not packaged everywhere
+};
+
+static const struct spirv_compiler_fns *compilers[] = {
+#if HAVE_SHADERC
+    [SPIRV_SHADERC] = &spirv_shaderc,
+#endif
+};
+
+static const struct m_opt_choice_alternatives compiler_choices[] = {
+    {"auto",        SPIRV_AUTO},
+#if HAVE_SHADERC
+    {"shaderc",     SPIRV_SHADERC},
+#endif
+    {0}
+};
+
+struct spirv_opts {
+    int compiler;
+};
+
+#define OPT_BASE_STRUCT struct spirv_opts
+const struct m_sub_options spirv_conf = {
+    .opts = (const struct m_option[]) {
+        {"spirv-compiler", OPT_CHOICE_C(compiler, compiler_choices)},
+        {0}
+    },
+    .size = sizeof(struct spirv_opts),
+};
+
+bool spirv_compiler_init(struct ra_ctx *ctx)
+{
+    void *tmp = talloc_new(NULL);
+    struct spirv_opts *opts = mp_get_config_group(tmp, ctx->global, &spirv_conf);
+    int compiler = opts->compiler;
+    talloc_free(tmp);
+
+    for (int i = SPIRV_AUTO+1; i < MP_ARRAY_SIZE(compilers); i++) {
+        if (compiler != SPIRV_AUTO && i != compiler)
+            continue;
+        if (!compilers[i])
+            continue;
+
+        ctx->spirv = talloc_zero(ctx, struct spirv_compiler);
+        ctx->spirv->log = ctx->log,
+        ctx->spirv->fns = compilers[i];
+
+        const char *name = m_opt_choice_str(compiler_choices, i);
+        strncpy(ctx->spirv->name, name, sizeof(ctx->spirv->name) - 1);
+        MP_VERBOSE(ctx, "Initializing SPIR-V compiler '%s'\n", name);
+        if (ctx->spirv->fns->init(ctx))
+            return true;
+        talloc_free(ctx->spirv);
+        ctx->spirv = NULL;
+    }
+
+    MP_ERR(ctx, "Failed initializing SPIR-V compiler!\n");
+    return false;
+}
diff --git a/video/out/gpu/spirv.h b/video/out/gpu/spirv.h
new file mode 100644
index 0000000..e3dbd4f
--- /dev/null
+++ b/video/out/gpu/spirv.h
@@ -0,0 +1,41 @@
+#pragma once
+
+#include "common/msg.h"
+#include "common/common.h"
+#include "context.h"
+
+enum glsl_shader {
+    GLSL_SHADER_VERTEX,
+    GLSL_SHADER_FRAGMENT,
+    GLSL_SHADER_COMPUTE,
+};
+
+#define SPIRV_NAME_MAX_LEN 32
+
+struct spirv_compiler {
+    char name[SPIRV_NAME_MAX_LEN];
+    const struct spirv_compiler_fns *fns;
+    struct mp_log *log;
+    void *priv;
+
+    const char *required_ext; // or NULL
+    int glsl_version;         // GLSL version supported
+    int compiler_version;     // for cache invalidation, may be left as 0
+    int ra_caps;              // RA_CAP_* provided by this implementation, if any
+};
+
+struct spirv_compiler_fns {
+    // Compile GLSL to SPIR-V, under GL_KHR_vulkan_glsl semantics.
+    bool (*compile_glsl)(struct spirv_compiler *spirv, void *tactx,
+                         enum glsl_shader type, const char *glsl,
+                         struct bstr *out_spirv);
+
+    // Called by spirv_compiler_init / ra_ctx_destroy. These don't need to
+    // allocate/free ctx->spirv, that is done by the caller
+    bool (*init)(struct ra_ctx *ctx);
+    void (*uninit)(struct ra_ctx *ctx); // optional
+};
+
+// Initializes ctx->spirv to a valid SPIR-V compiler, or returns false on
+// failure. Cleanup will be handled by ra_ctx_destroy.
+bool spirv_compiler_init(struct ra_ctx *ctx);
diff --git a/video/out/gpu/spirv_shaderc.c b/video/out/gpu/spirv_shaderc.c
new file mode 100644
index 0000000..f285631
--- /dev/null
+++ b/video/out/gpu/spirv_shaderc.c
@@ -0,0 +1,125 @@
+#include "common/msg.h"
+
+#include "context.h"
+#include "spirv.h"
+
+#include <shaderc/shaderc.h>
+
+struct priv {
+    shaderc_compiler_t compiler;
+    shaderc_compile_options_t opts;
+};
+
+static void shaderc_uninit(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->spirv->priv;
+    if (!p)
+        return;
+
+    shaderc_compile_options_release(p->opts);
+    shaderc_compiler_release(p->compiler);
+}
+
+static bool shaderc_init(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->spirv->priv = talloc_zero(ctx->spirv, struct priv);
+
+    p->compiler = shaderc_compiler_initialize();
+    if (!p->compiler)
+        goto error;
+    p->opts = shaderc_compile_options_initialize();
+    if (!p->opts)
+        goto error;
+
+    shaderc_compile_options_set_optimization_level(p->opts,
+                                    shaderc_optimization_level_performance);
+    if (ctx->opts.debug)
+        shaderc_compile_options_set_generate_debug_info(p->opts);
+
+    int ver, rev;
+    shaderc_get_spv_version(&ver, &rev);
+    ctx->spirv->compiler_version = ver * 100 + rev; // forwards compatibility
+    ctx->spirv->glsl_version = 450; // impossible to query?
+    return true;
+
+error:
+    shaderc_uninit(ctx);
+    return false;
+}
+
+static shaderc_compilation_result_t compile(struct priv *p,
+                                            enum glsl_shader type,
+                                            const char *glsl, bool debug)
+{
+    static const shaderc_shader_kind kinds[] = {
+        [GLSL_SHADER_VERTEX]   = shaderc_glsl_vertex_shader,
+        [GLSL_SHADER_FRAGMENT] = shaderc_glsl_fragment_shader,
+        [GLSL_SHADER_COMPUTE]  = shaderc_glsl_compute_shader,
+    };
+
+    if (debug) {
+        return shaderc_compile_into_spv_assembly(p->compiler, glsl, strlen(glsl),
+                                        kinds[type], "input", "main", p->opts);
+    } else {
+        return shaderc_compile_into_spv(p->compiler, glsl, strlen(glsl),
+                                        kinds[type], "input", "main", p->opts);
+    }
+}
+
+static bool shaderc_compile(struct spirv_compiler *spirv, void *tactx,
+                            enum glsl_shader type, const char *glsl,
+                            struct bstr *out_spirv)
+{
+    struct priv *p = spirv->priv;
+
+    shaderc_compilation_result_t res = compile(p, type, glsl, false);
+    int errs = shaderc_result_get_num_errors(res),
+        warn = shaderc_result_get_num_warnings(res),
+        msgl = errs ? MSGL_ERR : warn ? MSGL_WARN : MSGL_V;
+
+    const char *msg = shaderc_result_get_error_message(res);
+    if (msg[0])
+        MP_MSG(spirv, msgl, "shaderc output:\n%s", msg);
+
+    int s = shaderc_result_get_compilation_status(res);
+    bool success = s == shaderc_compilation_status_success;
+
+    static const char *results[] = {
+        [shaderc_compilation_status_success]            = "success",
+        [shaderc_compilation_status_invalid_stage]      = "invalid stage",
+        [shaderc_compilation_status_compilation_error]  = "error",
+        [shaderc_compilation_status_internal_error]     = "internal error",
+        [shaderc_compilation_status_null_result_object] = "no result",
+        [shaderc_compilation_status_invalid_assembly]   = "invalid assembly",
+    };
+
+    const char *status = s < MP_ARRAY_SIZE(results) ? results[s] : "unknown";
+    MP_MSG(spirv, msgl, "shaderc compile status '%s' (%d errors, %d warnings)\n",
+           status, errs, warn);
+
+    if (success) {
+        void *bytes = (void *) shaderc_result_get_bytes(res);
+        out_spirv->len = shaderc_result_get_length(res);
+        out_spirv->start = talloc_memdup(tactx, bytes, out_spirv->len);
+    }
+
+    // Also print SPIR-V disassembly for debugging purposes. Unfortunately
+    // there doesn't seem to be a way to get this except compiling the shader
+    // a second time..
+    if (mp_msg_test(spirv->log, MSGL_TRACE)) {
+        shaderc_compilation_result_t dis = compile(p, type, glsl, true);
+        MP_TRACE(spirv, "Generated SPIR-V:\n%.*s",
+                 (int)shaderc_result_get_length(dis),
+                 shaderc_result_get_bytes(dis));
+        shaderc_result_release(dis);
+    }
+
+    shaderc_result_release(res);
+    return success;
+}
+
+const struct spirv_compiler_fns spirv_shaderc = {
+    .compile_glsl = shaderc_compile,
+    .init = shaderc_init,
+    .uninit = shaderc_uninit,
+};
diff --git a/video/out/gpu/user_shaders.c b/video/out/gpu/user_shaders.c
new file mode 100644
index 0000000..708de87
--- /dev/null
+++ b/video/out/gpu/user_shaders.c
@@ -0,0 +1,463 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <math.h>
+
+#include "common/msg.h"
+#include "misc/ctype.h"
+#include "user_shaders.h"
+
+static bool parse_rpn_szexpr(struct bstr line, struct szexp out[MAX_SZEXP_SIZE])
+{
+    int pos = 0;
+
+    while (line.len > 0) {
+        struct bstr word = bstr_strip(bstr_splitchar(line, &line, ' '));
+        if (word.len == 0)
+            continue;
+
+        if (pos >= MAX_SZEXP_SIZE)
+            return false;
+
+        struct szexp *exp = &out[pos++];
+
+        if (bstr_eatend0(&word, ".w") || bstr_eatend0(&word, ".width")) {
+            exp->tag = SZEXP_VAR_W;
+            exp->val.varname = word;
+            continue;
+        }
+
+        if (bstr_eatend0(&word, ".h") || bstr_eatend0(&word, ".height")) {
+            exp->tag = SZEXP_VAR_H;
+            exp->val.varname = word;
+            continue;
+        }
+
+        switch (word.start[0]) {
+        case '+': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_ADD; continue;
+        case '-': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_SUB; continue;
+        case '*': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_MUL; continue;
+        case '/': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_DIV; continue;
+        case '%': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_MOD; continue;
+        case '!': exp->tag = SZEXP_OP1; exp->val.op = SZEXP_OP_NOT; continue;
+        case '>': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_GT;  continue;
+        case '<': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_LT;  continue;
+        case '=': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_EQ;  continue;
+        }
+
+        if (mp_isdigit(word.start[0])) {
+            exp->tag = SZEXP_CONST;
+            if (bstr_sscanf(word, "%f", &exp->val.cval) != 1)
+                return false;
+            continue;
+        }
+
+        // Some sort of illegal expression
+        return false;
+    }
+
+    return true;
+}
+
+// Returns whether successful. 'result' is left untouched on failure
+bool eval_szexpr(struct mp_log *log, void *priv,
+                 bool (*lookup)(void *priv, struct bstr var, float size[2]),
+                 struct szexp expr[MAX_SZEXP_SIZE], float *result)
+{
+    float stack[MAX_SZEXP_SIZE] = {0};
+    int idx = 0; // points to next element to push
+
+    for (int i = 0; i < MAX_SZEXP_SIZE; i++) {
+        switch (expr[i].tag) {
+        case SZEXP_END:
+            goto done;
+
+        case SZEXP_CONST:
+            // Since our SZEXPs are bound by MAX_SZEXP_SIZE, it should be
+            // impossible to overflow the stack
+            assert(idx < MAX_SZEXP_SIZE);
+            stack[idx++] = expr[i].val.cval;
+            continue;
+
+        case SZEXP_OP1:
+            if (idx < 1) {
+                mp_warn(log, "Stack underflow in RPN expression!\n");
+                return false;
+            }
+
+            switch (expr[i].val.op) {
+            case SZEXP_OP_NOT: stack[idx-1] = !stack[idx-1]; break;
+            default: MP_ASSERT_UNREACHABLE();
+            }
+            continue;
+
+        case SZEXP_OP2:
+            if (idx < 2) {
+                mp_warn(log, "Stack underflow in RPN expression!\n");
+                return false;
+            }
+
+            // Pop the operands in reverse order
+            float op2 = stack[--idx];
+            float op1 = stack[--idx];
+            float res = 0.0;
+            switch (expr[i].val.op) {
+            case SZEXP_OP_ADD: res = op1 + op2; break;
+            case SZEXP_OP_SUB: res = op1 - op2; break;
+            case SZEXP_OP_MUL: res = op1 * op2; break;
+            case SZEXP_OP_DIV: res = op1 / op2; break;
+            case SZEXP_OP_MOD: res = fmodf(op1, op2); break;
+            case SZEXP_OP_GT:  res = op1 > op2; break;
+            case SZEXP_OP_LT:  res = op1 < op2; break;
+            case SZEXP_OP_EQ:  res = op1 == op2; break;
+            default: MP_ASSERT_UNREACHABLE();
+            }
+
+            if (!isfinite(res)) {
+                mp_warn(log, "Illegal operation in RPN expression!\n");
+                return false;
+            }
+
+            stack[idx++] = res;
+            continue;
+
+        case SZEXP_VAR_W:
+        case SZEXP_VAR_H: {
+            struct bstr name = expr[i].val.varname;
+            float size[2];
+
+            if (!lookup(priv, name, size)) {
+                mp_warn(log, "Variable %.*s not found in RPN expression!\n",
+                        BSTR_P(name));
+                return false;
+            }
+
+            stack[idx++] = (expr[i].tag == SZEXP_VAR_W) ? size[0] : size[1];
+            continue;
+            }
+        }
+    }
+
+done:
+    // Return the single stack element
+    if (idx != 1) {
+        mp_warn(log, "Malformed stack after RPN expression!\n");
+        return false;
+    }
+
+    *result = stack[0];
+    return true;
+}
+
+static bool parse_hook(struct mp_log *log, struct bstr *body,
+                       struct gl_user_shader_hook *out)
+{
+    *out = (struct gl_user_shader_hook){
+        .pass_desc = bstr0("(unknown)"),
+        .offset = identity_trans,
+        .align_offset = false,
+        .width = {{ SZEXP_VAR_W, { .varname = bstr0("HOOKED") }}},
+        .height = {{ SZEXP_VAR_H, { .varname = bstr0("HOOKED") }}},
+        .cond = {{ SZEXP_CONST, { .cval = 1.0 }}},
+    };
+
+    int hook_idx = 0;
+    int bind_idx = 0;
+
+    // Parse all headers
+    while (true) {
+        struct bstr rest;
+        struct bstr line = bstr_strip(bstr_getline(*body, &rest));
+
+        // Check for the presence of the magic line beginning
+        if (!bstr_eatstart0(&line, "//!"))
+            break;
+
+        *body = rest;
+
+        // Parse the supported commands
+        if (bstr_eatstart0(&line, "HOOK")) {
+            if (hook_idx == SHADER_MAX_HOOKS) {
+                mp_err(log, "Passes may only hook up to %d textures!\n",
+                       SHADER_MAX_HOOKS);
+                return false;
+            }
+            out->hook_tex[hook_idx++] = bstr_strip(line);
+            continue;
+        }
+
+        if (bstr_eatstart0(&line, "BIND")) {
+            if (bind_idx == SHADER_MAX_BINDS) {
+                mp_err(log, "Passes may only bind up to %d textures!\n",
+                       SHADER_MAX_BINDS);
+                return false;
+            }
+            out->bind_tex[bind_idx++] = bstr_strip(line);
+            continue;
+        }
+
+        if (bstr_eatstart0(&line, "SAVE")) {
+            out->save_tex = bstr_strip(line);
+            continue;
+        }
+
+        if (bstr_eatstart0(&line, "DESC")) {
+            out->pass_desc = bstr_strip(line);
+            continue;
+        }
+
+        if (bstr_eatstart0(&line, "OFFSET")) {
+            line = bstr_strip(line);
+            if (bstr_equals0(line, "ALIGN")) {
+                out->align_offset = true;
+            } else {
+                float ox, oy;
+                if (bstr_sscanf(line, "%f %f", &ox, &oy) != 2) {
+                    mp_err(log, "Error while parsing OFFSET!\n");
+                    return false;
+                }
+                out->offset.t[0] = ox;
+                out->offset.t[1] = oy;
+            }
+            continue;
+        }
+
+        if (bstr_eatstart0(&line, "WIDTH")) {
+            if (!parse_rpn_szexpr(line, out->width)) {
+                mp_err(log, "Error while parsing WIDTH!\n");
+                return false;
+            }
+            continue;
+        }
+
+        if (bstr_eatstart0(&line, "HEIGHT")) {
+            if (!parse_rpn_szexpr(line, out->height)) {
+                mp_err(log, "Error while parsing HEIGHT!\n");
+                return false;
+            }
+            continue;
+        }
+
+        if (bstr_eatstart0(&line, "WHEN")) {
+            if (!parse_rpn_szexpr(line, out->cond)) {
+                mp_err(log, "Error while parsing WHEN!\n");
+                return false;
+            }
+            continue;
+        }
+
+        if (bstr_eatstart0(&line, "COMPONENTS")) {
+            if (bstr_sscanf(line, "%d", &out->components) != 1) {
+                mp_err(log, "Error while parsing COMPONENTS!\n");
+                return false;
+            }
+            continue;
+        }
+
+        if (bstr_eatstart0(&line, "COMPUTE")) {
+            struct compute_info *ci = &out->compute;
+            int num = bstr_sscanf(line, "%d %d %d %d", &ci->block_w, &ci->block_h,
+                                  &ci->threads_w, &ci->threads_h);
+
+            if (num == 2 || num == 4) {
+                ci->active = true;
+                ci->directly_writes = true;
+            } else {
+                mp_err(log, "Error while parsing COMPUTE!\n");
+                return false;
+            }
+            continue;
+        }
+
+        // Unknown command type
+        mp_err(log, "Unrecognized command '%.*s'!\n", BSTR_P(line));
+        return false;
+    }
+
+    // The rest of the file up until the next magic line beginning (if any)
+    // shall be the shader body
+    if (bstr_split_tok(*body, "//!", &out->pass_body, body)) {
+        // Make sure the magic line is part of the rest
+        body->start -= 3;
+        body->len += 3;
+    }
+
+    // Sanity checking
+    if (hook_idx == 0)
+        mp_warn(log, "Pass has no hooked textures (will be ignored)!\n");
+
+    return true;
+}
+
+static bool parse_tex(struct mp_log *log, struct ra *ra, struct bstr *body,
+                      struct gl_user_shader_tex *out)
+{
+    *out = (struct gl_user_shader_tex){
+        .name = bstr0("USER_TEX"),
+        .params = {
+            .dimensions = 2,
+            .w = 1, .h = 1, .d = 1,
+            .render_src = true,
+            .src_linear = true,
+        },
+    };
+    struct ra_tex_params *p = &out->params;
+
+    while (true) {
+        struct bstr rest;
+        struct bstr line = bstr_strip(bstr_getline(*body, &rest));
+
+        if (!bstr_eatstart0(&line, "//!"))
+            break;
+
+        *body = rest;
+
+        if (bstr_eatstart0(&line, "TEXTURE")) {
+            out->name = bstr_strip(line);
+            continue;
+        }
+
+        if (bstr_eatstart0(&line, "SIZE")) {
+            p->dimensions = bstr_sscanf(line, "%d %d %d", &p->w, &p->h, &p->d);
+            if (p->dimensions < 1 || p->dimensions > 3 ||
+                p->w < 1 || p->h < 1 || p->d < 1)
+            {
+                mp_err(log, "Error while parsing SIZE!\n");
+                return false;
+            }
+            continue;
+        }
+
+        if (bstr_eatstart0(&line, "FORMAT ")) {
+            p->format = NULL;
+            for (int n = 0; n < ra->num_formats; n++) {
+                const struct ra_format *fmt = ra->formats[n];
+                if (bstr_equals0(line, fmt->name)) {
+                    p->format = fmt;
+                    break;
+                }
+            }
+            // (pixel_size==0 is for opaque formats)
+            if (!p->format || !p->format->pixel_size) {
+                mp_err(log, "Unrecognized/unavailable FORMAT name: '%.*s'!\n",
+                       BSTR_P(line));
+                return false;
+            }
+            continue;
+        }
+
+        if (bstr_eatstart0(&line, "FILTER")) {
+            line = bstr_strip(line);
+            if (bstr_equals0(line, "LINEAR")) {
+                p->src_linear = true;
+            } else if (bstr_equals0(line, "NEAREST")) {
+                p->src_linear = false;
+            } else {
+                mp_err(log, "Unrecognized FILTER: '%.*s'!\n", BSTR_P(line));
+                return false;
+            }
+            continue;
+        }
+
+        if (bstr_eatstart0(&line, "BORDER")) {
+            line = bstr_strip(line);
+            if (bstr_equals0(line, "CLAMP")) {
+                p->src_repeat = false;
+            } else if (bstr_equals0(line, "REPEAT")) {
+                p->src_repeat = true;
+            } else {
+                mp_err(log, "Unrecognized BORDER: '%.*s'!\n", BSTR_P(line));
+                return false;
+            }
+            continue;
+        }
+
+        mp_err(log, "Unrecognized command '%.*s'!\n", BSTR_P(line));
+        return false;
+    }
+
+    if (!p->format) {
+        mp_err(log, "No FORMAT specified.\n");
+        return false;
+    }
+
+    if (p->src_linear && !p->format->linear_filter) {
+        mp_err(log, "The specified texture format cannot be filtered!\n");
+        return false;
+    }
+
+    // Decode the rest of the section (up to the next //! marker) as raw hex
+    // data for the texture
+    struct bstr hexdata;
+    if (bstr_split_tok(*body, "//!", &hexdata, body)) {
+        // Make sure the magic line is part of the rest
+        body->start -= 3;
+        body->len += 3;
+    }
+
+    struct bstr tex;
+    if (!bstr_decode_hex(NULL, bstr_strip(hexdata), &tex)) {
+        mp_err(log, "Error while parsing TEXTURE body: must be a valid "
+                    "hexadecimal sequence, on a single line!\n");
+        return false;
+    }
+
+    int expected_len = p->w * p->h * p->d * p->format->pixel_size;
+    if (tex.len != expected_len) {
+        mp_err(log, "Shader TEXTURE size mismatch: got %zd bytes, expected %d!\n",
+               tex.len, expected_len);
+        talloc_free(tex.start);
+        return false;
+    }
+
+    p->initial_data = tex.start;
+    return true;
+}
+
+void parse_user_shader(struct mp_log *log, struct ra *ra, struct bstr shader,
+                       void *priv,
+                       bool (*dohook)(void *p, struct gl_user_shader_hook hook),
+                       bool (*dotex)(void *p, struct gl_user_shader_tex tex))
+{
+    if (!dohook || !dotex || !shader.len)
+        return;
+
+    // Skip all garbage (e.g. comments) before the first header
+    int pos = bstr_find(shader, bstr0("//!"));
+    if (pos < 0) {
+        mp_warn(log, "Shader appears to contain no headers!\n");
+        return;
+    }
+    shader = bstr_cut(shader, pos);
+
+    // Loop over the file
+    while (shader.len > 0)
+    {
+        // Peek at the first header to dispatch the right type
+        if (bstr_startswith0(shader, "//!TEXTURE")) {
+            struct gl_user_shader_tex t;
+            if (!parse_tex(log, ra, &shader, &t) || !dotex(priv, t))
+                return;
+            continue;
+        }
+
+        struct gl_user_shader_hook h;
+        if (!parse_hook(log, &shader, &h) || !dohook(priv, h))
+            return;
+    }
+}
diff --git a/video/out/gpu/user_shaders.h b/video/out/gpu/user_shaders.h
new file mode 100644
index 0000000..4bb7c22
--- /dev/null
+++ b/video/out/gpu/user_shaders.h
@@ -0,0 +1,99 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MP_GL_USER_SHADERS_H
+#define MP_GL_USER_SHADERS_H
+
+#include "utils.h"
+#include "ra.h"
+
+#define SHADER_MAX_HOOKS 16
+#define SHADER_MAX_BINDS 16
+#define MAX_SZEXP_SIZE 32
+
+enum szexp_op {
+    SZEXP_OP_ADD,
+    SZEXP_OP_SUB,
+    SZEXP_OP_MUL,
+    SZEXP_OP_DIV,
+    SZEXP_OP_MOD,
+    SZEXP_OP_NOT,
+    SZEXP_OP_GT,
+    SZEXP_OP_LT,
+    SZEXP_OP_EQ,
+};
+
+enum szexp_tag {
+    SZEXP_END = 0, // End of an RPN expression
+    SZEXP_CONST, // Push a constant value onto the stack
+    SZEXP_VAR_W, // Get the width/height of a named texture (variable)
+    SZEXP_VAR_H,
+    SZEXP_OP2, // Pop two elements and push the result of a dyadic operation
+    SZEXP_OP1, // Pop one element and push the result of a monadic operation
+};
+
+struct szexp {
+    enum szexp_tag tag;
+    union {
+        float cval;
+        struct bstr varname;
+        enum szexp_op op;
+    } val;
+};
+
+struct compute_info {
+    bool active;
+    int block_w, block_h;     // Block size (each block corresponds to one WG)
+    int threads_w, threads_h; // How many threads form a working group
+    bool directly_writes;     // If true, shader is assumed to imageStore(out_image)
+};
+
+struct gl_user_shader_hook {
+    struct bstr pass_desc;
+    struct bstr hook_tex[SHADER_MAX_HOOKS];
+    struct bstr bind_tex[SHADER_MAX_BINDS];
+    struct bstr save_tex;
+    struct bstr pass_body;
+    struct gl_transform offset;
+    bool align_offset;
+    struct szexp width[MAX_SZEXP_SIZE];
+    struct szexp height[MAX_SZEXP_SIZE];
+    struct szexp cond[MAX_SZEXP_SIZE];
+    int components;
+    struct compute_info compute;
+};
+
+struct gl_user_shader_tex {
+    struct bstr name;
+    struct ra_tex_params params;
+    // for video.c
+    struct ra_tex *tex;
+};
+
+// Parse the next shader block from `body`. The callbacks are invoked on every
+// valid shader block parsed.
+void parse_user_shader(struct mp_log *log, struct ra *ra, struct bstr shader,
+                       void *priv,
+                       bool (*dohook)(void *p, struct gl_user_shader_hook hook),
+                       bool (*dotex)(void *p, struct gl_user_shader_tex tex));
+
+// Evaluate a szexp, given a lookup function for named textures
+bool eval_szexpr(struct mp_log *log, void *priv,
+                 bool (*lookup)(void *priv, struct bstr var, float size[2]),
+                 struct szexp expr[MAX_SZEXP_SIZE], float *result);
+
+#endif
diff --git a/video/out/gpu/utils.c b/video/out/gpu/utils.c
new file mode 100644
index 0000000..8a1aacf
--- /dev/null
+++ b/video/out/gpu/utils.c
@@ -0,0 +1,349 @@
+#include "common/msg.h"
+#include "video/out/vo.h"
+#include "utils.h"
+
+// Standard parallel 2D projection, except y1 < y0 means that the coordinate
+// system is flipped, not the projection.
+void gl_transform_ortho(struct gl_transform *t, float x0, float x1,
+                        float y0, float y1)
+{
+    if (y1 < y0) {
+        float tmp = y0;
+        y0 = tmp - y1;
+        y1 = tmp;
+    }
+
+    t->m[0][0] = 2.0f / (x1 - x0);
+    t->m[0][1] = 0.0f;
+    t->m[1][0] = 0.0f;
+    t->m[1][1] = 2.0f / (y1 - y0);
+    t->t[0] = -(x1 + x0) / (x1 - x0);
+    t->t[1] = -(y1 + y0) / (y1 - y0);
+}
+
+// Apply the effects of one transformation to another, transforming it in the
+// process. In other words: post-composes t onto x
+void gl_transform_trans(struct gl_transform t, struct gl_transform *x)
+{
+    struct gl_transform xt = *x;
+    x->m[0][0] = t.m[0][0] * xt.m[0][0] + t.m[0][1] * xt.m[1][0];
+    x->m[1][0] = t.m[1][0] * xt.m[0][0] + t.m[1][1] * xt.m[1][0];
+    x->m[0][1] = t.m[0][0] * xt.m[0][1] + t.m[0][1] * xt.m[1][1];
+    x->m[1][1] = t.m[1][0] * xt.m[0][1] + t.m[1][1] * xt.m[1][1];
+    gl_transform_vec(t, &x->t[0], &x->t[1]);
+}
+
+void gl_transform_ortho_fbo(struct gl_transform *t, struct ra_fbo fbo)
+{
+    int y_dir = fbo.flip ? -1 : 1;
+    gl_transform_ortho(t, 0, fbo.tex->params.w, 0, fbo.tex->params.h * y_dir);
+}
+
+float gl_video_scale_ambient_lux(float lmin, float lmax,
+                                 float rmin, float rmax, float lux)
+{
+    assert(lmax > lmin);
+
+    float num = (rmax - rmin) * (log10(lux) - log10(lmin));
+    float den = log10(lmax) - log10(lmin);
+    float result = num / den + rmin;
+
+    // clamp the result
+    float max = MPMAX(rmax, rmin);
+    float min = MPMIN(rmax, rmin);
+    return MPMAX(MPMIN(result, max), min);
+}
+
+void ra_buf_pool_uninit(struct ra *ra, struct ra_buf_pool *pool)
+{
+    for (int i = 0; i < pool->num_buffers; i++)
+        ra_buf_free(ra, &pool->buffers[i]);
+
+    talloc_free(pool->buffers);
+    *pool = (struct ra_buf_pool){0};
+}
+
+static bool ra_buf_params_compatible(const struct ra_buf_params *new,
+                                     const struct ra_buf_params *old)
+{
+    return new->type == old->type &&
+           new->size <= old->size &&
+           new->host_mapped  == old->host_mapped &&
+           new->host_mutable == old->host_mutable;
+}
+
+static bool ra_buf_pool_grow(struct ra *ra, struct ra_buf_pool *pool)
+{
+    struct ra_buf *buf = ra_buf_create(ra, &pool->current_params);
+    if (!buf)
+        return false;
+
+    MP_TARRAY_INSERT_AT(NULL, pool->buffers, pool->num_buffers, pool->index, buf);
+    MP_VERBOSE(ra, "Resized buffer pool of type %u to size %d\n",
+               pool->current_params.type, pool->num_buffers);
+    return true;
+}
+
+struct ra_buf *ra_buf_pool_get(struct ra *ra, struct ra_buf_pool *pool,
+                               const struct ra_buf_params *params)
+{
+    assert(!params->initial_data);
+
+    if (!ra_buf_params_compatible(params, &pool->current_params)) {
+        ra_buf_pool_uninit(ra, pool);
+        pool->current_params = *params;
+    }
+
+    // Make sure we have at least one buffer available
+    if (!pool->buffers && !ra_buf_pool_grow(ra, pool))
+        return NULL;
+
+    // Make sure the next buffer is available for use
+    if (!ra->fns->buf_poll(ra, pool->buffers[pool->index]) &&
+        !ra_buf_pool_grow(ra, pool))
+    {
+        return NULL;
+    }
+
+    struct ra_buf *buf = pool->buffers[pool->index++];
+    pool->index %= pool->num_buffers;
+
+    return buf;
+}
+
+bool ra_tex_upload_pbo(struct ra *ra, struct ra_buf_pool *pbo,
+                       const struct ra_tex_upload_params *params)
+{
+    if (params->buf)
+        return ra->fns->tex_upload(ra, params);
+
+    struct ra_tex *tex = params->tex;
+    size_t row_size = tex->params.dimensions == 2 ? params->stride :
+                      tex->params.w * tex->params.format->pixel_size;
+
+    int height = tex->params.h;
+    if (tex->params.dimensions == 2 && params->rc)
+        height = mp_rect_h(*params->rc);
+
+    struct ra_buf_params bufparams = {
+        .type = RA_BUF_TYPE_TEX_UPLOAD,
+        .size = row_size * height * tex->params.d,
+        .host_mutable = true,
+    };
+
+    struct ra_buf *buf = ra_buf_pool_get(ra, pbo, &bufparams);
+    if (!buf)
+        return false;
+
+    ra->fns->buf_update(ra, buf, 0, params->src, bufparams.size);
+
+    struct ra_tex_upload_params newparams = *params;
+    newparams.buf = buf;
+    newparams.src = NULL;
+
+    return ra->fns->tex_upload(ra, &newparams);
+}
+
+struct ra_layout std140_layout(struct ra_renderpass_input *inp)
+{
+    size_t el_size = ra_vartype_size(inp->type);
+
+    // std140 packing rules:
+    // 1. The alignment of generic values is their size in bytes
+    // 2. The alignment of vectors is the vector length * the base count, with
+    // the exception of vec3 which is always aligned like vec4
+    // 3. The alignment of arrays is that of the element size rounded up to
+    // the nearest multiple of vec4
+    // 4. Matrices are treated like arrays of vectors
+    // 5. Arrays/matrices are laid out with a stride equal to the alignment
+    size_t stride = el_size * inp->dim_v;
+    size_t align = stride;
+    if (inp->dim_v == 3)
+        align += el_size;
+    if (inp->dim_m > 1)
+        stride = align = MP_ALIGN_UP(stride, sizeof(float[4]));
+
+    return (struct ra_layout) {
+        .align  = align,
+        .stride = stride,
+        .size   = stride * inp->dim_m,
+    };
+}
+
+struct ra_layout std430_layout(struct ra_renderpass_input *inp)
+{
+    size_t el_size = ra_vartype_size(inp->type);
+
+    // std430 packing rules: like std140, except arrays/matrices are always
+    // "tightly" packed, even arrays/matrices of vec3s
+    size_t stride = el_size * inp->dim_v;
+    size_t align = stride;
+    if (inp->dim_v == 3 && inp->dim_m == 1)
+        align += el_size;
+
+    return (struct ra_layout) {
+        .align  = align,
+        .stride = stride,
+        .size   = stride * inp->dim_m,
+    };
+}
+
+// Resize a texture to a new desired size and format if necessary
+bool ra_tex_resize(struct ra *ra, struct mp_log *log, struct ra_tex **tex,
+                   int w, int h, const struct ra_format *fmt)
+{
+    if (*tex) {
+        struct ra_tex_params cur_params = (*tex)->params;
+        if (cur_params.w == w && cur_params.h == h && cur_params.format == fmt)
+            return true;
+    }
+
+    mp_dbg(log, "Resizing texture: %dx%d\n", w, h);
+
+    if (!fmt || !fmt->renderable || !fmt->linear_filter) {
+        mp_err(log, "Format %s not supported.\n", fmt ? fmt->name : "(unset)");
+        return false;
+    }
+
+    ra_tex_free(ra, tex);
+    struct ra_tex_params params = {
+        .dimensions = 2,
+        .w = w,
+        .h = h,
+        .d = 1,
+        .format = fmt,
+        .src_linear = true,
+        .render_src = true,
+        .render_dst = true,
+        .storage_dst = fmt->storable,
+        .blit_src = true,
+    };
+
+    *tex = ra_tex_create(ra, &params);
+    if (!*tex)
+        mp_err(log, "Error: texture could not be created.\n");
+
+    return *tex;
+}
+
+struct timer_pool {
+    struct ra *ra;
+    ra_timer *timer;
+    bool running; // detect invalid usage
+
+    uint64_t samples[VO_PERF_SAMPLE_COUNT];
+    int sample_idx;
+    int sample_count;
+
+    uint64_t sum;
+    uint64_t peak;
+};
+
+struct timer_pool *timer_pool_create(struct ra *ra)
+{
+    if (!ra->fns->timer_create)
+        return NULL;
+
+    ra_timer *timer = ra->fns->timer_create(ra);
+    if (!timer)
+        return NULL;
+
+    struct timer_pool *pool = talloc(NULL, struct timer_pool);
+    if (!pool) {
+        ra->fns->timer_destroy(ra, timer);
+        return NULL;
+    }
+
+    *pool = (struct timer_pool){ .ra = ra, .timer = timer };
+    return pool;
+}
+
+void timer_pool_destroy(struct timer_pool *pool)
+{
+    if (!pool)
+        return;
+
+    pool->ra->fns->timer_destroy(pool->ra, pool->timer);
+    talloc_free(pool);
+}
+
+void timer_pool_start(struct timer_pool *pool)
+{
+    if (!pool)
+        return;
+
+    assert(!pool->running);
+    pool->ra->fns->timer_start(pool->ra, pool->timer);
+    pool->running = true;
+}
+
+void timer_pool_stop(struct timer_pool *pool)
+{
+    if (!pool)
+        return;
+
+    assert(pool->running);
+    uint64_t res = pool->ra->fns->timer_stop(pool->ra, pool->timer);
+    pool->running = false;
+
+    if (res) {
+        // Input res into the buffer and grab the previous value
+        uint64_t old = pool->samples[pool->sample_idx];
+        pool->sample_count = MPMIN(pool->sample_count + 1, VO_PERF_SAMPLE_COUNT);
+        pool->samples[pool->sample_idx++] = res;
+        pool->sample_idx %= VO_PERF_SAMPLE_COUNT;
+        pool->sum = pool->sum + res - old;
+
+        // Update peak if necessary
+        if (res >= pool->peak) {
+            pool->peak = res;
+        } else if (pool->peak == old) {
+            // It's possible that the last peak was the value we just removed,
+            // if so we need to scan for the new peak
+            uint64_t peak = res;
+            for (int i = 0; i < VO_PERF_SAMPLE_COUNT; i++)
+                peak = MPMAX(peak, pool->samples[i]);
+            pool->peak = peak;
+        }
+    }
+}
+
+struct mp_pass_perf timer_pool_measure(struct timer_pool *pool)
+{
+    if (!pool)
+        return (struct mp_pass_perf){0};
+
+    struct mp_pass_perf res = {
+        .peak = pool->peak,
+        .count = pool->sample_count,
+    };
+
+    int idx = pool->sample_idx - pool->sample_count + VO_PERF_SAMPLE_COUNT;
+    for (int i = 0; i < res.count; i++) {
+        idx %= VO_PERF_SAMPLE_COUNT;
+        res.samples[i] = pool->samples[idx++];
+    }
+
+    if (res.count > 0) {
+        res.last = res.samples[res.count - 1];
+        res.avg = pool->sum / res.count;
+    }
+
+    return res;
+}
+
+void mp_log_source(struct mp_log *log, int lev, const char *src)
+{
+    int line = 1;
+    if (!src)
+        return;
+    while (*src) {
+        const char *end = strchr(src, '\n');
+        const char *next = end + 1;
+        if (!end)
+            next = end = src + strlen(src);
+        mp_msg(log, lev, "[%3d] %.*s\n", line, (int)(end - src), src);
+        line++;
+        src = next;
+    }
+}
diff --git a/video/out/gpu/utils.h b/video/out/gpu/utils.h
new file mode 100644
index 0000000..215873e
--- /dev/null
+++ b/video/out/gpu/utils.h
@@ -0,0 +1,108 @@
+#pragma once
+
+#include <stdbool.h>
+#include <math.h>
+
+#include "ra.h"
+#include "context.h"
+
+// A 3x2 matrix, with the translation part separate.
+struct gl_transform {
+    // row-major, e.g. in mathematical notation:
+    //  | m[0][0] m[0][1] |
+    //  | m[1][0] m[1][1] |
+    float m[2][2];
+    float t[2];
+};
+
+static const struct gl_transform identity_trans = {
+    .m = {{1.0, 0.0}, {0.0, 1.0}},
+    .t = {0.0, 0.0},
+};
+
+void gl_transform_ortho(struct gl_transform *t, float x0, float x1,
+                        float y0, float y1);
+
+// This treats m as an affine transformation, in other words m[2][n] gets
+// added to the output.
+static inline void gl_transform_vec(struct gl_transform t, float *x, float *y)
+{
+    float vx = *x, vy = *y;
+    *x = vx * t.m[0][0] + vy * t.m[0][1] + t.t[0];
+    *y = vx * t.m[1][0] + vy * t.m[1][1] + t.t[1];
+}
+
+struct mp_rect_f {
+    float x0, y0, x1, y1;
+};
+
+// Semantic equality (fuzzy comparison)
+static inline bool mp_rect_f_seq(struct mp_rect_f a, struct mp_rect_f b)
+{
+    return fabs(a.x0 - b.x0) < 1e-6 && fabs(a.x1 - b.x1) < 1e-6 &&
+           fabs(a.y0 - b.y0) < 1e-6 && fabs(a.y1 - b.y1) < 1e-6;
+}
+
+static inline void gl_transform_rect(struct gl_transform t, struct mp_rect_f *r)
+{
+    gl_transform_vec(t, &r->x0, &r->y0);
+    gl_transform_vec(t, &r->x1, &r->y1);
+}
+
+static inline bool gl_transform_eq(struct gl_transform a, struct gl_transform b)
+{
+    for (int x = 0; x < 2; x++) {
+        for (int y = 0; y < 2; y++) {
+            if (a.m[x][y] != b.m[x][y])
+                return false;
+        }
+    }
+
+    return a.t[0] == b.t[0] && a.t[1] == b.t[1];
+}
+
+void gl_transform_trans(struct gl_transform t, struct gl_transform *x);
+
+void gl_transform_ortho_fbo(struct gl_transform *t, struct ra_fbo fbo);
+
+float gl_video_scale_ambient_lux(float lmin, float lmax,
+                                 float rmin, float rmax, float lux);
+
+// A pool of buffers, which can grow as needed
+struct ra_buf_pool {
+    struct ra_buf_params current_params;
+    struct ra_buf **buffers;
+    int num_buffers;
+    int index;
+};
+
+void ra_buf_pool_uninit(struct ra *ra, struct ra_buf_pool *pool);
+
+// Note: params->initial_data is *not* supported
+struct ra_buf *ra_buf_pool_get(struct ra *ra, struct ra_buf_pool *pool,
+                               const struct ra_buf_params *params);
+
+// Helper that wraps ra_tex_upload using texture upload buffers to ensure that
+// params->buf is always set. This is intended for RA-internal usage.
+bool ra_tex_upload_pbo(struct ra *ra, struct ra_buf_pool *pbo,
+                       const struct ra_tex_upload_params *params);
+
+// Layout rules for GLSL's packing modes
+struct ra_layout std140_layout(struct ra_renderpass_input *inp);
+struct ra_layout std430_layout(struct ra_renderpass_input *inp);
+
+bool ra_tex_resize(struct ra *ra, struct mp_log *log, struct ra_tex **tex,
+                   int w, int h, const struct ra_format *fmt);
+
+// A wrapper around ra_timer that does result pooling, averaging etc.
+struct timer_pool;
+
+struct timer_pool *timer_pool_create(struct ra *ra);
+void timer_pool_destroy(struct timer_pool *pool);
+void timer_pool_start(struct timer_pool *pool);
+void timer_pool_stop(struct timer_pool *pool);
+struct mp_pass_perf timer_pool_measure(struct timer_pool *pool);
+
+// print a multi line string with line numbers (e.g. for shader sources)
+// log, lev: module and log level, as in mp_msg()
+void mp_log_source(struct mp_log *log, int lev, const char *src);
diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c
new file mode 100644
index 0000000..852ee78
--- /dev/null
+++ b/video/out/gpu/video.c
@@ -0,0 +1,4364 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <float.h>
+#include <math.h>
+#include <stdarg.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include <libavutil/common.h>
+#include <libavutil/lfg.h>
+
+#include "video.h"
+
+#include "misc/bstr.h"
+#include "options/m_config.h"
+#include "options/path.h"
+#include "common/global.h"
+#include "options/options.h"
+#include "utils.h"
+#include "hwdec.h"
+#include "osd.h"
+#include "ra.h"
+#include "stream/stream.h"
+#include "video_shaders.h"
+#include "user_shaders.h"
+#include "error_diffusion.h"
+#include "video/out/filter_kernels.h"
+#include "video/out/aspect.h"
+#include "video/out/dither.h"
+#include "video/out/vo.h"
+
+// scale/cscale arguments that map directly to shader filter routines.
+// Note that the convolution filters are not included in this list.
+static const char *const fixed_scale_filters[] = {
+    "bilinear",
+    "bicubic_fast",
+    "oversample",
+    NULL
+};
+static const char *const fixed_tscale_filters[] = {
+    "oversample",
+    "linear",
+    NULL
+};
+
+// must be sorted, and terminated with 0
+int filter_sizes[] =
+    {2, 4, 6, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 0};
+int tscale_sizes[] = {2, 4, 6, 8, 0};
+
+struct vertex_pt {
+    float x, y;
+};
+
+struct texplane {
+    struct ra_tex *tex;
+    int w, h;
+    bool flipped;
+};
+
+struct video_image {
+    struct texplane planes[4];
+    struct mp_image *mpi;       // original input image
+    uint64_t id;                // unique ID identifying mpi contents
+    bool hwdec_mapped;
+};
+
+enum plane_type {
+    PLANE_NONE = 0,
+    PLANE_RGB,
+    PLANE_LUMA,
+    PLANE_CHROMA,
+    PLANE_ALPHA,
+    PLANE_XYZ,
+};
+
+static const char *plane_names[] = {
+    [PLANE_NONE] = "unknown",
+    [PLANE_RGB] = "rgb",
+    [PLANE_LUMA] = "luma",
+    [PLANE_CHROMA] = "chroma",
+    [PLANE_ALPHA] = "alpha",
+    [PLANE_XYZ] = "xyz",
+};
+
+// A self-contained description of a source image which can be bound to a
+// texture unit and sampled from. Contains metadata about how it's to be used
+struct image {
+    enum plane_type type; // must be set to something non-zero
+    int components; // number of relevant coordinates
+    float multiplier; // multiplier to be used when sampling
+    struct ra_tex *tex;
+    int w, h; // logical size (after transformation)
+    struct gl_transform transform; // rendering transformation
+    int padding; // number of leading padding components (e.g. 2 = rg is padding)
+};
+
+// A named image, for user scripting purposes
+struct saved_img {
+    const char *name;
+    struct image img;
+};
+
+// A texture hook. This is some operation that transforms a named texture as
+// soon as it's generated
+struct tex_hook {
+    const char *save_tex;
+    const char *hook_tex[SHADER_MAX_HOOKS];
+    const char *bind_tex[SHADER_MAX_BINDS];
+    int components; // how many components are relevant (0 = same as input)
+    bool align_offset; // whether to align hooked tex with reference.
+    void *priv; // this gets talloc_freed when the tex_hook is removed
+    void (*hook)(struct gl_video *p, struct image img, // generates GLSL
+                 struct gl_transform *trans, void *priv);
+    bool (*cond)(struct gl_video *p, struct image img, void *priv);
+};
+
+struct surface {
+    struct ra_tex *tex;
+    uint64_t id;
+    double pts;
+};
+
+#define SURFACES_MAX 10
+
+struct cached_file {
+    char *path;
+    struct bstr body;
+};
+
+struct pass_info {
+    struct bstr desc;
+    struct mp_pass_perf perf;
+};
+
+struct dr_buffer {
+    struct ra_buf *buf;
+    // The mpi reference will keep the data from being recycled (or from other
+    // references gaining write access) while the GPU is accessing the buffer.
+    struct mp_image *mpi;
+};
+
+struct gl_video {
+    struct ra *ra;
+
+    struct mpv_global *global;
+    struct mp_log *log;
+    struct gl_video_opts opts;
+    struct m_config_cache *opts_cache;
+    struct gl_lcms *cms;
+
+    int fb_depth;               // actual bits available in GL main framebuffer
+    struct m_color clear_color;
+    bool force_clear_color;
+
+    struct gl_shader_cache *sc;
+
+    struct osd_state *osd_state;
+    struct mpgl_osd *osd;
+    double osd_pts;
+
+    struct ra_tex *lut_3d_texture;
+    bool use_lut_3d;
+    int lut_3d_size[3];
+
+    struct ra_tex *dither_texture;
+
+    struct mp_image_params real_image_params;   // configured format
+    struct mp_image_params image_params;        // texture format (mind hwdec case)
+    struct ra_imgfmt_desc ra_format;            // texture format
+    int plane_count;
+
+    bool is_gray;
+    bool has_alpha;
+    char color_swizzle[5];
+    bool use_integer_conversion;
+
+    struct video_image image;
+
+    struct dr_buffer *dr_buffers;
+    int num_dr_buffers;
+
+    bool using_dr_path;
+
+    bool dumb_mode;
+    bool forced_dumb_mode;
+
+    // Cached vertex array, to avoid re-allocation per frame. For simplicity,
+    // our vertex format is simply a list of `vertex_pt`s, since this greatly
+    // simplifies offset calculation at the cost of (unneeded) flexibility.
+    struct vertex_pt *tmp_vertex;
+    struct ra_renderpass_input *vao;
+    int vao_len;
+
+    const struct ra_format *fbo_format;
+    struct ra_tex *merge_tex[4];
+    struct ra_tex *scale_tex[4];
+    struct ra_tex *integer_tex[4];
+    struct ra_tex *indirect_tex;
+    struct ra_tex *blend_subs_tex;
+    struct ra_tex *error_diffusion_tex[2];
+    struct ra_tex *screen_tex;
+    struct ra_tex *output_tex;
+    struct ra_tex **hook_textures;
+    int num_hook_textures;
+    int idx_hook_textures;
+
+    struct ra_buf *hdr_peak_ssbo;
+    struct surface surfaces[SURFACES_MAX];
+
+    // user pass descriptions and textures
+    struct tex_hook *tex_hooks;
+    int num_tex_hooks;
+    struct gl_user_shader_tex *user_textures;
+    int num_user_textures;
+
+    int surface_idx;
+    int surface_now;
+    int frames_drawn;
+    bool is_interpolated;
+    bool output_tex_valid;
+
+    // state for configured scalers
+    struct scaler scaler[SCALER_COUNT];
+
+    struct mp_csp_equalizer_state *video_eq;
+
+    struct mp_rect src_rect;    // displayed part of the source video
+    struct mp_rect dst_rect;    // video rectangle on output window
+    struct mp_osd_res osd_rect; // OSD size/margins
+
+    // temporary during rendering
+    struct compute_info pass_compute; // compute shader metadata for this pass
+    struct image *pass_imgs;          // bound images for this pass
+    int num_pass_imgs;
+    struct saved_img *saved_imgs;     // saved (named) images for this frame
+    int num_saved_imgs;
+
+    // effective current texture metadata - this will essentially affect the
+    // next render pass target, as well as implicitly tracking what needs to
+    // be done with the image
+    int texture_w, texture_h;
+    struct gl_transform texture_offset; // texture transform without rotation
+    int components;
+    bool use_linear;
+    float user_gamma;
+
+    // pass info / metrics
+    struct pass_info pass_fresh[VO_PASS_PERF_MAX];
+    struct pass_info pass_redraw[VO_PASS_PERF_MAX];
+    struct pass_info *pass;
+    int pass_idx;
+    struct timer_pool *upload_timer;
+    struct timer_pool *blit_timer;
+    struct timer_pool *osd_timer;
+
+    int frames_uploaded;
+    int frames_rendered;
+    AVLFG lfg;
+
+    // Cached because computing it can take relatively long
+    int last_dither_matrix_size;
+    float *last_dither_matrix;
+
+    struct cached_file *files;
+    int num_files;
+
+    struct ra_hwdec_ctx hwdec_ctx;
+    struct ra_hwdec_mapper *hwdec_mapper;
+    struct ra_hwdec *hwdec_overlay;
+    bool hwdec_active;
+
+    bool dsi_warned;
+    bool broken_frame; // temporary error state
+
+    bool colorspace_override_warned;
+    bool correct_downscaling_warned;
+};
+
+static const struct gl_video_opts gl_video_opts_def = {
+    .dither_algo = DITHER_FRUIT,
+    .dither_size = 6,
+    .temporal_dither_period = 1,
+    .error_diffusion = "sierra-lite",
+    .fbo_format = "auto",
+    .sigmoid_center = 0.75,
+    .sigmoid_slope = 6.5,
+    .scaler = {
+        {{"lanczos", .params={NAN, NAN}}, {.params = {NAN, NAN}}},    // scale
+        {{"hermite", .params={NAN, NAN}}, {.params = {NAN, NAN}}},    // dscale
+        {{NULL, .params={NAN, NAN}}, {.params = {NAN, NAN}}},         // cscale
+        {{"oversample", .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // tscale
+    },
+    .scaler_resizes_only = true,
+    .correct_downscaling = true,
+    .linear_downscaling = true,
+    .sigmoid_upscaling = true,
+    .interpolation_threshold = 0.01,
+    .alpha_mode = ALPHA_BLEND_TILES,
+    .background = {0, 0, 0, 255},
+    .gamma = 1.0f,
+    .tone_map = {
+        .curve = TONE_MAPPING_AUTO,
+        .curve_param = NAN,
+        .max_boost = 1.0,
+        .decay_rate = 20.0,
+        .scene_threshold_low = 1.0,
+        .scene_threshold_high = 3.0,
+        .contrast_smoothness = 3.5,
+    },
+    .early_flush = -1,
+    .shader_cache = true,
+    .hwdec_interop = "auto",
+};
+
+static int validate_scaler_opt(struct mp_log *log, const m_option_t *opt,
+                               struct bstr name, const char **value);
+
+static int validate_window_opt(struct mp_log *log, const m_option_t *opt,
+                               struct bstr name, const char **value);
+
+static int validate_error_diffusion_opt(struct mp_log *log, const m_option_t *opt,
+                                        struct bstr name, const char **value);
+
+#define OPT_BASE_STRUCT struct gl_video_opts
+
+// Use for options which use NAN for defaults.
+#define OPT_FLOATDEF(field) \
+    OPT_FLOAT(field), \
+    .flags = M_OPT_DEFAULT_NAN
+
+#define SCALER_OPTS(n, i) \
+    {n, OPT_STRING_VALIDATE(scaler[i].kernel.name, validate_scaler_opt)},  \
+    {n"-param1", OPT_FLOATDEF(scaler[i].kernel.params[0])},                \
+    {n"-param2", OPT_FLOATDEF(scaler[i].kernel.params[1])},                \
+    {n"-blur",   OPT_FLOAT(scaler[i].kernel.blur)},                        \
+    {n"-cutoff", OPT_REMOVED("Hard-coded as 0.001")},                      \
+    {n"-taper",  OPT_FLOAT(scaler[i].kernel.taper), M_RANGE(0.0, 1.0)},    \
+    {n"-wparam", OPT_FLOATDEF(scaler[i].window.params[0])},                \
+    {n"-wblur",  OPT_REMOVED("Just adjust filter radius directly")},       \
+    {n"-wtaper", OPT_FLOAT(scaler[i].window.taper), M_RANGE(0.0, 1.0)},    \
+    {n"-clamp",  OPT_FLOAT(scaler[i].clamp), M_RANGE(0.0, 1.0)},           \
+    {n"-radius", OPT_FLOAT(scaler[i].radius), M_RANGE(0.5, 16.0)},         \
+    {n"-antiring", OPT_FLOAT(scaler[i].antiring), M_RANGE(0.0, 1.0)},      \
+    {n"-window", OPT_STRING_VALIDATE(scaler[i].window.name, validate_window_opt)}
+
+const struct m_sub_options gl_video_conf = {
+    .opts = (const m_option_t[]) {
+        {"gpu-dumb-mode", OPT_CHOICE(dumb_mode,
+            {"auto", 0}, {"yes", 1}, {"no", -1})},
+        {"gamma-factor", OPT_FLOAT(gamma), M_RANGE(0.1, 2.0),
+            .deprecation_message = "no replacement"},
+        {"gamma-auto", OPT_BOOL(gamma_auto),
+            .deprecation_message = "no replacement"},
+        {"target-prim", OPT_CHOICE_C(target_prim, mp_csp_prim_names)},
+        {"target-trc", OPT_CHOICE_C(target_trc, mp_csp_trc_names)},
+        {"target-peak", OPT_CHOICE(target_peak, {"auto", 0}),
+            M_RANGE(10, 10000)},
+        {"target-contrast", OPT_CHOICE(target_contrast, {"auto", 0}, {"inf", -1}),
+            M_RANGE(10, 1000000)},
+        {"target-gamut", OPT_CHOICE_C(target_gamut, mp_csp_prim_names)},
+        {"tone-mapping", OPT_CHOICE(tone_map.curve,
+            {"auto",     TONE_MAPPING_AUTO},
+            {"clip",     TONE_MAPPING_CLIP},
+            {"mobius",   TONE_MAPPING_MOBIUS},
+            {"reinhard", TONE_MAPPING_REINHARD},
+            {"hable",    TONE_MAPPING_HABLE},
+            {"gamma",    TONE_MAPPING_GAMMA},
+            {"linear",   TONE_MAPPING_LINEAR},
+            {"spline",   TONE_MAPPING_SPLINE},
+            {"bt.2390",  TONE_MAPPING_BT_2390},
+            {"bt.2446a", TONE_MAPPING_BT_2446A},
+            {"st2094-40", TONE_MAPPING_ST2094_40},
+            {"st2094-10", TONE_MAPPING_ST2094_10})},
+        {"tone-mapping-param", OPT_FLOATDEF(tone_map.curve_param)},
+        {"inverse-tone-mapping", OPT_BOOL(tone_map.inverse)},
+        {"tone-mapping-max-boost", OPT_FLOAT(tone_map.max_boost),
+            M_RANGE(1.0, 10.0)},
+        {"tone-mapping-visualize", OPT_BOOL(tone_map.visualize)},
+        {"gamut-mapping-mode", OPT_CHOICE(tone_map.gamut_mode,
+            {"auto",        GAMUT_AUTO},
+            {"clip",        GAMUT_CLIP},
+            {"perceptual",  GAMUT_PERCEPTUAL},
+            {"relative",    GAMUT_RELATIVE},
+            {"saturation",  GAMUT_SATURATION},
+            {"absolute",    GAMUT_ABSOLUTE},
+            {"desaturate",  GAMUT_DESATURATE},
+            {"darken",      GAMUT_DARKEN},
+            {"warn",        GAMUT_WARN},
+            {"linear",      GAMUT_LINEAR})},
+        {"hdr-compute-peak", OPT_CHOICE(tone_map.compute_peak,
+            {"auto", 0},
+            {"yes", 1},
+            {"no", -1})},
+        {"hdr-peak-percentile", OPT_FLOAT(tone_map.peak_percentile),
+            M_RANGE(0.0, 100.0)},
+        {"hdr-peak-decay-rate", OPT_FLOAT(tone_map.decay_rate),
+            M_RANGE(0.0, 1000.0)},
+        {"hdr-scene-threshold-low", OPT_FLOAT(tone_map.scene_threshold_low),
+            M_RANGE(0, 20.0)},
+        {"hdr-scene-threshold-high", OPT_FLOAT(tone_map.scene_threshold_high),
+            M_RANGE(0, 20.0)},
+        {"hdr-contrast-recovery", OPT_FLOAT(tone_map.contrast_recovery),
+            M_RANGE(0, 2.0)},
+        {"hdr-contrast-smoothness", OPT_FLOAT(tone_map.contrast_smoothness),
+            M_RANGE(1.0, 100.0)},
+        {"opengl-pbo", OPT_BOOL(pbo)},
+        SCALER_OPTS("scale",  SCALER_SCALE),
+        SCALER_OPTS("dscale", SCALER_DSCALE),
+        SCALER_OPTS("cscale", SCALER_CSCALE),
+        SCALER_OPTS("tscale", SCALER_TSCALE),
+        {"scaler-lut-size", OPT_REMOVED("hard-coded as 8")},
+        {"scaler-resizes-only", OPT_BOOL(scaler_resizes_only)},
+        {"correct-downscaling", OPT_BOOL(correct_downscaling)},
+        {"linear-downscaling", OPT_BOOL(linear_downscaling)},
+        {"linear-upscaling", OPT_BOOL(linear_upscaling)},
+        {"sigmoid-upscaling", OPT_BOOL(sigmoid_upscaling)},
+        {"sigmoid-center", OPT_FLOAT(sigmoid_center), M_RANGE(0.0, 1.0)},
+        {"sigmoid-slope", OPT_FLOAT(sigmoid_slope), M_RANGE(1.0, 20.0)},
+        {"fbo-format", OPT_STRING(fbo_format)},
+        {"dither-depth", OPT_CHOICE(dither_depth, {"no", -1}, {"auto", 0}),
+            M_RANGE(-1, 16)},
+        {"dither", OPT_CHOICE(dither_algo,
+            {"fruit", DITHER_FRUIT},
+            {"ordered", DITHER_ORDERED},
+            {"error-diffusion", DITHER_ERROR_DIFFUSION},
+            {"no", DITHER_NONE})},
+        {"dither-size-fruit", OPT_INT(dither_size), M_RANGE(2, 8)},
+        {"temporal-dither", OPT_BOOL(temporal_dither)},
+        {"temporal-dither-period", OPT_INT(temporal_dither_period),
+            M_RANGE(1, 128)},
+        {"error-diffusion",
+            OPT_STRING_VALIDATE(error_diffusion, validate_error_diffusion_opt)},
+        {"alpha", OPT_CHOICE(alpha_mode,
+            {"no", ALPHA_NO},
+            {"yes", ALPHA_YES},
+            {"blend", ALPHA_BLEND},
+            {"blend-tiles", ALPHA_BLEND_TILES})},
+        {"opengl-rectangle-textures", OPT_BOOL(use_rectangle)},
+        {"background", OPT_COLOR(background)},
+        {"interpolation", OPT_BOOL(interpolation)},
+        {"interpolation-threshold", OPT_FLOAT(interpolation_threshold)},
+        {"blend-subtitles", OPT_CHOICE(blend_subs,
+            {"no", BLEND_SUBS_NO},
+            {"yes", BLEND_SUBS_YES},
+            {"video", BLEND_SUBS_VIDEO})},
+        {"glsl-shaders", OPT_PATHLIST(user_shaders), .flags = M_OPT_FILE},
+        {"glsl-shader", OPT_CLI_ALIAS("glsl-shaders-append")},
+        {"glsl-shader-opts", OPT_KEYVALUELIST(user_shader_opts)},
+        {"deband", OPT_BOOL(deband)},
+        {"deband", OPT_SUBSTRUCT(deband_opts, deband_conf)},
+        {"sharpen", OPT_FLOAT(unsharp)},
+        {"gpu-tex-pad-x", OPT_INT(tex_pad_x), M_RANGE(0, 4096)},
+        {"gpu-tex-pad-y", OPT_INT(tex_pad_y), M_RANGE(0, 4096)},
+        {"", OPT_SUBSTRUCT(icc_opts, mp_icc_conf)},
+        {"gpu-shader-cache", OPT_BOOL(shader_cache)},
+        {"gpu-shader-cache-dir", OPT_STRING(shader_cache_dir), .flags = M_OPT_FILE},
+        {"gpu-hwdec-interop",
+            OPT_STRING_VALIDATE(hwdec_interop, ra_hwdec_validate_opt)},
+        {"gamut-warning", OPT_REMOVED("Replaced by --gamut-mapping-mode=warn")},
+        {"gamut-clipping", OPT_REMOVED("Replaced by --gamut-mapping-mode=desaturate")},
+        {"tone-mapping-desaturate", OPT_REMOVED("Replaced by --tone-mapping-mode")},
+        {"tone-mapping-desaturate-exponent", OPT_REMOVED("Replaced by --tone-mapping-mode")},
+        {"tone-mapping-crosstalk", OPT_REMOVED("Hard-coded as 0.04")},
+        {"tone-mapping-mode", OPT_REMOVED("no replacement")},
+        {0}
+    },
+    .size = sizeof(struct gl_video_opts),
+    .defaults = &gl_video_opts_def,
+};
+
+static void uninit_rendering(struct gl_video *p);
+static void uninit_scaler(struct gl_video *p, struct scaler *scaler);
+static void check_gl_features(struct gl_video *p);
+static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t id);
+static const char *handle_scaler_opt(const char *name, bool tscale);
+static void reinit_from_options(struct gl_video *p);
+static void get_scale_factors(struct gl_video *p, bool transpose_rot, double xy[2]);
+static void gl_video_setup_hooks(struct gl_video *p);
+static void gl_video_update_options(struct gl_video *p);
+
+#define GLSL(x) gl_sc_add(p->sc, #x "\n");
+#define GLSLF(...) gl_sc_addf(p->sc, __VA_ARGS__)
+#define GLSLHF(...) gl_sc_haddf(p->sc, __VA_ARGS__)
+#define PRELUDE(...) gl_sc_paddf(p->sc, __VA_ARGS__)
+
+static struct bstr load_cached_file(struct gl_video *p, const char *path)
+{
+    if (!path || !path[0])
+        return (struct bstr){0};
+    for (int n = 0; n < p->num_files; n++) {
+        if (strcmp(p->files[n].path, path) == 0)
+            return p->files[n].body;
+    }
+    // not found -> load it
+    char *fname = mp_get_user_path(NULL, p->global, path);
+    struct bstr s = stream_read_file(fname, p, p->global, 1000000000); // 1GB
+    talloc_free(fname);
+    if (s.len) {
+        struct cached_file new = {
+            .path = talloc_strdup(p, path),
+            .body = s,
+        };
+        MP_TARRAY_APPEND(p, p->files, p->num_files, new);
+        return new.body;
+    }
+    return (struct bstr){0};
+}
+
+static void debug_check_gl(struct gl_video *p, const char *msg)
+{
+    if (p->ra->fns->debug_marker)
+        p->ra->fns->debug_marker(p->ra, msg);
+}
+
+static void gl_video_reset_surfaces(struct gl_video *p)
+{
+    for (int i = 0; i < SURFACES_MAX; i++) {
+        p->surfaces[i].id = 0;
+        p->surfaces[i].pts = MP_NOPTS_VALUE;
+    }
+    p->surface_idx = 0;
+    p->surface_now = 0;
+    p->frames_drawn = 0;
+    p->output_tex_valid = false;
+}
+
+static void gl_video_reset_hooks(struct gl_video *p)
+{
+    for (int i = 0; i < p->num_tex_hooks; i++)
+        talloc_free(p->tex_hooks[i].priv);
+
+    for (int i = 0; i < p->num_user_textures; i++)
+        ra_tex_free(p->ra, &p->user_textures[i].tex);
+
+    p->num_tex_hooks = 0;
+    p->num_user_textures = 0;
+}
+
+static inline int surface_wrap(int id)
+{
+    id = id % SURFACES_MAX;
+    return id < 0 ? id + SURFACES_MAX : id;
+}
+
+static void reinit_osd(struct gl_video *p)
+{
+    mpgl_osd_destroy(p->osd);
+    p->osd = NULL;
+    if (p->osd_state)
+        p->osd = mpgl_osd_init(p->ra, p->log, p->osd_state);
+}
+
+static void uninit_rendering(struct gl_video *p)
+{
+    for (int n = 0; n < SCALER_COUNT; n++)
+        uninit_scaler(p, &p->scaler[n]);
+
+    ra_tex_free(p->ra, &p->dither_texture);
+
+    for (int n = 0; n < 4; n++) {
+        ra_tex_free(p->ra, &p->merge_tex[n]);
+        ra_tex_free(p->ra, &p->scale_tex[n]);
+        ra_tex_free(p->ra, &p->integer_tex[n]);
+    }
+
+    ra_tex_free(p->ra, &p->indirect_tex);
+    ra_tex_free(p->ra, &p->blend_subs_tex);
+    ra_tex_free(p->ra, &p->screen_tex);
+    ra_tex_free(p->ra, &p->output_tex);
+
+    for (int n = 0; n < 2; n++)
+        ra_tex_free(p->ra, &p->error_diffusion_tex[n]);
+
+    for (int n = 0; n < SURFACES_MAX; n++)
+        ra_tex_free(p->ra, &p->surfaces[n].tex);
+
+    for (int n = 0; n < p->num_hook_textures; n++)
+        ra_tex_free(p->ra, &p->hook_textures[n]);
+
+    gl_video_reset_surfaces(p);
+    gl_video_reset_hooks(p);
+
+    gl_sc_reset_error(p->sc);
+}
+
+bool gl_video_gamma_auto_enabled(struct gl_video *p)
+{
+    return p->opts.gamma_auto;
+}
+
+struct mp_colorspace gl_video_get_output_colorspace(struct gl_video *p)
+{
+    return (struct mp_colorspace) {
+        .primaries = p->opts.target_prim,
+        .gamma = p->opts.target_trc,
+        .hdr.max_luma = p->opts.target_peak,
+    };
+}
+
+// Warning: profile.start must point to a ta allocation, and the function
+//          takes over ownership.
+void gl_video_set_icc_profile(struct gl_video *p, bstr icc_data)
+{
+    if (gl_lcms_set_memory_profile(p->cms, icc_data))
+        reinit_from_options(p);
+}
+
+bool gl_video_icc_auto_enabled(struct gl_video *p)
+{
+    return p->opts.icc_opts ? p->opts.icc_opts->profile_auto : false;
+}
+
+static bool gl_video_get_lut3d(struct gl_video *p, enum mp_csp_prim prim,
+                               enum mp_csp_trc trc)
+{
+    if (!p->use_lut_3d)
+        return false;
+
+    struct AVBufferRef *icc = NULL;
+    if (p->image.mpi)
+        icc = p->image.mpi->icc_profile;
+
+    if (p->lut_3d_texture && !gl_lcms_has_changed(p->cms, prim, trc, icc))
+        return true;
+
+    // GLES3 doesn't provide filtered 16 bit integer textures
+    // GLES2 doesn't even provide 3D textures
+    const struct ra_format *fmt = ra_find_unorm_format(p->ra, 2, 4);
+    if (!fmt || !(p->ra->caps & RA_CAP_TEX_3D)) {
+        p->use_lut_3d = false;
+        MP_WARN(p, "Disabling color management (no RGBA16 3D textures).\n");
+        return false;
+    }
+
+    struct lut3d *lut3d = NULL;
+    if (!fmt || !gl_lcms_get_lut3d(p->cms, &lut3d, prim, trc, icc) || !lut3d) {
+        p->use_lut_3d = false;
+        return false;
+    }
+
+    ra_tex_free(p->ra, &p->lut_3d_texture);
+
+    struct ra_tex_params params = {
+        .dimensions = 3,
+        .w = lut3d->size[0],
+        .h = lut3d->size[1],
+        .d = lut3d->size[2],
+        .format = fmt,
+        .render_src = true,
+        .src_linear = true,
+        .initial_data = lut3d->data,
+    };
+    p->lut_3d_texture = ra_tex_create(p->ra, &params);
+
+    debug_check_gl(p, "after 3d lut creation");
+
+    for (int i = 0; i < 3; i++)
+        p->lut_3d_size[i] = lut3d->size[i];
+
+    talloc_free(lut3d);
+
+    if (!p->lut_3d_texture) {
+        p->use_lut_3d = false;
+        return false;
+    }
+
+    return true;
+}
+
+// Fill an image struct from a ra_tex + some metadata
+static struct image image_wrap(struct ra_tex *tex, enum plane_type type,
+                               int components)
+{
+    assert(type != PLANE_NONE);
+    return (struct image){
+        .type = type,
+        .tex = tex,
+        .multiplier = 1.0,
+        .w = tex ? tex->params.w : 1,
+        .h = tex ? tex->params.h : 1,
+        .transform = identity_trans,
+        .components = components,
+    };
+}
+
+// Bind an image to a free texture unit and return its ID.
+static int pass_bind(struct gl_video *p, struct image img)
+{
+    int idx = p->num_pass_imgs;
+    MP_TARRAY_APPEND(p, p->pass_imgs, p->num_pass_imgs, img);
+    return idx;
+}
+
+// Rotation by 90° and flipping.
+// w/h is used for recentering.
+static void get_transform(float w, float h, int rotate, bool flip,
+                          struct gl_transform *out_tr)
+{
+    int a = rotate % 90 ? 0 : rotate / 90;
+    int sin90[4] = {0, 1, 0, -1}; // just to avoid rounding issues etc.
+    int cos90[4] = {1, 0, -1, 0};
+    struct gl_transform tr = {{{ cos90[a], sin90[a]},
+                               {-sin90[a], cos90[a]}}};
+
+    // basically, recenter to keep the whole image in view
+    float b[2] = {1, 1};
+    gl_transform_vec(tr, &b[0], &b[1]);
+    tr.t[0] += b[0] < 0 ? w : 0;
+    tr.t[1] += b[1] < 0 ? h : 0;
+
+    if (flip) {
+        struct gl_transform fliptr = {{{1, 0}, {0, -1}}, {0, h}};
+        gl_transform_trans(fliptr, &tr);
+    }
+
+    *out_tr = tr;
+}
+
+// Return the chroma plane upscaled to luma size, but with additional padding
+// for image sizes not aligned to subsampling.
+static int chroma_upsize(int size, int pixel)
+{
+    return (size + pixel - 1) / pixel * pixel;
+}
+
+// If a and b are on the same plane, return what plane type should be used.
+// If a or b are none, the other type always wins.
+// Usually: LUMA/RGB/XYZ > CHROMA > ALPHA
+static enum plane_type merge_plane_types(enum plane_type a, enum plane_type b)
+{
+    if (a == PLANE_NONE)
+        return b;
+    if (b == PLANE_LUMA || b == PLANE_RGB || b == PLANE_XYZ)
+        return b;
+    if (b != PLANE_NONE && a == PLANE_ALPHA)
+        return b;
+    return a;
+}
+
+// Places a video_image's image textures + associated metadata into img[]. The
+// number of textures is equal to p->plane_count. Any necessary plane offsets
+// are stored in off. (e.g. chroma position)
+static void pass_get_images(struct gl_video *p, struct video_image *vimg,
+                            struct image img[4], struct gl_transform off[4])
+{
+    assert(vimg->mpi);
+
+    int w = p->image_params.w;
+    int h = p->image_params.h;
+
+    // Determine the chroma offset
+    float ls_w = 1.0 / p->ra_format.chroma_w;
+    float ls_h = 1.0 / p->ra_format.chroma_h;
+
+    struct gl_transform chroma = {{{ls_w, 0.0}, {0.0, ls_h}}};
+
+    if (p->image_params.chroma_location != MP_CHROMA_CENTER) {
+        int cx, cy;
+        mp_get_chroma_location(p->image_params.chroma_location, &cx, &cy);
+        // By default texture coordinates are such that chroma is centered with
+        // any chroma subsampling. If a specific direction is given, make it
+        // so that the luma and chroma sample line up exactly.
+        // For 4:4:4, setting chroma location should have no effect at all.
+        // luma sample size (in chroma coord. space)
+        chroma.t[0] = ls_w < 1 ? ls_w * -cx / 2 : 0;
+        chroma.t[1] = ls_h < 1 ? ls_h * -cy / 2 : 0;
+    }
+
+    memset(img, 0, 4 * sizeof(img[0]));
+    for (int n = 0; n < p->plane_count; n++) {
+        struct texplane *t = &vimg->planes[n];
+
+        enum plane_type type = PLANE_NONE;
+        int padding = 0;
+        for (int i = 0; i < 4; i++) {
+            int c = p->ra_format.components[n][i];
+            enum plane_type ctype;
+            if (c == 0) {
+                ctype = PLANE_NONE;
+            } else if (c == 4) {
+                ctype = PLANE_ALPHA;
+            } else if (p->image_params.color.space == MP_CSP_RGB) {
+                ctype = PLANE_RGB;
+            } else if (p->image_params.color.space == MP_CSP_XYZ) {
+                ctype = PLANE_XYZ;
+            } else {
+                ctype = c == 1 ? PLANE_LUMA : PLANE_CHROMA;
+            }
+            type = merge_plane_types(type, ctype);
+            if (!c && padding == i)
+                padding = i + 1;
+        }
+
+        int msb_valid_bits =
+            p->ra_format.component_bits + MPMIN(p->ra_format.component_pad, 0);
+        int csp = type == PLANE_ALPHA ? MP_CSP_RGB : p->image_params.color.space;
+        float tex_mul =
+            1.0 / mp_get_csp_mul(csp, msb_valid_bits, p->ra_format.component_bits);
+        if (p->ra_format.component_type == RA_CTYPE_FLOAT)
+            tex_mul = 1.0;
+
+        img[n] = (struct image){
+            .type = type,
+            .tex = t->tex,
+            .multiplier = tex_mul,
+            .w = t->w,
+            .h = t->h,
+            .padding = padding,
+        };
+
+        for (int i = 0; i < 4; i++)
+            img[n].components += !!p->ra_format.components[n][i];
+
+        get_transform(t->w, t->h, p->image_params.rotate, t->flipped,
+                      &img[n].transform);
+        if (p->image_params.rotate % 180 == 90)
+            MPSWAP(int, img[n].w, img[n].h);
+
+        off[n] = identity_trans;
+
+        if (type == PLANE_CHROMA) {
+            struct gl_transform rot;
+            get_transform(0, 0, p->image_params.rotate, true, &rot);
+
+            struct gl_transform tr = chroma;
+            gl_transform_vec(rot, &tr.t[0], &tr.t[1]);
+
+            float dx = (chroma_upsize(w, p->ra_format.chroma_w) - w) * ls_w;
+            float dy = (chroma_upsize(h, p->ra_format.chroma_h) - h) * ls_h;
+
+            // Adjust the chroma offset if the real chroma size is fractional
+            // due image sizes not aligned to chroma subsampling.
+            struct gl_transform rot2;
+            get_transform(0, 0, p->image_params.rotate, t->flipped, &rot2);
+            if (rot2.m[0][0] < 0)
+                tr.t[0] += dx;
+            if (rot2.m[1][0] < 0)
+                tr.t[0] += dy;
+            if (rot2.m[0][1] < 0)
+                tr.t[1] += dx;
+            if (rot2.m[1][1] < 0)
+                tr.t[1] += dy;
+
+            off[n] = tr;
+        }
+    }
+}
+
+// Return the index of the given component (assuming all non-padding components
+// of all planes are concatenated into a linear list).
+static int find_comp(struct ra_imgfmt_desc *desc, int component)
+{
+    int cur = 0;
+    for (int n = 0; n < desc->num_planes; n++) {
+        for (int i = 0; i < 4; i++) {
+            if (desc->components[n][i]) {
+                if (desc->components[n][i] == component)
+                    return cur;
+                cur++;
+            }
+        }
+    }
+    return -1;
+}
+
+static void init_video(struct gl_video *p)
+{
+    p->use_integer_conversion = false;
+
+    struct ra_hwdec *hwdec = ra_hwdec_get(&p->hwdec_ctx, p->image_params.imgfmt);
+    if (hwdec) {
+        if (hwdec->driver->overlay_frame) {
+            MP_WARN(p, "Using HW-overlay mode. No GL filtering is performed "
+                       "on the video!\n");
+            p->hwdec_overlay = hwdec;
+        } else {
+            p->hwdec_mapper = ra_hwdec_mapper_create(hwdec, &p->image_params);
+            if (!p->hwdec_mapper)
+                MP_ERR(p, "Initializing texture for hardware decoding failed.\n");
+        }
+        if (p->hwdec_mapper)
+            p->image_params = p->hwdec_mapper->dst_params;
+        const char **exts = hwdec->glsl_extensions;
+        for (int n = 0; exts && exts[n]; n++)
+            gl_sc_enable_extension(p->sc, (char *)exts[n]);
+        p->hwdec_active = true;
+    }
+
+    p->ra_format = (struct ra_imgfmt_desc){0};
+    ra_get_imgfmt_desc(p->ra, p->image_params.imgfmt, &p->ra_format);
+
+    p->plane_count = p->ra_format.num_planes;
+
+    p->has_alpha = false;
+    p->is_gray = true;
+
+    for (int n = 0; n < p->ra_format.num_planes; n++) {
+        for (int i = 0; i < 4; i++) {
+            if (p->ra_format.components[n][i]) {
+                p->has_alpha |= p->ra_format.components[n][i] == 4;
+                p->is_gray &= p->ra_format.components[n][i] == 1 ||
+                              p->ra_format.components[n][i] == 4;
+            }
+        }
+    }
+
+    for (int c = 0; c < 4; c++) {
+        int loc = find_comp(&p->ra_format, c + 1);
+        p->color_swizzle[c] = "rgba"[loc >= 0 && loc < 4 ? loc : 0];
+    }
+    p->color_swizzle[4] = '\0';
+
+    mp_image_params_guess_csp(&p->image_params);
+
+    av_lfg_init(&p->lfg, 1);
+
+    debug_check_gl(p, "before video texture creation");
+
+    if (!p->hwdec_active) {
+        struct video_image *vimg = &p->image;
+
+        struct mp_image layout = {0};
+        mp_image_set_params(&layout, &p->image_params);
+
+        for (int n = 0; n < p->plane_count; n++) {
+            struct texplane *plane = &vimg->planes[n];
+            const struct ra_format *format = p->ra_format.planes[n];
+
+            plane->w = mp_image_plane_w(&layout, n);
+            plane->h = mp_image_plane_h(&layout, n);
+
+            struct ra_tex_params params = {
+                .dimensions = 2,
+                .w = plane->w + p->opts.tex_pad_x,
+                .h = plane->h + p->opts.tex_pad_y,
+                .d = 1,
+                .format = format,
+                .render_src = true,
+                .src_linear = format->linear_filter,
+                .non_normalized = p->opts.use_rectangle,
+                .host_mutable = true,
+            };
+
+            MP_VERBOSE(p, "Texture for plane %d: %dx%d\n", n,
+                       params.w, params.h);
+
+            plane->tex = ra_tex_create(p->ra, &params);
+            p->use_integer_conversion |= format->ctype == RA_CTYPE_UINT;
+        }
+    }
+
+    debug_check_gl(p, "after video texture creation");
+
+    // Format-dependent checks.
+    check_gl_features(p);
+
+    gl_video_setup_hooks(p);
+}
+
+static struct dr_buffer *gl_find_dr_buffer(struct gl_video *p, uint8_t *ptr)
+{
+   for (int i = 0; i < p->num_dr_buffers; i++) {
+       struct dr_buffer *buffer = &p->dr_buffers[i];
+        uint8_t *bufptr = buffer->buf->data;
+        size_t size = buffer->buf->params.size;
+        if (ptr >= bufptr && ptr < bufptr + size)
+            return buffer;
+    }
+
+    return NULL;
+}
+
+static void gc_pending_dr_fences(struct gl_video *p, bool force)
+{
+again:;
+    for (int n = 0; n < p->num_dr_buffers; n++) {
+        struct dr_buffer *buffer = &p->dr_buffers[n];
+        if (!buffer->mpi)
+            continue;
+
+        bool res = p->ra->fns->buf_poll(p->ra, buffer->buf);
+        if (res || force) {
+            // Unreferencing the image could cause gl_video_dr_free_buffer()
+            // to be called by the talloc destructor (if it was the last
+            // reference). This will implicitly invalidate the buffer pointer
+            // and change the p->dr_buffers array. To make it worse, it could
+            // free multiple dr_buffers due to weird theoretical corner cases.
+            // This is also why we use the goto to iterate again from the
+            // start, because everything gets fucked up. Hail satan!
+            struct mp_image *ref = buffer->mpi;
+            buffer->mpi = NULL;
+            talloc_free(ref);
+            goto again;
+        }
+    }
+}
+
+static void unref_current_image(struct gl_video *p)
+{
+    struct video_image *vimg = &p->image;
+
+    if (vimg->hwdec_mapped) {
+        assert(p->hwdec_active && p->hwdec_mapper);
+        ra_hwdec_mapper_unmap(p->hwdec_mapper);
+        memset(vimg->planes, 0, sizeof(vimg->planes));
+        vimg->hwdec_mapped = false;
+    }
+
+    vimg->id = 0;
+
+    mp_image_unrefp(&vimg->mpi);
+
+    // While we're at it, also garbage collect pending fences in here to
+    // get it out of the way.
+    gc_pending_dr_fences(p, false);
+}
+
+// If overlay mode is used, make sure to remove the overlay.
+// Be careful with this. Removing the overlay and adding another one will
+// lead to flickering artifacts.
+static void unmap_overlay(struct gl_video *p)
+{
+    if (p->hwdec_overlay)
+        p->hwdec_overlay->driver->overlay_frame(p->hwdec_overlay, NULL, NULL, NULL, true);
+}
+
+static void uninit_video(struct gl_video *p)
+{
+    uninit_rendering(p);
+
+    struct video_image *vimg = &p->image;
+
+    unmap_overlay(p);
+    unref_current_image(p);
+
+    for (int n = 0; n < p->plane_count; n++) {
+        struct texplane *plane = &vimg->planes[n];
+        ra_tex_free(p->ra, &plane->tex);
+    }
+    *vimg = (struct video_image){0};
+
+    // Invalidate image_params to ensure that gl_video_config() will call
+    // init_video() on uninitialized gl_video.
+    p->real_image_params = (struct mp_image_params){0};
+    p->image_params = p->real_image_params;
+    p->hwdec_active = false;
+    p->hwdec_overlay = NULL;
+    ra_hwdec_mapper_free(&p->hwdec_mapper);
+}
+
+static void pass_record(struct gl_video *p, struct mp_pass_perf perf)
+{
+    if (!p->pass || p->pass_idx == VO_PASS_PERF_MAX)
+        return;
+
+    struct pass_info *pass = &p->pass[p->pass_idx];
+    pass->perf = perf;
+
+    if (pass->desc.len == 0)
+        bstr_xappend(p, &pass->desc, bstr0("(unknown)"));
+
+    p->pass_idx++;
+}
+
+PRINTF_ATTRIBUTE(2, 3)
+static void pass_describe(struct gl_video *p, const char *textf, ...)
+{
+    if (!p->pass || p->pass_idx == VO_PASS_PERF_MAX)
+        return;
+
+    struct pass_info *pass = &p->pass[p->pass_idx];
+
+    if (pass->desc.len > 0)
+        bstr_xappend(p, &pass->desc, bstr0(" + "));
+
+    va_list ap;
+    va_start(ap, textf);
+    bstr_xappend_vasprintf(p, &pass->desc, textf, ap);
+    va_end(ap);
+}
+
+static void pass_info_reset(struct gl_video *p, bool is_redraw)
+{
+    p->pass = is_redraw ? p->pass_redraw : p->pass_fresh;
+    p->pass_idx = 0;
+
+    for (int i = 0; i < VO_PASS_PERF_MAX; i++) {
+        p->pass[i].desc.len = 0;
+        p->pass[i].perf = (struct mp_pass_perf){0};
+    }
+}
+
+static void pass_report_performance(struct gl_video *p)
+{
+    if (!p->pass)
+        return;
+
+    for (int i = 0; i < VO_PASS_PERF_MAX; i++) {
+        struct pass_info *pass = &p->pass[i];
+        if (pass->desc.len) {
+            MP_TRACE(p, "pass '%.*s': last %dus avg %dus peak %dus\n",
+                     BSTR_P(pass->desc),
+                     (int)pass->perf.last/1000,
+                     (int)pass->perf.avg/1000,
+                     (int)pass->perf.peak/1000);
+        }
+    }
+}
+
+static void pass_prepare_src_tex(struct gl_video *p)
+{
+    struct gl_shader_cache *sc = p->sc;
+
+    for (int n = 0; n < p->num_pass_imgs; n++) {
+        struct image *s = &p->pass_imgs[n];
+        if (!s->tex)
+            continue;
+
+        char *texture_name = mp_tprintf(32, "texture%d", n);
+        char *texture_size = mp_tprintf(32, "texture_size%d", n);
+        char *texture_rot = mp_tprintf(32, "texture_rot%d", n);
+        char *texture_off = mp_tprintf(32, "texture_off%d", n);
+        char *pixel_size = mp_tprintf(32, "pixel_size%d", n);
+
+        gl_sc_uniform_texture(sc, texture_name, s->tex);
+        float f[2] = {1, 1};
+        if (!s->tex->params.non_normalized) {
+            f[0] = s->tex->params.w;
+            f[1] = s->tex->params.h;
+        }
+        gl_sc_uniform_vec2(sc, texture_size, f);
+        gl_sc_uniform_mat2(sc, texture_rot, true, (float *)s->transform.m);
+        gl_sc_uniform_vec2(sc, texture_off, (float *)s->transform.t);
+        gl_sc_uniform_vec2(sc, pixel_size, (float[]){1.0f / f[0],
+                                                     1.0f / f[1]});
+    }
+}
+
+static void cleanup_binds(struct gl_video *p)
+{
+    p->num_pass_imgs = 0;
+}
+
+// Sets the appropriate compute shader metadata for an implicit compute pass
+// bw/bh: block size
+static void pass_is_compute(struct gl_video *p, int bw, int bh, bool flexible)
+{
+    if (p->pass_compute.active && flexible) {
+        // Avoid overwriting existing block sizes when using a flexible pass
+        bw = p->pass_compute.block_w;
+        bh = p->pass_compute.block_h;
+    }
+
+    p->pass_compute = (struct compute_info){
+        .active = true,
+        .block_w = bw,
+        .block_h = bh,
+    };
+}
+
+// w/h: the width/height of the compute shader's operating domain (e.g. the
+// target target that needs to be written, or the source texture that needs to
+// be reduced)
+static void dispatch_compute(struct gl_video *p, int w, int h,
+                             struct compute_info info)
+{
+    PRELUDE("layout (local_size_x = %d, local_size_y = %d) in;\n",
+            info.threads_w > 0 ? info.threads_w : info.block_w,
+            info.threads_h > 0 ? info.threads_h : info.block_h);
+
+    pass_prepare_src_tex(p);
+
+    // Since we don't actually have vertices, we pretend for convenience
+    // reasons that we do and calculate the right texture coordinates based on
+    // the output sample ID
+    gl_sc_uniform_vec2(p->sc, "out_scale", (float[2]){ 1.0 / w, 1.0 / h });
+    PRELUDE("#define outcoord(id) (out_scale * (vec2(id) + vec2(0.5)))\n");
+
+    for (int n = 0; n < p->num_pass_imgs; n++) {
+        struct image *s = &p->pass_imgs[n];
+        if (!s->tex)
+            continue;
+
+        PRELUDE("#define texmap%d(id) (texture_rot%d * outcoord(id) + "
+               "pixel_size%d * texture_off%d)\n", n, n, n, n);
+        PRELUDE("#define texcoord%d texmap%d(gl_GlobalInvocationID)\n", n, n);
+    }
+
+    // always round up when dividing to make sure we don't leave off a part of
+    // the image
+    int num_x = info.block_w > 0 ? (w + info.block_w - 1) / info.block_w : 1,
+        num_y = info.block_h > 0 ? (h + info.block_h - 1) / info.block_h : 1;
+
+    if (!(p->ra->caps & RA_CAP_NUM_GROUPS))
+        PRELUDE("#define gl_NumWorkGroups uvec3(%d, %d, 1)\n", num_x, num_y);
+
+    pass_record(p, gl_sc_dispatch_compute(p->sc, num_x, num_y, 1));
+    cleanup_binds(p);
+}
+
+static struct mp_pass_perf render_pass_quad(struct gl_video *p,
+                                            struct ra_fbo fbo, bool discard,
+                                            const struct mp_rect *dst)
+{
+    // The first element is reserved for `vec2 position`
+    int num_vertex_attribs = 1 + p->num_pass_imgs;
+    size_t vertex_stride = num_vertex_attribs * sizeof(struct vertex_pt);
+
+    // Expand the VAO if necessary
+    while (p->vao_len < num_vertex_attribs) {
+        MP_TARRAY_APPEND(p, p->vao, p->vao_len, (struct ra_renderpass_input) {
+            .name = talloc_asprintf(p, "texcoord%d", p->vao_len - 1),
+            .type = RA_VARTYPE_FLOAT,
+            .dim_v = 2,
+            .dim_m = 1,
+            .offset = p->vao_len * sizeof(struct vertex_pt),
+        });
+    }
+
+    int num_vertices = 6; // quad as triangle list
+    int num_attribs_total = num_vertices * num_vertex_attribs;
+    MP_TARRAY_GROW(p, p->tmp_vertex, num_attribs_total);
+
+    struct gl_transform t;
+    gl_transform_ortho_fbo(&t, fbo);
+
+    float x[2] = {dst->x0, dst->x1};
+    float y[2] = {dst->y0, dst->y1};
+    gl_transform_vec(t, &x[0], &y[0]);
+    gl_transform_vec(t, &x[1], &y[1]);
+
+    for (int n = 0; n < 4; n++) {
+        struct vertex_pt *vs = &p->tmp_vertex[num_vertex_attribs * n];
+        // vec2 position in idx 0
+        vs[0].x = x[n / 2];
+        vs[0].y = y[n % 2];
+        for (int i = 0; i < p->num_pass_imgs; i++) {
+            struct image *s = &p->pass_imgs[i];
+            if (!s->tex)
+                continue;
+            struct gl_transform tr = s->transform;
+            float tx = (n / 2) * s->w;
+            float ty = (n % 2) * s->h;
+            gl_transform_vec(tr, &tx, &ty);
+            bool rect = s->tex->params.non_normalized;
+            // vec2 texcoordN in idx N+1
+            vs[i + 1].x = tx / (rect ? 1 : s->tex->params.w);
+            vs[i + 1].y = ty / (rect ? 1 : s->tex->params.h);
+        }
+    }
+
+    memmove(&p->tmp_vertex[num_vertex_attribs * 4],
+            &p->tmp_vertex[num_vertex_attribs * 2],
+            vertex_stride);
+
+    memmove(&p->tmp_vertex[num_vertex_attribs * 5],
+            &p->tmp_vertex[num_vertex_attribs * 1],
+            vertex_stride);
+
+    return gl_sc_dispatch_draw(p->sc, fbo.tex, discard, p->vao, num_vertex_attribs,
+                               vertex_stride, p->tmp_vertex, num_vertices);
+}
+
+static void finish_pass_fbo(struct gl_video *p, struct ra_fbo fbo,
+                            bool discard, const struct mp_rect *dst)
+{
+    pass_prepare_src_tex(p);
+    pass_record(p, render_pass_quad(p, fbo, discard, dst));
+    debug_check_gl(p, "after rendering");
+    cleanup_binds(p);
+}
+
+// dst_fbo: this will be used for rendering; possibly reallocating the whole
+//          FBO, if the required parameters have changed
+// w, h: required FBO target dimension, and also defines the target rectangle
+//       used for rasterization
+static void finish_pass_tex(struct gl_video *p, struct ra_tex **dst_tex,
+                            int w, int h)
+{
+    if (!ra_tex_resize(p->ra, p->log, dst_tex, w, h, p->fbo_format)) {
+        cleanup_binds(p);
+        gl_sc_reset(p->sc);
+        return;
+    }
+
+    // If RA_CAP_PARALLEL_COMPUTE is set, try to prefer compute shaders
+    // over fragment shaders wherever possible.
+    if (!p->pass_compute.active && (p->ra->caps & RA_CAP_PARALLEL_COMPUTE) &&
+        (*dst_tex)->params.storage_dst)
+    {
+        pass_is_compute(p, 16, 16, true);
+    }
+
+    if (p->pass_compute.active) {
+        gl_sc_uniform_image2D_wo(p->sc, "out_image", *dst_tex);
+        if (!p->pass_compute.directly_writes)
+            GLSL(imageStore(out_image, ivec2(gl_GlobalInvocationID), color);)
+
+        dispatch_compute(p, w, h, p->pass_compute);
+        p->pass_compute = (struct compute_info){0};
+
+        debug_check_gl(p, "after dispatching compute shader");
+    } else {
+        struct ra_fbo fbo = { .tex = *dst_tex, };
+        finish_pass_fbo(p, fbo, true, &(struct mp_rect){0, 0, w, h});
+    }
+}
+
+static const char *get_tex_swizzle(struct image *img)
+{
+    if (!img->tex)
+        return "rgba";
+    return img->tex->params.format->luminance_alpha ? "raaa" : "rgba";
+}
+
+// Copy a texture to the vec4 color, while increasing offset. Also applies
+// the texture multiplier to the sampled color
+static void copy_image(struct gl_video *p, unsigned int *offset, struct image img)
+{
+    const unsigned int count = img.components;
+    char src[5] = {0};
+    char dst[5] = {0};
+
+    assert(*offset + count < sizeof(dst));
+    assert(img.padding + count < sizeof(src));
+
+    int id = pass_bind(p, img);
+
+    const char *tex_fmt = get_tex_swizzle(&img);
+    const char *dst_fmt = "rgba";
+    for (unsigned int i = 0; i < count; i++) {
+        src[i] = tex_fmt[img.padding + i];
+        dst[i] = dst_fmt[*offset + i];
+    }
+
+    if (img.tex && img.tex->params.format->ctype == RA_CTYPE_UINT) {
+        uint64_t tex_max = 1ull << p->ra_format.component_bits;
+        img.multiplier *= 1.0 / (tex_max - 1);
+    }
+
+    GLSLF("color.%s = %f * vec4(texture(texture%d, texcoord%d)).%s;\n",
+          dst, img.multiplier, id, id, src);
+
+    *offset += count;
+}
+
+static void skip_unused(struct gl_video *p, int num_components)
+{
+    for (int i = num_components; i < 4; i++)
+        GLSLF("color.%c = %f;\n", "rgba"[i], i < 3 ? 0.0 : 1.0);
+}
+
+static void uninit_scaler(struct gl_video *p, struct scaler *scaler)
+{
+    ra_tex_free(p->ra, &scaler->sep_fbo);
+    ra_tex_free(p->ra, &scaler->lut);
+    scaler->kernel = NULL;
+    scaler->initialized = false;
+}
+
+static void hook_prelude(struct gl_video *p, const char *name, int id,
+                         struct image img)
+{
+    GLSLHF("#define %s_raw texture%d\n", name, id);
+    GLSLHF("#define %s_pos texcoord%d\n", name, id);
+    GLSLHF("#define %s_size texture_size%d\n", name, id);
+    GLSLHF("#define %s_rot texture_rot%d\n", name, id);
+    GLSLHF("#define %s_off texture_off%d\n", name, id);
+    GLSLHF("#define %s_pt pixel_size%d\n", name, id);
+    GLSLHF("#define %s_map texmap%d\n", name, id);
+    GLSLHF("#define %s_mul %f\n", name, img.multiplier);
+
+    char crap[5] = "";
+    snprintf(crap, sizeof(crap), "%s", get_tex_swizzle(&img));
+
+    // Remove leading padding by rotating the swizzle mask.
+    int len = strlen(crap);
+    for (int n = 0; n < img.padding; n++) {
+        if (len) {
+            char f = crap[0];
+            memmove(crap, crap + 1, len - 1);
+            crap[len - 1] = f;
+        }
+    }
+
+    // Set up the sampling functions
+    GLSLHF("#define %s_tex(pos) (%s_mul * vec4(texture(%s_raw, pos)).%s)\n",
+           name, name, name, crap);
+
+    if (p->ra->caps & RA_CAP_GATHER) {
+        GLSLHF("#define %s_gather(pos, c) (%s_mul * vec4("
+               "textureGather(%s_raw, pos, c)))\n", name, name, name);
+    }
+
+    // Since the extra matrix multiplication impacts performance,
+    // skip it unless the texture was actually rotated
+    if (gl_transform_eq(img.transform, identity_trans)) {
+        GLSLHF("#define %s_texOff(off) %s_tex(%s_pos + %s_pt * vec2(off))\n",
+               name, name, name, name);
+    } else {
+        GLSLHF("#define %s_texOff(off) "
+                   "%s_tex(%s_pos + %s_rot * vec2(off)/%s_size)\n",
+               name, name, name, name, name);
+    }
+}
+
+static bool saved_img_find(struct gl_video *p, const char *name,
+                           struct image *out)
+{
+    if (!name || !out)
+        return false;
+
+    for (int i = 0; i < p->num_saved_imgs; i++) {
+        if (strcmp(p->saved_imgs[i].name, name) == 0) {
+            *out = p->saved_imgs[i].img;
+            return true;
+        }
+    }
+
+    return false;
+}
+
+static void saved_img_store(struct gl_video *p, const char *name,
+                            struct image img)
+{
+    assert(name);
+
+    for (int i = 0; i < p->num_saved_imgs; i++) {
+        if (strcmp(p->saved_imgs[i].name, name) == 0) {
+            p->saved_imgs[i].img = img;
+            return;
+        }
+    }
+
+    MP_TARRAY_APPEND(p, p->saved_imgs, p->num_saved_imgs, (struct saved_img) {
+        .name = name,
+        .img = img
+    });
+}
+
+static bool pass_hook_setup_binds(struct gl_video *p, const char *name,
+                                  struct image img, struct tex_hook *hook)
+{
+    for (int t = 0; t < SHADER_MAX_BINDS; t++) {
+        char *bind_name = (char *)hook->bind_tex[t];
+
+        if (!bind_name)
+            continue;
+
+        // This is a special name that means "currently hooked texture"
+        if (strcmp(bind_name, "HOOKED") == 0) {
+            int id = pass_bind(p, img);
+            hook_prelude(p, "HOOKED", id, img);
+            hook_prelude(p, name, id, img);
+            continue;
+        }
+
+        // BIND can also be used to load user-defined textures, in which
+        // case we will directly load them as a uniform instead of
+        // generating the hook_prelude boilerplate
+        for (int u = 0; u < p->num_user_textures; u++) {
+            struct gl_user_shader_tex *utex = &p->user_textures[u];
+            if (bstr_equals0(utex->name, bind_name)) {
+                gl_sc_uniform_texture(p->sc, bind_name, utex->tex);
+                goto next_bind;
+            }
+        }
+
+        struct image bind_img;
+        if (!saved_img_find(p, bind_name, &bind_img)) {
+            // Clean up texture bindings and move on to the next hook
+            MP_TRACE(p, "Skipping hook on %s due to no texture named %s.\n",
+                     name, bind_name);
+            p->num_pass_imgs -= t;
+            return false;
+        }
+
+        hook_prelude(p, bind_name, pass_bind(p, bind_img), bind_img);
+
+next_bind: ;
+    }
+
+    return true;
+}
+
+static struct ra_tex **next_hook_tex(struct gl_video *p)
+{
+    if (p->idx_hook_textures == p->num_hook_textures)
+        MP_TARRAY_APPEND(p, p->hook_textures, p->num_hook_textures, NULL);
+
+    return &p->hook_textures[p->idx_hook_textures++];
+}
+
+// Process hooks for a plane, saving the result and returning a new image
+// If 'trans' is NULL, the shader is forbidden from transforming img
+static struct image pass_hook(struct gl_video *p, const char *name,
+                              struct image img, struct gl_transform *trans)
+{
+    if (!name)
+        return img;
+
+    saved_img_store(p, name, img);
+
+    MP_TRACE(p, "Running hooks for %s\n", name);
+    for (int i = 0; i < p->num_tex_hooks; i++) {
+        struct tex_hook *hook = &p->tex_hooks[i];
+
+        // Figure out if this pass hooks this texture
+        for (int h = 0; h < SHADER_MAX_HOOKS; h++) {
+            if (hook->hook_tex[h] && strcmp(hook->hook_tex[h], name) == 0)
+                goto found;
+        }
+
+        continue;
+
+found:
+        // Check the hook's condition
+        if (hook->cond && !hook->cond(p, img, hook->priv)) {
+            MP_TRACE(p, "Skipping hook on %s due to condition.\n", name);
+            continue;
+        }
+
+        const char *store_name = hook->save_tex ? hook->save_tex : name;
+        bool is_overwrite = strcmp(store_name, name) == 0;
+
+        // If user shader is set to align HOOKED with reference and fix its
+        // offset, it requires HOOKED to be resizable and overwrited.
+        if (is_overwrite && hook->align_offset) {
+            if (!trans) {
+                MP_ERR(p, "Hook tried to align unresizable texture %s!\n",
+                       name);
+                return img;
+            }
+
+            struct gl_transform align_off = identity_trans;
+            align_off.t[0] = trans->t[0];
+            align_off.t[1] = trans->t[1];
+
+            gl_transform_trans(align_off, &img.transform);
+        }
+
+        if (!pass_hook_setup_binds(p, name, img, hook))
+            continue;
+
+        // Run the actual hook. This generates a series of GLSL shader
+        // instructions sufficient for drawing the hook's output
+        struct gl_transform hook_off = identity_trans;
+        hook->hook(p, img, &hook_off, hook->priv);
+
+        int comps = hook->components ? hook->components : img.components;
+        skip_unused(p, comps);
+
+        // Compute the updated FBO dimensions and store the result
+        struct mp_rect_f sz = {0, 0, img.w, img.h};
+        gl_transform_rect(hook_off, &sz);
+        int w = lroundf(fabs(sz.x1 - sz.x0));
+        int h = lroundf(fabs(sz.y1 - sz.y0));
+
+        struct ra_tex **tex = next_hook_tex(p);
+        finish_pass_tex(p, tex, w, h);
+        struct image saved_img = image_wrap(*tex, img.type, comps);
+
+        // If the texture we're saving overwrites the "current" texture, also
+        // update the tex parameter so that the future loop cycles will use the
+        // updated values, and export the offset
+        if (is_overwrite) {
+            if (!trans && !gl_transform_eq(hook_off, identity_trans)) {
+                MP_ERR(p, "Hook tried changing size of unscalable texture %s!\n",
+                       name);
+                return img;
+            }
+
+            img = saved_img;
+            if (trans) {
+                gl_transform_trans(hook_off, trans);
+
+                // If user shader is set to align HOOKED, the offset it produces
+                // is dynamic (with static resizing factor though).
+                // Align it with reference manually to get offset fixed.
+                if (hook->align_offset) {
+                    trans->t[0] = 0.0;
+                    trans->t[1] = 0.0;
+                }
+            }
+        }
+
+        saved_img_store(p, store_name, saved_img);
+    }
+
+    return img;
+}
+
+// This can be used at any time in the middle of rendering to specify an
+// optional hook point, which if triggered will render out to a new FBO and
+// load the result back into vec4 color. Offsets applied by the hooks are
+// accumulated in tex_trans, and the FBO is dimensioned according
+// to p->texture_w/h
+static void pass_opt_hook_point(struct gl_video *p, const char *name,
+                                struct gl_transform *tex_trans)
+{
+    if (!name)
+        return;
+
+    for (int i = 0; i < p->num_tex_hooks; i++) {
+        struct tex_hook *hook = &p->tex_hooks[i];
+
+        for (int h = 0; h < SHADER_MAX_HOOKS; h++) {
+            if (hook->hook_tex[h] && strcmp(hook->hook_tex[h], name) == 0)
+                goto found;
+        }
+
+        for (int b = 0; b < SHADER_MAX_BINDS; b++) {
+            if (hook->bind_tex[b] && strcmp(hook->bind_tex[b], name) == 0)
+                goto found;
+        }
+    }
+
+    // Nothing uses this texture, don't bother storing it
+    return;
+
+found: ;
+    struct ra_tex **tex = next_hook_tex(p);
+    finish_pass_tex(p, tex, p->texture_w, p->texture_h);
+    struct image img = image_wrap(*tex, PLANE_RGB, p->components);
+    img = pass_hook(p, name, img, tex_trans);
+    copy_image(p, &(int){0}, img);
+    p->texture_w = img.w;
+    p->texture_h = img.h;
+    p->components = img.components;
+    pass_describe(p, "(remainder pass)");
+}
+
+static void load_shader(struct gl_video *p, struct bstr body)
+{
+    gl_sc_hadd_bstr(p->sc, body);
+    gl_sc_uniform_dynamic(p->sc);
+    gl_sc_uniform_f(p->sc, "random", (double)av_lfg_get(&p->lfg) / UINT32_MAX);
+    gl_sc_uniform_dynamic(p->sc);
+    gl_sc_uniform_i(p->sc, "frame", p->frames_uploaded);
+    gl_sc_uniform_vec2(p->sc, "input_size",
+                       (float[]){(p->src_rect.x1 - p->src_rect.x0) *
+                                  p->texture_offset.m[0][0],
+                                  (p->src_rect.y1 - p->src_rect.y0) *
+                                  p->texture_offset.m[1][1]});
+    gl_sc_uniform_vec2(p->sc, "target_size",
+                       (float[]){p->dst_rect.x1 - p->dst_rect.x0,
+                                 p->dst_rect.y1 - p->dst_rect.y0});
+    gl_sc_uniform_vec2(p->sc, "tex_offset",
+                       (float[]){p->src_rect.x0 * p->texture_offset.m[0][0] +
+                                 p->texture_offset.t[0],
+                                 p->src_rect.y0 * p->texture_offset.m[1][1] +
+                                 p->texture_offset.t[1]});
+}
+
+// Semantic equality
+static bool double_seq(double a, double b)
+{
+    return (isnan(a) && isnan(b)) || a == b;
+}
+
+static bool scaler_fun_eq(struct scaler_fun a, struct scaler_fun b)
+{
+    if ((a.name && !b.name) || (b.name && !a.name))
+        return false;
+
+    return ((!a.name && !b.name) || strcmp(a.name, b.name) == 0) &&
+           double_seq(a.params[0], b.params[0]) &&
+           double_seq(a.params[1], b.params[1]) &&
+           a.blur == b.blur &&
+           a.taper == b.taper;
+}
+
+static bool scaler_conf_eq(struct scaler_config a, struct scaler_config b)
+{
+    // Note: antiring isn't compared because it doesn't affect LUT
+    // generation
+    return scaler_fun_eq(a.kernel, b.kernel) &&
+           scaler_fun_eq(a.window, b.window) &&
+           a.radius == b.radius &&
+           a.clamp == b.clamp;
+}
+
+static void reinit_scaler(struct gl_video *p, struct scaler *scaler,
+                          const struct scaler_config *conf,
+                          double scale_factor,
+                          int sizes[])
+{
+    assert(conf);
+    if (scaler_conf_eq(scaler->conf, *conf) &&
+        scaler->scale_factor == scale_factor &&
+        scaler->initialized)
+        return;
+
+    uninit_scaler(p, scaler);
+
+    if (scaler->index == SCALER_DSCALE && (!conf->kernel.name ||
+        !conf->kernel.name[0]))
+    {
+        conf = &p->opts.scaler[SCALER_SCALE];
+    }
+
+    if (scaler->index == SCALER_CSCALE && (!conf->kernel.name ||
+        !conf->kernel.name[0]))
+    {
+        conf = &p->opts.scaler[SCALER_SCALE];
+    }
+
+    struct filter_kernel bare_window;
+    const struct filter_kernel *t_kernel = mp_find_filter_kernel(conf->kernel.name);
+    const struct filter_window *t_window = mp_find_filter_window(conf->window.name);
+    bool is_tscale = scaler->index == SCALER_TSCALE;
+    if (!t_kernel) {
+        const struct filter_window *window = mp_find_filter_window(conf->kernel.name);
+        if (window) {
+            bare_window = (struct filter_kernel) { .f = *window };
+            t_kernel = &bare_window;
+        }
+    }
+
+    scaler->conf = *conf;
+    scaler->conf.kernel.name = (char *)handle_scaler_opt(conf->kernel.name, is_tscale);
+    scaler->conf.window.name = t_window ? (char *)t_window->name : NULL;
+    scaler->scale_factor = scale_factor;
+    scaler->insufficient = false;
+    scaler->initialized = true;
+    if (!t_kernel)
+        return;
+
+    scaler->kernel_storage = *t_kernel;
+    scaler->kernel = &scaler->kernel_storage;
+
+    if (!t_window) {
+        // fall back to the scaler's default window if available
+        t_window = mp_find_filter_window(t_kernel->window);
+    }
+    if (t_window)
+        scaler->kernel->w = *t_window;
+
+    for (int n = 0; n < 2; n++) {
+        if (!isnan(conf->kernel.params[n]))
+            scaler->kernel->f.params[n] = conf->kernel.params[n];
+        if (!isnan(conf->window.params[n]))
+            scaler->kernel->w.params[n] = conf->window.params[n];
+    }
+
+    if (conf->kernel.blur > 0.0)
+        scaler->kernel->f.blur = conf->kernel.blur;
+    if (conf->window.blur > 0.0)
+        scaler->kernel->w.blur = conf->window.blur;
+
+    if (conf->kernel.taper > 0.0)
+        scaler->kernel->f.taper = conf->kernel.taper;
+    if (conf->window.taper > 0.0)
+        scaler->kernel->w.taper = conf->window.taper;
+
+    if (scaler->kernel->f.resizable && conf->radius > 0.0)
+        scaler->kernel->f.radius = conf->radius;
+
+    scaler->kernel->clamp = conf->clamp;
+    scaler->insufficient = !mp_init_filter(scaler->kernel, sizes, scale_factor);
+
+    int size = scaler->kernel->size;
+    int num_components = size > 2 ? 4 : size;
+    const struct ra_format *fmt = ra_find_float16_format(p->ra, num_components);
+    assert(fmt);
+
+    int width = (size + num_components - 1) / num_components; // round up
+    int stride = width * num_components;
+    assert(size <= stride);
+
+    static const int lut_size = 256;
+    float *weights = talloc_array(NULL, float, lut_size * stride);
+    mp_compute_lut(scaler->kernel, lut_size, stride, weights);
+
+    bool use_1d = scaler->kernel->polar && (p->ra->caps & RA_CAP_TEX_1D);
+
+    struct ra_tex_params lut_params = {
+        .dimensions = use_1d ? 1 : 2,
+        .w = use_1d ? lut_size : width,
+        .h = use_1d ? 1 : lut_size,
+        .d = 1,
+        .format = fmt,
+        .render_src = true,
+        .src_linear = true,
+        .initial_data = weights,
+    };
+    scaler->lut = ra_tex_create(p->ra, &lut_params);
+
+    talloc_free(weights);
+
+    debug_check_gl(p, "after initializing scaler");
+}
+
+// Special helper for sampling from two separated stages
+static void pass_sample_separated(struct gl_video *p, struct image src,
+                                  struct scaler *scaler, int w, int h)
+{
+    // Separate the transformation into x and y components, per pass
+    struct gl_transform t_x = {
+        .m = {{src.transform.m[0][0], 0.0}, {src.transform.m[1][0], 1.0}},
+        .t = {src.transform.t[0], 0.0},
+    };
+    struct gl_transform t_y = {
+        .m = {{1.0, src.transform.m[0][1]}, {0.0, src.transform.m[1][1]}},
+        .t = {0.0, src.transform.t[1]},
+    };
+
+    // First pass (scale only in the y dir)
+    src.transform = t_y;
+    sampler_prelude(p->sc, pass_bind(p, src));
+    GLSLF("// first pass\n");
+    pass_sample_separated_gen(p->sc, scaler, 0, 1);
+    GLSLF("color *= %f;\n", src.multiplier);
+    finish_pass_tex(p, &scaler->sep_fbo, src.w, h);
+
+    // Second pass (scale only in the x dir)
+    src = image_wrap(scaler->sep_fbo, src.type, src.components);
+    src.transform = t_x;
+    pass_describe(p, "%s second pass", scaler->conf.kernel.name);
+    sampler_prelude(p->sc, pass_bind(p, src));
+    pass_sample_separated_gen(p->sc, scaler, 1, 0);
+}
+
+// Picks either the compute shader version or the regular sampler version
+// depending on hardware support
+static void pass_dispatch_sample_polar(struct gl_video *p, struct scaler *scaler,
+                                       struct image img, int w, int h)
+{
+    uint64_t reqs = RA_CAP_COMPUTE;
+    if ((p->ra->caps & reqs) != reqs)
+        goto fallback;
+
+    int bound = ceil(scaler->kernel->radius_cutoff);
+    int offset = bound - 1; // padding top/left
+    int padding = offset + bound; // total padding
+
+    float ratiox = (float)w / img.w,
+          ratioy = (float)h / img.h;
+
+    // For performance we want to load at least as many pixels
+    // horizontally as there are threads in a warp (32 for nvidia), as
+    // well as enough to take advantage of shmem parallelism
+    const int warp_size = 32, threads = 256;
+    int bw = warp_size;
+    int bh = threads / bw;
+
+    // We need to sample everything from base_min to base_max, so make sure
+    // we have enough room in shmem
+    int iw = (int)ceil(bw / ratiox) + padding + 1,
+        ih = (int)ceil(bh / ratioy) + padding + 1;
+
+    int shmem_req = iw * ih * img.components * sizeof(float);
+    if (shmem_req > p->ra->max_shmem)
+        goto fallback;
+
+    pass_is_compute(p, bw, bh, false);
+    pass_compute_polar(p->sc, scaler, img.components, bw, bh, iw, ih);
+    return;
+
+fallback:
+    // Fall back to regular polar shader when compute shaders are unsupported
+    // or the kernel is too big for shmem
+    pass_sample_polar(p->sc, scaler, img.components,
+                      p->ra->caps & RA_CAP_GATHER);
+}
+
+// Sample from image, with the src rectangle given by it.
+// The dst rectangle is implicit by what the caller will do next, but w and h
+// must still be what is going to be used (to dimension FBOs correctly).
+// This will write the scaled contents to the vec4 "color".
+// The scaler unit is initialized by this function; in order to avoid cache
+// thrashing, the scaler unit should usually use the same parameters.
+static void pass_sample(struct gl_video *p, struct image img,
+                        struct scaler *scaler, const struct scaler_config *conf,
+                        double scale_factor, int w, int h)
+{
+    reinit_scaler(p, scaler, conf, scale_factor, filter_sizes);
+
+    // Describe scaler
+    const char *scaler_opt[] = {
+        [SCALER_SCALE] = "scale",
+        [SCALER_DSCALE] = "dscale",
+        [SCALER_CSCALE] = "cscale",
+        [SCALER_TSCALE] = "tscale",
+    };
+
+    pass_describe(p, "%s=%s (%s)", scaler_opt[scaler->index],
+                  scaler->conf.kernel.name, plane_names[img.type]);
+
+    bool is_separated = scaler->kernel && !scaler->kernel->polar;
+
+    // Set up the transformation+prelude and bind the texture, for everything
+    // other than separated scaling (which does this in the subfunction)
+    if (!is_separated)
+        sampler_prelude(p->sc, pass_bind(p, img));
+
+    // Dispatch the scaler. They're all wildly different.
+    const char *name = scaler->conf.kernel.name;
+    if (strcmp(name, "bilinear") == 0) {
+        GLSL(color = texture(tex, pos);)
+    } else if (strcmp(name, "bicubic_fast") == 0) {
+        pass_sample_bicubic_fast(p->sc);
+    } else if (strcmp(name, "oversample") == 0) {
+        pass_sample_oversample(p->sc, scaler, w, h);
+    } else if (scaler->kernel && scaler->kernel->polar) {
+        pass_dispatch_sample_polar(p, scaler, img, w, h);
+    } else if (scaler->kernel) {
+        pass_sample_separated(p, img, scaler, w, h);
+    } else {
+        MP_ASSERT_UNREACHABLE(); // should never happen
+    }
+
+    // Apply any required multipliers. Separated scaling already does this in
+    // its first stage
+    if (!is_separated)
+        GLSLF("color *= %f;\n", img.multiplier);
+
+    // Micro-optimization: Avoid scaling unneeded channels
+    skip_unused(p, img.components);
+}
+
+// Returns true if two images are semantically equivalent (same metadata)
+static bool image_equiv(struct image a, struct image b)
+{
+    return a.type == b.type &&
+           a.components == b.components &&
+           a.multiplier == b.multiplier &&
+           a.tex->params.format == b.tex->params.format &&
+           a.tex->params.w == b.tex->params.w &&
+           a.tex->params.h == b.tex->params.h &&
+           a.w == b.w &&
+           a.h == b.h &&
+           gl_transform_eq(a.transform, b.transform);
+}
+
+static void deband_hook(struct gl_video *p, struct image img,
+                        struct gl_transform *trans, void *priv)
+{
+    pass_describe(p, "debanding (%s)", plane_names[img.type]);
+    pass_sample_deband(p->sc, p->opts.deband_opts, &p->lfg,
+                       p->image_params.color.gamma);
+}
+
+static void unsharp_hook(struct gl_video *p, struct image img,
+                         struct gl_transform *trans, void *priv)
+{
+    pass_describe(p, "unsharp masking");
+    pass_sample_unsharp(p->sc, p->opts.unsharp);
+}
+
+struct szexp_ctx {
+    struct gl_video *p;
+    struct image img;
+};
+
+static bool szexp_lookup(void *priv, struct bstr var, float size[2])
+{
+    struct szexp_ctx *ctx = priv;
+    struct gl_video *p = ctx->p;
+
+    if (bstr_equals0(var, "NATIVE_CROPPED")) {
+        size[0] = (p->src_rect.x1 - p->src_rect.x0) * p->texture_offset.m[0][0];
+        size[1] = (p->src_rect.y1 - p->src_rect.y0) * p->texture_offset.m[1][1];
+        return true;
+    }
+
+    // The size of OUTPUT is determined. It could be useful for certain
+    // user shaders to skip passes.
+    if (bstr_equals0(var, "OUTPUT")) {
+        size[0] = p->dst_rect.x1 - p->dst_rect.x0;
+        size[1] = p->dst_rect.y1 - p->dst_rect.y0;
+        return true;
+    }
+
+    // HOOKED is a special case
+    if (bstr_equals0(var, "HOOKED")) {
+        size[0] = ctx->img.w;
+        size[1] = ctx->img.h;
+        return true;
+    }
+
+    for (int o = 0; o < p->num_saved_imgs; o++) {
+        if (bstr_equals0(var, p->saved_imgs[o].name)) {
+            size[0] = p->saved_imgs[o].img.w;
+            size[1] = p->saved_imgs[o].img.h;
+            return true;
+        }
+    }
+
+    return false;
+}
+
+static bool user_hook_cond(struct gl_video *p, struct image img, void *priv)
+{
+    struct gl_user_shader_hook *shader = priv;
+    assert(shader);
+
+    float res = false;
+    struct szexp_ctx ctx = {p, img};
+    eval_szexpr(p->log, &ctx, szexp_lookup, shader->cond, &res);
+    return res;
+}
+
+static void user_hook(struct gl_video *p, struct image img,
+                      struct gl_transform *trans, void *priv)
+{
+    struct gl_user_shader_hook *shader = priv;
+    assert(shader);
+    load_shader(p, shader->pass_body);
+
+    pass_describe(p, "user shader: %.*s (%s)", BSTR_P(shader->pass_desc),
+                  plane_names[img.type]);
+
+    if (shader->compute.active) {
+        p->pass_compute = shader->compute;
+        GLSLF("hook();\n");
+    } else {
+        GLSLF("color = hook();\n");
+    }
+
+    // Make sure we at least create a legal FBO on failure, since it's better
+    // to do this and display an error message than just crash OpenGL
+    float w = 1.0, h = 1.0;
+
+    eval_szexpr(p->log, &(struct szexp_ctx){p, img}, szexp_lookup, shader->width, &w);
+    eval_szexpr(p->log, &(struct szexp_ctx){p, img}, szexp_lookup, shader->height, &h);
+
+    *trans = (struct gl_transform){{{w / img.w, 0}, {0, h / img.h}}};
+    gl_transform_trans(shader->offset, trans);
+}
+
+static bool add_user_hook(void *priv, struct gl_user_shader_hook hook)
+{
+    struct gl_video *p = priv;
+    struct gl_user_shader_hook *copy = talloc_ptrtype(p, copy);
+    *copy = hook;
+
+    struct tex_hook texhook = {
+        .save_tex = bstrdup0(copy, hook.save_tex),
+        .components = hook.components,
+        .align_offset = hook.align_offset,
+        .hook = user_hook,
+        .cond = user_hook_cond,
+        .priv = copy,
+    };
+
+    for (int h = 0; h < SHADER_MAX_HOOKS; h++)
+        texhook.hook_tex[h] = bstrdup0(copy, hook.hook_tex[h]);
+    for (int h = 0; h < SHADER_MAX_BINDS; h++)
+        texhook.bind_tex[h] = bstrdup0(copy, hook.bind_tex[h]);
+
+    MP_TARRAY_APPEND(p, p->tex_hooks, p->num_tex_hooks, texhook);
+    return true;
+}
+
+static bool add_user_tex(void *priv, struct gl_user_shader_tex tex)
+{
+    struct gl_video *p = priv;
+
+    tex.tex = ra_tex_create(p->ra, &tex.params);
+    TA_FREEP(&tex.params.initial_data);
+
+    if (!tex.tex)
+        return false;
+
+    MP_TARRAY_APPEND(p, p->user_textures, p->num_user_textures, tex);
+    return true;
+}
+
+static void load_user_shaders(struct gl_video *p, char **shaders)
+{
+    if (!shaders)
+        return;
+
+    for (int n = 0; shaders[n] != NULL; n++) {
+        struct bstr file = load_cached_file(p, shaders[n]);
+        parse_user_shader(p->log, p->ra, file, p, add_user_hook, add_user_tex);
+    }
+}
+
+static void gl_video_setup_hooks(struct gl_video *p)
+{
+    gl_video_reset_hooks(p);
+
+    if (p->opts.deband) {
+        MP_TARRAY_APPEND(p, p->tex_hooks, p->num_tex_hooks, (struct tex_hook) {
+            .hook_tex = {"LUMA", "CHROMA", "RGB", "XYZ"},
+            .bind_tex = {"HOOKED"},
+            .hook = deband_hook,
+        });
+    }
+
+    if (p->opts.unsharp != 0.0) {
+        MP_TARRAY_APPEND(p, p->tex_hooks, p->num_tex_hooks, (struct tex_hook) {
+            .hook_tex = {"MAIN"},
+            .bind_tex = {"HOOKED"},
+            .hook = unsharp_hook,
+        });
+    }
+
+    load_user_shaders(p, p->opts.user_shaders);
+}
+
+// sample from video textures, set "color" variable to yuv value
+static void pass_read_video(struct gl_video *p)
+{
+    struct image img[4];
+    struct gl_transform offsets[4];
+    pass_get_images(p, &p->image, img, offsets);
+
+    // To keep the code as simple as possibly, we currently run all shader
+    // stages even if they would be unnecessary (e.g. no hooks for a texture).
+    // In the future, deferred image should optimize this away.
+
+    // Merge semantically identical textures. This loop is done from back
+    // to front so that merged textures end up in the right order while
+    // simultaneously allowing us to skip unnecessary merges
+    for (int n = 3; n >= 0; n--) {
+        if (img[n].type == PLANE_NONE)
+            continue;
+
+        int first = n;
+        int num = 0;
+
+        for (int i = 0; i < n; i++) {
+            if (image_equiv(img[n], img[i]) &&
+                gl_transform_eq(offsets[n], offsets[i]))
+            {
+                GLSLF("// merging plane %d ...\n", i);
+                copy_image(p, &num, img[i]);
+                first = MPMIN(first, i);
+                img[i] = (struct image){0};
+            }
+        }
+
+        if (num > 0) {
+            GLSLF("// merging plane %d ... into %d\n", n, first);
+            copy_image(p, &num, img[n]);
+            pass_describe(p, "merging planes");
+            finish_pass_tex(p, &p->merge_tex[n], img[n].w, img[n].h);
+            img[first] = image_wrap(p->merge_tex[n], img[n].type, num);
+            img[n] = (struct image){0};
+        }
+    }
+
+    // If any textures are still in integer format by this point, we need
+    // to introduce an explicit conversion pass to avoid breaking hooks/scaling
+    for (int n = 0; n < 4; n++) {
+        if (img[n].tex && img[n].tex->params.format->ctype == RA_CTYPE_UINT) {
+            GLSLF("// use_integer fix for plane %d\n", n);
+            copy_image(p, &(int){0}, img[n]);
+            pass_describe(p, "use_integer fix");
+            finish_pass_tex(p, &p->integer_tex[n], img[n].w, img[n].h);
+            img[n] = image_wrap(p->integer_tex[n], img[n].type,
+                                img[n].components);
+        }
+    }
+
+    // The basic idea is we assume the rgb/luma texture is the "reference" and
+    // scale everything else to match, after all planes are finalized.
+    // We find the reference texture first, in order to maintain texture offset
+    // between hooks on different type of planes.
+    int reference_tex_num = 0;
+    for (int n = 0; n < 4; n++) {
+        switch (img[n].type) {
+        case PLANE_RGB:
+        case PLANE_XYZ:
+        case PLANE_LUMA: break;
+        default: continue;
+        }
+
+        reference_tex_num = n;
+        break;
+    }
+
+    // Dispatch the hooks for all of these textures, saving and perhaps
+    // modifying them in the process
+    for (int n = 0; n < 4; n++) {
+        const char *name;
+        switch (img[n].type) {
+        case PLANE_RGB:    name = "RGB";    break;
+        case PLANE_LUMA:   name = "LUMA";   break;
+        case PLANE_CHROMA: name = "CHROMA"; break;
+        case PLANE_ALPHA:  name = "ALPHA";  break;
+        case PLANE_XYZ:    name = "XYZ";    break;
+        default: continue;
+        }
+
+        img[n] = pass_hook(p, name, img[n], &offsets[n]);
+
+        if (reference_tex_num == n) {
+            // The reference texture is finalized now.
+            p->texture_w = img[n].w;
+            p->texture_h = img[n].h;
+            p->texture_offset = offsets[n];
+        }
+    }
+
+    // At this point all planes are finalized but they may not be at the
+    // required size yet. Furthermore, they may have texture offsets that
+    // require realignment.
+
+    // Compute the reference rect
+    struct mp_rect_f src = {0.0, 0.0, p->image_params.w, p->image_params.h};
+    struct mp_rect_f ref = src;
+    gl_transform_rect(p->texture_offset, &ref);
+
+    // Explicitly scale all of the textures that don't match
+    for (int n = 0; n < 4; n++) {
+        if (img[n].type == PLANE_NONE)
+            continue;
+
+        // If the planes are aligned identically, we will end up with the
+        // exact same source rectangle.
+        struct mp_rect_f rect = src;
+        gl_transform_rect(offsets[n], &rect);
+        if (mp_rect_f_seq(ref, rect))
+            continue;
+
+        // If the rectangles differ, then our planes have a different
+        // alignment and/or size. First of all, we have to compute the
+        // corrections required to meet the target rectangle
+        struct gl_transform fix = {
+            .m = {{(ref.x1 - ref.x0) / (rect.x1 - rect.x0), 0.0},
+                  {0.0, (ref.y1 - ref.y0) / (rect.y1 - rect.y0)}},
+            .t = {ref.x0, ref.y0},
+        };
+
+        // Since the scale in texture space is different from the scale in
+        // absolute terms, we have to scale the coefficients down to be
+        // relative to the texture's physical dimensions and local offset
+        struct gl_transform scale = {
+            .m = {{(float)img[n].w / p->texture_w, 0.0},
+                  {0.0, (float)img[n].h / p->texture_h}},
+            .t = {-rect.x0, -rect.y0},
+        };
+        if (p->image_params.rotate % 180 == 90)
+            MPSWAP(double, scale.m[0][0], scale.m[1][1]);
+
+        gl_transform_trans(scale, &fix);
+
+        // Since the texture transform is a function of the texture coordinates
+        // to texture space, rather than the other way around, we have to
+        // actually apply the *inverse* of this. Fortunately, calculating
+        // the inverse is relatively easy here.
+        fix.m[0][0] = 1.0 / fix.m[0][0];
+        fix.m[1][1] = 1.0 / fix.m[1][1];
+        fix.t[0] = fix.m[0][0] * -fix.t[0];
+        fix.t[1] = fix.m[1][1] * -fix.t[1];
+        gl_transform_trans(fix, &img[n].transform);
+
+        int scaler_id = -1;
+        const char *name = NULL;
+        switch (img[n].type) {
+        case PLANE_RGB:
+        case PLANE_LUMA:
+        case PLANE_XYZ:
+            scaler_id = SCALER_SCALE;
+            // these aren't worth hooking, fringe hypothetical cases only
+            break;
+        case PLANE_CHROMA:
+            scaler_id = SCALER_CSCALE;
+            name = "CHROMA_SCALED";
+            break;
+        case PLANE_ALPHA:
+            // alpha always uses bilinear
+            name = "ALPHA_SCALED";
+        }
+
+        if (scaler_id < 0)
+            continue;
+
+        const struct scaler_config *conf = &p->opts.scaler[scaler_id];
+
+        if (scaler_id == SCALER_CSCALE && (!conf->kernel.name ||
+            !conf->kernel.name[0]))
+        {
+            conf = &p->opts.scaler[SCALER_SCALE];
+        }
+
+        struct scaler *scaler = &p->scaler[scaler_id];
+
+        // bilinear scaling is a free no-op thanks to GPU sampling
+        if (strcmp(conf->kernel.name, "bilinear") != 0) {
+            GLSLF("// upscaling plane %d\n", n);
+            pass_sample(p, img[n], scaler, conf, 1.0, p->texture_w, p->texture_h);
+            finish_pass_tex(p, &p->scale_tex[n], p->texture_w, p->texture_h);
+            img[n] = image_wrap(p->scale_tex[n], img[n].type, img[n].components);
+        }
+
+        // Run any post-scaling hooks
+        img[n] = pass_hook(p, name, img[n], NULL);
+    }
+
+    // All planes are of the same size and properly aligned at this point
+    pass_describe(p, "combining planes");
+    int coord = 0;
+    for (int i = 0; i < 4; i++) {
+        if (img[i].type != PLANE_NONE)
+            copy_image(p, &coord, img[i]);
+    }
+    p->components = coord;
+}
+
+// Utility function that simply binds a texture and reads from it, without any
+// transformations.
+static void pass_read_tex(struct gl_video *p, struct ra_tex *tex)
+{
+    struct image img = image_wrap(tex, PLANE_RGB, p->components);
+    copy_image(p, &(int){0}, img);
+}
+
+// yuv conversion, and any other conversions before main up/down-scaling
+static void pass_convert_yuv(struct gl_video *p)
+{
+    struct gl_shader_cache *sc = p->sc;
+
+    struct mp_csp_params cparams = MP_CSP_PARAMS_DEFAULTS;
+    cparams.gray = p->is_gray;
+    cparams.is_float = p->ra_format.component_type == RA_CTYPE_FLOAT;
+    mp_csp_set_image_params(&cparams, &p->image_params);
+    mp_csp_equalizer_state_get(p->video_eq, &cparams);
+    p->user_gamma = 1.0 / (cparams.gamma * p->opts.gamma);
+
+    pass_describe(p, "color conversion");
+
+    if (p->color_swizzle[0])
+        GLSLF("color = color.%s;\n", p->color_swizzle);
+
+    // Pre-colormatrix input gamma correction
+    if (cparams.color.space == MP_CSP_XYZ)
+        pass_linearize(p->sc, p->image_params.color.gamma);
+
+    // We always explicitly normalize the range in pass_read_video
+    cparams.input_bits = cparams.texture_bits = 0;
+
+    // Conversion to RGB. For RGB itself, this still applies e.g. brightness
+    // and contrast controls, or expansion of e.g. LSB-packed 10 bit data.
+    struct mp_cmat m = {{{0}}};
+    mp_get_csp_matrix(&cparams, &m);
+    gl_sc_uniform_mat3(sc, "colormatrix", true, &m.m[0][0]);
+    gl_sc_uniform_vec3(sc, "colormatrix_c", m.c);
+
+    GLSL(color.rgb = mat3(colormatrix) * color.rgb + colormatrix_c;)
+
+    if (cparams.color.space == MP_CSP_XYZ) {
+        pass_delinearize(p->sc, p->image_params.color.gamma);
+        // mp_get_csp_matrix implicitly converts XYZ to DCI-P3
+        p->image_params.color.space = MP_CSP_RGB;
+        p->image_params.color.primaries = MP_CSP_PRIM_DCI_P3;
+    }
+
+    if (p->image_params.color.space == MP_CSP_BT_2020_C) {
+        // Conversion for C'rcY'cC'bc via the BT.2020 CL system:
+        // C'bc = (B'-Y'c) / 1.9404  | C'bc <= 0
+        //      = (B'-Y'c) / 1.5816  | C'bc >  0
+        //
+        // C'rc = (R'-Y'c) / 1.7184  | C'rc <= 0
+        //      = (R'-Y'c) / 0.9936  | C'rc >  0
+        //
+        // as per the BT.2020 specification, table 4. This is a non-linear
+        // transformation because (constant) luminance receives non-equal
+        // contributions from the three different channels.
+        GLSLF("// constant luminance conversion \n"
+              "color.br = color.br * mix(vec2(1.5816, 0.9936),              \n"
+              "                         vec2(1.9404, 1.7184),               \n"
+              "                         %s(lessThanEqual(color.br, vec2(0))))\n"
+              "          + color.gg;                                        \n",
+              gl_sc_bvec(p->sc, 2));
+        // Expand channels to camera-linear light. This shader currently just
+        // assumes everything uses the BT.2020 12-bit gamma function, since the
+        // difference between 10 and 12-bit is negligible for anything other
+        // than 12-bit content.
+        GLSLF("color.rgb = mix(color.rgb * vec3(1.0/4.5),                       \n"
+              "                pow((color.rgb + vec3(0.0993))*vec3(1.0/1.0993), \n"
+              "                    vec3(1.0/0.45)),                             \n"
+              "                %s(lessThanEqual(vec3(0.08145), color.rgb)));    \n",
+              gl_sc_bvec(p->sc, 3));
+        // Calculate the green channel from the expanded RYcB
+        // The BT.2020 specification says Yc = 0.2627*R + 0.6780*G + 0.0593*B
+        GLSL(color.g = (color.g - 0.2627*color.r - 0.0593*color.b)*1.0/0.6780;)
+        // Recompress to receive the R'G'B' result, same as other systems
+        GLSLF("color.rgb = mix(color.rgb * vec3(4.5),                       \n"
+              "                vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993), \n"
+              "                %s(lessThanEqual(vec3(0.0181), color.rgb))); \n",
+              gl_sc_bvec(p->sc, 3));
+    }
+
+    p->components = 3;
+    if (!p->has_alpha || p->opts.alpha_mode == ALPHA_NO) {
+        GLSL(color.a = 1.0;)
+    } else if (p->image_params.alpha == MP_ALPHA_PREMUL) {
+        p->components = 4;
+    } else {
+        p->components = 4;
+        GLSL(color = vec4(color.rgb * color.a, color.a);) // straight -> premul
+    }
+}
+
+static void get_scale_factors(struct gl_video *p, bool transpose_rot, double xy[2])
+{
+    double target_w = p->src_rect.x1 - p->src_rect.x0;
+    double target_h = p->src_rect.y1 - p->src_rect.y0;
+    if (transpose_rot && p->image_params.rotate % 180 == 90)
+        MPSWAP(double, target_w, target_h);
+    xy[0] = (p->dst_rect.x1 - p->dst_rect.x0) / target_w;
+    xy[1] = (p->dst_rect.y1 - p->dst_rect.y0) / target_h;
+}
+
+// Cropping.
+static void compute_src_transform(struct gl_video *p, struct gl_transform *tr)
+{
+    float sx = (p->src_rect.x1 - p->src_rect.x0) / (float)p->texture_w,
+          sy = (p->src_rect.y1 - p->src_rect.y0) / (float)p->texture_h,
+          ox = p->src_rect.x0,
+          oy = p->src_rect.y0;
+    struct gl_transform transform = {{{sx, 0}, {0, sy}}, {ox, oy}};
+
+    gl_transform_trans(p->texture_offset, &transform);
+
+    *tr = transform;
+}
+
+// Takes care of the main scaling and pre/post-conversions
+static void pass_scale_main(struct gl_video *p)
+{
+    // Figure out the main scaler.
+    double xy[2];
+    get_scale_factors(p, true, xy);
+
+    // actual scale factor should be divided by the scale factor of prescaling.
+    xy[0] /= p->texture_offset.m[0][0];
+    xy[1] /= p->texture_offset.m[1][1];
+
+    // The calculation of scale factor involves 32-bit float(from gl_transform),
+    // use non-strict equality test to tolerate precision loss.
+    bool downscaling = xy[0] < 1.0 - FLT_EPSILON || xy[1] < 1.0 - FLT_EPSILON;
+    bool upscaling = !downscaling && (xy[0] > 1.0 + FLT_EPSILON ||
+                                      xy[1] > 1.0 + FLT_EPSILON);
+    double scale_factor = 1.0;
+
+    struct scaler *scaler = &p->scaler[SCALER_SCALE];
+    struct scaler_config scaler_conf = p->opts.scaler[SCALER_SCALE];
+    if (p->opts.scaler_resizes_only && !downscaling && !upscaling) {
+        scaler_conf.kernel.name = "bilinear";
+        // For scaler-resizes-only, we round the texture offset to
+        // the nearest round value in order to prevent ugly blurriness
+        // (in exchange for slightly shifting the image by up to half a
+        // subpixel)
+        p->texture_offset.t[0] = roundf(p->texture_offset.t[0]);
+        p->texture_offset.t[1] = roundf(p->texture_offset.t[1]);
+    }
+    if (downscaling && p->opts.scaler[SCALER_DSCALE].kernel.name) {
+        scaler_conf = p->opts.scaler[SCALER_DSCALE];
+        scaler = &p->scaler[SCALER_DSCALE];
+    }
+
+    // When requesting correct-downscaling and the clip is anamorphic, and
+    // because only a single scale factor is used for both axes, enable it only
+    // when both axes are downscaled, and use the milder of the factors to not
+    // end up with too much blur on one axis (even if we end up with sub-optimal
+    // scale factor on the other axis). This is better than not respecting
+    // correct scaling at all for anamorphic clips.
+    double f = MPMAX(xy[0], xy[1]);
+    if (p->opts.correct_downscaling && f < 1.0)
+        scale_factor = 1.0 / f;
+
+    // Pre-conversion, like linear light/sigmoidization
+    GLSLF("// scaler pre-conversion\n");
+    bool use_linear = false;
+    if (downscaling) {
+        use_linear = p->opts.linear_downscaling;
+
+        // Linear light downscaling results in nasty artifacts for HDR curves
+        // due to the potentially extreme brightness differences severely
+        // compounding any ringing. So just scale in gamma light instead.
+        if (mp_trc_is_hdr(p->image_params.color.gamma))
+            use_linear = false;
+    } else if (upscaling) {
+        use_linear = p->opts.linear_upscaling || p->opts.sigmoid_upscaling;
+    }
+
+    if (use_linear) {
+        p->use_linear = true;
+        pass_linearize(p->sc, p->image_params.color.gamma);
+        pass_opt_hook_point(p, "LINEAR", NULL);
+    }
+
+    bool use_sigmoid = use_linear && p->opts.sigmoid_upscaling && upscaling;
+    float sig_center, sig_slope, sig_offset, sig_scale;
+    if (use_sigmoid) {
+        // Coefficients for the sigmoidal transform are taken from the
+        // formula here: http://www.imagemagick.org/Usage/color_mods/#sigmoidal
+        sig_center = p->opts.sigmoid_center;
+        sig_slope  = p->opts.sigmoid_slope;
+        // This function needs to go through (0,0) and (1,1) so we compute the
+        // values at 1 and 0, and then scale/shift them, respectively.
+        sig_offset = 1.0/(1+expf(sig_slope * sig_center));
+        sig_scale  = 1.0/(1+expf(sig_slope * (sig_center-1))) - sig_offset;
+        GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);)
+        GLSLF("color.rgb = %f - log(1.0/(color.rgb * %f + %f) - 1.0) * 1.0/%f;\n",
+                sig_center, sig_scale, sig_offset, sig_slope);
+        pass_opt_hook_point(p, "SIGMOID", NULL);
+    }
+
+    pass_opt_hook_point(p, "PREKERNEL", NULL);
+
+    int vp_w = p->dst_rect.x1 - p->dst_rect.x0;
+    int vp_h = p->dst_rect.y1 - p->dst_rect.y0;
+    struct gl_transform transform;
+    compute_src_transform(p, &transform);
+
+    GLSLF("// main scaling\n");
+    finish_pass_tex(p, &p->indirect_tex, p->texture_w, p->texture_h);
+    struct image src = image_wrap(p->indirect_tex, PLANE_RGB, p->components);
+    gl_transform_trans(transform, &src.transform);
+    pass_sample(p, src, scaler, &scaler_conf, scale_factor, vp_w, vp_h);
+
+    // Changes the texture size to display size after main scaler.
+    p->texture_w = vp_w;
+    p->texture_h = vp_h;
+
+    pass_opt_hook_point(p, "POSTKERNEL", NULL);
+
+    GLSLF("// scaler post-conversion\n");
+    if (use_sigmoid) {
+        // Inverse of the transformation above
+        GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);)
+        GLSLF("color.rgb = (1.0/(1.0 + exp(%f * (%f - color.rgb))) - %f) * 1.0/%f;\n",
+                sig_slope, sig_center, sig_offset, sig_scale);
+    }
+}
+
+// Adapts the colors to the right output color space. (Final pass during
+// rendering)
+// If OSD is true, ignore any changes that may have been made to the video
+// by previous passes (i.e. linear scaling)
+static void pass_colormanage(struct gl_video *p, struct mp_colorspace src,
+                             struct mp_colorspace fbo_csp, int flags, bool osd)
+{
+    struct ra *ra = p->ra;
+
+    // Configure the destination according to the FBO color space,
+    // unless specific transfer function, primaries or target peak
+    // is set. If values are set to _AUTO, the most likely intended
+    // values are guesstimated later in this function.
+    struct mp_colorspace dst = {
+        .gamma = p->opts.target_trc == MP_CSP_TRC_AUTO ?
+                 fbo_csp.gamma : p->opts.target_trc,
+        .primaries = p->opts.target_prim == MP_CSP_PRIM_AUTO ?
+                     fbo_csp.primaries : p->opts.target_prim,
+        .light = MP_CSP_LIGHT_DISPLAY,
+        .hdr.max_luma = !p->opts.target_peak ?
+                        fbo_csp.hdr.max_luma : p->opts.target_peak,
+    };
+
+    if (!p->colorspace_override_warned &&
+        ((fbo_csp.gamma && dst.gamma != fbo_csp.gamma) ||
+         (fbo_csp.primaries && dst.primaries != fbo_csp.primaries)))
+    {
+        MP_WARN(p, "One or more colorspace value is being overridden "
+                   "by user while the FBO provides colorspace information: "
+                   "transfer function: (dst: %s, fbo: %s), "
+                   "primaries: (dst: %s, fbo: %s). "
+                   "Rendering can lead to incorrect results!\n",
+                m_opt_choice_str(mp_csp_trc_names,  dst.gamma),
+                m_opt_choice_str(mp_csp_trc_names,  fbo_csp.gamma),
+                m_opt_choice_str(mp_csp_prim_names, dst.primaries),
+                m_opt_choice_str(mp_csp_prim_names, fbo_csp.primaries));
+        p->colorspace_override_warned = true;
+    }
+
+    if (dst.gamma == MP_CSP_TRC_HLG)
+        dst.light = MP_CSP_LIGHT_SCENE_HLG;
+
+    if (p->use_lut_3d && (flags & RENDER_SCREEN_COLOR)) {
+        // The 3DLUT is always generated against the video's original source
+        // space, *not* the reference space. (To avoid having to regenerate
+        // the 3DLUT for the OSD on every frame)
+        enum mp_csp_prim prim_orig = p->image_params.color.primaries;
+        enum mp_csp_trc trc_orig = p->image_params.color.gamma;
+
+        // One exception: HDR is not implemented by LittleCMS for technical
+        // limitation reasons, so we use a gamma 2.2 input curve here instead.
+        // We could pick any value we want here, the difference is just coding
+        // efficiency.
+        if (mp_trc_is_hdr(trc_orig))
+            trc_orig = MP_CSP_TRC_GAMMA22;
+
+        if (gl_video_get_lut3d(p, prim_orig, trc_orig)) {
+            dst.primaries = prim_orig;
+            dst.gamma = trc_orig;
+            assert(dst.primaries && dst.gamma);
+        }
+    }
+
+    if (dst.primaries == MP_CSP_PRIM_AUTO) {
+        // The vast majority of people are on sRGB or BT.709 displays, so pick
+        // this as the default output color space.
+        dst.primaries = MP_CSP_PRIM_BT_709;
+
+        if (src.primaries == MP_CSP_PRIM_BT_601_525 ||
+            src.primaries == MP_CSP_PRIM_BT_601_625)
+        {
+            // Since we auto-pick BT.601 and BT.709 based on the dimensions,
+            // combined with the fact that they're very similar to begin with,
+            // and to avoid confusing the average user, just don't adapt BT.601
+            // content automatically at all.
+            dst.primaries = src.primaries;
+        }
+    }
+
+    if (dst.gamma == MP_CSP_TRC_AUTO) {
+        // Most people seem to complain when the image is darker or brighter
+        // than what they're "used to", so just avoid changing the gamma
+        // altogether by default. The only exceptions to this rule apply to
+        // very unusual TRCs, which even hardcode technoluddites would probably
+        // not enjoy viewing unaltered.
+        dst.gamma = src.gamma;
+
+        // Avoid outputting linear light or HDR content "by default". For these
+        // just pick gamma 2.2 as a default, since it's a good estimate for
+        // the response of typical displays
+        if (dst.gamma == MP_CSP_TRC_LINEAR || mp_trc_is_hdr(dst.gamma))
+            dst.gamma = MP_CSP_TRC_GAMMA22;
+    }
+
+    // If there's no specific signal peak known for the output display, infer
+    // it from the chosen transfer function. Also normalize the src peak, in
+    // case it was unknown
+    if (!dst.hdr.max_luma)
+        dst.hdr.max_luma = mp_trc_nom_peak(dst.gamma) * MP_REF_WHITE;
+    if (!src.hdr.max_luma)
+        src.hdr.max_luma = mp_trc_nom_peak(src.gamma) * MP_REF_WHITE;
+
+    // Whitelist supported modes
+    switch (p->opts.tone_map.curve) {
+    case TONE_MAPPING_AUTO:
+    case TONE_MAPPING_CLIP:
+    case TONE_MAPPING_MOBIUS:
+    case TONE_MAPPING_REINHARD:
+    case TONE_MAPPING_HABLE:
+    case TONE_MAPPING_GAMMA:
+    case TONE_MAPPING_LINEAR:
+    case TONE_MAPPING_BT_2390:
+        break;
+    default:
+        MP_WARN(p, "Tone mapping curve unsupported by vo_gpu, falling back.\n");
+        p->opts.tone_map.curve = TONE_MAPPING_AUTO;
+        break;
+    }
+
+    switch (p->opts.tone_map.gamut_mode) {
+    case GAMUT_AUTO:
+    case GAMUT_WARN:
+    case GAMUT_CLIP:
+    case GAMUT_DESATURATE:
+        break;
+    default:
+        MP_WARN(p, "Gamut mapping mode unsupported by vo_gpu, falling back.\n");
+        p->opts.tone_map.gamut_mode = GAMUT_AUTO;
+        break;
+    }
+
+    struct gl_tone_map_opts tone_map = p->opts.tone_map;
+    bool detect_peak = tone_map.compute_peak >= 0 && mp_trc_is_hdr(src.gamma)
+                       && src.hdr.max_luma > dst.hdr.max_luma;
+
+    if (detect_peak && !p->hdr_peak_ssbo) {
+        struct {
+            float average[2];
+            int32_t frame_sum;
+            uint32_t frame_max;
+            uint32_t counter;
+        } peak_ssbo = {0};
+
+        struct ra_buf_params params = {
+            .type = RA_BUF_TYPE_SHADER_STORAGE,
+            .size = sizeof(peak_ssbo),
+            .initial_data = &peak_ssbo,
+        };
+
+        p->hdr_peak_ssbo = ra_buf_create(ra, &params);
+        if (!p->hdr_peak_ssbo) {
+            MP_WARN(p, "Failed to create HDR peak detection SSBO, disabling.\n");
+            tone_map.compute_peak = p->opts.tone_map.compute_peak = -1;
+            detect_peak = false;
+        }
+    }
+
+    if (detect_peak) {
+        pass_describe(p, "detect HDR peak");
+        pass_is_compute(p, 8, 8, true); // 8x8 is good for performance
+        gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo,
+            "vec2 average;"
+            "int frame_sum;"
+            "uint frame_max;"
+            "uint counter;"
+        );
+    } else {
+        tone_map.compute_peak = -1;
+    }
+
+    // Adapt from src to dst as necessary
+    pass_color_map(p->sc, p->use_linear && !osd, src, dst, &tone_map);
+
+    if (p->use_lut_3d && (flags & RENDER_SCREEN_COLOR)) {
+        gl_sc_uniform_texture(p->sc, "lut_3d", p->lut_3d_texture);
+        GLSL(vec3 cpos;)
+        for (int i = 0; i < 3; i++)
+            GLSLF("cpos[%d] = LUT_POS(color[%d], %d.0);\n", i, i, p->lut_3d_size[i]);
+        GLSL(color.rgb = tex3D(lut_3d, cpos).rgb;)
+    }
+}
+
+void gl_video_set_fb_depth(struct gl_video *p, int fb_depth)
+{
+    p->fb_depth = fb_depth;
+}
+
+static void pass_dither(struct gl_video *p)
+{
+    // Assume 8 bits per component if unknown.
+    int dst_depth = p->fb_depth > 0 ? p->fb_depth : 8;
+    if (p->opts.dither_depth > 0)
+        dst_depth = p->opts.dither_depth;
+
+    if (p->opts.dither_depth < 0 || p->opts.dither_algo == DITHER_NONE)
+        return;
+
+    if (p->opts.dither_algo == DITHER_ERROR_DIFFUSION) {
+        const struct error_diffusion_kernel *kernel =
+            mp_find_error_diffusion_kernel(p->opts.error_diffusion);
+        int o_w = p->dst_rect.x1 - p->dst_rect.x0,
+            o_h = p->dst_rect.y1 - p->dst_rect.y0;
+
+        int shmem_req = mp_ef_compute_shared_memory_size(kernel, o_h);
+        if (shmem_req > p->ra->max_shmem) {
+            MP_WARN(p, "Fallback to dither=fruit because there is no enough "
+                       "shared memory (%d/%d).\n",
+                       shmem_req, (int)p->ra->max_shmem);
+            p->opts.dither_algo = DITHER_FRUIT;
+        } else {
+            finish_pass_tex(p, &p->error_diffusion_tex[0], o_w, o_h);
+
+            struct image img = image_wrap(p->error_diffusion_tex[0], PLANE_RGB, p->components);
+
+            // Ensure the block size doesn't exceed the maximum of the
+            // implementation.
+            int block_size = MPMIN(p->ra->max_compute_group_threads, o_h);
+
+            pass_describe(p, "dither=error-diffusion (kernel=%s, depth=%d)",
+                             kernel->name, dst_depth);
+
+            p->pass_compute = (struct compute_info) {
+                .active = true,
+                .threads_w = block_size,
+                .threads_h = 1,
+                .directly_writes = true
+            };
+
+            int tex_id = pass_bind(p, img);
+
+            pass_error_diffusion(p->sc, kernel, tex_id, o_w, o_h,
+                                 dst_depth, block_size);
+
+            finish_pass_tex(p, &p->error_diffusion_tex[1], o_w, o_h);
+
+            img = image_wrap(p->error_diffusion_tex[1], PLANE_RGB, p->components);
+            copy_image(p, &(int){0}, img);
+
+            return;
+        }
+    }
+
+    if (!p->dither_texture) {
+        MP_VERBOSE(p, "Dither to %d.\n", dst_depth);
+
+        int tex_size = 0;
+        void *tex_data = NULL;
+        const struct ra_format *fmt = NULL;
+        void *temp = NULL;
+
+        if (p->opts.dither_algo == DITHER_FRUIT) {
+            int sizeb = p->opts.dither_size;
+            int size = 1 << sizeb;
+
+            if (p->last_dither_matrix_size != size) {
+                p->last_dither_matrix = talloc_realloc(p, p->last_dither_matrix,
+                                                       float, size * size);
+                mp_make_fruit_dither_matrix(p->last_dither_matrix, sizeb);
+                p->last_dither_matrix_size = size;
+            }
+
+            // Prefer R16 texture since they provide higher precision.
+            fmt = ra_find_unorm_format(p->ra, 2, 1);
+            if (!fmt)
+                fmt = ra_find_float16_format(p->ra, 1);
+            if (fmt) {
+                tex_size = size;
+                tex_data = p->last_dither_matrix;
+                if (fmt->ctype == RA_CTYPE_UNORM) {
+                    uint16_t *t = temp = talloc_array(NULL, uint16_t, size * size);
+                    for (int n = 0; n < size * size; n++)
+                        t[n] = p->last_dither_matrix[n] * UINT16_MAX;
+                    tex_data = t;
+                }
+            } else {
+                MP_VERBOSE(p, "GL too old. Falling back to ordered dither.\n");
+                p->opts.dither_algo = DITHER_ORDERED;
+            }
+        }
+
+        if (p->opts.dither_algo == DITHER_ORDERED) {
+            temp = talloc_array(NULL, char, 8 * 8);
+            mp_make_ordered_dither_matrix(temp, 8);
+
+            fmt = ra_find_unorm_format(p->ra, 1, 1);
+            tex_size = 8;
+            tex_data = temp;
+        }
+
+        struct ra_tex_params params = {
+            .dimensions = 2,
+            .w = tex_size,
+            .h = tex_size,
+            .d = 1,
+            .format = fmt,
+            .render_src = true,
+            .src_repeat = true,
+            .initial_data = tex_data,
+        };
+        p->dither_texture = ra_tex_create(p->ra, &params);
+
+        debug_check_gl(p, "dither setup");
+
+        talloc_free(temp);
+
+        if (!p->dither_texture)
+            return;
+    }
+
+    GLSLF("// dithering\n");
+
+    // This defines how many bits are considered significant for output on
+    // screen. The superfluous bits will be used for rounding according to the
+    // dither matrix. The precision of the source implicitly decides how many
+    // dither patterns can be visible.
+    int dither_quantization = (1 << dst_depth) - 1;
+    int dither_size = p->dither_texture->params.w;
+
+    gl_sc_uniform_texture(p->sc, "dither", p->dither_texture);
+
+    GLSLF("vec2 dither_pos = gl_FragCoord.xy * 1.0/%d.0;\n", dither_size);
+
+    if (p->opts.temporal_dither) {
+        int phase = (p->frames_rendered / p->opts.temporal_dither_period) % 8u;
+        float r = phase * (M_PI / 2); // rotate
+        float m = phase < 4 ? 1 : -1; // mirror
+
+        float matrix[2][2] = {{cos(r),     -sin(r)    },
+                              {sin(r) * m,  cos(r) * m}};
+        gl_sc_uniform_dynamic(p->sc);
+        gl_sc_uniform_mat2(p->sc, "dither_trafo", true, &matrix[0][0]);
+
+        GLSL(dither_pos = dither_trafo * dither_pos;)
+    }
+
+    GLSL(float dither_value = texture(dither, dither_pos).r;)
+    GLSLF("color = floor(color * %d.0 + dither_value + 0.5 / %d.0) * 1.0/%d.0;\n",
+          dither_quantization, dither_size * dither_size, dither_quantization);
+}
+
+// Draws the OSD, in scene-referred colors.. If cms is true, subtitles are
+// instead adapted to the display's gamut.
+static void pass_draw_osd(struct gl_video *p, int osd_flags, int frame_flags,
+                          double pts, struct mp_osd_res rect, struct ra_fbo fbo,
+                          bool cms)
+{
+    if (frame_flags & RENDER_FRAME_VF_SUBS)
+        osd_flags |= OSD_DRAW_SUB_FILTER;
+
+    if ((osd_flags & OSD_DRAW_SUB_ONLY) && (osd_flags & OSD_DRAW_OSD_ONLY))
+        return;
+
+    mpgl_osd_generate(p->osd, rect, pts, p->image_params.stereo3d, osd_flags);
+
+    timer_pool_start(p->osd_timer);
+    for (int n = 0; n < MAX_OSD_PARTS; n++) {
+        // (This returns false if this part is empty with nothing to draw.)
+        if (!mpgl_osd_draw_prepare(p->osd, n, p->sc))
+            continue;
+        // When subtitles need to be color managed, assume they're in sRGB
+        // (for lack of anything saner to do)
+        if (cms) {
+            static const struct mp_colorspace csp_srgb = {
+                .primaries = MP_CSP_PRIM_BT_709,
+                .gamma = MP_CSP_TRC_SRGB,
+                .light = MP_CSP_LIGHT_DISPLAY,
+            };
+
+            pass_colormanage(p, csp_srgb, fbo.color_space, frame_flags, true);
+        }
+        mpgl_osd_draw_finish(p->osd, n, p->sc, fbo);
+    }
+
+    timer_pool_stop(p->osd_timer);
+    pass_describe(p, "drawing osd");
+    pass_record(p, timer_pool_measure(p->osd_timer));
+}
+
+static float chroma_realign(int size, int pixel)
+{
+    return size / (float)chroma_upsize(size, pixel);
+}
+
+// Minimal rendering code path, for GLES or OpenGL 2.1 without proper FBOs.
+static void pass_render_frame_dumb(struct gl_video *p)
+{
+    struct image img[4];
+    struct gl_transform off[4];
+    pass_get_images(p, &p->image, img, off);
+
+    struct gl_transform transform;
+    compute_src_transform(p, &transform);
+
+    int index = 0;
+    for (int i = 0; i < p->plane_count; i++) {
+        int cw = img[i].type == PLANE_CHROMA ? p->ra_format.chroma_w : 1;
+        int ch = img[i].type == PLANE_CHROMA ? p->ra_format.chroma_h : 1;
+        if (p->image_params.rotate % 180 == 90)
+            MPSWAP(int, cw, ch);
+
+        struct gl_transform t = transform;
+        t.m[0][0] *= chroma_realign(p->texture_w, cw);
+        t.m[1][1] *= chroma_realign(p->texture_h, ch);
+
+        t.t[0] /= cw;
+        t.t[1] /= ch;
+
+        t.t[0] += off[i].t[0];
+        t.t[1] += off[i].t[1];
+
+        gl_transform_trans(img[i].transform, &t);
+        img[i].transform = t;
+
+        copy_image(p, &index, img[i]);
+    }
+
+    pass_convert_yuv(p);
+}
+
+// The main rendering function, takes care of everything up to and including
+// upscaling. p->image is rendered.
+// flags: bit set of RENDER_FRAME_* flags
+static bool pass_render_frame(struct gl_video *p, struct mp_image *mpi,
+                              uint64_t id, int flags)
+{
+    // initialize the texture parameters and temporary variables
+    p->texture_w = p->image_params.w;
+    p->texture_h = p->image_params.h;
+    p->texture_offset = identity_trans;
+    p->components = 0;
+    p->num_saved_imgs = 0;
+    p->idx_hook_textures = 0;
+    p->use_linear = false;
+
+    // try uploading the frame
+    if (!pass_upload_image(p, mpi, id))
+        return false;
+
+    if (p->image_params.rotate % 180 == 90)
+        MPSWAP(int, p->texture_w, p->texture_h);
+
+    if (p->dumb_mode)
+        return true;
+
+    pass_read_video(p);
+    pass_opt_hook_point(p, "NATIVE", &p->texture_offset);
+    pass_convert_yuv(p);
+    pass_opt_hook_point(p, "MAINPRESUB", &p->texture_offset);
+
+    // For subtitles
+    double vpts = p->image.mpi->pts;
+    if (vpts == MP_NOPTS_VALUE)
+        vpts = p->osd_pts;
+
+    if (p->osd && p->opts.blend_subs == BLEND_SUBS_VIDEO &&
+        (flags & RENDER_FRAME_SUBS))
+    {
+        double scale[2];
+        get_scale_factors(p, false, scale);
+        struct mp_osd_res rect = {
+            .w = p->texture_w, .h = p->texture_h,
+            .display_par = scale[1] / scale[0], // counter compensate scaling
+        };
+        finish_pass_tex(p, &p->blend_subs_tex, rect.w, rect.h);
+        struct ra_fbo fbo = { p->blend_subs_tex };
+        pass_draw_osd(p, OSD_DRAW_SUB_ONLY, flags, vpts, rect, fbo, false);
+        pass_read_tex(p, p->blend_subs_tex);
+        pass_describe(p, "blend subs video");
+    }
+    pass_opt_hook_point(p, "MAIN", &p->texture_offset);
+
+    pass_scale_main(p);
+
+    int vp_w = p->dst_rect.x1 - p->dst_rect.x0,
+        vp_h = p->dst_rect.y1 - p->dst_rect.y0;
+    if (p->osd && p->opts.blend_subs == BLEND_SUBS_YES &&
+        (flags & RENDER_FRAME_SUBS))
+    {
+        // Recreate the real video size from the src/dst rects
+        struct mp_osd_res rect = {
+            .w = vp_w, .h = vp_h,
+            .ml = -p->src_rect.x0, .mr = p->src_rect.x1 - p->image_params.w,
+            .mt = -p->src_rect.y0, .mb = p->src_rect.y1 - p->image_params.h,
+            .display_par = 1.0,
+        };
+        // Adjust margins for scale
+        double scale[2];
+        get_scale_factors(p, true, scale);
+        rect.ml *= scale[0]; rect.mr *= scale[0];
+        rect.mt *= scale[1]; rect.mb *= scale[1];
+        // We should always blend subtitles in non-linear light
+        if (p->use_linear) {
+            pass_delinearize(p->sc, p->image_params.color.gamma);
+            p->use_linear = false;
+        }
+        finish_pass_tex(p, &p->blend_subs_tex, p->texture_w, p->texture_h);
+        struct ra_fbo fbo = { p->blend_subs_tex };
+        pass_draw_osd(p, OSD_DRAW_SUB_ONLY, flags, vpts, rect, fbo, false);
+        pass_read_tex(p, p->blend_subs_tex);
+        pass_describe(p, "blend subs");
+    }
+
+    pass_opt_hook_point(p, "SCALED", NULL);
+
+    return true;
+}
+
+static void pass_draw_to_screen(struct gl_video *p, struct ra_fbo fbo, int flags)
+{
+    if (p->dumb_mode)
+        pass_render_frame_dumb(p);
+
+    // Adjust the overall gamma before drawing to screen
+    if (p->user_gamma != 1) {
+        gl_sc_uniform_f(p->sc, "user_gamma", p->user_gamma);
+        GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);)
+        GLSL(color.rgb = pow(color.rgb, vec3(user_gamma));)
+    }
+
+    pass_colormanage(p, p->image_params.color, fbo.color_space, flags, false);
+
+    // Since finish_pass_fbo doesn't work with compute shaders, and neither
+    // does the checkerboard/dither code, we may need an indirection via
+    // p->screen_tex here.
+    if (p->pass_compute.active) {
+        int o_w = p->dst_rect.x1 - p->dst_rect.x0,
+            o_h = p->dst_rect.y1 - p->dst_rect.y0;
+        finish_pass_tex(p, &p->screen_tex, o_w, o_h);
+        struct image tmp = image_wrap(p->screen_tex, PLANE_RGB, p->components);
+        copy_image(p, &(int){0}, tmp);
+    }
+
+    if (p->has_alpha){
+        if (p->opts.alpha_mode == ALPHA_BLEND_TILES) {
+            // Draw checkerboard pattern to indicate transparency
+            GLSLF("// transparency checkerboard\n");
+            GLSL(bvec2 tile = lessThan(fract(gl_FragCoord.xy * 1.0/32.0), vec2(0.5));)
+            GLSL(vec3 background = vec3(tile.x == tile.y ? 0.93 : 0.87);)
+            GLSL(color.rgb += background.rgb * (1.0 - color.a);)
+            GLSL(color.a = 1.0;)
+        } else if (p->opts.alpha_mode == ALPHA_BLEND) {
+            // Blend into background color (usually black)
+            struct m_color c = p->opts.background;
+            GLSLF("vec4 background = vec4(%f, %f, %f, %f);\n",
+                  c.r / 255.0, c.g / 255.0, c.b / 255.0, c.a / 255.0);
+            GLSL(color.rgb += background.rgb * (1.0 - color.a);)
+            GLSL(color.a = background.a;)
+        }
+    }
+
+    pass_opt_hook_point(p, "OUTPUT", NULL);
+
+    if (flags & RENDER_SCREEN_COLOR)
+        pass_dither(p);
+    pass_describe(p, "output to screen");
+    finish_pass_fbo(p, fbo, false, &p->dst_rect);
+}
+
+// flags: bit set of RENDER_FRAME_* flags
+static bool update_surface(struct gl_video *p, struct mp_image *mpi,
+                           uint64_t id, struct surface *surf, int flags)
+{
+    int vp_w = p->dst_rect.x1 - p->dst_rect.x0,
+        vp_h = p->dst_rect.y1 - p->dst_rect.y0;
+
+    pass_info_reset(p, false);
+    if (!pass_render_frame(p, mpi, id, flags))
+        return false;
+
+    // Frame blending should always be done in linear light to preserve the
+    // overall brightness, otherwise this will result in flashing dark frames
+    // because mixing in compressed light artificially darkens the results
+    if (!p->use_linear) {
+        p->use_linear = true;
+        pass_linearize(p->sc, p->image_params.color.gamma);
+    }
+
+    finish_pass_tex(p, &surf->tex, vp_w, vp_h);
+    surf->id  = id;
+    surf->pts = mpi->pts;
+    return true;
+}
+
+// Draws an interpolate frame to fbo, based on the frame timing in t
+// flags: bit set of RENDER_FRAME_* flags
+static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t,
+                                       struct ra_fbo fbo, int flags)
+{
+    bool is_new = false;
+
+    // Reset the queue completely if this is a still image, to avoid any
+    // interpolation artifacts from surrounding frames when unpausing or
+    // framestepping
+    if (t->still)
+        gl_video_reset_surfaces(p);
+
+    // First of all, figure out if we have a frame available at all, and draw
+    // it manually + reset the queue if not
+    if (p->surfaces[p->surface_now].id == 0) {
+        struct surface *now = &p->surfaces[p->surface_now];
+        if (!update_surface(p, t->current, t->frame_id, now, flags))
+            return;
+        p->surface_idx = p->surface_now;
+        is_new = true;
+    }
+
+    // Find the right frame for this instant
+    if (t->current) {
+        int next = surface_wrap(p->surface_now + 1);
+        while (p->surfaces[next].id &&
+               p->surfaces[next].id > p->surfaces[p->surface_now].id &&
+               p->surfaces[p->surface_now].id < t->frame_id)
+        {
+            p->surface_now = next;
+            next = surface_wrap(next + 1);
+        }
+    }
+
+    // Figure out the queue size. For illustration, a filter radius of 2 would
+    // look like this: _ A [B] C D _
+    // A is surface_bse, B is surface_now, C is surface_now+1 and D is
+    // surface_end.
+    struct scaler *tscale = &p->scaler[SCALER_TSCALE];
+    reinit_scaler(p, tscale, &p->opts.scaler[SCALER_TSCALE], 1, tscale_sizes);
+    bool oversample = strcmp(tscale->conf.kernel.name, "oversample") == 0;
+    bool linear = strcmp(tscale->conf.kernel.name, "linear") == 0;
+    int size;
+
+    if (oversample || linear) {
+        size = 2;
+    } else {
+        assert(tscale->kernel && !tscale->kernel->polar);
+        size = ceil(tscale->kernel->size);
+    }
+
+    int radius = size/2;
+    int surface_now = p->surface_now;
+    int surface_bse = surface_wrap(surface_now - (radius-1));
+    int surface_end = surface_wrap(surface_now + radius);
+    assert(surface_wrap(surface_bse + size-1) == surface_end);
+
+    // Render new frames while there's room in the queue. Note that technically,
+    // this should be done before the step where we find the right frame, but
+    // it only barely matters at the very beginning of playback, and this way
+    // makes the code much more linear.
+    int surface_dst = surface_wrap(p->surface_idx + 1);
+    for (int i = 0; i < t->num_frames; i++) {
+        // Avoid overwriting data we might still need
+        if (surface_dst == surface_bse - 1)
+            break;
+
+        struct mp_image *f = t->frames[i];
+        uint64_t f_id = t->frame_id + i;
+        if (!mp_image_params_equal(&f->params, &p->real_image_params))
+            continue;
+
+        if (f_id > p->surfaces[p->surface_idx].id) {
+            struct surface *dst = &p->surfaces[surface_dst];
+            if (!update_surface(p, f, f_id, dst, flags))
+                return;
+            p->surface_idx = surface_dst;
+            surface_dst = surface_wrap(surface_dst + 1);
+            is_new = true;
+        }
+    }
+
+    // Figure out whether the queue is "valid". A queue is invalid if the
+    // frames' PTS is not monotonically increasing. Anything else is invalid,
+    // so avoid blending incorrect data and just draw the latest frame as-is.
+    // Possible causes for failure of this condition include seeks, pausing,
+    // end of playback or start of playback.
+    bool valid = true;
+    for (int i = surface_bse, ii; valid && i != surface_end; i = ii) {
+        ii = surface_wrap(i + 1);
+        if (p->surfaces[i].id == 0 || p->surfaces[ii].id == 0) {
+            valid = false;
+        } else if (p->surfaces[ii].id < p->surfaces[i].id) {
+            valid = false;
+            MP_DBG(p, "interpolation queue underrun\n");
+        }
+    }
+
+    // Update OSD PTS to synchronize subtitles with the displayed frame
+    p->osd_pts = p->surfaces[surface_now].pts;
+
+    // Finally, draw the right mix of frames to the screen.
+    if (!is_new)
+        pass_info_reset(p, true);
+    pass_describe(p, "interpolation");
+    if (!valid || t->still) {
+        // surface_now is guaranteed to be valid, so we can safely use it.
+        pass_read_tex(p, p->surfaces[surface_now].tex);
+        p->is_interpolated = false;
+    } else {
+        double mix = t->vsync_offset / t->ideal_frame_duration;
+        // The scaler code always wants the fcoord to be between 0 and 1,
+        // so we try to adjust by using the previous set of N frames instead
+        // (which requires some extra checking to make sure it's valid)
+        if (mix < 0.0) {
+            int prev = surface_wrap(surface_bse - 1);
+            if (p->surfaces[prev].id != 0 &&
+                p->surfaces[prev].id < p->surfaces[surface_bse].id)
+            {
+                mix += 1.0;
+                surface_bse = prev;
+            } else {
+                mix = 0.0; // at least don't blow up, this should only
+                           // ever happen at the start of playback
+            }
+        }
+
+        if (oversample) {
+            // Oversample uses the frame area as mix ratio, not the vsync
+            // position itself
+            double vsync_dist = t->vsync_interval / t->ideal_frame_duration,
+                   threshold = tscale->conf.kernel.params[0];
+            threshold = isnan(threshold) ? 0.0 : threshold;
+            mix = (1 - mix) / vsync_dist;
+            mix = mix <= 0 + threshold ? 0 : mix;
+            mix = mix >= 1 - threshold ? 1 : mix;
+            mix = 1 - mix;
+        }
+
+        // Blend the frames together
+        if (oversample || linear) {
+            gl_sc_uniform_dynamic(p->sc);
+            gl_sc_uniform_f(p->sc, "inter_coeff", mix);
+            GLSL(color = mix(texture(texture0, texcoord0),
+                             texture(texture1, texcoord1),
+                             inter_coeff);)
+        } else {
+            gl_sc_uniform_dynamic(p->sc);
+            gl_sc_uniform_f(p->sc, "fcoord", mix);
+            pass_sample_separated_gen(p->sc, tscale, 0, 0);
+        }
+
+        // Load all the required frames
+        for (int i = 0; i < size; i++) {
+            struct image img =
+                image_wrap(p->surfaces[surface_wrap(surface_bse+i)].tex,
+                           PLANE_RGB, p->components);
+            // Since the code in pass_sample_separated currently assumes
+            // the textures are bound in-order and starting at 0, we just
+            // assert to make sure this is the case (which it should always be)
+            int id = pass_bind(p, img);
+            assert(id == i);
+        }
+
+        MP_TRACE(p, "inter frame dur: %f vsync: %f, mix: %f\n",
+                 t->ideal_frame_duration, t->vsync_interval, mix);
+        p->is_interpolated = true;
+    }
+    pass_draw_to_screen(p, fbo, flags);
+
+    p->frames_drawn += 1;
+}
+
+void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame,
+                           struct ra_fbo fbo, int flags)
+{
+    gl_video_update_options(p);
+
+    struct mp_rect target_rc = {0, 0, fbo.tex->params.w, fbo.tex->params.h};
+
+    p->broken_frame = false;
+
+    bool has_frame = !!frame->current;
+
+    struct m_color c = p->clear_color;
+    float clear_color[4] = {c.r / 255.0, c.g / 255.0, c.b / 255.0, c.a / 255.0};
+    p->ra->fns->clear(p->ra, fbo.tex, clear_color, &target_rc);
+
+    if (p->hwdec_overlay) {
+        if (has_frame) {
+            float *color = p->hwdec_overlay->overlay_colorkey;
+            p->ra->fns->clear(p->ra, fbo.tex, color, &p->dst_rect);
+        }
+
+        p->hwdec_overlay->driver->overlay_frame(p->hwdec_overlay, frame->current,
+                                                &p->src_rect, &p->dst_rect,
+                                                frame->frame_id != p->image.id);
+
+        if (frame->current)
+            p->osd_pts = frame->current->pts;
+
+        // Disable GL rendering
+        has_frame = false;
+    }
+
+    if (has_frame) {
+        bool interpolate = p->opts.interpolation && frame->display_synced &&
+                           (p->frames_drawn || !frame->still);
+        if (interpolate) {
+            double ratio = frame->ideal_frame_duration / frame->vsync_interval;
+            if (fabs(ratio - 1.0) < p->opts.interpolation_threshold)
+                interpolate = false;
+        }
+
+        if (interpolate) {
+            gl_video_interpolate_frame(p, frame, fbo, flags);
+        } else {
+            bool is_new = frame->frame_id != p->image.id;
+
+            // Redrawing a frame might update subtitles.
+            if (frame->still && p->opts.blend_subs)
+                is_new = true;
+
+            if (is_new || !p->output_tex_valid) {
+                p->output_tex_valid = false;
+
+                pass_info_reset(p, !is_new);
+                if (!pass_render_frame(p, frame->current, frame->frame_id, flags))
+                    goto done;
+
+                // For the non-interpolation case, we draw to a single "cache"
+                // texture to speed up subsequent re-draws (if any exist)
+                struct ra_fbo dest_fbo = fbo;
+                bool repeats = frame->num_vsyncs > 1 && frame->display_synced;
+                if ((repeats || frame->still) && !p->dumb_mode &&
+                    (p->ra->caps & RA_CAP_BLIT) && fbo.tex->params.blit_dst)
+                {
+                    // Attempt to use the same format as the destination FBO
+                    // if possible. Some RAs use a wrapped dummy format here,
+                    // so fall back to the fbo_format in that case.
+                    const struct ra_format *fmt = fbo.tex->params.format;
+                    if (fmt->dummy_format)
+                        fmt = p->fbo_format;
+
+                    bool r = ra_tex_resize(p->ra, p->log, &p->output_tex,
+                                           fbo.tex->params.w, fbo.tex->params.h,
+                                           fmt);
+                    if (r) {
+                        dest_fbo = (struct ra_fbo) { p->output_tex };
+                        p->output_tex_valid = true;
+                    }
+                }
+                pass_draw_to_screen(p, dest_fbo, flags);
+            }
+
+            // "output tex valid" and "output tex needed" are equivalent
+            if (p->output_tex_valid && fbo.tex->params.blit_dst) {
+                pass_info_reset(p, true);
+                pass_describe(p, "redraw cached frame");
+                struct mp_rect src = p->dst_rect;
+                struct mp_rect dst = src;
+                if (fbo.flip) {
+                    dst.y0 = fbo.tex->params.h - src.y0;
+                    dst.y1 = fbo.tex->params.h - src.y1;
+                }
+                timer_pool_start(p->blit_timer);
+                p->ra->fns->blit(p->ra, fbo.tex, p->output_tex, &dst, &src);
+                timer_pool_stop(p->blit_timer);
+                pass_record(p, timer_pool_measure(p->blit_timer));
+            }
+        }
+    }
+
+done:
+
+    debug_check_gl(p, "after video rendering");
+
+    if (p->osd && (flags & (RENDER_FRAME_SUBS | RENDER_FRAME_OSD))) {
+        // If we haven't actually drawn anything so far, then we technically
+        // need to consider this the start of a new pass. Let's call it a
+        // redraw just because, since it's basically a blank frame anyway
+        if (!has_frame)
+            pass_info_reset(p, true);
+
+        int osd_flags = p->opts.blend_subs ? OSD_DRAW_OSD_ONLY : 0;
+        if (!(flags & RENDER_FRAME_SUBS))
+            osd_flags |= OSD_DRAW_OSD_ONLY;
+        if (!(flags & RENDER_FRAME_OSD))
+            osd_flags |= OSD_DRAW_SUB_ONLY;
+
+        pass_draw_osd(p, osd_flags, flags, p->osd_pts, p->osd_rect, fbo, true);
+        debug_check_gl(p, "after OSD rendering");
+    }
+
+    p->broken_frame |= gl_sc_error_state(p->sc);
+    if (p->broken_frame) {
+        // Make the screen solid blue to make it visually clear that an
+        // error has occurred
+        float color[4] = {0.0, 0.05, 0.5, 1.0};
+        p->ra->fns->clear(p->ra, fbo.tex, color, &target_rc);
+    }
+
+    p->frames_rendered++;
+    pass_report_performance(p);
+}
+
+void gl_video_screenshot(struct gl_video *p, struct vo_frame *frame,
+                         struct voctrl_screenshot *args)
+{
+    if (!p->ra->fns->tex_download)
+        return;
+
+    bool ok = false;
+    struct mp_image *res = NULL;
+    struct ra_tex *target = NULL;
+    struct mp_rect old_src = p->src_rect;
+    struct mp_rect old_dst = p->dst_rect;
+    struct mp_osd_res old_osd = p->osd_rect;
+    struct vo_frame *nframe = vo_frame_ref(frame);
+
+    // Disable interpolation and such.
+    nframe->redraw = true;
+    nframe->repeat = false;
+    nframe->still = true;
+    nframe->pts = 0;
+    nframe->duration = -1;
+
+    if (!args->scaled) {
+        int w, h;
+        mp_image_params_get_dsize(&p->image_params, &w, &h);
+        if (w < 1 || h < 1)
+            return;
+
+        int src_w = p->image_params.w;
+        int src_h = p->image_params.h;
+        struct mp_rect src = {0, 0, src_w, src_h};
+        struct mp_rect dst = {0, 0, w, h};
+
+        if (mp_image_crop_valid(&p->image_params))
+            src = p->image_params.crop;
+
+        if (p->image_params.rotate % 180 == 90) {
+            MPSWAP(int, w, h);
+            MPSWAP(int, src_w, src_h);
+        }
+        mp_rect_rotate(&src, src_w, src_h, p->image_params.rotate);
+        mp_rect_rotate(&dst, w, h, p->image_params.rotate);
+
+        struct mp_osd_res osd = {
+            .display_par = 1.0,
+            .w = mp_rect_w(dst),
+            .h = mp_rect_h(dst),
+        };
+        gl_video_resize(p, &src, &dst, &osd);
+    }
+
+    gl_video_reset_surfaces(p);
+
+    struct ra_tex_params params = {
+        .dimensions = 2,
+        .downloadable = true,
+        .w = p->osd_rect.w,
+        .h = p->osd_rect.h,
+        .d = 1,
+        .render_dst = true,
+    };
+
+    params.format = ra_find_unorm_format(p->ra, 1, 4);
+    int mpfmt = IMGFMT_RGB0;
+    if (args->high_bit_depth && p->ra_format.component_bits > 8) {
+        const struct ra_format *fmt = ra_find_unorm_format(p->ra, 2, 4);
+        if (fmt && fmt->renderable) {
+            params.format = fmt;
+            mpfmt = IMGFMT_RGBA64;
+        }
+    }
+
+    if (!params.format || !params.format->renderable)
+        goto done;
+    target = ra_tex_create(p->ra, &params);
+    if (!target)
+        goto done;
+
+    int flags = 0;
+    if (args->subs)
+        flags |= RENDER_FRAME_SUBS;
+    if (args->osd)
+        flags |= RENDER_FRAME_OSD;
+    if (args->scaled)
+        flags |= RENDER_SCREEN_COLOR;
+    gl_video_render_frame(p, nframe, (struct ra_fbo){target}, flags);
+
+    res = mp_image_alloc(mpfmt, params.w, params.h);
+    if (!res)
+        goto done;
+
+    struct ra_tex_download_params download_params = {
+        .tex = target,
+        .dst = res->planes[0],
+        .stride = res->stride[0],
+    };
+    if (!p->ra->fns->tex_download(p->ra, &download_params))
+        goto done;
+
+    if (p->broken_frame)
+        goto done;
+
+    ok = true;
+done:
+    talloc_free(nframe);
+    ra_tex_free(p->ra, &target);
+    gl_video_resize(p, &old_src, &old_dst, &old_osd);
+    gl_video_reset_surfaces(p);
+    if (!ok)
+        TA_FREEP(&res);
+    args->res = res;
+}
+
+// Use this color instead of the global option.
+void gl_video_set_clear_color(struct gl_video *p, struct m_color c)
+{
+    p->force_clear_color = true;
+    p->clear_color = c;
+}
+
+void gl_video_set_osd_pts(struct gl_video *p, double pts)
+{
+    p->osd_pts = pts;
+}
+
+bool gl_video_check_osd_change(struct gl_video *p, struct mp_osd_res *res,
+                               double pts)
+{
+    return p->osd ? mpgl_osd_check_change(p->osd, res, pts) : false;
+}
+
+void gl_video_resize(struct gl_video *p,
+                     struct mp_rect *src, struct mp_rect *dst,
+                     struct mp_osd_res *osd)
+{
+    if (mp_rect_equals(&p->src_rect, src) &&
+        mp_rect_equals(&p->dst_rect, dst) &&
+        osd_res_equals(p->osd_rect, *osd))
+        return;
+
+    p->src_rect = *src;
+    p->dst_rect = *dst;
+    p->osd_rect = *osd;
+
+    gl_video_reset_surfaces(p);
+
+    if (p->osd)
+        mpgl_osd_resize(p->osd, p->osd_rect, p->image_params.stereo3d);
+}
+
+static void frame_perf_data(struct pass_info pass[], struct mp_frame_perf *out)
+{
+    for (int i = 0; i < VO_PASS_PERF_MAX; i++) {
+        if (!pass[i].desc.len)
+            break;
+        out->perf[out->count] = pass[i].perf;
+        strncpy(out->desc[out->count], pass[i].desc.start,
+                sizeof(out->desc[out->count]) - 1);
+        out->desc[out->count][sizeof(out->desc[out->count]) - 1] = '\0';
+        out->count++;
+    }
+}
+
+void gl_video_perfdata(struct gl_video *p, struct voctrl_performance_data *out)
+{
+    *out = (struct voctrl_performance_data){0};
+    frame_perf_data(p->pass_fresh,  &out->fresh);
+    frame_perf_data(p->pass_redraw, &out->redraw);
+}
+
+// Returns false on failure.
+static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t id)
+{
+    struct video_image *vimg = &p->image;
+
+    if (vimg->id == id)
+        return true;
+
+    unref_current_image(p);
+
+    mpi = mp_image_new_ref(mpi);
+    if (!mpi)
+        goto error;
+
+    vimg->mpi = mpi;
+    vimg->id = id;
+    p->osd_pts = mpi->pts;
+    p->frames_uploaded++;
+
+    if (p->hwdec_active) {
+        // Hardware decoding
+
+        if (!p->hwdec_mapper)
+            goto error;
+
+        pass_describe(p, "map frame (hwdec)");
+        timer_pool_start(p->upload_timer);
+        bool ok = ra_hwdec_mapper_map(p->hwdec_mapper, vimg->mpi) >= 0;
+        timer_pool_stop(p->upload_timer);
+        pass_record(p, timer_pool_measure(p->upload_timer));
+
+        vimg->hwdec_mapped = true;
+        if (ok) {
+            struct mp_image layout = {0};
+            mp_image_set_params(&layout, &p->image_params);
+            struct ra_tex **tex = p->hwdec_mapper->tex;
+            for (int n = 0; n < p->plane_count; n++) {
+                vimg->planes[n] = (struct texplane){
+                    .w = mp_image_plane_w(&layout, n),
+                    .h = mp_image_plane_h(&layout, n),
+                    .tex = tex[n],
+                };
+            }
+        } else {
+            MP_FATAL(p, "Mapping hardware decoded surface failed.\n");
+            goto error;
+        }
+        return true;
+    }
+
+    // Software decoding
+    assert(mpi->num_planes == p->plane_count);
+
+    timer_pool_start(p->upload_timer);
+    for (int n = 0; n < p->plane_count; n++) {
+        struct texplane *plane = &vimg->planes[n];
+        if (!plane->tex) {
+            timer_pool_stop(p->upload_timer);
+            goto error;
+        }
+
+        struct ra_tex_upload_params params = {
+            .tex = plane->tex,
+            .src = mpi->planes[n],
+            .invalidate = true,
+            .stride = mpi->stride[n],
+        };
+
+        plane->flipped = params.stride < 0;
+        if (plane->flipped) {
+            int h = mp_image_plane_h(mpi, n);
+            params.src = (char *)params.src + (h - 1) * params.stride;
+            params.stride = -params.stride;
+        }
+
+        struct dr_buffer *mapped = gl_find_dr_buffer(p, mpi->planes[n]);
+        if (mapped) {
+            params.buf = mapped->buf;
+            params.buf_offset = (uintptr_t)params.src -
+                                (uintptr_t)mapped->buf->data;
+            params.src = NULL;
+        }
+
+        if (p->using_dr_path != !!mapped) {
+            p->using_dr_path = !!mapped;
+            MP_VERBOSE(p, "DR enabled: %s\n", p->using_dr_path ? "yes" : "no");
+        }
+
+        if (!p->ra->fns->tex_upload(p->ra, &params)) {
+            timer_pool_stop(p->upload_timer);
+            goto error;
+        }
+
+        if (mapped && !mapped->mpi)
+            mapped->mpi = mp_image_new_ref(mpi);
+    }
+    timer_pool_stop(p->upload_timer);
+
+    bool using_pbo = p->ra->use_pbo || !(p->ra->caps & RA_CAP_DIRECT_UPLOAD);
+    const char *mode = p->using_dr_path ? "DR" : using_pbo ? "PBO" : "naive";
+    pass_describe(p, "upload frame (%s)", mode);
+    pass_record(p, timer_pool_measure(p->upload_timer));
+
+    return true;
+
+error:
+    unref_current_image(p);
+    p->broken_frame = true;
+    return false;
+}
+
+static bool test_fbo(struct gl_video *p, const struct ra_format *fmt)
+{
+    MP_VERBOSE(p, "Testing FBO format %s\n", fmt->name);
+    struct ra_tex *tex = NULL;
+    bool success = ra_tex_resize(p->ra, p->log, &tex, 16, 16, fmt);
+    ra_tex_free(p->ra, &tex);
+    return success;
+}
+
+// Return whether dumb-mode can be used without disabling any features.
+// Essentially, vo_gpu with mostly default settings will return true.
+static bool check_dumb_mode(struct gl_video *p)
+{
+    struct gl_video_opts *o = &p->opts;
+    if (p->use_integer_conversion)
+        return false;
+    if (o->dumb_mode > 0) // requested by user
+        return true;
+    if (o->dumb_mode < 0) // disabled by user
+        return false;
+
+    // otherwise, use auto-detection
+    if (o->correct_downscaling || o->linear_downscaling ||
+        o->linear_upscaling || o->sigmoid_upscaling || o->interpolation ||
+        o->blend_subs || o->deband || o->unsharp)
+        return false;
+    // check remaining scalers (tscale is already implicitly excluded above)
+    for (int i = 0; i < SCALER_COUNT; i++) {
+        if (i != SCALER_TSCALE) {
+            const char *name = o->scaler[i].kernel.name;
+            if (name && strcmp(name, "bilinear") != 0)
+                return false;
+        }
+    }
+    if (o->user_shaders && o->user_shaders[0])
+        return false;
+    return true;
+}
+
+// Disable features that are not supported with the current OpenGL version.
+static void check_gl_features(struct gl_video *p)
+{
+    struct ra *ra = p->ra;
+    bool have_float_tex = !!ra_find_float16_format(ra, 1);
+    bool have_mglsl = ra->glsl_version >= 130; // modern GLSL
+    const struct ra_format *rg_tex = ra_find_unorm_format(p->ra, 1, 2);
+    bool have_texrg = rg_tex && !rg_tex->luminance_alpha;
+    bool have_compute = ra->caps & RA_CAP_COMPUTE;
+    bool have_ssbo = ra->caps & RA_CAP_BUF_RW;
+    bool have_fragcoord = ra->caps & RA_CAP_FRAGCOORD;
+
+    const char *auto_fbo_fmts[] = {"rgba16f", "rgba16hf", "rgba16",
+                                   "rgb10_a2", "rgba8", 0};
+    const char *user_fbo_fmts[] = {p->opts.fbo_format, 0};
+    const char **fbo_fmts = user_fbo_fmts[0] && strcmp(user_fbo_fmts[0], "auto")
+                          ? user_fbo_fmts : auto_fbo_fmts;
+    bool user_specified_fbo_fmt = fbo_fmts == user_fbo_fmts;
+    bool fbo_test_result = false;
+    bool have_fbo = false;
+    p->fbo_format = NULL;
+    for (int n = 0; fbo_fmts[n]; n++) {
+        const char *fmt = fbo_fmts[n];
+        const struct ra_format *f = ra_find_named_format(p->ra, fmt);
+        if (!f && user_specified_fbo_fmt)
+            MP_WARN(p, "FBO format '%s' not found!\n", fmt);
+        if (f && f->renderable && f->linear_filter &&
+            (fbo_test_result = test_fbo(p, f))) {
+            MP_VERBOSE(p, "Using FBO format %s.\n", f->name);
+            have_fbo = true;
+            p->fbo_format = f;
+            break;
+        }
+
+        if (user_specified_fbo_fmt) {
+            MP_WARN(p, "User-specified FBO format '%s' failed to initialize! "
+                       "(exists=%d, renderable=%d, linear_filter=%d, "
+                       "fbo_test_result=%d)\n",
+                    fmt, !!f, f ? f->renderable : 0,  f ? f->linear_filter : 0,
+                    fbo_test_result);
+        }
+    }
+
+    if (!have_fragcoord && p->opts.dither_depth >= 0 &&
+        p->opts.dither_algo != DITHER_NONE)
+    {
+        p->opts.dither_algo = DITHER_NONE;
+        MP_WARN(p, "Disabling dithering (no gl_FragCoord).\n");
+    }
+    if (!have_fragcoord && p->opts.alpha_mode == ALPHA_BLEND_TILES) {
+        p->opts.alpha_mode = ALPHA_BLEND;
+        // Verbose, since this is the default setting
+        MP_VERBOSE(p, "Disabling alpha checkerboard (no gl_FragCoord).\n");
+    }
+    if (!have_fbo && have_compute) {
+        have_compute = false;
+        MP_WARN(p, "Force-disabling compute shaders as an FBO format was not "
+                   "available! See your FBO format configuration!\n");
+    }
+
+    if (have_compute && have_fbo && !p->fbo_format->storable) {
+        have_compute = false;
+        MP_WARN(p, "Force-disabling compute shaders as the chosen FBO format "
+                "is not storable! See your FBO format configuration!\n");
+    }
+
+    if (!have_compute && p->opts.dither_algo == DITHER_ERROR_DIFFUSION) {
+        MP_WARN(p, "Disabling error diffusion dithering because compute shader "
+                   "was not supported. Fallback to dither=fruit instead.\n");
+        p->opts.dither_algo = DITHER_FRUIT;
+    }
+
+    bool have_compute_peak = have_compute && have_ssbo;
+    if (!have_compute_peak && p->opts.tone_map.compute_peak >= 0) {
+        int msgl = p->opts.tone_map.compute_peak == 1 ? MSGL_WARN : MSGL_V;
+        MP_MSG(p, msgl, "Disabling HDR peak computation (one or more of the "
+                        "following is not supported: compute shaders=%d, "
+                        "SSBO=%d).\n", have_compute, have_ssbo);
+        p->opts.tone_map.compute_peak = -1;
+    }
+
+    p->forced_dumb_mode = p->opts.dumb_mode > 0 || !have_fbo || !have_texrg;
+    bool voluntarily_dumb = check_dumb_mode(p);
+    if (p->forced_dumb_mode || voluntarily_dumb) {
+        if (voluntarily_dumb) {
+            MP_VERBOSE(p, "No advanced processing required. Enabling dumb mode.\n");
+        } else if (p->opts.dumb_mode <= 0) {
+            MP_WARN(p, "High bit depth FBOs unsupported. Enabling dumb mode.\n"
+                       "Most extended features will be disabled.\n");
+        }
+        p->dumb_mode = true;
+        static const struct scaler_config dumb_scaler_config = {
+            {"bilinear", .params = {NAN, NAN}},
+            {.params = {NAN, NAN}},
+        };
+        // Most things don't work, so whitelist all options that still work.
+        p->opts = (struct gl_video_opts){
+            .scaler = {
+                [SCALER_SCALE] = dumb_scaler_config,
+                [SCALER_DSCALE] = dumb_scaler_config,
+                [SCALER_CSCALE] = dumb_scaler_config,
+                [SCALER_TSCALE] = dumb_scaler_config,
+            },
+            .gamma = p->opts.gamma,
+            .gamma_auto = p->opts.gamma_auto,
+            .pbo = p->opts.pbo,
+            .fbo_format = p->opts.fbo_format,
+            .alpha_mode = p->opts.alpha_mode,
+            .use_rectangle = p->opts.use_rectangle,
+            .background = p->opts.background,
+            .dither_algo = p->opts.dither_algo,
+            .dither_depth = p->opts.dither_depth,
+            .dither_size = p->opts.dither_size,
+            .error_diffusion = p->opts.error_diffusion,
+            .temporal_dither = p->opts.temporal_dither,
+            .temporal_dither_period = p->opts.temporal_dither_period,
+            .tex_pad_x = p->opts.tex_pad_x,
+            .tex_pad_y = p->opts.tex_pad_y,
+            .tone_map = p->opts.tone_map,
+            .early_flush = p->opts.early_flush,
+            .icc_opts = p->opts.icc_opts,
+            .hwdec_interop = p->opts.hwdec_interop,
+            .target_trc = p->opts.target_trc,
+            .target_prim = p->opts.target_prim,
+            .target_peak = p->opts.target_peak,
+        };
+        if (!have_fbo)
+            p->use_lut_3d = false;
+        return;
+    }
+    p->dumb_mode = false;
+
+    // Normally, we want to disable them by default if FBOs are unavailable,
+    // because they will be slow (not critically slow, but still slower).
+    // Without FP textures, we must always disable them.
+    // I don't know if luminance alpha float textures exist, so disregard them.
+    for (int n = 0; n < SCALER_COUNT; n++) {
+        const struct filter_kernel *kernel =
+            mp_find_filter_kernel(p->opts.scaler[n].kernel.name);
+        if (kernel) {
+            char *reason = NULL;
+            if (!have_float_tex)
+                reason = "(float tex. missing)";
+            if (!have_mglsl)
+                reason = "(GLSL version too old)";
+            if (reason) {
+                MP_WARN(p, "Disabling scaler #%d %s %s.\n", n,
+                        p->opts.scaler[n].kernel.name, reason);
+                // p->opts is a copy => we can just mess with it.
+                p->opts.scaler[n].kernel.name = "bilinear";
+                if (n == SCALER_TSCALE)
+                    p->opts.interpolation = false;
+            }
+        }
+    }
+
+    int use_cms = p->opts.target_prim != MP_CSP_PRIM_AUTO ||
+                  p->opts.target_trc != MP_CSP_TRC_AUTO || p->use_lut_3d;
+
+    // mix() is needed for some gamma functions
+    if (!have_mglsl && (p->opts.linear_downscaling ||
+                        p->opts.linear_upscaling || p->opts.sigmoid_upscaling))
+    {
+        p->opts.linear_downscaling = false;
+        p->opts.linear_upscaling = false;
+        p->opts.sigmoid_upscaling = false;
+        MP_WARN(p, "Disabling linear/sigmoid scaling (GLSL version too old).\n");
+    }
+    if (!have_mglsl && use_cms) {
+        p->opts.target_prim = MP_CSP_PRIM_AUTO;
+        p->opts.target_trc = MP_CSP_TRC_AUTO;
+        p->use_lut_3d = false;
+        MP_WARN(p, "Disabling color management (GLSL version too old).\n");
+    }
+    if (!have_mglsl && p->opts.deband) {
+        p->opts.deband = false;
+        MP_WARN(p, "Disabling debanding (GLSL version too old).\n");
+    }
+}
+
+static void init_gl(struct gl_video *p)
+{
+    debug_check_gl(p, "before init_gl");
+
+    p->upload_timer = timer_pool_create(p->ra);
+    p->blit_timer = timer_pool_create(p->ra);
+    p->osd_timer = timer_pool_create(p->ra);
+
+    debug_check_gl(p, "after init_gl");
+
+    ra_dump_tex_formats(p->ra, MSGL_DEBUG);
+    ra_dump_img_formats(p->ra, MSGL_DEBUG);
+}
+
+void gl_video_uninit(struct gl_video *p)
+{
+    if (!p)
+        return;
+
+    uninit_video(p);
+    ra_hwdec_ctx_uninit(&p->hwdec_ctx);
+    gl_sc_destroy(p->sc);
+
+    ra_tex_free(p->ra, &p->lut_3d_texture);
+    ra_buf_free(p->ra, &p->hdr_peak_ssbo);
+
+    timer_pool_destroy(p->upload_timer);
+    timer_pool_destroy(p->blit_timer);
+    timer_pool_destroy(p->osd_timer);
+
+    for (int i = 0; i < VO_PASS_PERF_MAX; i++) {
+        talloc_free(p->pass_fresh[i].desc.start);
+        talloc_free(p->pass_redraw[i].desc.start);
+    }
+
+    mpgl_osd_destroy(p->osd);
+
+    // Forcibly destroy possibly remaining image references. This should also
+    // cause gl_video_dr_free_buffer() to be called for the remaining buffers.
+    gc_pending_dr_fences(p, true);
+
+    // Should all have been unreffed already.
+    assert(!p->num_dr_buffers);
+
+    talloc_free(p);
+}
+
+void gl_video_reset(struct gl_video *p)
+{
+    gl_video_reset_surfaces(p);
+}
+
+bool gl_video_showing_interpolated_frame(struct gl_video *p)
+{
+    return p->is_interpolated;
+}
+
+static bool is_imgfmt_desc_supported(struct gl_video *p,
+                                     const struct ra_imgfmt_desc *desc)
+{
+    if (!desc->num_planes)
+        return false;
+
+    if (desc->planes[0]->ctype == RA_CTYPE_UINT && p->forced_dumb_mode)
+        return false;
+
+    return true;
+}
+
+bool gl_video_check_format(struct gl_video *p, int mp_format)
+{
+    struct ra_imgfmt_desc desc;
+    if (ra_get_imgfmt_desc(p->ra, mp_format, &desc) &&
+        is_imgfmt_desc_supported(p, &desc))
+        return true;
+    if (ra_hwdec_get(&p->hwdec_ctx, mp_format))
+        return true;
+    return false;
+}
+
+void gl_video_config(struct gl_video *p, struct mp_image_params *params)
+{
+    unmap_overlay(p);
+    unref_current_image(p);
+
+    if (!mp_image_params_equal(&p->real_image_params, params)) {
+        uninit_video(p);
+        p->real_image_params = *params;
+        p->image_params = *params;
+        if (params->imgfmt)
+            init_video(p);
+    }
+
+    gl_video_reset_surfaces(p);
+}
+
+void gl_video_set_osd_source(struct gl_video *p, struct osd_state *osd)
+{
+    mpgl_osd_destroy(p->osd);
+    p->osd = NULL;
+    p->osd_state = osd;
+    reinit_osd(p);
+}
+
+struct gl_video *gl_video_init(struct ra *ra, struct mp_log *log,
+                               struct mpv_global *g)
+{
+    struct gl_video *p = talloc_ptrtype(NULL, p);
+    *p = (struct gl_video) {
+        .ra = ra,
+        .global = g,
+        .log = log,
+        .sc = gl_sc_create(ra, g, log),
+        .video_eq = mp_csp_equalizer_create(p, g),
+        .opts_cache = m_config_cache_alloc(p, g, &gl_video_conf),
+    };
+    // make sure this variable is initialized to *something*
+    p->pass = p->pass_fresh;
+    struct gl_video_opts *opts = p->opts_cache->opts;
+    p->cms = gl_lcms_init(p, log, g, opts->icc_opts),
+    p->opts = *opts;
+    for (int n = 0; n < SCALER_COUNT; n++)
+        p->scaler[n] = (struct scaler){.index = n};
+    // our VAO always has the vec2 position as the first element
+    MP_TARRAY_APPEND(p, p->vao, p->vao_len, (struct ra_renderpass_input) {
+        .name = "position",
+        .type = RA_VARTYPE_FLOAT,
+        .dim_v = 2,
+        .dim_m = 1,
+        .offset = 0,
+    });
+    init_gl(p);
+    reinit_from_options(p);
+    return p;
+}
+
+// Get static string for scaler shader. If "tscale" is set to true, the
+// scaler must be a separable convolution filter.
+static const char *handle_scaler_opt(const char *name, bool tscale)
+{
+    if (name && name[0]) {
+        const struct filter_kernel *kernel = mp_find_filter_kernel(name);
+        if (kernel && (!tscale || !kernel->polar))
+                return kernel->f.name;
+
+        const struct filter_window *window = mp_find_filter_window(name);
+        if (window)
+            return window->name;
+
+        for (const char *const *filter = tscale ? fixed_tscale_filters
+                                                : fixed_scale_filters;
+             *filter; filter++) {
+            if (strcmp(*filter, name) == 0)
+                return *filter;
+        }
+    }
+    return NULL;
+}
+
+static void gl_video_update_options(struct gl_video *p)
+{
+    if (m_config_cache_update(p->opts_cache)) {
+        gl_lcms_update_options(p->cms);
+        reinit_from_options(p);
+    }
+
+    if (mp_csp_equalizer_state_changed(p->video_eq))
+        p->output_tex_valid = false;
+}
+
+static void reinit_from_options(struct gl_video *p)
+{
+    p->use_lut_3d = gl_lcms_has_profile(p->cms);
+
+    // Copy the option fields, so that check_gl_features() can mutate them.
+    // This works only for the fields themselves of course, not for any memory
+    // referenced by them.
+    p->opts = *(struct gl_video_opts *)p->opts_cache->opts;
+
+    if (!p->force_clear_color)
+        p->clear_color = p->opts.background;
+
+    check_gl_features(p);
+    uninit_rendering(p);
+    if (p->opts.shader_cache)
+        gl_sc_set_cache_dir(p->sc, p->opts.shader_cache_dir);
+    p->ra->use_pbo = p->opts.pbo;
+    gl_video_setup_hooks(p);
+    reinit_osd(p);
+
+    struct mp_vo_opts *vo_opts = mp_get_config_group(p, p->global, &vo_sub_opts);
+    if (p->opts.interpolation && !vo_opts->video_sync && !p->dsi_warned) {
+        MP_WARN(p, "Interpolation now requires enabling display-sync mode.\n"
+                   "E.g.: --video-sync=display-resample\n");
+        p->dsi_warned = true;
+    }
+    talloc_free(vo_opts);
+
+    if (p->opts.correct_downscaling && !p->correct_downscaling_warned) {
+        const char *name = p->opts.scaler[SCALER_DSCALE].kernel.name;
+        if (!name)
+            name = p->opts.scaler[SCALER_SCALE].kernel.name;
+        if (!name || !strcmp(name, "bilinear")) {
+            MP_WARN(p, "correct-downscaling requires non-bilinear scaler.\n");
+            p->correct_downscaling_warned = true;
+        }
+    }
+}
+
+void gl_video_configure_queue(struct gl_video *p, struct vo *vo)
+{
+    gl_video_update_options(p);
+
+    int queue_size = 1;
+
+    // Figure out an adequate size for the interpolation queue. The larger
+    // the radius, the earlier we need to queue frames.
+    if (p->opts.interpolation) {
+        const struct filter_kernel *kernel =
+            mp_find_filter_kernel(p->opts.scaler[SCALER_TSCALE].kernel.name);
+        if (kernel) {
+            // filter_scale wouldn't be correctly initialized were we to use it here.
+            // This is fine since we're always upsampling, but beware if downsampling
+            // is added!
+            double radius = kernel->f.radius;
+            radius = radius > 0 ? radius : p->opts.scaler[SCALER_TSCALE].radius;
+            queue_size += 1 + ceil(radius);
+        } else {
+            // Oversample/linear case
+            queue_size += 2;
+        }
+    }
+
+    vo_set_queue_params(vo, 0, queue_size);
+}
+
+static int validate_scaler_opt(struct mp_log *log, const m_option_t *opt,
+                               struct bstr name, const char **value)
+{
+    struct bstr param = bstr0(*value);
+    char s[32] = {0};
+    int r = 1;
+    bool tscale = bstr_equals0(name, "tscale");
+    if (bstr_equals0(param, "help")) {
+        r = M_OPT_EXIT;
+    } else if (bstr_equals0(name, "dscale") && !param.len) {
+        return r; // empty dscale means "use same as upscaler"
+    } else if (bstr_equals0(name, "cscale") && !param.len) {
+        return r; // empty cscale means "use same as upscaler"
+    } else {
+        snprintf(s, sizeof(s), "%.*s", BSTR_P(param));
+        if (!handle_scaler_opt(s, tscale))
+            r = M_OPT_INVALID;
+    }
+    if (r < 1) {
+        mp_info(log, "Available scalers:\n");
+        for (const char *const *filter = tscale ? fixed_tscale_filters
+                                                : fixed_scale_filters;
+             *filter; filter++) {
+            mp_info(log, "    %s\n", *filter);
+        }
+        for (int n = 0; mp_filter_kernels[n].f.name; n++) {
+            if (!tscale || !mp_filter_kernels[n].polar)
+                mp_info(log, "    %s\n", mp_filter_kernels[n].f.name);
+        }
+        for (int n = 0; mp_filter_windows[n].name; n++) {
+            for (int m = 0; mp_filter_kernels[m].f.name; m++) {
+                if (!strcmp(mp_filter_windows[n].name, mp_filter_kernels[m].f.name))
+                    goto next_window; // don't log duplicates
+            }
+            mp_info(log, "    %s\n", mp_filter_windows[n].name);
+next_window: ;
+        }
+        if (s[0])
+            mp_fatal(log, "No scaler named '%s' found!\n", s);
+    }
+    return r;
+}
+
+static int validate_window_opt(struct mp_log *log, const m_option_t *opt,
+                               struct bstr name, const char **value)
+{
+    struct bstr param = bstr0(*value);
+    char s[32] = {0};
+    int r = 1;
+    if (bstr_equals0(param, "help")) {
+        r = M_OPT_EXIT;
+    } else if (!param.len) {
+        return r; // empty string means "use preferred window"
+    } else {
+        snprintf(s, sizeof(s), "%.*s", BSTR_P(param));
+        const struct filter_window *window = mp_find_filter_window(s);
+        if (!window)
+            r = M_OPT_INVALID;
+    }
+    if (r < 1) {
+        mp_info(log, "Available windows:\n");
+        for (int n = 0; mp_filter_windows[n].name; n++)
+            mp_info(log, "    %s\n", mp_filter_windows[n].name);
+        if (s[0])
+            mp_fatal(log, "No window named '%s' found!\n", s);
+    }
+    return r;
+}
+
+static int validate_error_diffusion_opt(struct mp_log *log, const m_option_t *opt,
+                                        struct bstr name, const char **value)
+{
+    struct bstr param = bstr0(*value);
+    char s[32] = {0};
+    int r = 1;
+    if (bstr_equals0(param, "help")) {
+        r = M_OPT_EXIT;
+    } else {
+        snprintf(s, sizeof(s), "%.*s", BSTR_P(param));
+        const struct error_diffusion_kernel *k = mp_find_error_diffusion_kernel(s);
+        if (!k)
+            r = M_OPT_INVALID;
+    }
+    if (r < 1) {
+        mp_info(log, "Available error diffusion kernels:\n");
+        for (int n = 0; mp_error_diffusion_kernels[n].name; n++)
+            mp_info(log, "    %s\n", mp_error_diffusion_kernels[n].name);
+        if (s[0])
+            mp_fatal(log, "No error diffusion kernel named '%s' found!\n", s);
+    }
+    return r;
+}
+
+void gl_video_set_ambient_lux(struct gl_video *p, int lux)
+{
+    if (p->opts.gamma_auto) {
+        p->opts.gamma = gl_video_scale_ambient_lux(16.0, 256.0, 1.0, 1.2, lux);
+        MP_TRACE(p, "ambient light changed: %d lux (gamma: %f)\n", lux,
+                 p->opts.gamma);
+    }
+}
+
+static void *gl_video_dr_alloc_buffer(struct gl_video *p, size_t size)
+{
+    struct ra_buf_params params = {
+        .type = RA_BUF_TYPE_TEX_UPLOAD,
+        .host_mapped = true,
+        .size = size,
+    };
+
+    struct ra_buf *buf = ra_buf_create(p->ra, &params);
+    if (!buf)
+        return NULL;
+
+    MP_TARRAY_GROW(p, p->dr_buffers, p->num_dr_buffers);
+    p->dr_buffers[p->num_dr_buffers++] = (struct dr_buffer){ .buf = buf };
+
+    return buf->data;
+}
+
+static void gl_video_dr_free_buffer(void *opaque, uint8_t *data)
+{
+    struct gl_video *p = opaque;
+
+    for (int n = 0; n < p->num_dr_buffers; n++) {
+        struct dr_buffer *buffer = &p->dr_buffers[n];
+        if (buffer->buf->data == data) {
+            assert(!buffer->mpi); // can't be freed while it has a ref
+            ra_buf_free(p->ra, &buffer->buf);
+            MP_TARRAY_REMOVE_AT(p->dr_buffers, p->num_dr_buffers, n);
+            return;
+        }
+    }
+    // not found - must not happen
+    MP_ASSERT_UNREACHABLE();
+}
+
+struct mp_image *gl_video_get_image(struct gl_video *p, int imgfmt, int w, int h,
+                                    int stride_align, int flags)
+{
+    if (flags & VO_DR_FLAG_HOST_CACHED) {
+        if (p->ra->caps & RA_CAP_SLOW_DR) {
+            MP_VERBOSE(p, "DR path suspected slow/uncached, disabling.\n");
+            return NULL;
+        }
+    }
+
+    if (!gl_video_check_format(p, imgfmt))
+        return NULL;
+
+    int size = mp_image_get_alloc_size(imgfmt, w, h, stride_align);
+    if (size < 0)
+        return NULL;
+
+    int alloc_size = size + stride_align;
+    void *ptr = gl_video_dr_alloc_buffer(p, alloc_size);
+    if (!ptr)
+        return NULL;
+
+    // (we expect vo.c to proxy the free callback, so it happens in the same
+    // thread it was allocated in, removing the need for synchronization)
+    struct mp_image *res = mp_image_from_buffer(imgfmt, w, h, stride_align,
+                                                ptr, alloc_size, p,
+                                                gl_video_dr_free_buffer);
+    if (!res)
+        gl_video_dr_free_buffer(p, ptr);
+    return res;
+}
+
+void gl_video_init_hwdecs(struct gl_video *p, struct ra_ctx *ra_ctx,
+                          struct mp_hwdec_devices *devs,
+                          bool load_all_by_default)
+{
+    assert(!p->hwdec_ctx.ra_ctx);
+    p->hwdec_ctx = (struct ra_hwdec_ctx) {
+        .log = p->log,
+        .global = p->global,
+        .ra_ctx = ra_ctx,
+    };
+
+    ra_hwdec_ctx_init(&p->hwdec_ctx, devs, p->opts.hwdec_interop, load_all_by_default);
+}
+
+void gl_video_load_hwdecs_for_img_fmt(struct gl_video *p, struct mp_hwdec_devices *devs,
+                                      struct hwdec_imgfmt_request *params)
+{
+    assert(p->hwdec_ctx.ra_ctx);
+    ra_hwdec_ctx_load_fmt(&p->hwdec_ctx, devs, params);
+}
diff --git a/video/out/gpu/video.h b/video/out/gpu/video.h
new file mode 100644
index 0000000..411d336
--- /dev/null
+++ b/video/out/gpu/video.h
@@ -0,0 +1,238 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MP_GL_VIDEO_H
+#define MP_GL_VIDEO_H
+
+#include <stdbool.h>
+
+#include "options/m_option.h"
+#include "sub/osd.h"
+#include "utils.h"
+#include "lcms.h"
+#include "shader_cache.h"
+#include "video/csputils.h"
+#include "video/out/filter_kernels.h"
+
+struct scaler_fun {
+    char *name;
+    float params[2];
+    float blur;
+    float taper;
+};
+
+struct scaler_config {
+    struct scaler_fun kernel;
+    struct scaler_fun window;
+    float radius;
+    float antiring;
+    float clamp;
+};
+
+struct scaler {
+    int index;
+    struct scaler_config conf;
+    double scale_factor;
+    bool initialized;
+    struct filter_kernel *kernel;
+    struct ra_tex *lut;
+    struct ra_tex *sep_fbo;
+    bool insufficient;
+
+    // kernel points here
+    struct filter_kernel kernel_storage;
+};
+
+enum scaler_unit {
+    SCALER_SCALE,  // luma/video
+    SCALER_DSCALE, // luma-video downscaling
+    SCALER_CSCALE, // chroma upscaling
+    SCALER_TSCALE, // temporal scaling (interpolation)
+    SCALER_COUNT
+};
+
+enum dither_algo {
+    DITHER_NONE = 0,
+    DITHER_FRUIT,
+    DITHER_ORDERED,
+    DITHER_ERROR_DIFFUSION,
+};
+
+enum alpha_mode {
+    ALPHA_NO = 0,
+    ALPHA_YES,
+    ALPHA_BLEND,
+    ALPHA_BLEND_TILES,
+};
+
+enum blend_subs_mode {
+    BLEND_SUBS_NO = 0,
+    BLEND_SUBS_YES,
+    BLEND_SUBS_VIDEO,
+};
+
+enum tone_mapping {
+    TONE_MAPPING_AUTO,
+    TONE_MAPPING_CLIP,
+    TONE_MAPPING_MOBIUS,
+    TONE_MAPPING_REINHARD,
+    TONE_MAPPING_HABLE,
+    TONE_MAPPING_GAMMA,
+    TONE_MAPPING_LINEAR,
+    TONE_MAPPING_SPLINE,
+    TONE_MAPPING_BT_2390,
+    TONE_MAPPING_BT_2446A,
+    TONE_MAPPING_ST2094_40,
+    TONE_MAPPING_ST2094_10,
+};
+
+enum gamut_mode {
+    GAMUT_AUTO,
+    GAMUT_CLIP,
+    GAMUT_PERCEPTUAL,
+    GAMUT_RELATIVE,
+    GAMUT_SATURATION,
+    GAMUT_ABSOLUTE,
+    GAMUT_DESATURATE,
+    GAMUT_DARKEN,
+    GAMUT_WARN,
+    GAMUT_LINEAR,
+};
+
+struct gl_tone_map_opts {
+    int curve;
+    float curve_param;
+    float max_boost;
+    bool inverse;
+    int compute_peak;
+    float decay_rate;
+    float scene_threshold_low;
+    float scene_threshold_high;
+    float peak_percentile;
+    float contrast_recovery;
+    float contrast_smoothness;
+    int gamut_mode;
+    bool visualize;
+};
+
+struct gl_video_opts {
+    int dumb_mode;
+    struct scaler_config scaler[4];
+    float gamma;
+    bool gamma_auto;
+    int target_prim;
+    int target_trc;
+    int target_peak;
+    int target_contrast;
+    int target_gamut;
+    struct gl_tone_map_opts tone_map;
+    bool correct_downscaling;
+    bool linear_downscaling;
+    bool linear_upscaling;
+    bool sigmoid_upscaling;
+    float sigmoid_center;
+    float sigmoid_slope;
+    bool scaler_resizes_only;
+    bool pbo;
+    int dither_depth;
+    int dither_algo;
+    int dither_size;
+    bool temporal_dither;
+    int temporal_dither_period;
+    char *error_diffusion;
+    char *fbo_format;
+    int alpha_mode;
+    bool use_rectangle;
+    struct m_color background;
+    bool interpolation;
+    float interpolation_threshold;
+    int blend_subs;
+    char **user_shaders;
+    char **user_shader_opts;
+    bool deband;
+    struct deband_opts *deband_opts;
+    float unsharp;
+    int tex_pad_x, tex_pad_y;
+    struct mp_icc_opts *icc_opts;
+    bool shader_cache;
+    int early_flush;
+    char *shader_cache_dir;
+    char *hwdec_interop;
+};
+
+extern const struct m_sub_options gl_video_conf;
+
+struct gl_video;
+struct vo_frame;
+struct voctrl_screenshot;
+
+enum {
+    RENDER_FRAME_SUBS = 1 << 0,
+    RENDER_FRAME_OSD = 1 << 1,
+    RENDER_FRAME_VF_SUBS = 1 << 2,
+    RENDER_SCREEN_COLOR = 1 << 3, // 3D LUT and dithering
+    RENDER_FRAME_DEF = RENDER_FRAME_SUBS | RENDER_FRAME_OSD | RENDER_SCREEN_COLOR,
+};
+
+struct gl_video *gl_video_init(struct ra *ra, struct mp_log *log,
+                               struct mpv_global *g);
+void gl_video_uninit(struct gl_video *p);
+void gl_video_set_osd_source(struct gl_video *p, struct osd_state *osd);
+bool gl_video_check_format(struct gl_video *p, int mp_format);
+void gl_video_config(struct gl_video *p, struct mp_image_params *params);
+void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame,
+                           struct ra_fbo fbo, int flags);
+void gl_video_resize(struct gl_video *p,
+                     struct mp_rect *src, struct mp_rect *dst,
+                     struct mp_osd_res *osd);
+void gl_video_set_fb_depth(struct gl_video *p, int fb_depth);
+void gl_video_perfdata(struct gl_video *p, struct voctrl_performance_data *out);
+void gl_video_set_clear_color(struct gl_video *p, struct m_color color);
+void gl_video_set_osd_pts(struct gl_video *p, double pts);
+bool gl_video_check_osd_change(struct gl_video *p, struct mp_osd_res *osd,
+                               double pts);
+
+void gl_video_screenshot(struct gl_video *p, struct vo_frame *frame,
+                         struct voctrl_screenshot *args);
+
+float gl_video_scale_ambient_lux(float lmin, float lmax,
+                                 float rmin, float rmax, float lux);
+void gl_video_set_ambient_lux(struct gl_video *p, int lux);
+void gl_video_set_icc_profile(struct gl_video *p, bstr icc_data);
+bool gl_video_icc_auto_enabled(struct gl_video *p);
+bool gl_video_gamma_auto_enabled(struct gl_video *p);
+struct mp_colorspace gl_video_get_output_colorspace(struct gl_video *p);
+
+void gl_video_reset(struct gl_video *p);
+bool gl_video_showing_interpolated_frame(struct gl_video *p);
+
+struct mp_hwdec_devices;
+void gl_video_init_hwdecs(struct gl_video *p, struct ra_ctx *ra_ctx,
+                          struct mp_hwdec_devices *devs,
+                          bool load_all_by_default);
+struct hwdec_imgfmt_request;
+void gl_video_load_hwdecs_for_img_fmt(struct gl_video *p, struct mp_hwdec_devices *devs,
+                                      struct hwdec_imgfmt_request *params);
+
+struct vo;
+void gl_video_configure_queue(struct gl_video *p, struct vo *vo);
+
+struct mp_image *gl_video_get_image(struct gl_video *p, int imgfmt, int w, int h,
+                                    int stride_align, int flags);
+
+
+#endif
diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c
new file mode 100644
index 0000000..6c0e8a8
--- /dev/null
+++ b/video/out/gpu/video_shaders.c
@@ -0,0 +1,1033 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <math.h>
+
+#include "video_shaders.h"
+#include "video.h"
+
+#define GLSL(x) gl_sc_add(sc, #x "\n");
+#define GLSLF(...) gl_sc_addf(sc, __VA_ARGS__)
+#define GLSLH(x) gl_sc_hadd(sc, #x "\n");
+#define GLSLHF(...) gl_sc_haddf(sc, __VA_ARGS__)
+
+// Set up shared/commonly used variables and macros
+void sampler_prelude(struct gl_shader_cache *sc, int tex_num)
+{
+    GLSLF("#undef tex\n");
+    GLSLF("#undef texmap\n");
+    GLSLF("#define tex texture%d\n", tex_num);
+    GLSLF("#define texmap texmap%d\n", tex_num);
+    GLSLF("vec2 pos = texcoord%d;\n", tex_num);
+    GLSLF("vec2 size = texture_size%d;\n", tex_num);
+    GLSLF("vec2 pt = pixel_size%d;\n", tex_num);
+}
+
+static void pass_sample_separated_get_weights(struct gl_shader_cache *sc,
+                                              struct scaler *scaler)
+{
+    gl_sc_uniform_texture(sc, "lut", scaler->lut);
+    GLSLF("float ypos = LUT_POS(fcoord, %d.0);\n", scaler->lut->params.h);
+
+    int N = scaler->kernel->size;
+    int width = (N + 3) / 4; // round up
+
+    GLSLF("float weights[%d];\n", N);
+    for (int i = 0; i < N; i++) {
+        if (i % 4 == 0)
+            GLSLF("c = texture(lut, vec2(%f, ypos));\n", (i / 4 + 0.5) / width);
+        GLSLF("weights[%d] = c[%d];\n", i, i % 4);
+    }
+}
+
+// Handle a single pass (either vertical or horizontal). The direction is given
+// by the vector (d_x, d_y). If the vector is 0, then planar interpolation is
+// used instead (samples from texture0 through textureN)
+void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler,
+                               int d_x, int d_y)
+{
+    int N = scaler->kernel->size;
+    bool use_ar = scaler->conf.antiring > 0;
+    bool planar = d_x == 0 && d_y == 0;
+    GLSL(color = vec4(0.0);)
+    GLSLF("{\n");
+    if (!planar) {
+        GLSLF("vec2 dir = vec2(%d.0, %d.0);\n", d_x, d_y);
+        GLSL(pt *= dir;)
+        GLSL(float fcoord = dot(fract(pos * size - vec2(0.5)), dir);)
+        GLSLF("vec2 base = pos - fcoord * pt - pt * vec2(%d.0);\n", N / 2 - 1);
+    }
+    GLSL(vec4 c;)
+    if (use_ar) {
+        GLSL(vec4 hi = vec4(0.0);)
+        GLSL(vec4 lo = vec4(1.0);)
+    }
+    pass_sample_separated_get_weights(sc, scaler);
+    GLSLF("// scaler samples\n");
+    for (int n = 0; n < N; n++) {
+        if (planar) {
+            GLSLF("c = texture(texture%d, texcoord%d);\n", n, n);
+        } else {
+            GLSLF("c = texture(tex, base + pt * vec2(%d.0));\n", n);
+        }
+        GLSLF("color += vec4(weights[%d]) * c;\n", n);
+        if (use_ar && (n == N/2-1 || n == N/2)) {
+            GLSL(lo = min(lo, c);)
+            GLSL(hi = max(hi, c);)
+        }
+    }
+    if (use_ar)
+        GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n",
+              scaler->conf.antiring);
+    GLSLF("}\n");
+}
+
+// Subroutine for computing and adding an individual texel contribution
+// If planar is false, samples directly
+// If planar is true, takes the pixel from inX[idx] where X is the component and
+// `idx` must be defined by the caller
+static void polar_sample(struct gl_shader_cache *sc, struct scaler *scaler,
+                         int x, int y, int components, bool planar)
+{
+    double radius = scaler->kernel->radius * scaler->kernel->filter_scale;
+    double radius_cutoff = scaler->kernel->radius_cutoff;
+
+    // Since we can't know the subpixel position in advance, assume a
+    // worst case scenario
+    int yy = y > 0 ? y-1 : y;
+    int xx = x > 0 ? x-1 : x;
+    double dmax = sqrt(xx*xx + yy*yy);
+    // Skip samples definitely outside the radius
+    if (dmax >= radius_cutoff)
+        return;
+    GLSLF("d = length(vec2(%d.0, %d.0) - fcoord);\n", x, y);
+    // Check for samples that might be skippable
+    bool maybe_skippable = dmax >= radius_cutoff - M_SQRT2;
+    if (maybe_skippable)
+        GLSLF("if (d < %f) {\n", radius_cutoff);
+
+    // get the weight for this pixel
+    if (scaler->lut->params.dimensions == 1) {
+        GLSLF("w = tex1D(lut, LUT_POS(d * 1.0/%f, %d.0)).r;\n",
+              radius, scaler->lut->params.w);
+    } else {
+        GLSLF("w = texture(lut, vec2(0.5, LUT_POS(d * 1.0/%f, %d.0))).r;\n",
+              radius, scaler->lut->params.h);
+    }
+    GLSL(wsum += w;)
+
+    if (planar) {
+        for (int n = 0; n < components; n++)
+            GLSLF("color[%d] += w * in%d[idx];\n", n, n);
+    } else {
+        GLSLF("in0 = texture(tex, base + pt * vec2(%d.0, %d.0));\n", x, y);
+        GLSL(color += vec4(w) * in0;)
+    }
+
+    if (maybe_skippable)
+        GLSLF("}\n");
+}
+
+void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler,
+                       int components, bool sup_gather)
+{
+    GLSL(color = vec4(0.0);)
+    GLSLF("{\n");
+    GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));)
+    GLSL(vec2 base = pos - fcoord * pt;)
+    GLSLF("float w, d, wsum = 0.0;\n");
+    for (int n = 0; n < components; n++)
+        GLSLF("vec4 in%d;\n", n);
+    GLSL(int idx;)
+
+    gl_sc_uniform_texture(sc, "lut", scaler->lut);
+
+    GLSLF("// scaler samples\n");
+    int bound = ceil(scaler->kernel->radius_cutoff);
+    for (int y = 1-bound; y <= bound; y += 2) {
+        for (int x = 1-bound; x <= bound; x += 2) {
+            // First we figure out whether it's more efficient to use direct
+            // sampling or gathering. The problem is that gathering 4 texels
+            // only to discard some of them is very wasteful, so only do it if
+            // we suspect it will be a win rather than a loss. This is the case
+            // exactly when all four texels are within bounds
+            bool use_gather = sqrt(x*x + y*y) < scaler->kernel->radius_cutoff;
+
+            if (!sup_gather)
+                use_gather = false;
+
+            if (use_gather) {
+                // Gather the four surrounding texels simultaneously
+                for (int n = 0; n < components; n++) {
+                    GLSLF("in%d = textureGatherOffset(tex, base, "
+                          "ivec2(%d, %d), %d);\n", n, x, y, n);
+                }
+
+                // Mix in all of the points with their weights
+                for (int p = 0; p < 4; p++) {
+                    // The four texels are gathered counterclockwise starting
+                    // from the bottom left
+                    static const int xo[4] = {0, 1, 1, 0};
+                    static const int yo[4] = {1, 1, 0, 0};
+                    if (x+xo[p] > bound || y+yo[p] > bound)
+                        continue;
+                    GLSLF("idx = %d;\n", p);
+                    polar_sample(sc, scaler, x+xo[p], y+yo[p], components, true);
+                }
+            } else {
+                // switch to direct sampling instead, for efficiency/compatibility
+                for (int yy = y; yy <= bound && yy <= y+1; yy++) {
+                    for (int xx = x; xx <= bound && xx <= x+1; xx++)
+                        polar_sample(sc, scaler, xx, yy, components, false);
+                }
+            }
+        }
+    }
+
+    GLSL(color = color / vec4(wsum);)
+    GLSLF("}\n");
+}
+
+// bw/bh: block size
+// iw/ih: input size (pre-calculated to fit all required texels)
+void pass_compute_polar(struct gl_shader_cache *sc, struct scaler *scaler,
+                        int components, int bw, int bh, int iw, int ih)
+{
+    int bound = ceil(scaler->kernel->radius_cutoff);
+    int offset = bound - 1; // padding top/left
+
+    GLSL(color = vec4(0.0);)
+    GLSLF("{\n");
+    GLSL(vec2 wpos = texmap(gl_WorkGroupID * gl_WorkGroupSize);)
+    GLSL(vec2 wbase = wpos - pt * fract(wpos * size - vec2(0.5));)
+    GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));)
+    GLSL(vec2 base = pos - pt * fcoord;)
+    GLSL(ivec2 rel = ivec2(round((base - wbase) * size));)
+    GLSL(int idx;)
+    GLSLF("float w, d, wsum = 0.0;\n");
+    gl_sc_uniform_texture(sc, "lut", scaler->lut);
+
+    // Load all relevant texels into shmem
+    for (int c = 0; c < components; c++)
+        GLSLHF("shared float in%d[%d];\n", c, ih * iw);
+
+    GLSL(vec4 c;)
+    GLSLF("for (int y = int(gl_LocalInvocationID.y); y < %d; y += %d) {\n", ih, bh);
+    GLSLF("for (int x = int(gl_LocalInvocationID.x); x < %d; x += %d) {\n", iw, bw);
+    GLSLF("c = texture(tex, wbase + pt * vec2(x - %d, y - %d));\n", offset, offset);
+    for (int c = 0; c < components; c++)
+        GLSLF("in%d[%d * y + x] = c[%d];\n", c, iw, c);
+    GLSLF("}}\n");
+    GLSL(groupMemoryBarrier();)
+    GLSL(barrier();)
+
+    // Dispatch the actual samples
+    GLSLF("// scaler samples\n");
+    for (int y = 1-bound; y <= bound; y++) {
+        for (int x = 1-bound; x <= bound; x++) {
+            GLSLF("idx = %d * rel.y + rel.x + %d;\n", iw,
+                  iw * (y + offset) + x + offset);
+            polar_sample(sc, scaler, x, y, components, true);
+        }
+    }
+
+    GLSL(color = color / vec4(wsum);)
+    GLSLF("}\n");
+}
+
+static void bicubic_calcweights(struct gl_shader_cache *sc, const char *t, const char *s)
+{
+    // Explanation of how bicubic scaling with only 4 texel fetches is done:
+    //   http://www.mate.tue.nl/mate/pdfs/10318.pdf
+    //   'Efficient GPU-Based Texture Interpolation using Uniform B-Splines'
+    // Explanation why this algorithm normally always blurs, even with unit
+    // scaling:
+    //   http://bigwww.epfl.ch/preprints/ruijters1001p.pdf
+    //   'GPU Prefilter for Accurate Cubic B-spline Interpolation'
+    GLSLF("vec4 %s = vec4(-0.5, 0.1666, 0.3333, -0.3333) * %s"
+                " + vec4(1, 0, -0.5, 0.5);\n", t, s);
+    GLSLF("%s = %s * %s + vec4(0, 0, -0.5, 0.5);\n", t, t, s);
+    GLSLF("%s = %s * %s + vec4(-0.6666, 0, 0.8333, 0.1666);\n", t, t, s);
+    GLSLF("%s.xy *= vec2(1, 1) / vec2(%s.z, %s.w);\n", t, t, t);
+    GLSLF("%s.xy += vec2(1.0 + %s, 1.0 - %s);\n", t, s, s);
+}
+
+void pass_sample_bicubic_fast(struct gl_shader_cache *sc)
+{
+    GLSLF("{\n");
+    GLSL(vec2 fcoord = fract(pos * size + vec2(0.5, 0.5));)
+    bicubic_calcweights(sc, "parmx", "fcoord.x");
+    bicubic_calcweights(sc, "parmy", "fcoord.y");
+    GLSL(vec4 cdelta;)
+    GLSL(cdelta.xz = parmx.rg * vec2(-pt.x, pt.x);)
+    GLSL(cdelta.yw = parmy.rg * vec2(-pt.y, pt.y);)
+    // first y-interpolation
+    GLSL(vec4 ar = texture(tex, pos + cdelta.xy);)
+    GLSL(vec4 ag = texture(tex, pos + cdelta.xw);)
+    GLSL(vec4 ab = mix(ag, ar, parmy.b);)
+    // second y-interpolation
+    GLSL(vec4 br = texture(tex, pos + cdelta.zy);)
+    GLSL(vec4 bg = texture(tex, pos + cdelta.zw);)
+    GLSL(vec4 aa = mix(bg, br, parmy.b);)
+    // x-interpolation
+    GLSL(color = mix(aa, ab, parmx.b);)
+    GLSLF("}\n");
+}
+
+void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler,
+                                   int w, int h)
+{
+    GLSLF("{\n");
+    GLSL(vec2 pos = pos - vec2(0.5) * pt;) // round to nearest
+    GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));)
+    // Determine the mixing coefficient vector
+    gl_sc_uniform_vec2(sc, "output_size", (float[2]){w, h});
+    GLSL(vec2 coeff = fcoord * output_size/size;)
+    float threshold = scaler->conf.kernel.params[0];
+    threshold = isnan(threshold) ? 0.0 : threshold;
+    GLSLF("coeff = (coeff - %f) * 1.0/%f;\n", threshold, 1.0 - 2 * threshold);
+    GLSL(coeff = clamp(coeff, 0.0, 1.0);)
+    // Compute the right blend of colors
+    GLSL(color = texture(tex, pos + pt * (coeff - fcoord));)
+    GLSLF("}\n");
+}
+
+// Common constants for SMPTE ST.2084 (HDR)
+static const float PQ_M1 = 2610./4096 * 1./4,
+                   PQ_M2 = 2523./4096 * 128,
+                   PQ_C1 = 3424./4096,
+                   PQ_C2 = 2413./4096 * 32,
+                   PQ_C3 = 2392./4096 * 32;
+
+// Common constants for ARIB STD-B67 (HLG)
+static const float HLG_A = 0.17883277,
+                   HLG_B = 0.28466892,
+                   HLG_C = 0.55991073;
+
+// Common constants for Panasonic V-Log
+static const float VLOG_B = 0.00873,
+                   VLOG_C = 0.241514,
+                   VLOG_D = 0.598206;
+
+// Common constants for Sony S-Log
+static const float SLOG_A = 0.432699,
+                   SLOG_B = 0.037584,
+                   SLOG_C = 0.616596 + 0.03,
+                   SLOG_P = 3.538813,
+                   SLOG_Q = 0.030001,
+                   SLOG_K2 = 155.0 / 219.0;
+
+// Linearize (expand), given a TRC as input. In essence, this is the ITU-R
+// EOTF, calculated on an idealized (reference) monitor with a white point of
+// MP_REF_WHITE and infinite contrast.
+//
+// These functions always output to a normalized scale of [0,1], for
+// convenience of the video.c code that calls it. To get the values in an
+// absolute scale, multiply the result by `mp_trc_nom_peak(trc)`
+void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
+{
+    if (trc == MP_CSP_TRC_LINEAR)
+        return;
+
+    GLSLF("// linearize\n");
+
+    // Note that this clamp may technically violate the definition of
+    // ITU-R BT.2100, which allows for sub-blacks and super-whites to be
+    // displayed on the display where such would be possible. That said, the
+    // problem is that not all gamma curves are well-defined on the values
+    // outside this range, so we ignore it and just clip anyway for sanity.
+    GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);)
+
+    switch (trc) {
+    case MP_CSP_TRC_SRGB:
+        GLSLF("color.rgb = mix(color.rgb * vec3(1.0/12.92),             \n"
+              "                pow((color.rgb + vec3(0.055))/vec3(1.055), vec3(2.4)), \n"
+              "                %s(lessThan(vec3(0.04045), color.rgb))); \n",
+              gl_sc_bvec(sc, 3));
+        break;
+    case MP_CSP_TRC_BT_1886:
+        GLSL(color.rgb = pow(color.rgb, vec3(2.4));)
+        break;
+    case MP_CSP_TRC_GAMMA18:
+        GLSL(color.rgb = pow(color.rgb, vec3(1.8));)
+        break;
+    case MP_CSP_TRC_GAMMA20:
+        GLSL(color.rgb = pow(color.rgb, vec3(2.0));)
+        break;
+    case MP_CSP_TRC_GAMMA22:
+        GLSL(color.rgb = pow(color.rgb, vec3(2.2));)
+        break;
+    case MP_CSP_TRC_GAMMA24:
+        GLSL(color.rgb = pow(color.rgb, vec3(2.4));)
+        break;
+    case MP_CSP_TRC_GAMMA26:
+        GLSL(color.rgb = pow(color.rgb, vec3(2.6));)
+        break;
+    case MP_CSP_TRC_GAMMA28:
+        GLSL(color.rgb = pow(color.rgb, vec3(2.8));)
+        break;
+    case MP_CSP_TRC_PRO_PHOTO:
+        GLSLF("color.rgb = mix(color.rgb * vec3(1.0/16.0),              \n"
+              "                pow(color.rgb, vec3(1.8)),               \n"
+              "                %s(lessThan(vec3(0.03125), color.rgb))); \n",
+              gl_sc_bvec(sc, 3));
+        break;
+    case MP_CSP_TRC_PQ:
+        GLSLF("color.rgb = pow(color.rgb, vec3(1.0/%f));\n", PQ_M2);
+        GLSLF("color.rgb = max(color.rgb - vec3(%f), vec3(0.0)) \n"
+              "             / (vec3(%f) - vec3(%f) * color.rgb);\n",
+              PQ_C1, PQ_C2, PQ_C3);
+        GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", 1.0 / PQ_M1);
+        // PQ's output range is 0-10000, but we need it to be relative to
+        // MP_REF_WHITE instead, so rescale
+        GLSLF("color.rgb *= vec3(%f);\n", 10000 / MP_REF_WHITE);
+        break;
+    case MP_CSP_TRC_HLG:
+        GLSLF("color.rgb = mix(vec3(4.0) * color.rgb * color.rgb,\n"
+              "                exp((color.rgb - vec3(%f)) * vec3(1.0/%f)) + vec3(%f),\n"
+              "                %s(lessThan(vec3(0.5), color.rgb)));\n",
+              HLG_C, HLG_A, HLG_B, gl_sc_bvec(sc, 3));
+        GLSLF("color.rgb *= vec3(1.0/%f);\n", MP_REF_WHITE_HLG);
+        break;
+    case MP_CSP_TRC_V_LOG:
+        GLSLF("color.rgb = mix((color.rgb - vec3(0.125)) * vec3(1.0/5.6), \n"
+              "    pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n"
+              "              - vec3(%f),                                  \n"
+              "    %s(lessThanEqual(vec3(0.181), color.rgb)));            \n",
+              VLOG_D, VLOG_C, VLOG_B, gl_sc_bvec(sc, 3));
+        break;
+    case MP_CSP_TRC_S_LOG1:
+        GLSLF("color.rgb = pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f))\n"
+              "            - vec3(%f);\n",
+              SLOG_C, SLOG_A, SLOG_B);
+        break;
+    case MP_CSP_TRC_S_LOG2:
+        GLSLF("color.rgb = mix((color.rgb - vec3(%f)) * vec3(1.0/%f),      \n"
+              "    (pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n"
+              "              - vec3(%f)) * vec3(1.0/%f),                   \n"
+              "    %s(lessThanEqual(vec3(%f), color.rgb)));                \n",
+              SLOG_Q, SLOG_P, SLOG_C, SLOG_A, SLOG_B, SLOG_K2, gl_sc_bvec(sc, 3), SLOG_Q);
+        break;
+    case MP_CSP_TRC_ST428:
+        GLSL(color.rgb = vec3(52.37/48.0) * pow(color.rgb, vec3(2.6)););
+        break;
+    default:
+        abort();
+    }
+
+    // Rescale to prevent clipping on non-float textures
+    GLSLF("color.rgb *= vec3(1.0/%f);\n", mp_trc_nom_peak(trc));
+}
+
+// Delinearize (compress), given a TRC as output. This corresponds to the
+// inverse EOTF (not the OETF) in ITU-R terminology, again assuming a
+// reference monitor.
+//
+// Like pass_linearize, this functions ingests values on an normalized scale
+void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc)
+{
+    if (trc == MP_CSP_TRC_LINEAR)
+        return;
+
+    GLSLF("// delinearize\n");
+    GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);)
+    GLSLF("color.rgb *= vec3(%f);\n", mp_trc_nom_peak(trc));
+
+    switch (trc) {
+    case MP_CSP_TRC_SRGB:
+        GLSLF("color.rgb = mix(color.rgb * vec3(12.92),                       \n"
+              "               vec3(1.055) * pow(color.rgb, vec3(1.0/2.4))     \n"
+              "                   - vec3(0.055),                              \n"
+              "               %s(lessThanEqual(vec3(0.0031308), color.rgb))); \n",
+              gl_sc_bvec(sc, 3));
+        break;
+    case MP_CSP_TRC_BT_1886:
+        GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));)
+        break;
+    case MP_CSP_TRC_GAMMA18:
+        GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.8));)
+        break;
+    case MP_CSP_TRC_GAMMA20:
+        GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.0));)
+        break;
+    case MP_CSP_TRC_GAMMA22:
+        GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.2));)
+        break;
+    case MP_CSP_TRC_GAMMA24:
+        GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));)
+        break;
+    case MP_CSP_TRC_GAMMA26:
+        GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.6));)
+        break;
+    case MP_CSP_TRC_GAMMA28:
+        GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.8));)
+        break;
+    case MP_CSP_TRC_PRO_PHOTO:
+        GLSLF("color.rgb = mix(color.rgb * vec3(16.0),                        \n"
+              "                pow(color.rgb, vec3(1.0/1.8)),                 \n"
+              "                %s(lessThanEqual(vec3(0.001953), color.rgb))); \n",
+              gl_sc_bvec(sc, 3));
+        break;
+    case MP_CSP_TRC_PQ:
+        GLSLF("color.rgb *= vec3(1.0/%f);\n", 10000 / MP_REF_WHITE);
+        GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", PQ_M1);
+        GLSLF("color.rgb = (vec3(%f) + vec3(%f) * color.rgb) \n"
+              "             / (vec3(1.0) + vec3(%f) * color.rgb);\n",
+              PQ_C1, PQ_C2, PQ_C3);
+        GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", PQ_M2);
+        break;
+    case MP_CSP_TRC_HLG:
+        GLSLF("color.rgb *= vec3(%f);\n", MP_REF_WHITE_HLG);
+        GLSLF("color.rgb = mix(vec3(0.5) * sqrt(color.rgb),\n"
+              "                vec3(%f) * log(color.rgb - vec3(%f)) + vec3(%f),\n"
+              "                %s(lessThan(vec3(1.0), color.rgb)));\n",
+              HLG_A, HLG_B, HLG_C, gl_sc_bvec(sc, 3));
+        break;
+    case MP_CSP_TRC_V_LOG:
+        GLSLF("color.rgb = mix(vec3(5.6) * color.rgb + vec3(0.125),   \n"
+              "                vec3(%f) * log(color.rgb + vec3(%f))   \n"
+              "                    + vec3(%f),                        \n"
+              "                %s(lessThanEqual(vec3(0.01), color.rgb))); \n",
+              VLOG_C / M_LN10, VLOG_B, VLOG_D, gl_sc_bvec(sc, 3));
+        break;
+    case MP_CSP_TRC_S_LOG1:
+        GLSLF("color.rgb = vec3(%f) * log(color.rgb + vec3(%f)) + vec3(%f);\n",
+              SLOG_A / M_LN10, SLOG_B, SLOG_C);
+        break;
+    case MP_CSP_TRC_S_LOG2:
+        GLSLF("color.rgb = mix(vec3(%f) * color.rgb + vec3(%f),                \n"
+              "                vec3(%f) * log(vec3(%f) * color.rgb + vec3(%f)) \n"
+              "                    + vec3(%f),                                 \n"
+              "                %s(lessThanEqual(vec3(0.0), color.rgb)));       \n",
+              SLOG_P, SLOG_Q, SLOG_A / M_LN10, SLOG_K2, SLOG_B, SLOG_C, gl_sc_bvec(sc, 3));
+        break;
+    case MP_CSP_TRC_ST428:
+        GLSL(color.rgb = pow(color.rgb * vec3(48.0/52.37), vec3(1.0/2.6)););
+        break;
+    default:
+        abort();
+    }
+}
+
+// Apply the OOTF mapping from a given light type to display-referred light.
+// Assumes absolute scale values. `peak` is used to tune the OOTF where
+// applicable (currently only HLG).
+static void pass_ootf(struct gl_shader_cache *sc, enum mp_csp_light light,
+                      float peak)
+{
+    if (light == MP_CSP_LIGHT_DISPLAY)
+        return;
+
+    GLSLF("// apply ootf\n");
+
+    switch (light)
+    {
+    case MP_CSP_LIGHT_SCENE_HLG: {
+        // HLG OOTF from BT.2100, scaled to the chosen display peak
+        float gamma = MPMAX(1.0, 1.2 + 0.42 * log10(peak * MP_REF_WHITE / 1000.0));
+        GLSLF("color.rgb *= vec3(%f * pow(dot(src_luma, color.rgb), %f));\n",
+              peak / pow(12.0 / MP_REF_WHITE_HLG, gamma), gamma - 1.0);
+        break;
+    }
+    case MP_CSP_LIGHT_SCENE_709_1886:
+        // This OOTF is defined by encoding the result as 709 and then decoding
+        // it as 1886; although this is called 709_1886 we actually use the
+        // more precise (by one decimal) values from BT.2020 instead
+        GLSLF("color.rgb = mix(color.rgb * vec3(4.5),                  \n"
+              "                vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993), \n"
+              "                %s(lessThan(vec3(0.0181), color.rgb))); \n",
+              gl_sc_bvec(sc, 3));
+        GLSL(color.rgb = pow(color.rgb, vec3(2.4));)
+        break;
+    case MP_CSP_LIGHT_SCENE_1_2:
+        GLSL(color.rgb = pow(color.rgb, vec3(1.2));)
+        break;
+    default:
+        abort();
+    }
+}
+
+// Inverse of the function pass_ootf, for completeness' sake.
+static void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light,
+                              float peak)
+{
+    if (light == MP_CSP_LIGHT_DISPLAY)
+        return;
+
+    GLSLF("// apply inverse ootf\n");
+
+    switch (light)
+    {
+    case MP_CSP_LIGHT_SCENE_HLG: {
+        float gamma = MPMAX(1.0, 1.2 + 0.42 * log10(peak * MP_REF_WHITE / 1000.0));
+        GLSLF("color.rgb *= vec3(1.0/%f);\n", peak / pow(12.0 / MP_REF_WHITE_HLG, gamma));
+        GLSLF("color.rgb /= vec3(max(1e-6, pow(dot(src_luma, color.rgb), %f)));\n",
+              (gamma - 1.0) / gamma);
+        break;
+    }
+    case MP_CSP_LIGHT_SCENE_709_1886:
+        GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));)
+        GLSLF("color.rgb = mix(color.rgb * vec3(1.0/4.5),               \n"
+              "                pow((color.rgb + vec3(0.0993)) * vec3(1.0/1.0993), \n"
+              "                    vec3(1/0.45)),                       \n"
+              "                %s(lessThan(vec3(0.08145), color.rgb))); \n",
+              gl_sc_bvec(sc, 3));
+        break;
+    case MP_CSP_LIGHT_SCENE_1_2:
+        GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.2));)
+        break;
+    default:
+        abort();
+    }
+}
+
+// Average light level for SDR signals. This is equal to a signal level of 0.5
+// under a typical presentation gamma of about 2.0.
+static const float sdr_avg = 0.25;
+
+static void hdr_update_peak(struct gl_shader_cache *sc,
+                            const struct gl_tone_map_opts *opts)
+{
+    // Update the sig_peak/sig_avg from the old SSBO state
+    GLSL(if (average.y > 0.0) {)
+    GLSL(    sig_avg  = max(1e-3, average.x);)
+    GLSL(    sig_peak = max(1.00, average.y);)
+    GLSL(})
+
+    // Chosen to avoid overflowing on an 8K buffer
+    const float log_min = 1e-3, log_scale = 400.0, sig_scale = 10000.0;
+
+    // For performance, and to avoid overflows, we tally up the sub-results per
+    // pixel using shared memory first
+    GLSLH(shared int wg_sum;)
+    GLSLH(shared uint wg_max;)
+    GLSL(wg_sum = 0; wg_max = 0u;)
+    GLSL(barrier();)
+    GLSLF("float sig_log = log(max(sig_max, %f));\n", log_min);
+    GLSLF("atomicAdd(wg_sum, int(sig_log * %f));\n", log_scale);
+    GLSLF("atomicMax(wg_max, uint(sig_max * %f));\n", sig_scale);
+
+    // Have one thread per work group update the global atomics
+    GLSL(memoryBarrierShared();)
+    GLSL(barrier();)
+    GLSL(if (gl_LocalInvocationIndex == 0u) {)
+    GLSL(    int wg_avg = wg_sum / int(gl_WorkGroupSize.x * gl_WorkGroupSize.y);)
+    GLSL(    atomicAdd(frame_sum, wg_avg);)
+    GLSL(    atomicMax(frame_max, wg_max);)
+    GLSL(    memoryBarrierBuffer();)
+    GLSL(})
+    GLSL(barrier();)
+
+    // Finally, to update the global state, we increment a counter per dispatch
+    GLSL(uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y;)
+    GLSL(if (gl_LocalInvocationIndex == 0u && atomicAdd(counter, 1u) == num_wg - 1u) {)
+    GLSL(    counter = 0u;)
+    GLSL(    vec2 cur = vec2(float(frame_sum) / float(num_wg), frame_max);)
+    GLSLF("  cur *= vec2(1.0/%f, 1.0/%f);\n", log_scale, sig_scale);
+    GLSL(    cur.x = exp(cur.x);)
+    GLSL(    if (average.y == 0.0))
+    GLSL(        average = cur;)
+
+    // Use an IIR low-pass filter to smooth out the detected values, with a
+    // configurable decay rate based on the desired time constant (tau)
+    if (opts->decay_rate) {
+        float decay = 1.0f - expf(-1.0f / opts->decay_rate);
+        GLSLF("  average += %f * (cur - average);\n", decay);
+    } else {
+        GLSLF("  average = cur;\n");
+    }
+
+    // Scene change hysteresis
+    float log_db = 10.0 / log(10.0);
+    GLSLF("  float weight = smoothstep(%f, %f, abs(log(cur.x / average.x)));\n",
+          opts->scene_threshold_low / log_db,
+          opts->scene_threshold_high / log_db);
+    GLSL(    average = mix(average, cur, weight);)
+
+    // Reset SSBO state for the next frame
+    GLSL(    frame_sum = 0; frame_max = 0u;)
+    GLSL(    memoryBarrierBuffer();)
+    GLSL(})
+}
+
+static inline float pq_delinearize(float x)
+{
+    x *= MP_REF_WHITE / 10000.0;
+    x = powf(x, PQ_M1);
+    x = (PQ_C1 + PQ_C2 * x) / (1.0 + PQ_C3 * x);
+    x = pow(x, PQ_M2);
+    return x;
+}
+
+// Tone map from a known peak brightness to the range [0,1]. If ref_peak
+// is 0, we will use peak detection instead
+static void pass_tone_map(struct gl_shader_cache *sc,
+                          float src_peak, float dst_peak,
+                          const struct gl_tone_map_opts *opts)
+{
+    GLSLF("// HDR tone mapping\n");
+
+    // To prevent discoloration due to out-of-bounds clipping, we need to make
+    // sure to reduce the value range as far as necessary to keep the entire
+    // signal in range, so tone map based on the brightest component.
+    GLSL(int sig_idx = 0;)
+    GLSL(if (color[1] > color[sig_idx]) sig_idx = 1;)
+    GLSL(if (color[2] > color[sig_idx]) sig_idx = 2;)
+    GLSL(float sig_max = color[sig_idx];)
+    GLSLF("float sig_peak = %f;\n", src_peak);
+    GLSLF("float sig_avg = %f;\n", sdr_avg);
+
+    if (opts->compute_peak >= 0)
+        hdr_update_peak(sc, opts);
+
+    // Always hard-clip the upper bound of the signal range to avoid functions
+    // exploding on inputs greater than 1.0
+    GLSLF("vec3 sig = min(color.rgb, sig_peak);\n");
+
+    // This function always operates on an absolute scale, so ignore the
+    // dst_peak normalization for it
+    float dst_scale = dst_peak;
+    enum tone_mapping curve = opts->curve ? opts->curve : TONE_MAPPING_BT_2390;
+    if (curve == TONE_MAPPING_BT_2390)
+        dst_scale = 1.0;
+
+    // Rescale the variables in order to bring it into a representation where
+    // 1.0 represents the dst_peak. This is because all of the tone mapping
+    // algorithms are defined in such a way that they map to the range [0.0, 1.0].
+    if (dst_scale > 1.0) {
+        GLSLF("sig *= 1.0/%f;\n", dst_scale);
+        GLSLF("sig_peak *= 1.0/%f;\n", dst_scale);
+    }
+
+    GLSL(float sig_orig = sig[sig_idx];)
+    GLSLF("float slope = min(%f, %f / sig_avg);\n", opts->max_boost, sdr_avg);
+    GLSL(sig *= slope;)
+    GLSL(sig_peak *= slope;)
+
+    float param = opts->curve_param;
+    switch (curve) {
+    case TONE_MAPPING_CLIP:
+        GLSLF("sig = min(%f * sig, 1.0);\n", isnan(param) ? 1.0 : param);
+        break;
+
+    case TONE_MAPPING_MOBIUS:
+        GLSLF("if (sig_peak > (1.0 + 1e-6)) {\n");
+        GLSLF("const float j = %f;\n", isnan(param) ? 0.3 : param);
+        // solve for M(j) = j; M(sig_peak) = 1.0; M'(j) = 1.0
+        // where M(x) = scale * (x+a)/(x+b)
+        GLSLF("float a = -j*j * (sig_peak - 1.0) / (j*j - 2.0*j + sig_peak);\n");
+        GLSLF("float b = (j*j - 2.0*j*sig_peak + sig_peak) / "
+              "max(1e-6, sig_peak - 1.0);\n");
+        GLSLF("float scale = (b*b + 2.0*b*j + j*j) / (b-a);\n");
+        GLSLF("sig = mix(sig, scale * (sig + vec3(a)) / (sig + vec3(b)),"
+              "          %s(greaterThan(sig, vec3(j))));\n",
+              gl_sc_bvec(sc, 3));
+        GLSLF("}\n");
+        break;
+
+    case TONE_MAPPING_REINHARD: {
+        float contrast = isnan(param) ? 0.5 : param,
+              offset = (1.0 - contrast) / contrast;
+        GLSLF("sig = sig / (sig + vec3(%f));\n", offset);
+        GLSLF("float scale = (sig_peak + %f) / sig_peak;\n", offset);
+        GLSL(sig *= scale;)
+        break;
+    }
+
+    case TONE_MAPPING_HABLE: {
+        float A = 0.15, B = 0.50, C = 0.10, D = 0.20, E = 0.02, F = 0.30;
+        GLSLHF("vec3 hable(vec3 x) {\n");
+        GLSLHF("return (x * (%f*x + vec3(%f)) + vec3(%f)) / "
+               "       (x * (%f*x + vec3(%f)) + vec3(%f)) "
+               "       - vec3(%f);\n",
+               A, C*B, D*E,
+               A, B, D*F,
+               E/F);
+        GLSLHF("}\n");
+        GLSLF("sig = hable(max(vec3(0.0), sig)) / hable(vec3(sig_peak)).x;\n");
+        break;
+    }
+
+    case TONE_MAPPING_GAMMA: {
+        float gamma = isnan(param) ? 1.8 : param;
+        GLSLF("const float cutoff = 0.05, gamma = 1.0/%f;\n", gamma);
+        GLSL(float scale = pow(cutoff / sig_peak, gamma.x) / cutoff;)
+        GLSLF("sig = mix(scale * sig,"
+              "          pow(sig / sig_peak, vec3(gamma)),"
+              "          %s(greaterThan(sig, vec3(cutoff))));\n",
+              gl_sc_bvec(sc, 3));
+        break;
+    }
+
+    case TONE_MAPPING_LINEAR: {
+        float coeff = isnan(param) ? 1.0 : param;
+        GLSLF("sig = min(%f / sig_peak, 1.0) * sig;\n", coeff);
+        break;
+    }
+
+    case TONE_MAPPING_BT_2390:
+        // We first need to encode both sig and sig_peak into PQ space
+        GLSLF("vec4 sig_pq = vec4(sig.rgb, sig_peak);                           \n"
+              "sig_pq *= vec4(1.0/%f);                                          \n"
+              "sig_pq = pow(sig_pq, vec4(%f));                                  \n"
+              "sig_pq = (vec4(%f) + vec4(%f) * sig_pq)                          \n"
+              "          / (vec4(1.0) + vec4(%f) * sig_pq);                     \n"
+              "sig_pq = pow(sig_pq, vec4(%f));                                  \n",
+              10000.0 / MP_REF_WHITE, PQ_M1, PQ_C1, PQ_C2, PQ_C3, PQ_M2);
+        // Encode both the signal and the target brightness to be relative to
+        // the source peak brightness, and figure out the target peak in this space
+        GLSLF("float scale = 1.0 / sig_pq.a;                                    \n"
+              "sig_pq.rgb *= vec3(scale);                                       \n"
+              "float maxLum = %f * scale;                                       \n",
+              pq_delinearize(dst_peak));
+        // Apply piece-wise hermite spline
+        GLSLF("float ks = 1.5 * maxLum - 0.5;                                   \n"
+              "vec3 tb = (sig_pq.rgb - vec3(ks)) / vec3(1.0 - ks);              \n"
+              "vec3 tb2 = tb * tb;                                              \n"
+              "vec3 tb3 = tb2 * tb;                                             \n"
+              "vec3 pb = (2.0 * tb3 - 3.0 * tb2 + vec3(1.0)) * vec3(ks) +       \n"
+              "          (tb3 - 2.0 * tb2 + tb) * vec3(1.0 - ks) +              \n"
+              "          (-2.0 * tb3 + 3.0 * tb2) * vec3(maxLum);               \n"
+              "sig = mix(pb, sig_pq.rgb, %s(lessThan(sig_pq.rgb, vec3(ks))));   \n",
+              gl_sc_bvec(sc, 3));
+        // Convert back from PQ space to linear light
+        GLSLF("sig *= vec3(sig_pq.a);                                           \n"
+              "sig = pow(sig, vec3(1.0/%f));                                    \n"
+              "sig = max(sig - vec3(%f), 0.0) /                                 \n"
+              "          (vec3(%f) - vec3(%f) * sig);                           \n"
+              "sig = pow(sig, vec3(1.0/%f));                                    \n"
+              "sig *= vec3(%f);                                                 \n",
+              PQ_M2, PQ_C1, PQ_C2, PQ_C3, PQ_M1, 10000.0 / MP_REF_WHITE);
+        break;
+
+    default:
+        abort();
+    }
+
+    GLSLF("float coeff = max(sig[sig_idx] - %f, 1e-6) / \n"
+          "              max(sig[sig_idx], 1.0);        \n"
+          "coeff = %f * pow(coeff / %f, %f);            \n"
+          "color.rgb *= sig[sig_idx] / sig_orig;        \n"
+          "color.rgb = mix(color.rgb, %f * sig, coeff); \n",
+          0.18 / dst_scale, 0.90, dst_scale, 0.20, dst_scale);
+}
+
+// Map colors from one source space to another. These source spaces must be
+// known (i.e. not MP_CSP_*_AUTO), as this function won't perform any
+// auto-guessing. If is_linear is true, we assume the input has already been
+// linearized (e.g. for linear-scaling). If `opts->compute_peak` is true, we
+// will detect the peak instead of relying on metadata. Note that this requires
+// the caller to have already bound the appropriate SSBO and set up the compute
+// shader metadata
+void pass_color_map(struct gl_shader_cache *sc, bool is_linear,
+                    struct mp_colorspace src, struct mp_colorspace dst,
+                    const struct gl_tone_map_opts *opts)
+{
+    GLSLF("// color mapping\n");
+
+    // Some operations need access to the video's luma coefficients, so make
+    // them available
+    float rgb2xyz[3][3];
+    mp_get_rgb2xyz_matrix(mp_get_csp_primaries(src.primaries), rgb2xyz);
+    gl_sc_uniform_vec3(sc, "src_luma", rgb2xyz[1]);
+    mp_get_rgb2xyz_matrix(mp_get_csp_primaries(dst.primaries), rgb2xyz);
+    gl_sc_uniform_vec3(sc, "dst_luma", rgb2xyz[1]);
+
+    bool need_ootf = src.light != dst.light;
+    if (src.light == MP_CSP_LIGHT_SCENE_HLG && src.hdr.max_luma != dst.hdr.max_luma)
+        need_ootf = true;
+
+    // All operations from here on require linear light as a starting point,
+    // so we linearize even if src.gamma == dst.gamma when one of the other
+    // operations needs it
+    bool need_linear = src.gamma != dst.gamma ||
+                       src.primaries != dst.primaries ||
+                       src.hdr.max_luma != dst.hdr.max_luma ||
+                       need_ootf;
+
+    if (need_linear && !is_linear) {
+        // We also pull it up so that 1.0 is the reference white
+        pass_linearize(sc, src.gamma);
+        is_linear = true;
+    }
+
+    // Pre-scale the incoming values into an absolute scale
+    GLSLF("color.rgb *= vec3(%f);\n", mp_trc_nom_peak(src.gamma));
+
+    if (need_ootf)
+        pass_ootf(sc, src.light, src.hdr.max_luma / MP_REF_WHITE);
+
+    // Tone map to prevent clipping due to excessive brightness
+    if (src.hdr.max_luma > dst.hdr.max_luma) {
+        pass_tone_map(sc, src.hdr.max_luma / MP_REF_WHITE,
+                      dst.hdr.max_luma / MP_REF_WHITE, opts);
+    }
+
+    // Adapt to the right colorspace if necessary
+    if (src.primaries != dst.primaries) {
+        struct mp_csp_primaries csp_src = mp_get_csp_primaries(src.primaries),
+                                csp_dst = mp_get_csp_primaries(dst.primaries);
+        float m[3][3] = {{0}};
+        mp_get_cms_matrix(csp_src, csp_dst, MP_INTENT_RELATIVE_COLORIMETRIC, m);
+        gl_sc_uniform_mat3(sc, "cms_matrix", true, &m[0][0]);
+        GLSL(color.rgb = cms_matrix * color.rgb;)
+
+        if (!opts->gamut_mode || opts->gamut_mode == GAMUT_DESATURATE) {
+            GLSL(float cmin = min(min(color.r, color.g), color.b);)
+            GLSL(if (cmin < 0.0) {
+                     float luma = dot(dst_luma, color.rgb);
+                     float coeff = cmin / (cmin - luma);
+                     color.rgb = mix(color.rgb, vec3(luma), coeff);
+                 })
+            GLSLF("float cmax = 1.0/%f * max(max(color.r, color.g), color.b);\n",
+                  dst.hdr.max_luma / MP_REF_WHITE);
+            GLSL(if (cmax > 1.0) color.rgb /= cmax;)
+        }
+    }
+
+    if (need_ootf)
+        pass_inverse_ootf(sc, dst.light, dst.hdr.max_luma / MP_REF_WHITE);
+
+    // Post-scale the outgoing values from absolute scale to normalized.
+    // For SDR, we normalize to the chosen signal peak. For HDR, we normalize
+    // to the encoding range of the transfer function.
+    float dst_range = dst.hdr.max_luma / MP_REF_WHITE;
+    if (mp_trc_is_hdr(dst.gamma))
+        dst_range = mp_trc_nom_peak(dst.gamma);
+
+    GLSLF("color.rgb *= vec3(%f);\n", 1.0 / dst_range);
+
+    // Warn for remaining out-of-gamut colors if enabled
+    if (opts->gamut_mode == GAMUT_WARN) {
+        GLSL(if (any(greaterThan(color.rgb, vec3(1.005))) ||
+                 any(lessThan(color.rgb, vec3(-0.005)))))
+            GLSL(color.rgb = vec3(1.0) - color.rgb;) // invert
+    }
+
+    if (is_linear)
+        pass_delinearize(sc, dst.gamma);
+}
+
+// Wide usage friendly PRNG, shamelessly stolen from a GLSL tricks forum post.
+// Obtain random numbers by calling rand(h), followed by h = permute(h) to
+// update the state. Assumes the texture was hooked.
+// permute() was modified from the original to avoid "large" numbers in
+// calculations, since low-end mobile GPUs choke on them (overflow).
+static void prng_init(struct gl_shader_cache *sc, AVLFG *lfg)
+{
+    GLSLH(float mod289(float x)  { return x - floor(x * 1.0/289.0) * 289.0; })
+    GLSLHF("float permute(float x) {\n");
+        GLSLH(return mod289( mod289(34.0*x + 1.0) * (fract(x) + 1.0) );)
+    GLSLHF("}\n");
+    GLSLH(float rand(float x)    { return fract(x * 1.0/41.0); })
+
+    // Initialize the PRNG by hashing the position + a random uniform
+    GLSL(vec3 _m = vec3(HOOKED_pos, random) + vec3(1.0);)
+    GLSL(float h = permute(permute(permute(_m.x)+_m.y)+_m.z);)
+    gl_sc_uniform_dynamic(sc);
+    gl_sc_uniform_f(sc, "random", (double)av_lfg_get(lfg) / UINT32_MAX);
+}
+
+const struct deband_opts deband_opts_def = {
+    .iterations = 1,
+    .threshold = 48.0,
+    .range = 16.0,
+    .grain = 32.0,
+};
+
+#define OPT_BASE_STRUCT struct deband_opts
+const struct m_sub_options deband_conf = {
+    .opts = (const m_option_t[]) {
+        {"iterations", OPT_INT(iterations), M_RANGE(0, 16)},
+        {"threshold", OPT_FLOAT(threshold), M_RANGE(0.0, 4096.0)},
+        {"range", OPT_FLOAT(range), M_RANGE(1.0, 64.0)},
+        {"grain", OPT_FLOAT(grain), M_RANGE(0.0, 4096.0)},
+        {0}
+    },
+    .size = sizeof(struct deband_opts),
+    .defaults = &deband_opts_def,
+};
+
+// Stochastically sample a debanded result from a hooked texture.
+void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts,
+                        AVLFG *lfg, enum mp_csp_trc trc)
+{
+    // Initialize the PRNG
+    GLSLF("{\n");
+    prng_init(sc, lfg);
+
+    // Helper: Compute a stochastic approximation of the avg color around a
+    // pixel
+    GLSLHF("vec4 average(float range, inout float h) {\n");
+        // Compute a random rangle and distance
+        GLSLH(float dist = rand(h) * range;     h = permute(h);)
+        GLSLH(float dir  = rand(h) * 6.2831853; h = permute(h);)
+        GLSLH(vec2 o = dist * vec2(cos(dir), sin(dir));)
+
+        // Sample at quarter-turn intervals around the source pixel
+        GLSLH(vec4 ref[4];)
+        GLSLH(ref[0] = HOOKED_texOff(vec2( o.x,  o.y));)
+        GLSLH(ref[1] = HOOKED_texOff(vec2(-o.y,  o.x));)
+        GLSLH(ref[2] = HOOKED_texOff(vec2(-o.x, -o.y));)
+        GLSLH(ref[3] = HOOKED_texOff(vec2( o.y, -o.x));)
+
+        // Return the (normalized) average
+        GLSLH(return (ref[0] + ref[1] + ref[2] + ref[3])*0.25;)
+    GLSLHF("}\n");
+
+    // Sample the source pixel
+    GLSL(color = HOOKED_tex(HOOKED_pos);)
+    GLSLF("vec4 avg, diff;\n");
+    for (int i = 1; i <= opts->iterations; i++) {
+        // Sample the average pixel and use it instead of the original if
+        // the difference is below the given threshold
+        GLSLF("avg = average(%f, h);\n", i * opts->range);
+        GLSL(diff = abs(color - avg);)
+        GLSLF("color = mix(avg, color, %s(greaterThan(diff, vec4(%f))));\n",
+              gl_sc_bvec(sc, 4), opts->threshold / (i * 16384.0));
+    }
+
+    // Add some random noise to smooth out residual differences
+    GLSL(vec3 noise;)
+    GLSL(noise.x = rand(h); h = permute(h);)
+    GLSL(noise.y = rand(h); h = permute(h);)
+    GLSL(noise.z = rand(h); h = permute(h);)
+
+    // Noise is scaled to the signal level to prevent extreme noise for HDR
+    float gain = opts->grain/8192.0 / mp_trc_nom_peak(trc);
+    GLSLF("color.xyz += %f * (noise - vec3(0.5));\n", gain);
+    GLSLF("}\n");
+}
+
+// Assumes the texture was hooked
+void pass_sample_unsharp(struct gl_shader_cache *sc, float param) {
+    GLSLF("{\n");
+    GLSL(float st1 = 1.2;)
+    GLSL(vec4 p = HOOKED_tex(HOOKED_pos);)
+    GLSL(vec4 sum1 = HOOKED_texOff(st1 * vec2(+1, +1))
+                   + HOOKED_texOff(st1 * vec2(+1, -1))
+                   + HOOKED_texOff(st1 * vec2(-1, +1))
+                   + HOOKED_texOff(st1 * vec2(-1, -1));)
+    GLSL(float st2 = 1.5;)
+    GLSL(vec4 sum2 = HOOKED_texOff(st2 * vec2(+1,  0))
+                   + HOOKED_texOff(st2 * vec2( 0, +1))
+                   + HOOKED_texOff(st2 * vec2(-1,  0))
+                   + HOOKED_texOff(st2 * vec2( 0, -1));)
+    GLSL(vec4 t = p * 0.859375 + sum2 * -0.1171875 + sum1 * -0.09765625;)
+    GLSLF("color = p + t * %f;\n", param);
+    GLSLF("}\n");
+}
diff --git a/video/out/gpu/video_shaders.h b/video/out/gpu/video_shaders.h
new file mode 100644
index 0000000..27e7874
--- /dev/null
+++ b/video/out/gpu/video_shaders.h
@@ -0,0 +1,59 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MP_GL_VIDEO_SHADERS_H
+#define MP_GL_VIDEO_SHADERS_H
+
+#include <libavutil/lfg.h>
+
+#include "utils.h"
+#include "video.h"
+
+struct deband_opts {
+    int iterations;
+    float threshold;
+    float range;
+    float grain;
+};
+
+extern const struct deband_opts deband_opts_def;
+extern const struct m_sub_options deband_conf;
+
+void sampler_prelude(struct gl_shader_cache *sc, int tex_num);
+void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler,
+                               int d_x, int d_y);
+void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler,
+                       int components, bool sup_gather);
+void pass_compute_polar(struct gl_shader_cache *sc, struct scaler *scaler,
+                        int components, int bw, int bh, int iw, int ih);
+void pass_sample_bicubic_fast(struct gl_shader_cache *sc);
+void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler,
+                            int w, int h);
+
+void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc);
+void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc);
+
+void pass_color_map(struct gl_shader_cache *sc, bool is_linear,
+                    struct mp_colorspace src, struct mp_colorspace dst,
+                    const struct gl_tone_map_opts *opts);
+
+void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts,
+                        AVLFG *lfg, enum mp_csp_trc trc);
+
+void pass_sample_unsharp(struct gl_shader_cache *sc, float param);
+
+#endif
diff --git a/video/out/gpu_next/context.c b/video/out/gpu_next/context.c
new file mode 100644
index 0000000..2887cff
--- /dev/null
+++ b/video/out/gpu_next/context.c
@@ -0,0 +1,240 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#include <libplacebo/config.h>
+
+#ifdef PL_HAVE_D3D11
+#include <libplacebo/d3d11.h>
+#endif
+
+#ifdef PL_HAVE_OPENGL
+#include <libplacebo/opengl.h>
+#endif
+
+#include "context.h"
+#include "config.h"
+#include "common/common.h"
+#include "options/m_config.h"
+#include "video/out/placebo/utils.h"
+#include "video/out/gpu/video.h"
+
+#if HAVE_D3D11
+#include "osdep/windows_utils.h"
+#include "video/out/d3d11/ra_d3d11.h"
+#include "video/out/d3d11/context.h"
+#endif
+
+#if HAVE_GL
+#include "video/out/opengl/context.h"
+#include "video/out/opengl/ra_gl.h"
+# if HAVE_EGL
+#include <EGL/egl.h>
+# endif
+#endif
+
+#if HAVE_VULKAN
+#include "video/out/vulkan/context.h"
+#endif
+
+#if HAVE_D3D11
+static bool d3d11_pl_init(struct vo *vo, struct gpu_ctx *ctx,
+                          struct ra_ctx_opts *ctx_opts)
+{
+#if !defined(PL_HAVE_D3D11)
+    MP_MSG(ctx, vo->probing ? MSGL_V : MSGL_ERR,
+           "libplacebo was built without D3D11 support.\n");
+    return false;
+#else // defined(PL_HAVE_D3D11)
+    bool success = false;
+
+    ID3D11Device   *device    = ra_d3d11_get_device(ctx->ra_ctx->ra);
+    IDXGISwapChain *swapchain = ra_d3d11_ctx_get_swapchain(ctx->ra_ctx);
+    if (!device || !swapchain) {
+        mp_err(ctx->log,
+               "Failed to receive required components from the mpv d3d11 "
+               "context! (device: %s, swap chain: %s)\n",
+               device    ? "OK" : "failed",
+               swapchain ? "OK" : "failed");
+        goto err_out;
+    }
+
+    pl_d3d11 d3d11 = pl_d3d11_create(ctx->pllog,
+        pl_d3d11_params(
+            .device = device,
+        )
+    );
+    if (!d3d11) {
+        mp_err(ctx->log, "Failed to acquire a d3d11 libplacebo context!\n");
+        goto err_out;
+    }
+    ctx->gpu = d3d11->gpu;
+
+    mppl_log_set_probing(ctx->pllog, false);
+
+    ctx->swapchain = pl_d3d11_create_swapchain(d3d11,
+        pl_d3d11_swapchain_params(
+            .swapchain = swapchain,
+        )
+    );
+    if (!ctx->swapchain) {
+        mp_err(ctx->log, "Failed to acquire a d3d11 libplacebo swap chain!\n");
+        goto err_out;
+    }
+
+    success = true;
+
+err_out:
+    SAFE_RELEASE(swapchain);
+    SAFE_RELEASE(device);
+
+    return success;
+#endif // defined(PL_HAVE_D3D11)
+}
+#endif // HAVE_D3D11
+
+struct gpu_ctx *gpu_ctx_create(struct vo *vo, struct gl_video_opts *gl_opts)
+{
+    struct gpu_ctx *ctx = talloc_zero(NULL, struct gpu_ctx);
+    ctx->log = vo->log;
+
+    struct ra_ctx_opts *ctx_opts = mp_get_config_group(ctx, vo->global, &ra_ctx_conf);
+    ctx_opts->want_alpha = gl_opts->alpha_mode == ALPHA_YES;
+    ctx->ra_ctx = ra_ctx_create(vo, *ctx_opts);
+    if (!ctx->ra_ctx)
+        goto err_out;
+
+#if HAVE_VULKAN
+    struct mpvk_ctx *vkctx = ra_vk_ctx_get(ctx->ra_ctx);
+    if (vkctx) {
+        ctx->pllog = vkctx->pllog;
+        ctx->gpu = vkctx->gpu;
+        ctx->swapchain = vkctx->swapchain;
+        return ctx;
+    }
+#endif
+
+    ctx->pllog = mppl_log_create(ctx, ctx->log);
+    if (!ctx->pllog)
+        goto err_out;
+
+    mppl_log_set_probing(ctx->pllog, vo->probing);
+
+#if HAVE_D3D11
+    if (ra_is_d3d11(ctx->ra_ctx->ra)) {
+        if (!d3d11_pl_init(vo, ctx, ctx_opts))
+            goto err_out;
+
+        return ctx;
+    }
+#endif
+
+#if HAVE_GL && defined(PL_HAVE_OPENGL)
+    if (ra_is_gl(ctx->ra_ctx->ra)) {
+        struct GL *gl = ra_gl_get(ctx->ra_ctx->ra);
+        pl_opengl opengl = pl_opengl_create(ctx->pllog,
+            pl_opengl_params(
+                .debug = ctx_opts->debug,
+                .allow_software = ctx_opts->allow_sw,
+                .get_proc_addr_ex = (void *) gl->get_fn,
+                .proc_ctx = gl->fn_ctx,
+# if HAVE_EGL
+                .egl_display = eglGetCurrentDisplay(),
+                .egl_context = eglGetCurrentContext(),
+# endif
+            )
+        );
+        if (!opengl)
+            goto err_out;
+        ctx->gpu = opengl->gpu;
+
+        mppl_log_set_probing(ctx->pllog, false);
+
+        ctx->swapchain = pl_opengl_create_swapchain(opengl, pl_opengl_swapchain_params(
+            .max_swapchain_depth = vo->opts->swapchain_depth,
+            .framebuffer.flipped = gl->flipped,
+        ));
+        if (!ctx->swapchain)
+            goto err_out;
+
+        return ctx;
+    }
+#elif HAVE_GL
+    if (ra_is_gl(ctx->ra_ctx->ra)) {
+        MP_MSG(ctx, vo->probing ? MSGL_V : MSGL_ERR,
+            "libplacebo was built without OpenGL support.\n");
+    }
+#endif
+
+err_out:
+    gpu_ctx_destroy(&ctx);
+    return NULL;
+}
+
+bool gpu_ctx_resize(struct gpu_ctx *ctx, int w, int h)
+{
+#if HAVE_VULKAN
+    if (ra_vk_ctx_get(ctx->ra_ctx))
+        // vulkan RA handles this by itself
+        return true;
+#endif
+
+    return pl_swapchain_resize(ctx->swapchain, &w, &h);
+}
+
+void gpu_ctx_destroy(struct gpu_ctx **ctxp)
+{
+    struct gpu_ctx *ctx = *ctxp;
+    if (!ctx)
+        return;
+    if (!ctx->ra_ctx)
+        goto skip_common_pl_cleanup;
+
+#if HAVE_VULKAN
+    if (ra_vk_ctx_get(ctx->ra_ctx))
+        // vulkan RA context handles pl cleanup by itself,
+        // skip common local clean-up.
+        goto skip_common_pl_cleanup;
+#endif
+
+    if (ctx->swapchain)
+        pl_swapchain_destroy(&ctx->swapchain);
+
+    if (ctx->gpu) {
+#if HAVE_GL && defined(PL_HAVE_OPENGL)
+        if (ra_is_gl(ctx->ra_ctx->ra)) {
+            pl_opengl opengl = pl_opengl_get(ctx->gpu);
+            pl_opengl_destroy(&opengl);
+        }
+#endif
+
+#if HAVE_D3D11 && defined(PL_HAVE_D3D11)
+        if (ra_is_d3d11(ctx->ra_ctx->ra)) {
+            pl_d3d11 d3d11 = pl_d3d11_get(ctx->gpu);
+            pl_d3d11_destroy(&d3d11);
+        }
+#endif
+    }
+
+    if (ctx->pllog)
+        pl_log_destroy(&ctx->pllog);
+
+skip_common_pl_cleanup:
+    ra_ctx_destroy(&ctx->ra_ctx);
+
+    talloc_free(ctx);
+    *ctxp = NULL;
+}
diff --git a/video/out/gpu_next/context.h b/video/out/gpu_next/context.h
new file mode 100644
index 0000000..b98b9e7
--- /dev/null
+++ b/video/out/gpu_next/context.h
@@ -0,0 +1,40 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <https://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <libplacebo/renderer.h>
+
+struct mp_log;
+struct ra_ctx;
+struct vo;
+struct gl_video_opts;
+
+struct gpu_ctx {
+    struct mp_log *log;
+    struct ra_ctx *ra_ctx;
+
+    pl_log pllog;
+    pl_gpu gpu;
+    pl_swapchain swapchain;
+
+    void *priv;
+};
+
+struct gpu_ctx *gpu_ctx_create(struct vo *vo, struct gl_video_opts *gl_opts);
+bool gpu_ctx_resize(struct gpu_ctx *ctx, int w, int h);
+void gpu_ctx_destroy(struct gpu_ctx **ctxp);
diff --git a/video/out/hwdec/dmabuf_interop.h b/video/out/hwdec/dmabuf_interop.h
new file mode 100644
index 0000000..e9b3e8e
--- /dev/null
+++ b/video/out/hwdec/dmabuf_interop.h
@@ -0,0 +1,57 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <libavutil/hwcontext_drm.h>
+
+#include "video/out/gpu/hwdec.h"
+
+struct dmabuf_interop {
+    bool use_modifiers;
+    bool composed_layers;
+
+    bool (*interop_init)(struct ra_hwdec_mapper *mapper,
+                         const struct ra_imgfmt_desc *desc);
+    void (*interop_uninit)(const struct ra_hwdec_mapper *mapper);
+
+    bool (*interop_map)(struct ra_hwdec_mapper *mapper,
+                        struct dmabuf_interop *dmabuf_interop,
+                        bool probing);
+    void (*interop_unmap)(struct ra_hwdec_mapper *mapper);
+};
+
+struct dmabuf_interop_priv {
+    int num_planes;
+    struct mp_image layout;
+    struct ra_tex *tex[4];
+
+    AVDRMFrameDescriptor desc;
+    bool surface_acquired;
+
+    void *interop_mapper_priv;
+};
+
+typedef bool (*dmabuf_interop_init)(const struct ra_hwdec *hw,
+                                    struct dmabuf_interop *dmabuf_interop);
+
+bool dmabuf_interop_gl_init(const struct ra_hwdec *hw,
+                            struct dmabuf_interop *dmabuf_interop);
+bool dmabuf_interop_pl_init(const struct ra_hwdec *hw,
+                            struct dmabuf_interop *dmabuf_interop);
+bool dmabuf_interop_wl_init(const struct ra_hwdec *hw,
+                            struct dmabuf_interop *dmabuf_interop);
diff --git a/video/out/hwdec/dmabuf_interop_gl.c b/video/out/hwdec/dmabuf_interop_gl.c
new file mode 100644
index 0000000..e7fb103
--- /dev/null
+++ b/video/out/hwdec/dmabuf_interop_gl.c
@@ -0,0 +1,311 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "dmabuf_interop.h"
+
+#include <drm_fourcc.h>
+#include <EGL/egl.h>
+#include "video/out/opengl/ra_gl.h"
+
+typedef void* GLeglImageOES;
+typedef void *EGLImageKHR;
+
+// Any EGL_EXT_image_dma_buf_import definitions used in this source file.
+#define EGL_LINUX_DMA_BUF_EXT             0x3270
+#define EGL_LINUX_DRM_FOURCC_EXT          0x3271
+#define EGL_DMA_BUF_PLANE0_FD_EXT         0x3272
+#define EGL_DMA_BUF_PLANE0_OFFSET_EXT     0x3273
+#define EGL_DMA_BUF_PLANE0_PITCH_EXT      0x3274
+#define EGL_DMA_BUF_PLANE1_FD_EXT         0x3275
+#define EGL_DMA_BUF_PLANE1_OFFSET_EXT     0x3276
+#define EGL_DMA_BUF_PLANE1_PITCH_EXT      0x3277
+#define EGL_DMA_BUF_PLANE2_FD_EXT         0x3278
+#define EGL_DMA_BUF_PLANE2_OFFSET_EXT     0x3279
+#define EGL_DMA_BUF_PLANE2_PITCH_EXT      0x327A
+
+
+// Any EGL_EXT_image_dma_buf_import definitions used in this source file.
+#define EGL_DMA_BUF_PLANE3_FD_EXT         0x3440
+#define EGL_DMA_BUF_PLANE3_OFFSET_EXT     0x3441
+#define EGL_DMA_BUF_PLANE3_PITCH_EXT      0x3442
+#define EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT 0x3443
+#define EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT 0x3444
+#define EGL_DMA_BUF_PLANE1_MODIFIER_LO_EXT 0x3445
+#define EGL_DMA_BUF_PLANE1_MODIFIER_HI_EXT 0x3446
+#define EGL_DMA_BUF_PLANE2_MODIFIER_LO_EXT 0x3447
+#define EGL_DMA_BUF_PLANE2_MODIFIER_HI_EXT 0x3448
+#define EGL_DMA_BUF_PLANE3_MODIFIER_LO_EXT 0x3449
+#define EGL_DMA_BUF_PLANE3_MODIFIER_HI_EXT 0x344A
+
+struct vaapi_gl_mapper_priv {
+    GLuint gl_textures[4];
+    EGLImageKHR images[4];
+
+    EGLImageKHR (EGLAPIENTRY *CreateImageKHR)(EGLDisplay, EGLContext,
+                                              EGLenum, EGLClientBuffer,
+                                              const EGLint *);
+    EGLBoolean (EGLAPIENTRY *DestroyImageKHR)(EGLDisplay, EGLImageKHR);
+    void (EGLAPIENTRY *EGLImageTargetTexture2DOES)(GLenum, GLeglImageOES);
+};
+
+static bool vaapi_gl_mapper_init(struct ra_hwdec_mapper *mapper,
+                                 const struct ra_imgfmt_desc *desc)
+{
+    struct dmabuf_interop_priv *p_mapper = mapper->priv;
+    struct vaapi_gl_mapper_priv *p = talloc_ptrtype(NULL, p);
+    p_mapper->interop_mapper_priv = p;
+
+    *p = (struct vaapi_gl_mapper_priv) {
+        // EGL_KHR_image_base
+        .CreateImageKHR = (void *)eglGetProcAddress("eglCreateImageKHR"),
+        .DestroyImageKHR = (void *)eglGetProcAddress("eglDestroyImageKHR"),
+        // GL_OES_EGL_image
+        .EGLImageTargetTexture2DOES =
+            (void *)eglGetProcAddress("glEGLImageTargetTexture2DOES"),
+    };
+
+    if (!p->CreateImageKHR || !p->DestroyImageKHR ||
+        !p->EGLImageTargetTexture2DOES)
+        return false;
+
+    GL *gl = ra_gl_get(mapper->ra);
+    gl->GenTextures(4, p->gl_textures);
+    for (int n = 0; n < desc->num_planes; n++) {
+        gl->BindTexture(GL_TEXTURE_2D, p->gl_textures[n]);
+        gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+        gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+        gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+        gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+        gl->BindTexture(GL_TEXTURE_2D, 0);
+
+        struct ra_tex_params params = {
+            .dimensions = 2,
+            .w = mp_image_plane_w(&p_mapper->layout, n),
+            .h = mp_image_plane_h(&p_mapper->layout, n),
+            .d = 1,
+            .format = desc->planes[n],
+            .render_src = true,
+            .src_linear = true,
+        };
+
+        if (params.format->ctype != RA_CTYPE_UNORM)
+            return false;
+
+        p_mapper->tex[n] = ra_create_wrapped_tex(mapper->ra, &params,
+                                                 p->gl_textures[n]);
+        if (!p_mapper->tex[n])
+            return false;
+    }
+
+    return true;
+}
+
+static void vaapi_gl_mapper_uninit(const struct ra_hwdec_mapper *mapper)
+{
+    struct dmabuf_interop_priv *p_mapper = mapper->priv;
+    struct vaapi_gl_mapper_priv *p = p_mapper->interop_mapper_priv;
+
+    if (p) {
+        GL *gl = ra_gl_get(mapper->ra);
+        gl->DeleteTextures(4, p->gl_textures);
+        for (int n = 0; n < 4; n++) {
+            p->gl_textures[n] = 0;
+            ra_tex_free(mapper->ra, &p_mapper->tex[n]);
+        }
+        talloc_free(p);
+        p_mapper->interop_mapper_priv = NULL;
+    }
+}
+
+#define ADD_ATTRIB(name, value)                         \
+    do {                                                \
+    assert(num_attribs + 3 < MP_ARRAY_SIZE(attribs));   \
+    attribs[num_attribs++] = (name);                    \
+    attribs[num_attribs++] = (value);                   \
+    attribs[num_attribs] = EGL_NONE;                    \
+    } while(0)
+
+#define ADD_PLANE_ATTRIBS(plane) do { \
+            uint64_t drm_format_modifier = p_mapper->desc.objects[p_mapper->desc.layers[i].planes[j].object_index].format_modifier; \
+            ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _FD_EXT, \
+                        p_mapper->desc.objects[p_mapper->desc.layers[i].planes[j].object_index].fd); \
+            ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _OFFSET_EXT, \
+                        p_mapper->desc.layers[i].planes[j].offset); \
+            ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _PITCH_EXT, \
+                        p_mapper->desc.layers[i].planes[j].pitch); \
+            if (dmabuf_interop->use_modifiers && drm_format_modifier != DRM_FORMAT_MOD_INVALID) { \
+                ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _MODIFIER_LO_EXT, drm_format_modifier & 0xfffffffful); \
+                ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _MODIFIER_HI_EXT, drm_format_modifier >> 32); \
+            }                               \
+        } while (0)
+
+static bool vaapi_gl_map(struct ra_hwdec_mapper *mapper,
+                         struct dmabuf_interop *dmabuf_interop,
+                         bool probing)
+{
+    struct dmabuf_interop_priv *p_mapper = mapper->priv;
+    struct vaapi_gl_mapper_priv *p = p_mapper->interop_mapper_priv;
+
+    GL *gl = ra_gl_get(mapper->ra);
+
+    for (int i = 0, n = 0; i < p_mapper->desc.nb_layers; i++) {
+        /*
+         * As we must map surfaces as one texture per plane, we can only support
+         * a subset of possible multi-plane layer formats. This is due to having
+         * to manually establish what DRM format each synthetic layer should
+         * have.
+         */
+        uint32_t format[AV_DRM_MAX_PLANES] = {
+            p_mapper->desc.layers[i].format,
+        };
+
+        if (p_mapper->desc.layers[i].nb_planes > 1) {
+            switch (p_mapper->desc.layers[i].format) {
+            case DRM_FORMAT_NV12:
+            case DRM_FORMAT_NV16:
+                format[0] = DRM_FORMAT_R8;
+                format[1] = DRM_FORMAT_GR88;
+                break;
+            case DRM_FORMAT_YUV420:
+                format[0] = DRM_FORMAT_R8;
+                format[1] = DRM_FORMAT_R8;
+                format[2] = DRM_FORMAT_R8;
+                break;
+            case DRM_FORMAT_P010:
+#ifdef DRM_FORMAT_P030 /* Format added in a newer libdrm version than minimum */
+            case DRM_FORMAT_P030:
+#endif
+                format[0] = DRM_FORMAT_R16;
+                format[1] = DRM_FORMAT_GR1616;
+                break;
+            default:
+                mp_msg(mapper->log, probing ? MSGL_DEBUG : MSGL_ERR,
+                       "Cannot map unknown multi-plane format: 0x%08X\n",
+                       p_mapper->desc.layers[i].format);
+                return false;
+            }
+        } else {
+            /*
+             * As OpenGL only has one guaranteed rgba format (rgba8), drivers
+             * that support importing dmabuf formats with different channel
+             * orders do implicit swizzling to get to rgba. However, we look at
+             * the original imgfmt to decide channel order, and we then swizzle
+             * based on that. So, we can get into a situation where we swizzle
+             * twice and end up with a mess.
+             *
+             * The simplest way to avoid that is to lie to OpenGL and say that
+             * the surface we are importing is in the natural channel order, so
+             * that our swizzling does the right thing.
+             *
+             * DRM ABGR corresponds to OpenGL RGBA due to different naming
+             * conventions.
+             */
+            switch (format[0]) {
+            case DRM_FORMAT_ARGB8888:
+            case DRM_FORMAT_RGBA8888:
+            case DRM_FORMAT_BGRA8888:
+                format[0] = DRM_FORMAT_ABGR8888;
+                break;
+            case DRM_FORMAT_XRGB8888:
+                format[0] = DRM_FORMAT_XBGR8888;
+                break;
+            case DRM_FORMAT_RGBX8888:
+            case DRM_FORMAT_BGRX8888:
+                // Logically, these two formats should be handled as above,
+                // but there appear to be additional problems that make the
+                // format change here insufficient or incorrect, so we're
+                // doing nothing for now.
+                break;
+            }
+        }
+
+        for (int j = 0; j < p_mapper->desc.layers[i].nb_planes; j++, n++) {
+            int attribs[48] = {EGL_NONE};
+            int num_attribs = 0;
+
+            ADD_ATTRIB(EGL_LINUX_DRM_FOURCC_EXT, format[j]);
+            ADD_ATTRIB(EGL_WIDTH,  p_mapper->tex[n]->params.w);
+            ADD_ATTRIB(EGL_HEIGHT, p_mapper->tex[n]->params.h);
+            ADD_PLANE_ATTRIBS(0);
+
+            p->images[n] = p->CreateImageKHR(eglGetCurrentDisplay(),
+                EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, attribs);
+            if (!p->images[n]) {
+                mp_msg(mapper->log, probing ? MSGL_DEBUG : MSGL_ERR,
+                    "Failed to import surface in EGL: %u\n", eglGetError());
+                return false;
+            }
+
+            gl->BindTexture(GL_TEXTURE_2D, p->gl_textures[n]);
+            p->EGLImageTargetTexture2DOES(GL_TEXTURE_2D, p->images[n]);
+
+            mapper->tex[n] = p_mapper->tex[n];
+        }
+    }
+
+    gl->BindTexture(GL_TEXTURE_2D, 0);
+    return true;
+}
+
+static void vaapi_gl_unmap(struct ra_hwdec_mapper *mapper)
+{
+    struct dmabuf_interop_priv *p_mapper = mapper->priv;
+    struct vaapi_gl_mapper_priv *p = p_mapper->interop_mapper_priv;
+
+    if (p) {
+        for (int n = 0; n < 4; n++) {
+            if (p->images[n])
+                p->DestroyImageKHR(eglGetCurrentDisplay(), p->images[n]);
+            p->images[n] = 0;
+        }
+    }
+}
+
+bool dmabuf_interop_gl_init(const struct ra_hwdec *hw,
+                            struct dmabuf_interop *dmabuf_interop)
+{
+    if (!ra_is_gl(hw->ra_ctx->ra)) {
+        // This is not an OpenGL RA.
+        return false;
+    }
+
+    if (!eglGetCurrentContext())
+        return false;
+
+    const char *exts = eglQueryString(eglGetCurrentDisplay(), EGL_EXTENSIONS);
+    if (!exts)
+        return false;
+
+    GL *gl = ra_gl_get(hw->ra_ctx->ra);
+    if (!gl_check_extension(exts, "EGL_EXT_image_dma_buf_import") ||
+        !gl_check_extension(exts, "EGL_KHR_image_base") ||
+        !gl_check_extension(gl->extensions, "GL_OES_EGL_image") ||
+        !(gl->mpgl_caps & MPGL_CAP_TEX_RG))
+        return false;
+
+    dmabuf_interop->use_modifiers =
+        gl_check_extension(exts, "EGL_EXT_image_dma_buf_import_modifiers");
+
+    MP_VERBOSE(hw, "using EGL dmabuf interop\n");
+
+    dmabuf_interop->interop_init = vaapi_gl_mapper_init;
+    dmabuf_interop->interop_uninit = vaapi_gl_mapper_uninit;
+    dmabuf_interop->interop_map = vaapi_gl_map;
+    dmabuf_interop->interop_unmap = vaapi_gl_unmap;
+
+    return true;
+}
diff --git a/video/out/hwdec/dmabuf_interop_pl.c b/video/out/hwdec/dmabuf_interop_pl.c
new file mode 100644
index 0000000..0a8ec5b
--- /dev/null
+++ b/video/out/hwdec/dmabuf_interop_pl.c
@@ -0,0 +1,138 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <errno.h>
+#include <unistd.h>
+
+#include "dmabuf_interop.h"
+#include "video/out/placebo/ra_pl.h"
+#include "video/out/placebo/utils.h"
+
+static bool vaapi_pl_map(struct ra_hwdec_mapper *mapper,
+                         struct dmabuf_interop *dmabuf_interop,
+                         bool probing)
+{
+    struct dmabuf_interop_priv *p = mapper->priv;
+    pl_gpu gpu = ra_pl_get(mapper->ra);
+
+    struct ra_imgfmt_desc desc = {0};
+    if (!ra_get_imgfmt_desc(mapper->ra, mapper->dst_params.imgfmt, &desc))
+        return false;
+
+    // The calling code validates that the total number of exported planes
+    // equals the number we expected in p->num_planes.
+    int layer = 0;
+    int layer_plane = 0;
+    for (int n = 0; n < p->num_planes; n++) {
+
+        const struct ra_format *format = desc.planes[n];
+        int id = p->desc.layers[layer].planes[layer_plane].object_index;
+        int fd = p->desc.objects[id].fd;
+        uint32_t size = p->desc.objects[id].size;
+        uint32_t offset = p->desc.layers[layer].planes[layer_plane].offset;
+        uint32_t pitch = p->desc.layers[layer].planes[layer_plane].pitch;
+
+        // AMD drivers do not return the size in the surface description, so we
+        // need to query it manually.
+        if (size == 0) {
+            size = lseek(fd, 0, SEEK_END);
+            if (size == -1) {
+                MP_ERR(mapper, "Cannot obtain size of object with fd %d: %s\n",
+                       fd, mp_strerror(errno));
+                return false;
+            }
+            off_t err = lseek(fd, 0, SEEK_SET);
+            if (err == -1) {
+                MP_ERR(mapper, "Failed to reset offset for fd %d: %s\n",
+                       fd, mp_strerror(errno));
+                return false;
+            }
+        }
+
+        struct pl_tex_params tex_params = {
+            .w = mp_image_plane_w(&p->layout, n),
+            .h = mp_image_plane_h(&p->layout, n),
+            .d = 0,
+            .format = format->priv,
+            .sampleable = true,
+            .import_handle = PL_HANDLE_DMA_BUF,
+            .shared_mem = (struct pl_shared_mem) {
+                .handle = {
+                    .fd = fd,
+                },
+                .size = size,
+                .offset = offset,
+                .drm_format_mod = p->desc.objects[id].format_modifier,
+                .stride_w = pitch,
+            },
+        };
+
+        mppl_log_set_probing(gpu->log, probing);
+        pl_tex pltex = pl_tex_create(gpu, &tex_params);
+        mppl_log_set_probing(gpu->log, false);
+        if (!pltex)
+            return false;
+
+        struct ra_tex *ratex = talloc_ptrtype(NULL, ratex);
+        int ret = mppl_wrap_tex(mapper->ra, pltex, ratex);
+        if (!ret) {
+            pl_tex_destroy(gpu, &pltex);
+            talloc_free(ratex);
+            return false;
+        }
+        mapper->tex[n] = ratex;
+
+        MP_TRACE(mapper, "Object %d with fd %d imported as %p\n",
+                id, fd, ratex);
+
+        layer_plane++;
+        if (layer_plane == p->desc.layers[layer].nb_planes) {
+            layer_plane = 0;
+            layer++;
+        }
+    }
+    return true;
+}
+
+static void vaapi_pl_unmap(struct ra_hwdec_mapper *mapper)
+{
+    for (int n = 0; n < 4; n++)
+        ra_tex_free(mapper->ra, &mapper->tex[n]);
+}
+
+bool dmabuf_interop_pl_init(const struct ra_hwdec *hw,
+                            struct dmabuf_interop *dmabuf_interop)
+{
+    pl_gpu gpu = ra_pl_get(hw->ra_ctx->ra);
+    if (!gpu) {
+        // This is not a libplacebo RA;
+        return false;
+    }
+
+    if (!(gpu->import_caps.tex & PL_HANDLE_DMA_BUF)) {
+        MP_VERBOSE(hw, "libplacebo dmabuf interop requires support for "
+                        "PL_HANDLE_DMA_BUF import.\n");
+        return false;
+    }
+
+    MP_VERBOSE(hw, "using libplacebo dmabuf interop\n");
+
+    dmabuf_interop->interop_map = vaapi_pl_map;
+    dmabuf_interop->interop_unmap = vaapi_pl_unmap;
+
+    return true;
+}
diff --git a/video/out/hwdec/dmabuf_interop_wl.c b/video/out/hwdec/dmabuf_interop_wl.c
new file mode 100644
index 0000000..606a0aa
--- /dev/null
+++ b/video/out/hwdec/dmabuf_interop_wl.c
@@ -0,0 +1,83 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include "video/out/wldmabuf/ra_wldmabuf.h"
+#include "dmabuf_interop.h"
+
+static bool mapper_init(struct ra_hwdec_mapper *mapper,
+                        const struct ra_imgfmt_desc *desc)
+{
+    return true;
+}
+
+static void mapper_uninit(const struct ra_hwdec_mapper *mapper)
+{
+}
+
+static bool map(struct ra_hwdec_mapper *mapper,
+                struct dmabuf_interop *dmabuf_interop,
+                bool probing)
+{
+    // 1. only validate format when composed layers is enabled (i.e. vaapi)
+    // 2. for drmprime, just return true for now, as this use case
+    // has not been tested.
+    if (!dmabuf_interop->composed_layers)
+        return true;
+
+    int layer_no = 0;
+    struct dmabuf_interop_priv *mapper_p = mapper->priv;
+    uint32_t drm_format = mapper_p->desc.layers[layer_no].format;
+
+    if (mapper_p->desc.nb_layers != 1) {
+        MP_VERBOSE(mapper, "Mapped surface has separate layers - expected composed layers.\n");
+        return false;
+    } else if (!ra_compatible_format(mapper->ra, drm_format,
+        mapper_p->desc.objects[0].format_modifier)) {
+        MP_VERBOSE(mapper, "Mapped surface with format %s; drm format '%s(%016lx)' "
+                   "is not supported by compositor.\n",
+                   mp_imgfmt_to_name(mapper->src->params.hw_subfmt),
+                   mp_tag_str(drm_format),
+                   mapper_p->desc.objects[0].format_modifier);
+        return false;
+    }
+
+    MP_VERBOSE(mapper, "Supported Wayland display format %s: '%s(%016lx)'\n",
+               mp_imgfmt_to_name(mapper->src->params.hw_subfmt),
+               mp_tag_str(drm_format), mapper_p->desc.objects[0].format_modifier);
+
+    return true;
+}
+
+static void unmap(struct ra_hwdec_mapper *mapper)
+{
+}
+
+bool dmabuf_interop_wl_init(const struct ra_hwdec *hw,
+                            struct dmabuf_interop *dmabuf_interop)
+{
+    if (!ra_is_wldmabuf(hw->ra_ctx->ra))
+        return false;
+
+    if (strstr(hw->driver->name, "vaapi") != NULL)
+        dmabuf_interop->composed_layers = true;
+
+    dmabuf_interop->interop_init = mapper_init;
+    dmabuf_interop->interop_uninit = mapper_uninit;
+    dmabuf_interop->interop_map = map;
+    dmabuf_interop->interop_unmap = unmap;
+
+    return true;
+}
diff --git a/video/out/hwdec/hwdec_aimagereader.c b/video/out/hwdec/hwdec_aimagereader.c
new file mode 100644
index 0000000..0dd5497
--- /dev/null
+++ b/video/out/hwdec/hwdec_aimagereader.c
@@ -0,0 +1,402 @@
+/*
+ * Copyright (c) 2021 sfan5 <sfan5@live.de>
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <dlfcn.h>
+#include <EGL/egl.h>
+#include <media/NdkImageReader.h>
+#include <android/native_window_jni.h>
+#include <libavcodec/mediacodec.h>
+#include <libavutil/hwcontext.h>
+#include <libavutil/hwcontext_mediacodec.h>
+
+#include "misc/jni.h"
+#include "osdep/threads.h"
+#include "osdep/timer.h"
+#include "video/out/gpu/hwdec.h"
+#include "video/out/opengl/ra_gl.h"
+
+typedef void *GLeglImageOES;
+typedef void *EGLImageKHR;
+#define EGL_NATIVE_BUFFER_ANDROID 0x3140
+
+struct priv_owner {
+    struct mp_hwdec_ctx hwctx;
+    AImageReader *reader;
+    jobject surface;
+    void *lib_handle;
+
+    media_status_t (*AImageReader_newWithUsage)(
+        int32_t, int32_t, int32_t, uint64_t, int32_t, AImageReader **);
+    media_status_t (*AImageReader_getWindow)(
+        AImageReader *, ANativeWindow **);
+    media_status_t (*AImageReader_setImageListener)(
+        AImageReader *, AImageReader_ImageListener *);
+    media_status_t (*AImageReader_acquireLatestImage)(AImageReader *, AImage **);
+    void (*AImageReader_delete)(AImageReader *);
+    media_status_t (*AImage_getHardwareBuffer)(const AImage *, AHardwareBuffer **);
+    void (*AImage_delete)(AImage *);
+    void (*AHardwareBuffer_describe)(const AHardwareBuffer *, AHardwareBuffer_Desc *);
+    jobject (*ANativeWindow_toSurface)(JNIEnv *, ANativeWindow *);
+};
+
+struct priv {
+    struct mp_log *log;
+
+    GLuint gl_texture;
+    AImage *image;
+    EGLImageKHR egl_image;
+
+    mp_mutex lock;
+    mp_cond cond;
+    bool image_available;
+
+    EGLImageKHR (EGLAPIENTRY *CreateImageKHR)(
+        EGLDisplay, EGLContext, EGLenum, EGLClientBuffer, const EGLint *);
+    EGLBoolean (EGLAPIENTRY *DestroyImageKHR)(EGLDisplay, EGLImageKHR);
+    EGLClientBuffer (EGLAPIENTRY *GetNativeClientBufferANDROID)(
+        const struct AHardwareBuffer *);
+    void (EGLAPIENTRY *EGLImageTargetTexture2DOES)(GLenum, GLeglImageOES);
+};
+
+const static struct { const char *symbol; int offset; } lib_functions[] = {
+    { "AImageReader_newWithUsage", offsetof(struct priv_owner, AImageReader_newWithUsage) },
+    { "AImageReader_getWindow", offsetof(struct priv_owner, AImageReader_getWindow) },
+    { "AImageReader_setImageListener", offsetof(struct priv_owner, AImageReader_setImageListener) },
+    { "AImageReader_acquireLatestImage", offsetof(struct priv_owner, AImageReader_acquireLatestImage) },
+    { "AImageReader_delete", offsetof(struct priv_owner, AImageReader_delete) },
+    { "AImage_getHardwareBuffer", offsetof(struct priv_owner, AImage_getHardwareBuffer) },
+    { "AImage_delete", offsetof(struct priv_owner, AImage_delete) },
+    { "AHardwareBuffer_describe", offsetof(struct priv_owner, AHardwareBuffer_describe) },
+    { "ANativeWindow_toSurface", offsetof(struct priv_owner, ANativeWindow_toSurface) },
+    { NULL, 0 },
+};
+
+
+static AVBufferRef *create_mediacodec_device_ref(jobject surface)
+{
+    AVBufferRef *device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_MEDIACODEC);
+    if (!device_ref)
+        return NULL;
+
+    AVHWDeviceContext *ctx = (void *)device_ref->data;
+    AVMediaCodecDeviceContext *hwctx = ctx->hwctx;
+    hwctx->surface = surface;
+
+    if (av_hwdevice_ctx_init(device_ref) < 0)
+        av_buffer_unref(&device_ref);
+
+    return device_ref;
+}
+
+static bool load_lib_functions(struct priv_owner *p, struct mp_log *log)
+{
+    p->lib_handle = dlopen("libmediandk.so", RTLD_NOW | RTLD_GLOBAL);
+    if (!p->lib_handle)
+        return false;
+    for (int i = 0; lib_functions[i].symbol; i++) {
+        const char *sym = lib_functions[i].symbol;
+        void *fun = dlsym(p->lib_handle, sym);
+        if (!fun)
+            fun = dlsym(RTLD_DEFAULT, sym);
+        if (!fun) {
+            mp_warn(log, "Could not resolve symbol %s\n", sym);
+            return false;
+        }
+
+        *(void **) ((uint8_t*)p + lib_functions[i].offset) = fun;
+    }
+    return true;
+}
+
+static int init(struct ra_hwdec *hw)
+{
+    struct priv_owner *p = hw->priv;
+
+    if (!ra_is_gl(hw->ra_ctx->ra))
+        return -1;
+    if (!eglGetCurrentContext())
+        return -1;
+
+    const char *exts = eglQueryString(eglGetCurrentDisplay(), EGL_EXTENSIONS);
+    if (!gl_check_extension(exts, "EGL_ANDROID_image_native_buffer"))
+        return -1;
+
+    if (!load_lib_functions(p, hw->log))
+        return -1;
+
+    static const char *es2_exts[] = {"GL_OES_EGL_image_external", 0};
+    static const char *es3_exts[] = {"GL_OES_EGL_image_external_essl3", 0};
+    GL *gl = ra_gl_get(hw->ra_ctx->ra);
+    if (gl_check_extension(gl->extensions, es3_exts[0]))
+        hw->glsl_extensions = es3_exts;
+    else
+        hw->glsl_extensions = es2_exts;
+
+    // dummy dimensions, AImageReader only transports hardware buffers
+    media_status_t ret = p->AImageReader_newWithUsage(16, 16,
+        AIMAGE_FORMAT_PRIVATE, AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE,
+        5, &p->reader);
+    if (ret != AMEDIA_OK) {
+        MP_ERR(hw, "newWithUsage failed: %d\n", ret);
+        return -1;
+    }
+    assert(p->reader);
+
+    ANativeWindow *window;
+    ret = p->AImageReader_getWindow(p->reader, &window);
+    if (ret != AMEDIA_OK) {
+        MP_ERR(hw, "getWindow failed: %d\n", ret);
+        return -1;
+    }
+    assert(window);
+
+    JNIEnv *env = MP_JNI_GET_ENV(hw);
+    assert(env);
+    jobject surface = p->ANativeWindow_toSurface(env, window);
+    p->surface = (*env)->NewGlobalRef(env, surface);
+    (*env)->DeleteLocalRef(env, surface);
+
+    p->hwctx = (struct mp_hwdec_ctx) {
+        .driver_name = hw->driver->name,
+        .av_device_ref = create_mediacodec_device_ref(p->surface),
+        .hw_imgfmt = IMGFMT_MEDIACODEC,
+    };
+
+    if (!p->hwctx.av_device_ref) {
+        MP_VERBOSE(hw, "Failed to create hwdevice_ctx\n");
+        return -1;
+    }
+
+    hwdec_devices_add(hw->devs, &p->hwctx);
+
+    return 0;
+}
+
+static void uninit(struct ra_hwdec *hw)
+{
+    struct priv_owner *p = hw->priv;
+    JNIEnv *env = MP_JNI_GET_ENV(hw);
+    assert(env);
+
+    if (p->surface) {
+        (*env)->DeleteGlobalRef(env, p->surface);
+        p->surface = NULL;
+    }
+
+    if (p->reader) {
+        p->AImageReader_delete(p->reader);
+        p->reader = NULL;
+    }
+
+    hwdec_devices_remove(hw->devs, &p->hwctx);
+    av_buffer_unref(&p->hwctx.av_device_ref);
+
+    if (p->lib_handle) {
+        dlclose(p->lib_handle);
+        p->lib_handle = NULL;
+    }
+}
+
+static void image_callback(void *context, AImageReader *reader)
+{
+    struct priv *p = context;
+
+    mp_mutex_lock(&p->lock);
+    p->image_available = true;
+    mp_cond_signal(&p->cond);
+    mp_mutex_unlock(&p->lock);
+}
+
+static int mapper_init(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+    struct priv_owner *o = mapper->owner->priv;
+    GL *gl = ra_gl_get(mapper->ra);
+
+    p->log = mapper->log;
+    mp_mutex_init(&p->lock);
+    mp_cond_init(&p->cond);
+
+    p->CreateImageKHR = (void *)eglGetProcAddress("eglCreateImageKHR");
+    p->DestroyImageKHR = (void *)eglGetProcAddress("eglDestroyImageKHR");
+    p->GetNativeClientBufferANDROID =
+        (void *)eglGetProcAddress("eglGetNativeClientBufferANDROID");
+    p->EGLImageTargetTexture2DOES =
+        (void *)eglGetProcAddress("glEGLImageTargetTexture2DOES");
+
+    if (!p->CreateImageKHR || !p->DestroyImageKHR ||
+        !p->GetNativeClientBufferANDROID || !p->EGLImageTargetTexture2DOES)
+        return -1;
+
+    AImageReader_ImageListener listener = {
+        .context = p,
+        .onImageAvailable = image_callback,
+    };
+    o->AImageReader_setImageListener(o->reader, &listener);
+
+    mapper->dst_params = mapper->src_params;
+    mapper->dst_params.imgfmt = IMGFMT_RGB0;
+    mapper->dst_params.hw_subfmt = 0;
+
+    // texture creation
+    gl->GenTextures(1, &p->gl_texture);
+    gl->BindTexture(GL_TEXTURE_EXTERNAL_OES, p->gl_texture);
+    gl->TexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+    gl->TexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+    gl->TexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+    gl->TexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+    gl->BindTexture(GL_TEXTURE_EXTERNAL_OES, 0);
+
+    struct ra_tex_params params = {
+        .dimensions = 2,
+        .w = mapper->src_params.w,
+        .h = mapper->src_params.h,
+        .d = 1,
+        .format = ra_find_unorm_format(mapper->ra, 1, 4),
+        .render_src = true,
+        .src_linear = true,
+        .external_oes = true,
+    };
+
+    if (params.format->ctype != RA_CTYPE_UNORM)
+        return -1;
+
+    mapper->tex[0] = ra_create_wrapped_tex(mapper->ra, &params, p->gl_texture);
+    if (!mapper->tex[0])
+        return -1;
+
+    return 0;
+}
+
+static void mapper_uninit(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+    struct priv_owner *o = mapper->owner->priv;
+    GL *gl = ra_gl_get(mapper->ra);
+
+    o->AImageReader_setImageListener(o->reader, NULL);
+
+    gl->DeleteTextures(1, &p->gl_texture);
+    p->gl_texture = 0;
+
+    ra_tex_free(mapper->ra, &mapper->tex[0]);
+
+    mp_mutex_destroy(&p->lock);
+    mp_cond_destroy(&p->cond);
+}
+
+static void mapper_unmap(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+    struct priv_owner *o = mapper->owner->priv;
+
+    if (p->egl_image) {
+        p->DestroyImageKHR(eglGetCurrentDisplay(), p->egl_image);
+        p->egl_image = 0;
+    }
+
+    if (p->image) {
+        o->AImage_delete(p->image);
+        p->image = NULL;
+    }
+}
+
+static int mapper_map(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+    struct priv_owner *o = mapper->owner->priv;
+    GL *gl = ra_gl_get(mapper->ra);
+
+    {
+        if (mapper->src->imgfmt != IMGFMT_MEDIACODEC)
+            return -1;
+        AVMediaCodecBuffer *buffer = (AVMediaCodecBuffer *)mapper->src->planes[3];
+        av_mediacodec_release_buffer(buffer, 1);
+    }
+
+    bool image_available = false;
+    mp_mutex_lock(&p->lock);
+    if (!p->image_available) {
+        mp_cond_timedwait(&p->cond, &p->lock, MP_TIME_MS_TO_NS(100));
+        if (!p->image_available)
+            MP_WARN(mapper, "Waiting for frame timed out!\n");
+    }
+    image_available = p->image_available;
+    p->image_available = false;
+    mp_mutex_unlock(&p->lock);
+
+    media_status_t ret = o->AImageReader_acquireLatestImage(o->reader, &p->image);
+    if (ret != AMEDIA_OK) {
+        MP_ERR(mapper, "acquireLatestImage failed: %d\n", ret);
+        // If we merely timed out waiting return success anyway to avoid
+        // flashing frames of render errors.
+        return image_available ? -1 : 0;
+    }
+    assert(p->image);
+
+    AHardwareBuffer *hwbuf = NULL;
+    ret = o->AImage_getHardwareBuffer(p->image, &hwbuf);
+    if (ret != AMEDIA_OK) {
+        MP_ERR(mapper, "getHardwareBuffer failed: %d\n", ret);
+        return -1;
+    }
+    assert(hwbuf);
+
+    // Update texture size since it may differ
+    AHardwareBuffer_Desc d;
+    o->AHardwareBuffer_describe(hwbuf, &d);
+    if (mapper->tex[0]->params.w != d.width || mapper->tex[0]->params.h != d.height) {
+        MP_VERBOSE(p, "Texture dimensions changed to %dx%d\n", d.width, d.height);
+        mapper->tex[0]->params.w = d.width;
+        mapper->tex[0]->params.h = d.height;
+    }
+
+    EGLClientBuffer buf = p->GetNativeClientBufferANDROID(hwbuf);
+    if (!buf)
+        return -1;
+
+    const int attribs[] = {EGL_NONE};
+    p->egl_image = p->CreateImageKHR(eglGetCurrentDisplay(),
+        EGL_NO_CONTEXT, EGL_NATIVE_BUFFER_ANDROID, buf, attribs);
+    if (!p->egl_image)
+        return -1;
+
+    gl->BindTexture(GL_TEXTURE_EXTERNAL_OES, p->gl_texture);
+    p->EGLImageTargetTexture2DOES(GL_TEXTURE_EXTERNAL_OES, p->egl_image);
+    gl->BindTexture(GL_TEXTURE_EXTERNAL_OES, 0);
+
+    return 0;
+}
+
+
+const struct ra_hwdec_driver ra_hwdec_aimagereader = {
+    .name = "aimagereader",
+    .priv_size = sizeof(struct priv_owner),
+    .imgfmts = {IMGFMT_MEDIACODEC, 0},
+    .init = init,
+    .uninit = uninit,
+    .mapper = &(const struct ra_hwdec_mapper_driver){
+        .priv_size = sizeof(struct priv),
+        .init = mapper_init,
+        .uninit = mapper_uninit,
+        .map = mapper_map,
+        .unmap = mapper_unmap,
+    },
+};
diff --git a/video/out/hwdec/hwdec_cuda.c b/video/out/hwdec/hwdec_cuda.c
new file mode 100644
index 0000000..68ad60d
--- /dev/null
+++ b/video/out/hwdec/hwdec_cuda.c
@@ -0,0 +1,286 @@
+/*
+ * Copyright (c) 2016 Philip Langdale <philipl@overt.org>
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * This hwdec implements an optimized output path using CUDA->OpenGL
+ * or CUDA->Vulkan interop for frame data that is stored in CUDA
+ * device memory. Although it is not explicit in the code here, the
+ * only practical way to get data in this form is from the
+ * nvdec/cuvid decoder.
+ */
+
+#include "config.h"
+#include "hwdec_cuda.h"
+
+#include <libavutil/hwcontext.h>
+#include <libavutil/hwcontext_cuda.h>
+
+int check_cu(const struct ra_hwdec *hw, CUresult err, const char *func)
+{
+    const char *err_name;
+    const char *err_string;
+
+    struct cuda_hw_priv *p = hw->priv;
+    int level = hw->probing ? MSGL_V : MSGL_ERR;
+
+    MP_TRACE(hw, "Calling %s\n", func);
+
+    if (err == CUDA_SUCCESS)
+        return 0;
+
+    p->cu->cuGetErrorName(err, &err_name);
+    p->cu->cuGetErrorString(err, &err_string);
+
+    MP_MSG(hw, level, "%s failed", func);
+    if (err_name && err_string)
+        MP_MSG(hw, level, " -> %s: %s", err_name, err_string);
+    MP_MSG(hw, level, "\n");
+
+    return -1;
+}
+
+#define CHECK_CU(x) check_cu(hw, (x), #x)
+
+const static cuda_interop_init interop_inits[] = {
+#if HAVE_GL
+    cuda_gl_init,
+#endif
+#if HAVE_VULKAN
+    cuda_vk_init,
+#endif
+    NULL
+};
+
+static int cuda_init(struct ra_hwdec *hw)
+{
+    AVBufferRef *hw_device_ctx = NULL;
+    CUcontext dummy;
+    int ret = 0;
+    struct cuda_hw_priv *p = hw->priv;
+    CudaFunctions *cu;
+    int level = hw->probing ? MSGL_V : MSGL_ERR;
+
+    ret = cuda_load_functions(&p->cu, NULL);
+    if (ret != 0) {
+        MP_MSG(hw, level, "Failed to load CUDA symbols\n");
+        return -1;
+    }
+    cu = p->cu;
+
+    ret = CHECK_CU(cu->cuInit(0));
+    if (ret < 0)
+        return -1;
+
+    // Initialise CUDA context from backend.
+    for (int i = 0; interop_inits[i]; i++) {
+        if (interop_inits[i](hw)) {
+            break;
+        }
+    }
+
+    if (!p->ext_init || !p->ext_uninit) {
+        MP_MSG(hw, level,
+               "CUDA hwdec only works with OpenGL or Vulkan backends.\n");
+        return -1;
+    }
+
+    hw_device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA);
+    if (!hw_device_ctx)
+        goto error;
+
+    AVHWDeviceContext *device_ctx = (void *)hw_device_ctx->data;
+
+    AVCUDADeviceContext *device_hwctx = device_ctx->hwctx;
+    device_hwctx->cuda_ctx = p->decode_ctx;
+
+    ret = av_hwdevice_ctx_init(hw_device_ctx);
+    if (ret < 0) {
+        MP_MSG(hw, level, "av_hwdevice_ctx_init failed\n");
+        goto error;
+    }
+
+    ret = CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+    if (ret < 0)
+        goto error;
+
+    p->hwctx = (struct mp_hwdec_ctx) {
+        .driver_name = hw->driver->name,
+        .av_device_ref = hw_device_ctx,
+        .hw_imgfmt = IMGFMT_CUDA,
+    };
+    hwdec_devices_add(hw->devs, &p->hwctx);
+    return 0;
+
+ error:
+    av_buffer_unref(&hw_device_ctx);
+    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+
+    return -1;
+}
+
+static void cuda_uninit(struct ra_hwdec *hw)
+{
+    struct cuda_hw_priv *p = hw->priv;
+    CudaFunctions *cu = p->cu;
+
+    hwdec_devices_remove(hw->devs, &p->hwctx);
+    av_buffer_unref(&p->hwctx.av_device_ref);
+
+    if (p->decode_ctx && p->decode_ctx != p->display_ctx)
+        CHECK_CU(cu->cuCtxDestroy(p->decode_ctx));
+
+    if (p->display_ctx)
+        CHECK_CU(cu->cuCtxDestroy(p->display_ctx));
+
+    cuda_free_functions(&p->cu);
+}
+
+#undef CHECK_CU
+#define CHECK_CU(x) check_cu((mapper)->owner, (x), #x)
+
+static int mapper_init(struct ra_hwdec_mapper *mapper)
+{
+    struct cuda_hw_priv *p_owner = mapper->owner->priv;
+    struct cuda_mapper_priv *p = mapper->priv;
+    CUcontext dummy;
+    CudaFunctions *cu = p_owner->cu;
+    int ret = 0, eret = 0;
+
+    p->display_ctx = p_owner->display_ctx;
+
+    int imgfmt = mapper->src_params.hw_subfmt;
+    mapper->dst_params = mapper->src_params;
+    mapper->dst_params.imgfmt = imgfmt;
+    mapper->dst_params.hw_subfmt = 0;
+
+    mp_image_set_params(&p->layout, &mapper->dst_params);
+
+    struct ra_imgfmt_desc desc;
+    if (!ra_get_imgfmt_desc(mapper->ra, imgfmt, &desc)) {
+        MP_ERR(mapper, "Unsupported format: %s\n", mp_imgfmt_to_name(imgfmt));
+        return -1;
+    }
+
+    ret = CHECK_CU(cu->cuCtxPushCurrent(p->display_ctx));
+    if (ret < 0)
+        return ret;
+
+    for (int n = 0; n < desc.num_planes; n++) {
+        if (!p_owner->ext_init(mapper, desc.planes[n], n))
+            goto error;
+    }
+
+ error:
+    eret = CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+    if (eret < 0)
+        return eret;
+
+    return ret;
+}
+
+static void mapper_uninit(struct ra_hwdec_mapper *mapper)
+{
+    struct cuda_mapper_priv *p = mapper->priv;
+    struct cuda_hw_priv *p_owner = mapper->owner->priv;
+    CudaFunctions *cu = p_owner->cu;
+    CUcontext dummy;
+
+    // Don't bail if any CUDA calls fail. This is all best effort.
+    CHECK_CU(cu->cuCtxPushCurrent(p->display_ctx));
+    for (int n = 0; n < 4; n++) {
+        p_owner->ext_uninit(mapper, n);
+        ra_tex_free(mapper->ra, &mapper->tex[n]);
+    }
+    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+}
+
+static void mapper_unmap(struct ra_hwdec_mapper *mapper)
+{
+}
+
+static int mapper_map(struct ra_hwdec_mapper *mapper)
+{
+    struct cuda_mapper_priv *p = mapper->priv;
+    struct cuda_hw_priv *p_owner = mapper->owner->priv;
+    CudaFunctions *cu = p_owner->cu;
+    CUcontext dummy;
+    int ret = 0, eret = 0;
+
+    ret = CHECK_CU(cu->cuCtxPushCurrent(p->display_ctx));
+    if (ret < 0)
+        return ret;
+
+    for (int n = 0; n < p->layout.num_planes; n++) {
+        if (p_owner->ext_wait) {
+            if (!p_owner->ext_wait(mapper, n))
+                goto error;
+        }
+
+        CUDA_MEMCPY2D cpy = {
+            .srcMemoryType = CU_MEMORYTYPE_DEVICE,
+            .srcDevice     = (CUdeviceptr)mapper->src->planes[n],
+            .srcPitch      = mapper->src->stride[n],
+            .srcY          = 0,
+            .dstMemoryType = CU_MEMORYTYPE_ARRAY,
+            .dstArray      = p->cu_array[n],
+            .WidthInBytes  = mp_image_plane_w(&p->layout, n) *
+                             mapper->tex[n]->params.format->pixel_size,
+            .Height        = mp_image_plane_h(&p->layout, n),
+        };
+
+        ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, 0));
+        if (ret < 0)
+            goto error;
+
+        if (p_owner->ext_signal) {
+            if (!p_owner->ext_signal(mapper, n))
+                goto error;
+        }
+    }
+    if (p_owner->do_full_sync)
+        CHECK_CU(cu->cuStreamSynchronize(0));
+
+    // fall through
+ error:
+
+    // Regardless of success or failure, we no longer need the source image,
+    // because this hwdec makes an explicit memcpy into the mapper textures
+    mp_image_unrefp(&mapper->src);
+
+    eret = CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+    if (eret < 0)
+        return eret;
+
+    return ret;
+}
+
+const struct ra_hwdec_driver ra_hwdec_cuda = {
+    .name = "cuda",
+    .imgfmts = {IMGFMT_CUDA, 0},
+    .priv_size = sizeof(struct cuda_hw_priv),
+    .init = cuda_init,
+    .uninit = cuda_uninit,
+    .mapper = &(const struct ra_hwdec_mapper_driver){
+        .priv_size = sizeof(struct cuda_mapper_priv),
+        .init = mapper_init,
+        .uninit = mapper_uninit,
+        .map = mapper_map,
+        .unmap = mapper_unmap,
+    },
+};
diff --git a/video/out/hwdec/hwdec_cuda.h b/video/out/hwdec/hwdec_cuda.h
new file mode 100644
index 0000000..9c55053
--- /dev/null
+++ b/video/out/hwdec/hwdec_cuda.h
@@ -0,0 +1,59 @@
+/*
+ * Copyright (c) 2019 Philip Langdale <philipl@overt.org>
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <ffnvcodec/dynlink_loader.h>
+
+#include "video/out/gpu/hwdec.h"
+
+struct cuda_hw_priv {
+    struct mp_hwdec_ctx hwctx;
+    CudaFunctions *cu;
+    CUcontext display_ctx;
+    CUcontext decode_ctx;
+
+    // Do we need to do a full CPU sync after copying
+    bool do_full_sync;
+
+    bool (*ext_init)(struct ra_hwdec_mapper *mapper,
+                     const struct ra_format *format, int n);
+    void (*ext_uninit)(const struct ra_hwdec_mapper *mapper, int n);
+
+    // These are only necessary if the gpu api requires synchronisation
+    bool (*ext_wait)(const struct ra_hwdec_mapper *mapper, int n);
+    bool (*ext_signal)(const struct ra_hwdec_mapper *mapper, int n);
+};
+
+struct cuda_mapper_priv {
+    struct mp_image layout;
+    CUarray cu_array[4];
+
+    CUcontext display_ctx;
+
+    void *ext[4];
+};
+
+typedef bool (*cuda_interop_init)(const struct ra_hwdec *hw);
+
+bool cuda_gl_init(const struct ra_hwdec *hw);
+
+bool cuda_vk_init(const struct ra_hwdec *hw);
+
+int check_cu(const struct ra_hwdec *hw, CUresult err, const char *func);
diff --git a/video/out/hwdec/hwdec_cuda_gl.c b/video/out/hwdec/hwdec_cuda_gl.c
new file mode 100644
index 0000000..f20540e
--- /dev/null
+++ b/video/out/hwdec/hwdec_cuda_gl.c
@@ -0,0 +1,174 @@
+/*
+ * Copyright (c) 2019 Philip Langdale <philipl@overt.org>
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "hwdec_cuda.h"
+#include "options/m_config.h"
+#include "options/options.h"
+#include "video/out/opengl/formats.h"
+#include "video/out/opengl/ra_gl.h"
+
+#include <libavutil/hwcontext.h>
+#include <libavutil/hwcontext_cuda.h>
+#include <unistd.h>
+
+#define CHECK_CU(x) check_cu((mapper)->owner, (x), #x)
+
+struct ext_gl {
+    CUgraphicsResource cu_res;
+};
+
+static bool cuda_ext_gl_init(struct ra_hwdec_mapper *mapper,
+                             const struct ra_format *format, int n)
+{
+    struct cuda_hw_priv *p_owner = mapper->owner->priv;
+    struct cuda_mapper_priv *p = mapper->priv;
+    CudaFunctions *cu = p_owner->cu;
+    int ret = 0;
+    CUcontext dummy;
+
+    struct ext_gl *egl = talloc_ptrtype(NULL, egl);
+    p->ext[n] = egl;
+
+    struct ra_tex_params params = {
+        .dimensions = 2,
+        .w = mp_image_plane_w(&p->layout, n),
+        .h = mp_image_plane_h(&p->layout, n),
+        .d = 1,
+        .format = format,
+        .render_src = true,
+        .src_linear = format->linear_filter,
+    };
+
+    mapper->tex[n] = ra_tex_create(mapper->ra, &params);
+    if (!mapper->tex[n]) {
+        goto error;
+    }
+
+    GLuint texture;
+    GLenum target;
+    ra_gl_get_raw_tex(mapper->ra, mapper->tex[n], &texture, &target);
+
+    ret = CHECK_CU(cu->cuGraphicsGLRegisterImage(&egl->cu_res, texture, target,
+                                                 CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD));
+    if (ret < 0)
+        goto error;
+
+    ret = CHECK_CU(cu->cuGraphicsMapResources(1, &egl->cu_res, 0));
+    if (ret < 0)
+        goto error;
+
+    ret = CHECK_CU(cu->cuGraphicsSubResourceGetMappedArray(&p->cu_array[n], egl->cu_res,
+                                                           0, 0));
+    if (ret < 0)
+        goto error;
+
+    ret = CHECK_CU(cu->cuGraphicsUnmapResources(1, &egl->cu_res, 0));
+    if (ret < 0)
+        goto error;
+
+    return true;
+
+error:
+    CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+    return false;
+}
+
+static void cuda_ext_gl_uninit(const struct ra_hwdec_mapper *mapper, int n)
+{
+    struct cuda_hw_priv *p_owner = mapper->owner->priv;
+    struct cuda_mapper_priv *p = mapper->priv;
+    CudaFunctions *cu = p_owner->cu;
+
+    struct ext_gl *egl = p->ext[n];
+    if (egl && egl->cu_res) {
+        CHECK_CU(cu->cuGraphicsUnregisterResource(egl->cu_res));
+        egl->cu_res = 0;
+    }
+    talloc_free(egl);
+}
+
+#undef CHECK_CU
+#define CHECK_CU(x) check_cu(hw, (x), #x)
+
+bool cuda_gl_init(const struct ra_hwdec *hw) {
+    int ret = 0;
+    struct cuda_hw_priv *p = hw->priv;
+    CudaFunctions *cu = p->cu;
+
+    if (ra_is_gl(hw->ra_ctx->ra)) {
+        GL *gl = ra_gl_get(hw->ra_ctx->ra);
+        if (gl->version < 210 && gl->es < 300) {
+            MP_VERBOSE(hw, "need OpenGL >= 2.1 or OpenGL-ES >= 3.0\n");
+            return false;
+        }
+    } else {
+        // This is not an OpenGL RA.
+        return false;
+    }
+
+    CUdevice display_dev;
+    unsigned int device_count;
+    ret = CHECK_CU(cu->cuGLGetDevices(&device_count, &display_dev, 1,
+                                      CU_GL_DEVICE_LIST_ALL));
+    if (ret < 0)
+        return false;
+
+    ret = CHECK_CU(cu->cuCtxCreate(&p->display_ctx, CU_CTX_SCHED_BLOCKING_SYNC,
+                                   display_dev));
+    if (ret < 0)
+        return false;
+
+    p->decode_ctx = p->display_ctx;
+
+    struct cuda_opts *opts = mp_get_config_group(NULL, hw->global, &cuda_conf);
+    int decode_dev_idx = opts->cuda_device;
+    talloc_free(opts);
+
+    if (decode_dev_idx > -1) {
+        CUcontext dummy;
+        CUdevice decode_dev;
+        ret = CHECK_CU(cu->cuDeviceGet(&decode_dev, decode_dev_idx));
+        if (ret < 0) {
+            CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+            return false;
+        }
+
+        if (decode_dev != display_dev) {
+            MP_INFO(hw, "Using separate decoder and display devices\n");
+
+            // Pop the display context. We won't use it again during init()
+            ret = CHECK_CU(cu->cuCtxPopCurrent(&dummy));
+            if (ret < 0)
+                return false;
+
+            ret = CHECK_CU(cu->cuCtxCreate(&p->decode_ctx, CU_CTX_SCHED_BLOCKING_SYNC,
+                                           decode_dev));
+            if (ret < 0)
+                return false;
+        }
+    }
+
+    // We don't have a way to do a GPU sync after copying
+    p->do_full_sync = true;
+
+    p->ext_init = cuda_ext_gl_init;
+    p->ext_uninit = cuda_ext_gl_uninit;
+
+    return true;
+}
diff --git a/video/out/hwdec/hwdec_cuda_vk.c b/video/out/hwdec/hwdec_cuda_vk.c
new file mode 100644
index 0000000..b9f8caa
--- /dev/null
+++ b/video/out/hwdec/hwdec_cuda_vk.c
@@ -0,0 +1,344 @@
+/*
+ * Copyright (c) 2019 Philip Langdale <philipl@overt.org>
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+#include "hwdec_cuda.h"
+#include "video/out/placebo/ra_pl.h"
+
+#include <libavutil/hwcontext.h>
+#include <libavutil/hwcontext_cuda.h>
+#include <libplacebo/vulkan.h>
+#include <unistd.h>
+
+#if HAVE_WIN32_DESKTOP
+#include <versionhelpers.h>
+#define HANDLE_TYPE PL_HANDLE_WIN32
+#else
+#define HANDLE_TYPE PL_HANDLE_FD
+#endif
+
+#define CHECK_CU(x) check_cu((mapper)->owner, (x), #x)
+
+struct ext_vk {
+    CUexternalMemory mem;
+    CUmipmappedArray mma;
+
+    pl_tex pltex;
+    pl_vulkan_sem vk_sem;
+    union pl_handle sem_handle;
+    CUexternalSemaphore cuda_sem;
+};
+
+static bool cuda_ext_vk_init(struct ra_hwdec_mapper *mapper,
+                             const struct ra_format *format, int n)
+{
+    struct cuda_hw_priv *p_owner = mapper->owner->priv;
+    struct cuda_mapper_priv *p = mapper->priv;
+    CudaFunctions *cu = p_owner->cu;
+    int mem_fd = -1;
+    int ret = 0;
+
+    struct ext_vk *evk = talloc_ptrtype(NULL, evk);
+    p->ext[n] = evk;
+
+    pl_gpu gpu = ra_pl_get(mapper->ra);
+
+    struct pl_tex_params tex_params = {
+        .w = mp_image_plane_w(&p->layout, n),
+        .h = mp_image_plane_h(&p->layout, n),
+        .d = 0,
+        .format = ra_pl_fmt_get(format),
+        .sampleable = true,
+        .export_handle = HANDLE_TYPE,
+    };
+
+    evk->pltex = pl_tex_create(gpu, &tex_params);
+    if (!evk->pltex) {
+        goto error;
+    }
+
+    struct ra_tex *ratex = talloc_ptrtype(NULL, ratex);
+    ret = mppl_wrap_tex(mapper->ra, evk->pltex, ratex);
+    if (!ret) {
+        pl_tex_destroy(gpu, &evk->pltex);
+        talloc_free(ratex);
+        goto error;
+    }
+    mapper->tex[n] = ratex;
+
+#if !HAVE_WIN32_DESKTOP
+    mem_fd = dup(evk->pltex->shared_mem.handle.fd);
+    if (mem_fd < 0)
+        goto error;
+#endif
+
+    CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = {
+#if HAVE_WIN32_DESKTOP
+        .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32,
+        .handle.win32.handle = evk->pltex->shared_mem.handle.handle,
+#else
+        .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD,
+        .handle.fd = mem_fd,
+#endif
+        .size = evk->pltex->shared_mem.size,
+        .flags = 0,
+    };
+    ret = CHECK_CU(cu->cuImportExternalMemory(&evk->mem, &ext_desc));
+    if (ret < 0)
+        goto error;
+    // CUDA takes ownership of imported memory
+    mem_fd = -1;
+
+    CUarray_format cufmt;
+    switch (format->pixel_size / format->num_components) {
+    case 1:
+        cufmt = CU_AD_FORMAT_UNSIGNED_INT8;
+        break;
+    case 2:
+        cufmt = CU_AD_FORMAT_UNSIGNED_INT16;
+        break;
+    default:
+        ret = -1;
+        goto error;
+    }
+
+    CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = {
+        .offset = evk->pltex->shared_mem.offset,
+        .arrayDesc = {
+            .Width = mp_image_plane_w(&p->layout, n),
+            .Height = mp_image_plane_h(&p->layout, n),
+            .Depth = 0,
+            .Format = cufmt,
+            .NumChannels = format->num_components,
+            .Flags = 0,
+        },
+        .numLevels = 1,
+    };
+
+    ret = CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&evk->mma, evk->mem, &tex_desc));
+    if (ret < 0)
+        goto error;
+
+    ret = CHECK_CU(cu->cuMipmappedArrayGetLevel(&p->cu_array[n], evk->mma, 0));
+    if (ret < 0)
+        goto error;
+
+    evk->vk_sem.sem = pl_vulkan_sem_create(gpu, pl_vulkan_sem_params(
+        .type = VK_SEMAPHORE_TYPE_TIMELINE,
+        .export_handle = HANDLE_TYPE,
+        .out_handle = &(evk->sem_handle),
+    ));
+    if (evk->vk_sem.sem == VK_NULL_HANDLE) {
+         ret = -1;
+         goto error;
+     }
+     // The returned FD or Handle is owned by the caller (us).
+
+    CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC w_desc = {
+#if HAVE_WIN32_DESKTOP
+        .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32,
+        .handle.win32.handle = evk->sem_handle.handle,
+#else
+        .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD,
+        .handle.fd = evk->sem_handle.fd,
+#endif
+    };
+    ret = CHECK_CU(cu->cuImportExternalSemaphore(&evk->cuda_sem, &w_desc));
+    if (ret < 0)
+        goto error;
+    // CUDA takes ownership of an imported FD *but not* an imported Handle.
+    evk->sem_handle.fd = -1;
+
+    return true;
+
+error:
+    MP_ERR(mapper, "cuda_ext_vk_init failed\n");
+    if (mem_fd > -1)
+        close(mem_fd);
+#if HAVE_WIN32_DESKTOP
+    if (evk->sem_handle.handle != NULL)
+        CloseHandle(evk->sem_handle.handle);
+#else
+    if (evk->sem_handle.fd > -1)
+        close(evk->sem_handle.fd);
+#endif
+    return false;
+}
+
+static void cuda_ext_vk_uninit(const struct ra_hwdec_mapper *mapper, int n)
+{
+    struct cuda_hw_priv *p_owner = mapper->owner->priv;
+    struct cuda_mapper_priv *p = mapper->priv;
+    CudaFunctions *cu = p_owner->cu;
+
+    struct ext_vk *evk = p->ext[n];
+    if (evk) {
+        if (evk->mma) {
+            CHECK_CU(cu->cuMipmappedArrayDestroy(evk->mma));
+            evk->mma = 0;
+        }
+        if (evk->mem) {
+            CHECK_CU(cu->cuDestroyExternalMemory(evk->mem));
+            evk->mem = 0;
+        }
+        if (evk->cuda_sem) {
+            CHECK_CU(cu->cuDestroyExternalSemaphore(evk->cuda_sem));
+            evk->cuda_sem = 0;
+        }
+        pl_vulkan_sem_destroy(ra_pl_get(mapper->ra), &evk->vk_sem.sem);
+#if HAVE_WIN32_DESKTOP
+        CloseHandle(evk->sem_handle.handle);
+#endif
+    }
+    talloc_free(evk);
+}
+
+static bool cuda_ext_vk_wait(const struct ra_hwdec_mapper *mapper, int n)
+{
+    struct cuda_hw_priv *p_owner = mapper->owner->priv;
+    struct cuda_mapper_priv *p = mapper->priv;
+    CudaFunctions *cu = p_owner->cu;
+    int ret;
+    struct ext_vk *evk = p->ext[n];
+
+    evk->vk_sem.value += 1;
+    ret = pl_vulkan_hold_ex(ra_pl_get(mapper->ra), pl_vulkan_hold_params(
+        .tex = evk->pltex,
+        .layout = VK_IMAGE_LAYOUT_GENERAL,
+        .qf = VK_QUEUE_FAMILY_EXTERNAL,
+        .semaphore = evk->vk_sem,
+    ));
+    if (!ret)
+        return false;
+
+    CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS wp = {
+        .params = {
+            .fence = {
+                .value = evk->vk_sem.value
+            }
+        }
+     };
+     ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(&evk->cuda_sem,
+                                                     &wp, 1, 0));
+    return ret == 0;
+}
+
+static bool cuda_ext_vk_signal(const struct ra_hwdec_mapper *mapper, int n)
+{
+    struct cuda_hw_priv *p_owner = mapper->owner->priv;
+    struct cuda_mapper_priv *p = mapper->priv;
+    CudaFunctions *cu = p_owner->cu;
+    int ret;
+    struct ext_vk *evk = p->ext[n];
+
+    evk->vk_sem.value += 1;
+    CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS sp = {
+        .params = {
+            .fence = {
+                .value = evk->vk_sem.value
+            }
+        }
+    };
+    ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(&evk->cuda_sem,
+                                                       &sp, 1, 0));
+    if (ret != 0)
+        return false;
+
+    pl_vulkan_release_ex(ra_pl_get(mapper->ra), pl_vulkan_release_params(
+        .tex = evk->pltex,
+        .layout = VK_IMAGE_LAYOUT_GENERAL,
+        .qf = VK_QUEUE_FAMILY_EXTERNAL,
+        .semaphore = evk->vk_sem,
+    ));
+    return ret == 0;
+}
+
+#undef CHECK_CU
+#define CHECK_CU(x) check_cu(hw, (x), #x)
+
+bool cuda_vk_init(const struct ra_hwdec *hw) {
+    int ret = 0;
+    int level = hw->probing ? MSGL_V : MSGL_ERR;
+    struct cuda_hw_priv *p = hw->priv;
+    CudaFunctions *cu = p->cu;
+
+    pl_gpu gpu = ra_pl_get(hw->ra_ctx->ra);
+    if (gpu != NULL) {
+        if (!(gpu->export_caps.tex & HANDLE_TYPE)) {
+            MP_VERBOSE(hw, "CUDA hwdec with Vulkan requires exportable texture memory of type 0x%X.\n",
+                       HANDLE_TYPE);
+            return false;
+        } else if (!(gpu->export_caps.sync & HANDLE_TYPE)) {
+            MP_VERBOSE(hw, "CUDA hwdec with Vulkan requires exportable semaphores of type 0x%X.\n",
+                       HANDLE_TYPE);
+            return false;
+        }
+    } else {
+        // This is not a Vulkan RA.
+        return false;
+    }
+
+    if (!cu->cuImportExternalMemory) {
+        MP_MSG(hw, level, "CUDA hwdec with Vulkan requires driver version 410.48 or newer.\n");
+        return false;
+    }
+
+    int device_count;
+    ret = CHECK_CU(cu->cuDeviceGetCount(&device_count));
+    if (ret < 0)
+        return false;
+
+    CUdevice display_dev = -1;
+    for (int i = 0; i < device_count; i++) {
+        CUdevice dev;
+        ret = CHECK_CU(cu->cuDeviceGet(&dev, i));
+        if (ret < 0)
+            continue;
+
+        CUuuid uuid;
+        ret = CHECK_CU(cu->cuDeviceGetUuid(&uuid, dev));
+        if (ret < 0)
+            continue;
+
+        if (memcmp(gpu->uuid, uuid.bytes, sizeof (gpu->uuid)) == 0) {
+            display_dev = dev;
+            break;
+        }
+    }
+
+    if (display_dev == -1) {
+        MP_MSG(hw, level, "Could not match Vulkan display device in CUDA.\n");
+        return false;
+    }
+
+    ret = CHECK_CU(cu->cuCtxCreate(&p->display_ctx, CU_CTX_SCHED_BLOCKING_SYNC,
+                                   display_dev));
+    if (ret < 0)
+        return false;
+
+    p->decode_ctx = p->display_ctx;
+
+    p->ext_init = cuda_ext_vk_init;
+    p->ext_uninit = cuda_ext_vk_uninit;
+    p->ext_wait = cuda_ext_vk_wait;
+    p->ext_signal = cuda_ext_vk_signal;
+
+    return true;
+}
+
diff --git a/video/out/hwdec/hwdec_drmprime.c b/video/out/hwdec/hwdec_drmprime.c
new file mode 100644
index 0000000..f7c6250
--- /dev/null
+++ b/video/out/hwdec/hwdec_drmprime.c
@@ -0,0 +1,294 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <fcntl.h>
+#include <stddef.h>
+#include <string.h>
+#include <assert.h>
+#include <unistd.h>
+
+#include <libavutil/hwcontext.h>
+#include <libavutil/hwcontext_drm.h>
+#include <xf86drm.h>
+
+#include "config.h"
+
+#include "libmpv/render_gl.h"
+#include "options/m_config.h"
+#include "video/fmt-conversion.h"
+#include "video/out/drm_common.h"
+#include "video/out/gpu/hwdec.h"
+#include "video/out/hwdec/dmabuf_interop.h"
+
+extern const struct m_sub_options drm_conf;
+
+struct priv_owner {
+    struct mp_hwdec_ctx hwctx;
+    int *formats;
+
+    struct dmabuf_interop dmabuf_interop;
+};
+
+static void uninit(struct ra_hwdec *hw)
+{
+    struct priv_owner *p = hw->priv;
+    if (p->hwctx.driver_name)
+        hwdec_devices_remove(hw->devs, &p->hwctx);
+    av_buffer_unref(&p->hwctx.av_device_ref);
+}
+
+const static dmabuf_interop_init interop_inits[] = {
+#if HAVE_DMABUF_INTEROP_GL
+    dmabuf_interop_gl_init,
+#endif
+#if HAVE_VAAPI
+    dmabuf_interop_pl_init,
+#endif
+#if HAVE_DMABUF_WAYLAND
+    dmabuf_interop_wl_init,
+#endif
+    NULL
+};
+
+static int init(struct ra_hwdec *hw)
+{
+    struct priv_owner *p = hw->priv;
+
+    for (int i = 0; interop_inits[i]; i++) {
+        if (interop_inits[i](hw, &p->dmabuf_interop)) {
+            break;
+        }
+    }
+
+    if (!p->dmabuf_interop.interop_map || !p->dmabuf_interop.interop_unmap) {
+        MP_VERBOSE(hw, "drmprime hwdec requires at least one dmabuf interop backend.\n");
+        return -1;
+    }
+
+    /*
+     * The drm_params resource is not provided when using X11 or Wayland, but
+     * there are extensions that supposedly provide this information from the
+     * drivers. Not properly documented. Of course.
+     */
+    mpv_opengl_drm_params_v2 *params = ra_get_native_resource(hw->ra_ctx->ra,
+                                                              "drm_params_v2");
+
+    /*
+     * Respect drm_device option, so there is a way to control this when not
+     * using a DRM gpu context. If drm_params_v2 are present, they will already
+     * respect this option.
+     */
+    void *tmp = talloc_new(NULL);
+    struct drm_opts *drm_opts = mp_get_config_group(tmp, hw->global, &drm_conf);
+    const char *opt_path = drm_opts->device_path;
+
+    const char *device_path = params && params->render_fd > -1 ?
+                              drmGetRenderDeviceNameFromFd(params->render_fd) :
+                              opt_path ? opt_path : "/dev/dri/renderD128";
+    MP_VERBOSE(hw, "Using DRM device: %s\n", device_path);
+
+    int ret = av_hwdevice_ctx_create(&p->hwctx.av_device_ref,
+                                     AV_HWDEVICE_TYPE_DRM,
+                                     device_path, NULL, 0);
+    talloc_free(tmp);
+    if (ret != 0) {
+        MP_VERBOSE(hw, "Failed to create hwdevice_ctx: %s\n", av_err2str(ret));
+        return -1;
+    }
+
+    /*
+     * At the moment, there is no way to discover compatible formats
+     * from the hwdevice_ctx, and in fact the ffmpeg hwaccels hard-code
+     * formats too, so we're not missing out on anything.
+     */
+    int num_formats = 0;
+    MP_TARRAY_APPEND(p, p->formats, num_formats, IMGFMT_NV12);
+    MP_TARRAY_APPEND(p, p->formats, num_formats, IMGFMT_420P);
+    MP_TARRAY_APPEND(p, p->formats, num_formats, pixfmt2imgfmt(AV_PIX_FMT_NV16));
+    MP_TARRAY_APPEND(p, p->formats, num_formats, 0); // terminate it
+
+    p->hwctx.hw_imgfmt = IMGFMT_DRMPRIME;
+    p->hwctx.supported_formats = p->formats;
+    p->hwctx.driver_name = hw->driver->name;
+    hwdec_devices_add(hw->devs, &p->hwctx);
+
+    return 0;
+}
+
+static void mapper_unmap(struct ra_hwdec_mapper *mapper)
+{
+    struct priv_owner *p_owner = mapper->owner->priv;
+    struct dmabuf_interop_priv *p = mapper->priv;
+
+    p_owner->dmabuf_interop.interop_unmap(mapper);
+
+    if (p->surface_acquired) {
+        for (int n = 0; n < p->desc.nb_objects; n++) {
+            if (p->desc.objects[n].fd > -1)
+                close(p->desc.objects[n].fd);
+        }
+        p->surface_acquired = false;
+    }
+}
+
+static void mapper_uninit(struct ra_hwdec_mapper *mapper)
+{
+    struct priv_owner *p_owner = mapper->owner->priv;
+    if (p_owner->dmabuf_interop.interop_uninit) {
+        p_owner->dmabuf_interop.interop_uninit(mapper);
+    }
+}
+
+static bool check_fmt(struct ra_hwdec_mapper *mapper, int fmt)
+{
+    struct priv_owner *p_owner = mapper->owner->priv;
+    for (int n = 0; p_owner->formats && p_owner->formats[n]; n++) {
+        if (p_owner->formats[n] == fmt)
+            return true;
+    }
+    return false;
+}
+
+static int mapper_init(struct ra_hwdec_mapper *mapper)
+{
+    struct priv_owner *p_owner = mapper->owner->priv;
+    struct dmabuf_interop_priv *p = mapper->priv;
+
+    mapper->dst_params = mapper->src_params;
+
+    /*
+     * rpi4_8 and rpi4_10 function identically to NV12. These two pixel
+     * formats however are not defined in upstream ffmpeg so a string
+     * comparison is used to identify them instead of a mpv IMGFMT.
+     */
+    const char* fmt_name = mp_imgfmt_to_name(mapper->src_params.hw_subfmt);
+    if (strcmp(fmt_name, "rpi4_8") == 0 || strcmp(fmt_name, "rpi4_10") == 0)
+        mapper->dst_params.imgfmt = IMGFMT_NV12;
+    else
+        mapper->dst_params.imgfmt = mapper->src_params.hw_subfmt;
+    mapper->dst_params.hw_subfmt = 0;
+
+    struct ra_imgfmt_desc desc = {0};
+
+    if (mapper->ra->num_formats &&
+            !ra_get_imgfmt_desc(mapper->ra, mapper->dst_params.imgfmt, &desc))
+        return -1;
+
+    p->num_planes = desc.num_planes;
+    mp_image_set_params(&p->layout, &mapper->dst_params);
+
+    if (p_owner->dmabuf_interop.interop_init)
+        if (!p_owner->dmabuf_interop.interop_init(mapper, &desc))
+            return -1;
+
+    if (!check_fmt(mapper, mapper->dst_params.imgfmt))
+    {
+        MP_FATAL(mapper, "unsupported DRM image format %s\n",
+                 mp_imgfmt_to_name(mapper->dst_params.imgfmt));
+        return -1;
+    }
+
+    return 0;
+}
+
+static int mapper_map(struct ra_hwdec_mapper *mapper)
+{
+    struct priv_owner *p_owner = mapper->owner->priv;
+    struct dmabuf_interop_priv *p = mapper->priv;
+
+    /*
+     * Although we use the same AVDRMFrameDescriptor to hold the dmabuf
+     * properties, we additionally need to dup the fds to ensure the
+     * frame doesn't disappear out from under us. And then for clarity,
+     * we copy all the individual fields.
+     */
+    const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)mapper->src->planes[0];
+    p->desc.nb_layers = desc->nb_layers;
+    p->desc.nb_objects = desc->nb_objects;
+    for (int i = 0; i < desc->nb_layers; i++) {
+        p->desc.layers[i].format = desc->layers[i].format;
+        p->desc.layers[i].nb_planes = desc->layers[i].nb_planes;
+        for (int j = 0; j < desc->layers[i].nb_planes; j++) {
+            p->desc.layers[i].planes[j].object_index = desc->layers[i].planes[j].object_index;
+            p->desc.layers[i].planes[j].offset = desc->layers[i].planes[j].offset;
+            p->desc.layers[i].planes[j].pitch = desc->layers[i].planes[j].pitch;
+        }
+    }
+    for (int i = 0; i < desc->nb_objects; i++) {
+        p->desc.objects[i].format_modifier = desc->objects[i].format_modifier;
+        p->desc.objects[i].size = desc->objects[i].size;
+        // Initialise fds to -1 to make partial failure cleanup easier.
+        p->desc.objects[i].fd = -1;
+    }
+    // Surface is now safe to treat as acquired to allow for unmapping to run.
+    p->surface_acquired = true;
+
+    // Now actually dup the fds
+    for (int i = 0; i < desc->nb_objects; i++) {
+        p->desc.objects[i].fd = fcntl(desc->objects[i].fd, F_DUPFD_CLOEXEC, 0);
+        if (p->desc.objects[i].fd == -1) {
+            MP_ERR(mapper, "Failed to duplicate dmabuf fd: %s\n",
+                   mp_strerror(errno));
+            goto err;
+        }
+    }
+
+    // We can handle composed formats if the total number of planes is still
+    // equal the number of planes we expect. Complex formats with auxiliary
+    // planes cannot be supported.
+
+    int num_returned_planes = 0;
+    for (int i = 0; i < p->desc.nb_layers; i++) {
+        num_returned_planes += p->desc.layers[i].nb_planes;
+    }
+
+    if (p->num_planes != 0 && p->num_planes != num_returned_planes) {
+        MP_ERR(mapper,
+               "Mapped surface with format '%s' has unexpected number of planes. "
+               "(%d layers and %d planes, but expected %d planes)\n",
+               mp_imgfmt_to_name(mapper->src->params.hw_subfmt),
+               p->desc.nb_layers, num_returned_planes, p->num_planes);
+        goto err;
+    }
+
+    if (!p_owner->dmabuf_interop.interop_map(mapper, &p_owner->dmabuf_interop,
+                                             false))
+        goto err;
+
+    return 0;
+
+err:
+    mapper_unmap(mapper);
+
+    MP_FATAL(mapper, "mapping DRM dmabuf failed\n");
+    return -1;
+}
+
+const struct ra_hwdec_driver ra_hwdec_drmprime = {
+    .name = "drmprime",
+    .priv_size = sizeof(struct priv_owner),
+    .imgfmts = {IMGFMT_DRMPRIME, 0},
+    .init = init,
+    .uninit = uninit,
+    .mapper = &(const struct ra_hwdec_mapper_driver){
+        .priv_size = sizeof(struct dmabuf_interop_priv),
+        .init = mapper_init,
+        .uninit = mapper_uninit,
+        .map = mapper_map,
+        .unmap = mapper_unmap,
+    },
+};
diff --git a/video/out/hwdec/hwdec_drmprime_overlay.c b/video/out/hwdec/hwdec_drmprime_overlay.c
new file mode 100644
index 0000000..6b6aae6
--- /dev/null
+++ b/video/out/hwdec/hwdec_drmprime_overlay.c
@@ -0,0 +1,334 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <stdbool.h>
+
+#include <libavutil/hwcontext.h>
+#include <libavutil/hwcontext_drm.h>
+
+#include "video/hwdec.h"
+#include "common/msg.h"
+#include "options/m_config.h"
+#include "libmpv/render_gl.h"
+#include "video/out/drm_atomic.h"
+#include "video/out/drm_common.h"
+#include "video/out/drm_prime.h"
+#include "video/out/gpu/hwdec.h"
+#include "video/mp_image.h"
+
+extern const struct m_sub_options drm_conf;
+
+struct drm_frame {
+    struct drm_prime_framebuffer fb;
+    struct mp_image *image; // associated mpv image
+};
+
+struct priv {
+    struct mp_log *log;
+    struct mp_hwdec_ctx hwctx;
+
+    struct mp_image_params params;
+
+    struct drm_atomic_context *ctx;
+    struct drm_frame current_frame, last_frame, old_frame;
+
+    struct mp_rect src, dst;
+
+    int display_w, display_h;
+
+    struct drm_prime_handle_refs handle_refs;
+};
+
+static void set_current_frame(struct ra_hwdec *hw, struct drm_frame *frame)
+{
+    struct priv *p = hw->priv;
+
+    // frame will be on screen after next vsync
+    // current_frame is currently the displayed frame and will be replaced
+    // by frame after next vsync.
+    // We used old frame as triple buffering to make sure that the drm framebuffer
+    // is not being displayed when we release it.
+
+    if (p->ctx) {
+        drm_prime_destroy_framebuffer(p->log, p->ctx->fd, &p->old_frame.fb, &p->handle_refs);
+    }
+
+    mp_image_setrefp(&p->old_frame.image, p->last_frame.image);
+    p->old_frame.fb = p->last_frame.fb;
+
+    mp_image_setrefp(&p->last_frame.image, p->current_frame.image);
+    p->last_frame.fb = p->current_frame.fb;
+
+    if (frame) {
+        p->current_frame.fb = frame->fb;
+        mp_image_setrefp(&p->current_frame.image, frame->image);
+    } else {
+        memset(&p->current_frame.fb, 0, sizeof(p->current_frame.fb));
+        mp_image_setrefp(&p->current_frame.image, NULL);
+    }
+}
+
+static void scale_dst_rect(struct ra_hwdec *hw, int source_w, int source_h ,struct mp_rect *src, struct mp_rect *dst)
+{
+    struct priv *p = hw->priv;
+
+    // drm can allow to have a layer that has a different size from framebuffer
+    // we scale here the destination size to video mode
+    double hratio = p->display_w / (double)source_w;
+    double vratio = p->display_h / (double)source_h;
+    double ratio = hratio <= vratio ? hratio : vratio;
+
+    dst->x0 = src->x0 * ratio;
+    dst->x1 = src->x1 * ratio;
+    dst->y0 = src->y0 * ratio;
+    dst->y1 = src->y1 * ratio;
+
+    int offset_x = (p->display_w - ratio * source_w) / 2;
+    int offset_y = (p->display_h - ratio * source_h) / 2;
+
+    dst->x0 += offset_x;
+    dst->x1 += offset_x;
+    dst->y0 += offset_y;
+    dst->y1 += offset_y;
+}
+
+static void disable_video_plane(struct ra_hwdec *hw)
+{
+    struct priv *p = hw->priv;
+    if (!p->ctx)
+        return;
+
+    if (!p->ctx->drmprime_video_plane)
+        return;
+
+    // Disabling the drmprime video plane is needed on some devices when using
+    // the primary plane for video. Primary buffer can't be active with no
+    // framebuffer associated. So we need this function to commit it right away
+    // as mpv will free all framebuffers on playback end.
+    drmModeAtomicReqPtr request = drmModeAtomicAlloc();
+    if (request) {
+        drm_object_set_property(request, p->ctx->drmprime_video_plane, "FB_ID", 0);
+        drm_object_set_property(request, p->ctx->drmprime_video_plane, "CRTC_ID", 0);
+
+        int ret = drmModeAtomicCommit(p->ctx->fd, request,
+                                  0, NULL);
+
+        if (ret)
+            MP_ERR(hw, "Failed to commit disable plane request (code %d)", ret);
+        drmModeAtomicFree(request);
+    }
+}
+
+static int overlay_frame(struct ra_hwdec *hw, struct mp_image *hw_image,
+                         struct mp_rect *src, struct mp_rect *dst, bool newframe)
+{
+    struct priv *p = hw->priv;
+    AVDRMFrameDescriptor *desc = NULL;
+    drmModeAtomicReq *request = NULL;
+    struct drm_frame next_frame = {0};
+    int ret;
+
+    struct ra *ra = hw->ra_ctx->ra;
+
+    // grab atomic request from native resources
+    if (p->ctx) {
+        struct mpv_opengl_drm_params_v2 *drm_params;
+        drm_params = (mpv_opengl_drm_params_v2 *)ra_get_native_resource(ra, "drm_params_v2");
+        if (!drm_params) {
+            MP_ERR(hw, "Failed to retrieve drm params from native resources\n");
+            return -1;
+        }
+        if (drm_params->atomic_request_ptr) {
+            request = *drm_params->atomic_request_ptr;
+        } else {
+            MP_ERR(hw, "drm params pointer to atomic request is invalid\n");
+            return -1;
+        }
+    }
+
+    if (hw_image) {
+
+        // grab draw plane windowing info to eventually upscale the overlay
+        // as egl windows could be upscaled to draw plane.
+        struct mpv_opengl_drm_draw_surface_size *draw_surface_size = ra_get_native_resource(ra, "drm_draw_surface_size");
+        if (draw_surface_size) {
+            scale_dst_rect(hw, draw_surface_size->width, draw_surface_size->height, dst, &p->dst);
+        } else {
+            p->dst = *dst;
+        }
+        p->src = *src;
+
+        next_frame.image = hw_image;
+        desc = (AVDRMFrameDescriptor *)hw_image->planes[0];
+
+        if (desc) {
+            int srcw = p->src.x1 - p->src.x0;
+            int srch = p->src.y1 - p->src.y0;
+            int dstw = MP_ALIGN_UP(p->dst.x1 - p->dst.x0, 2);
+            int dsth = MP_ALIGN_UP(p->dst.y1 - p->dst.y0, 2);
+
+            if (drm_prime_create_framebuffer(p->log, p->ctx->fd, desc, srcw, srch, &next_frame.fb, &p->handle_refs)) {
+                ret = -1;
+                goto fail;
+            }
+
+            if (request) {
+                drm_object_set_property(request, p->ctx->drmprime_video_plane, "FB_ID", next_frame.fb.fb_id);
+                drm_object_set_property(request, p->ctx->drmprime_video_plane, "CRTC_ID", p->ctx->crtc->id);
+                drm_object_set_property(request, p->ctx->drmprime_video_plane, "SRC_X",   p->src.x0 << 16);
+                drm_object_set_property(request, p->ctx->drmprime_video_plane, "SRC_Y",   p->src.y0 << 16);
+                drm_object_set_property(request, p->ctx->drmprime_video_plane, "SRC_W",   srcw << 16);
+                drm_object_set_property(request, p->ctx->drmprime_video_plane, "SRC_H",   srch << 16);
+                drm_object_set_property(request, p->ctx->drmprime_video_plane, "CRTC_X",  MP_ALIGN_DOWN(p->dst.x0, 2));
+                drm_object_set_property(request, p->ctx->drmprime_video_plane, "CRTC_Y",  MP_ALIGN_DOWN(p->dst.y0, 2));
+                drm_object_set_property(request, p->ctx->drmprime_video_plane, "CRTC_W",  dstw);
+                drm_object_set_property(request, p->ctx->drmprime_video_plane, "CRTC_H",  dsth);
+                drm_object_set_property(request, p->ctx->drmprime_video_plane, "ZPOS",    0);
+            } else {
+                ret = drmModeSetPlane(p->ctx->fd, p->ctx->drmprime_video_plane->id, p->ctx->crtc->id, next_frame.fb.fb_id, 0,
+                                      MP_ALIGN_DOWN(p->dst.x0, 2), MP_ALIGN_DOWN(p->dst.y0, 2), dstw, dsth,
+                                      p->src.x0 << 16, p->src.y0 << 16 , srcw << 16, srch << 16);
+                if (ret < 0) {
+                    MP_ERR(hw, "Failed to set the drmprime video plane %d (buffer %d).\n",
+                           p->ctx->drmprime_video_plane->id, next_frame.fb.fb_id);
+                    goto fail;
+                }
+            }
+        }
+    } else {
+        disable_video_plane(hw);
+
+        while (p->old_frame.fb.fb_id)
+          set_current_frame(hw, NULL);
+    }
+
+    set_current_frame(hw, &next_frame);
+    return 0;
+
+ fail:
+    drm_prime_destroy_framebuffer(p->log, p->ctx->fd, &next_frame.fb, &p->handle_refs);
+    return ret;
+}
+
+static void uninit(struct ra_hwdec *hw)
+{
+    struct priv *p = hw->priv;
+
+    disable_video_plane(hw);
+    set_current_frame(hw, NULL);
+
+    hwdec_devices_remove(hw->devs, &p->hwctx);
+    av_buffer_unref(&p->hwctx.av_device_ref);
+
+    if (p->ctx) {
+        drm_atomic_destroy_context(p->ctx);
+        p->ctx = NULL;
+    }
+}
+
+static int init(struct ra_hwdec *hw)
+{
+    struct priv *p = hw->priv;
+    int draw_plane, drmprime_video_plane;
+
+    p->log = hw->log;
+
+    void *tmp = talloc_new(NULL);
+    struct drm_opts *opts = mp_get_config_group(tmp, hw->global, &drm_conf);
+    draw_plane = opts->draw_plane;
+    drmprime_video_plane = opts->drmprime_video_plane;
+    talloc_free(tmp);
+
+    struct mpv_opengl_drm_params_v2 *drm_params;
+
+    drm_params = ra_get_native_resource(hw->ra_ctx->ra, "drm_params_v2");
+    if (drm_params) {
+        p->ctx = drm_atomic_create_context(p->log, drm_params->fd, drm_params->crtc_id,
+                                           drm_params->connector_id, draw_plane, drmprime_video_plane);
+        if (!p->ctx) {
+            mp_err(p->log, "Failed to retrieve DRM atomic context.\n");
+            goto err;
+        }
+        if (!p->ctx->drmprime_video_plane) {
+            mp_warn(p->log, "No drmprime video plane. You might need to specify it manually using --drm-drmprime-video-plane\n");
+            goto err;
+        }
+    } else {
+        mp_verbose(p->log, "Failed to retrieve DRM fd from native display.\n");
+        goto err;
+    }
+
+    drmModeCrtcPtr crtc;
+    crtc = drmModeGetCrtc(p->ctx->fd, p->ctx->crtc->id);
+    if (crtc) {
+        p->display_w = crtc->mode.hdisplay;
+        p->display_h = crtc->mode.vdisplay;
+        drmModeFreeCrtc(crtc);
+    }
+
+    uint64_t has_prime;
+    if (drmGetCap(p->ctx->fd, DRM_CAP_PRIME, &has_prime) < 0) {
+        MP_ERR(hw, "Card does not support prime handles.\n");
+        goto err;
+    }
+
+    if (has_prime) {
+        drm_prime_init_handle_ref_count(p, &p->handle_refs);
+    }
+
+    disable_video_plane(hw);
+
+    p->hwctx = (struct mp_hwdec_ctx) {
+        .driver_name = hw->driver->name,
+        .hw_imgfmt = IMGFMT_DRMPRIME,
+    };
+
+    char *device = drmGetDeviceNameFromFd2(p->ctx->fd);
+    int ret = av_hwdevice_ctx_create(&p->hwctx.av_device_ref,
+                                     AV_HWDEVICE_TYPE_DRM, device, NULL, 0);
+
+    if (device)
+        free(device);
+
+    if (ret != 0) {
+        MP_VERBOSE(hw, "Failed to create hwdevice_ctx: %s\n", av_err2str(ret));
+        goto err;
+    }
+
+    hwdec_devices_add(hw->devs, &p->hwctx);
+
+    return 0;
+
+err:
+    uninit(hw);
+    return -1;
+}
+
+const struct ra_hwdec_driver ra_hwdec_drmprime_overlay = {
+    .name = "drmprime-overlay",
+    .priv_size = sizeof(struct priv),
+    .imgfmts = {IMGFMT_DRMPRIME, 0},
+    .init = init,
+    .overlay_frame = overlay_frame,
+    .uninit = uninit,
+};
diff --git a/video/out/hwdec/hwdec_ios_gl.m b/video/out/hwdec/hwdec_ios_gl.m
new file mode 100644
index 0000000..633cc3d
--- /dev/null
+++ b/video/out/hwdec/hwdec_ios_gl.m
@@ -0,0 +1,222 @@
+/*
+ * Copyright (c) 2013 Stefano Pigozzi <stefano.pigozzi@gmail.com>
+ *               2017 Aman Gupta <ffmpeg@tmm1.net>
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+
+#include <CoreVideo/CoreVideo.h>
+#include <OpenGLES/EAGL.h>
+
+#include <libavutil/hwcontext.h>
+
+#include "video/out/gpu/hwdec.h"
+#include "video/mp_image_pool.h"
+#include "video/out/opengl/ra_gl.h"
+#include "hwdec_vt.h"
+
+static bool check_hwdec(const struct ra_hwdec *hw)
+{
+    if (!ra_is_gl(hw->ra_ctx->ra))
+        return false;
+
+    GL *gl = ra_gl_get(hw->ra_ctx->ra);
+    if (gl->es < 200) {
+        MP_ERR(hw, "need OpenGLES 2.0 for CVOpenGLESTextureCacheCreateTextureFromImage()\n");
+        return false;
+    }
+
+    if ([EAGLContext currentContext] == nil) {
+        MP_ERR(hw, "need a current EAGLContext set\n");
+        return false;
+    }
+
+    return true;
+}
+
+// In GLES3 mode, CVOpenGLESTextureCacheCreateTextureFromImage()
+// will return error -6683 unless invoked with GL_LUMINANCE and
+// GL_LUMINANCE_ALPHA (http://stackoverflow.com/q/36213994/332798)
+// If a format trues to use GL_RED/GL_RG instead, try to find a format
+// that uses GL_LUMINANCE[_ALPHA] instead.
+static const struct ra_format *find_la_variant(struct ra *ra,
+                                               const struct ra_format *fmt)
+{
+    GLint internal_format;
+    GLenum format;
+    GLenum type;
+    ra_gl_get_format(fmt, &internal_format, &format, &type);
+
+    if (format == GL_RED) {
+        format = internal_format = GL_LUMINANCE;
+    } else if (format == GL_RG) {
+        format = internal_format = GL_LUMINANCE_ALPHA;
+    } else {
+        return fmt;
+    }
+
+    for (int n = 0; n < ra->num_formats; n++) {
+        const struct ra_format *fmt2 = ra->formats[n];
+        GLint internal_format2;
+        GLenum format2;
+        GLenum type2;
+        ra_gl_get_format(fmt2, &internal_format2, &format2, &type2);
+        if (internal_format2 == internal_format &&
+            format2 == format && type2 == type)
+            return fmt2;
+    }
+
+    return NULL;
+}
+
+static int mapper_init(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+
+    for (int n = 0; n < p->desc.num_planes; n++) {
+        p->desc.planes[n] = find_la_variant(mapper->ra, p->desc.planes[n]);
+        if (!p->desc.planes[n] || p->desc.planes[n]->ctype != RA_CTYPE_UNORM) {
+            MP_ERR(mapper, "Format unsupported.\n");
+            return -1;
+        }
+    }
+
+    CVReturn err = CVOpenGLESTextureCacheCreate(
+        kCFAllocatorDefault,
+        NULL,
+        [EAGLContext currentContext],
+        NULL,
+        &p->gl_texture_cache);
+
+    if (err != noErr) {
+        MP_ERR(mapper, "Failure in CVOpenGLESTextureCacheCreate: %d\n", err);
+        return -1;
+    }
+
+    return 0;
+}
+
+static void mapper_unmap(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+
+    for (int i = 0; i < p->desc.num_planes; i++) {
+        ra_tex_free(mapper->ra, &mapper->tex[i]);
+        if (p->gl_planes[i]) {
+            CFRelease(p->gl_planes[i]);
+            p->gl_planes[i] = NULL;
+        }
+    }
+
+    CVOpenGLESTextureCacheFlush(p->gl_texture_cache, 0);
+}
+
+static int mapper_map(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+    GL *gl = ra_gl_get(mapper->ra);
+
+    CVPixelBufferRelease(p->pbuf);
+    p->pbuf = (CVPixelBufferRef)mapper->src->planes[3];
+    CVPixelBufferRetain(p->pbuf);
+
+    const bool planar = CVPixelBufferIsPlanar(p->pbuf);
+    const int planes  = CVPixelBufferGetPlaneCount(p->pbuf);
+    assert((planar && planes == p->desc.num_planes) || p->desc.num_planes == 1);
+
+    for (int i = 0; i < p->desc.num_planes; i++) {
+        const struct ra_format *fmt = p->desc.planes[i];
+
+        GLint internal_format;
+        GLenum format;
+        GLenum type;
+        ra_gl_get_format(fmt, &internal_format, &format, &type);
+
+        CVReturn err = CVOpenGLESTextureCacheCreateTextureFromImage(
+            kCFAllocatorDefault,
+            p->gl_texture_cache,
+            p->pbuf,
+            NULL,
+            GL_TEXTURE_2D,
+            internal_format,
+            CVPixelBufferGetWidthOfPlane(p->pbuf, i),
+            CVPixelBufferGetHeightOfPlane(p->pbuf, i),
+            format,
+            type,
+            i,
+            &p->gl_planes[i]);
+
+        if (err != noErr) {
+            MP_ERR(mapper, "error creating texture for plane %d: %d\n", i, err);
+            return -1;
+        }
+
+        gl->BindTexture(GL_TEXTURE_2D, CVOpenGLESTextureGetName(p->gl_planes[i]));
+        gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+        gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+        gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+        gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+        gl->BindTexture(GL_TEXTURE_2D, 0);
+
+        struct ra_tex_params params = {
+            .dimensions = 2,
+            .w = CVPixelBufferGetWidthOfPlane(p->pbuf, i),
+            .h = CVPixelBufferGetHeightOfPlane(p->pbuf, i),
+            .d = 1,
+            .format = fmt,
+            .render_src = true,
+            .src_linear = true,
+        };
+
+        mapper->tex[i] = ra_create_wrapped_tex(
+            mapper->ra,
+            &params,
+            CVOpenGLESTextureGetName(p->gl_planes[i])
+        );
+        if (!mapper->tex[i])
+            return -1;
+    }
+
+    return 0;
+}
+
+static void mapper_uninit(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+
+    CVPixelBufferRelease(p->pbuf);
+    if (p->gl_texture_cache) {
+        CFRelease(p->gl_texture_cache);
+        p->gl_texture_cache = NULL;
+    }
+}
+
+bool vt_gl_init(const struct ra_hwdec *hw)
+{
+    struct priv_owner *p = hw->priv;
+
+    if (!check_hwdec(hw))
+        return false;
+
+    p->interop_init   = mapper_init;
+    p->interop_uninit = mapper_uninit;
+    p->interop_map    = mapper_map;
+    p->interop_unmap  = mapper_unmap;
+
+    return true;
+}
diff --git a/video/out/hwdec/hwdec_mac_gl.c b/video/out/hwdec/hwdec_mac_gl.c
new file mode 100644
index 0000000..b73f5b9
--- /dev/null
+++ b/video/out/hwdec/hwdec_mac_gl.c
@@ -0,0 +1,169 @@
+/*
+ * Copyright (c) 2013 Stefano Pigozzi <stefano.pigozzi@gmail.com>
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+
+#include <IOSurface/IOSurface.h>
+#include <CoreVideo/CoreVideo.h>
+#include <OpenGL/OpenGL.h>
+#include <OpenGL/CGLIOSurface.h>
+
+#include <libavutil/hwcontext.h>
+
+#include "video/mp_image_pool.h"
+#include "video/out/gpu/hwdec.h"
+#include "video/out/opengl/ra_gl.h"
+#include "hwdec_vt.h"
+
+static bool check_hwdec(const struct ra_hwdec *hw)
+{
+    if (!ra_is_gl(hw->ra_ctx->ra))
+        return false;
+
+    GL *gl = ra_gl_get(hw->ra_ctx->ra);
+    if (gl->version < 300) {
+        MP_ERR(hw, "need >= OpenGL 3.0 for core rectangle texture support\n");
+        return false;
+    }
+
+    if (!CGLGetCurrentContext()) {
+        MP_ERR(hw, "need cocoa opengl backend to be active");
+        return false;
+    }
+
+    return true;
+}
+
+static int mapper_init(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+    GL *gl = ra_gl_get(mapper->ra);
+
+    gl->GenTextures(MP_MAX_PLANES, p->gl_planes);
+
+    for (int n = 0; n < p->desc.num_planes; n++) {
+        if (p->desc.planes[n]->ctype != RA_CTYPE_UNORM) {
+            MP_ERR(mapper, "Format unsupported.\n");
+            return -1;
+        }
+    }
+
+    return 0;
+}
+
+static void mapper_unmap(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+
+    // Is this sane? No idea how to release the texture without deleting it.
+    CVPixelBufferRelease(p->pbuf);
+    p->pbuf = NULL;
+
+    for (int i = 0; i < p->desc.num_planes; i++)
+        ra_tex_free(mapper->ra, &mapper->tex[i]);
+}
+
+static int mapper_map(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+    GL *gl = ra_gl_get(mapper->ra);
+
+    CVPixelBufferRelease(p->pbuf);
+    p->pbuf = (CVPixelBufferRef)mapper->src->planes[3];
+    CVPixelBufferRetain(p->pbuf);
+    IOSurfaceRef surface = CVPixelBufferGetIOSurface(p->pbuf);
+    if (!surface) {
+        MP_ERR(mapper, "CVPixelBuffer has no IOSurface\n");
+        return -1;
+    }
+
+    const bool planar = CVPixelBufferIsPlanar(p->pbuf);
+    const int planes  = CVPixelBufferGetPlaneCount(p->pbuf);
+    assert((planar && planes == p->desc.num_planes) || p->desc.num_planes == 1);
+
+    GLenum gl_target = GL_TEXTURE_RECTANGLE;
+
+    for (int i = 0; i < p->desc.num_planes; i++) {
+        const struct ra_format *fmt = p->desc.planes[i];
+
+        GLint internal_format;
+        GLenum format;
+        GLenum type;
+        ra_gl_get_format(fmt, &internal_format, &format, &type);
+
+        gl->BindTexture(gl_target, p->gl_planes[i]);
+
+        CGLError err = CGLTexImageIOSurface2D(
+            CGLGetCurrentContext(), gl_target,
+            internal_format,
+            IOSurfaceGetWidthOfPlane(surface, i),
+            IOSurfaceGetHeightOfPlane(surface, i),
+            format, type, surface, i);
+
+        gl->BindTexture(gl_target, 0);
+
+        if (err != kCGLNoError) {
+            MP_ERR(mapper,
+                   "error creating IOSurface texture for plane %d: %s (%x)\n",
+                   i, CGLErrorString(err), gl->GetError());
+            return -1;
+        }
+
+        struct ra_tex_params params = {
+            .dimensions = 2,
+            .w = IOSurfaceGetWidthOfPlane(surface, i),
+            .h = IOSurfaceGetHeightOfPlane(surface, i),
+            .d = 1,
+            .format = fmt,
+            .render_src = true,
+            .src_linear = true,
+            .non_normalized = gl_target == GL_TEXTURE_RECTANGLE,
+        };
+
+        mapper->tex[i] = ra_create_wrapped_tex(mapper->ra, &params,
+                                               p->gl_planes[i]);
+        if (!mapper->tex[i])
+            return -1;
+    }
+
+    return 0;
+}
+
+static void mapper_uninit(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+    GL *gl = ra_gl_get(mapper->ra);
+
+    gl->DeleteTextures(MP_MAX_PLANES, p->gl_planes);
+}
+
+bool vt_gl_init(const struct ra_hwdec *hw)
+{
+    struct priv_owner *p = hw->priv;
+
+    if (!check_hwdec(hw))
+        return false;
+
+    p->interop_init   = mapper_init;
+    p->interop_uninit = mapper_uninit;
+    p->interop_map    = mapper_map;
+    p->interop_unmap  = mapper_unmap;
+
+    return true;
+}
diff --git a/video/out/hwdec/hwdec_vaapi.c b/video/out/hwdec/hwdec_vaapi.c
new file mode 100644
index 0000000..d8a4517
--- /dev/null
+++ b/video/out/hwdec/hwdec_vaapi.c
@@ -0,0 +1,557 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stddef.h>
+#include <string.h>
+#include <assert.h>
+#include <unistd.h>
+
+#include <libavutil/hwcontext.h>
+#include <libavutil/hwcontext_vaapi.h>
+#include <va/va_drmcommon.h>
+
+#include "config.h"
+
+#include "video/out/gpu/hwdec.h"
+#include "video/out/hwdec/dmabuf_interop.h"
+#include "video/fmt-conversion.h"
+#include "video/mp_image_pool.h"
+#include "video/vaapi.h"
+
+#if HAVE_VAAPI_DRM
+#include "libmpv/render_gl.h"
+#endif
+
+#if HAVE_VAAPI_X11
+#include <va/va_x11.h>
+
+static VADisplay *create_x11_va_display(struct ra *ra)
+{
+    Display *x11 = ra_get_native_resource(ra, "x11");
+    return x11 ? vaGetDisplay(x11) : NULL;
+}
+#endif
+
+#if HAVE_VAAPI_WAYLAND
+#include <va/va_wayland.h>
+
+static VADisplay *create_wayland_va_display(struct ra *ra)
+{
+    struct wl_display *wl = ra_get_native_resource(ra, "wl");
+
+    return wl ? vaGetDisplayWl(wl) : NULL;
+}
+#endif
+
+#if HAVE_VAAPI_DRM
+#include <va/va_drm.h>
+
+static VADisplay *create_drm_va_display(struct ra *ra)
+{
+    mpv_opengl_drm_params_v2 *params = ra_get_native_resource(ra, "drm_params_v2");
+    if (!params || params->render_fd == -1)
+        return NULL;
+
+    return vaGetDisplayDRM(params->render_fd);
+}
+#endif
+
+struct va_create_native {
+    const char *name;
+    VADisplay *(*create)(struct ra *ra);
+};
+
+static const struct va_create_native create_native_cbs[] = {
+#if HAVE_VAAPI_X11
+    {"x11",     create_x11_va_display},
+#endif
+#if HAVE_VAAPI_WAYLAND
+    {"wayland", create_wayland_va_display},
+#endif
+#if HAVE_VAAPI_DRM
+    {"drm",     create_drm_va_display},
+#endif
+};
+
+static VADisplay *create_native_va_display(struct ra *ra, struct mp_log *log)
+{
+    for (int n = 0; n < MP_ARRAY_SIZE(create_native_cbs); n++) {
+        const struct va_create_native *disp = &create_native_cbs[n];
+        mp_verbose(log, "Trying to open a %s VA display...\n", disp->name);
+        VADisplay *display = disp->create(ra);
+        if (display)
+            return display;
+    }
+    return NULL;
+}
+
+static void determine_working_formats(struct ra_hwdec *hw);
+
+struct priv_owner {
+    struct mp_vaapi_ctx *ctx;
+    VADisplay *display;
+    int *formats;
+    bool probing_formats; // temporary during init
+
+    struct dmabuf_interop dmabuf_interop;
+};
+
+static void uninit(struct ra_hwdec *hw)
+{
+    struct priv_owner *p = hw->priv;
+    if (p->ctx) {
+        hwdec_devices_remove(hw->devs, &p->ctx->hwctx);
+        if (p->ctx->hwctx.conversion_config) {
+            AVVAAPIHWConfig *hwconfig = p->ctx->hwctx.conversion_config;
+            vaDestroyConfig(p->ctx->display, hwconfig->config_id);
+            av_freep(&p->ctx->hwctx.conversion_config);
+        }
+    }
+    va_destroy(p->ctx);
+}
+
+const static dmabuf_interop_init interop_inits[] = {
+#if HAVE_DMABUF_INTEROP_GL
+    dmabuf_interop_gl_init,
+#endif
+    dmabuf_interop_pl_init,
+#if HAVE_DMABUF_WAYLAND
+    dmabuf_interop_wl_init,
+#endif
+    NULL
+};
+
+static int init(struct ra_hwdec *hw)
+{
+    struct priv_owner *p = hw->priv;
+    VAStatus vas;
+
+    for (int i = 0; interop_inits[i]; i++) {
+        if (interop_inits[i](hw, &p->dmabuf_interop)) {
+            break;
+        }
+    }
+
+    if (!p->dmabuf_interop.interop_map || !p->dmabuf_interop.interop_unmap) {
+        MP_VERBOSE(hw, "VAAPI hwdec only works with OpenGL or Vulkan backends.\n");
+        return -1;
+    }
+
+    p->display = create_native_va_display(hw->ra_ctx->ra, hw->log);
+    if (!p->display) {
+        MP_VERBOSE(hw, "Could not create a VA display.\n");
+        return -1;
+    }
+
+    p->ctx = va_initialize(p->display, hw->log, true);
+    if (!p->ctx) {
+        vaTerminate(p->display);
+        return -1;
+    }
+    if (!p->ctx->av_device_ref) {
+        MP_VERBOSE(hw, "libavutil vaapi code rejected the driver?\n");
+        return -1;
+    }
+
+    if (hw->probing && va_guess_if_emulated(p->ctx)) {
+        return -1;
+    }
+
+    determine_working_formats(hw);
+    if (!p->formats || !p->formats[0]) {
+        return -1;
+    }
+
+    VAConfigID config_id;
+    AVVAAPIHWConfig *hwconfig = NULL;
+    vas = vaCreateConfig(p->display, VAProfileNone, VAEntrypointVideoProc, NULL,
+                         0, &config_id);
+    if (vas == VA_STATUS_SUCCESS) {
+        hwconfig = av_hwdevice_hwconfig_alloc(p->ctx->av_device_ref);
+        hwconfig->config_id = config_id;
+    }
+
+    // it's now safe to set the display resource
+    ra_add_native_resource(hw->ra_ctx->ra, "VADisplay", p->display);
+
+    p->ctx->hwctx.hw_imgfmt = IMGFMT_VAAPI;
+    p->ctx->hwctx.supported_formats = p->formats;
+    p->ctx->hwctx.driver_name = hw->driver->name;
+    p->ctx->hwctx.conversion_filter_name = "scale_vaapi";
+    p->ctx->hwctx.conversion_config = hwconfig;
+    hwdec_devices_add(hw->devs, &p->ctx->hwctx);
+    return 0;
+}
+
+static void mapper_unmap(struct ra_hwdec_mapper *mapper)
+{
+    struct priv_owner *p_owner = mapper->owner->priv;
+    struct dmabuf_interop_priv *p = mapper->priv;
+
+    p_owner->dmabuf_interop.interop_unmap(mapper);
+
+    if (p->surface_acquired) {
+        for (int n = 0; n < p->desc.nb_objects; n++)
+            close(p->desc.objects[n].fd);
+        p->surface_acquired = false;
+    }
+}
+
+static void mapper_uninit(struct ra_hwdec_mapper *mapper)
+{
+    struct priv_owner *p_owner = mapper->owner->priv;
+    if (p_owner->dmabuf_interop.interop_uninit) {
+        p_owner->dmabuf_interop.interop_uninit(mapper);
+    }
+}
+
+static bool check_fmt(struct ra_hwdec_mapper *mapper, int fmt)
+{
+    struct priv_owner *p_owner = mapper->owner->priv;
+    for (int n = 0; p_owner->formats && p_owner->formats[n]; n++) {
+        if (p_owner->formats[n] == fmt)
+            return true;
+    }
+    return false;
+}
+
+static int mapper_init(struct ra_hwdec_mapper *mapper)
+{
+    struct priv_owner *p_owner = mapper->owner->priv;
+    struct dmabuf_interop_priv *p = mapper->priv;
+
+    mapper->dst_params = mapper->src_params;
+    mapper->dst_params.imgfmt = mapper->src_params.hw_subfmt;
+    mapper->dst_params.hw_subfmt = 0;
+
+    struct ra_imgfmt_desc desc = {0};
+
+    if (mapper->ra->num_formats &&
+            !ra_get_imgfmt_desc(mapper->ra, mapper->dst_params.imgfmt, &desc))
+       return -1;
+
+    p->num_planes = desc.num_planes;
+    mp_image_set_params(&p->layout, &mapper->dst_params);
+
+    if (p_owner->dmabuf_interop.interop_init)
+        if (!p_owner->dmabuf_interop.interop_init(mapper, &desc))
+            return -1;
+
+    if (!p_owner->probing_formats && !check_fmt(mapper, mapper->dst_params.imgfmt))
+    {
+        MP_FATAL(mapper, "unsupported VA image format %s\n",
+                 mp_imgfmt_to_name(mapper->dst_params.imgfmt));
+        return -1;
+    }
+
+    return 0;
+}
+
+static void close_file_descriptors(VADRMPRIMESurfaceDescriptor desc)
+{
+    for (int i = 0; i < desc.num_objects; i++)
+        close(desc.objects[i].fd);
+}
+
+static int mapper_map(struct ra_hwdec_mapper *mapper)
+{
+    struct priv_owner *p_owner = mapper->owner->priv;
+    struct dmabuf_interop_priv *p = mapper->priv;
+    VAStatus status;
+    VADisplay *display = p_owner->display;
+    VADRMPRIMESurfaceDescriptor desc = {0};
+
+    uint32_t flags = p_owner->dmabuf_interop.composed_layers ?
+        VA_EXPORT_SURFACE_COMPOSED_LAYERS : VA_EXPORT_SURFACE_SEPARATE_LAYERS;
+    status = vaExportSurfaceHandle(display, va_surface_id(mapper->src),
+                                   VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2,
+                                   VA_EXPORT_SURFACE_READ_ONLY |
+                                   flags,
+                                   &desc);
+    if (!CHECK_VA_STATUS_LEVEL(mapper, "vaExportSurfaceHandle()",
+                               p_owner->probing_formats ? MSGL_DEBUG : MSGL_ERR))
+    {
+        close_file_descriptors(desc);
+        goto err;
+    }
+    vaSyncSurface(display, va_surface_id(mapper->src));
+    // No need to error out if sync fails, but good to know if it did.
+    CHECK_VA_STATUS(mapper, "vaSyncSurface()");
+    p->surface_acquired = true;
+
+    // We use AVDRMFrameDescriptor to store the dmabuf so we need to copy the
+    // values over.
+    int num_returned_planes = 0;
+    p->desc.nb_layers = desc.num_layers;
+    p->desc.nb_objects = desc.num_objects;
+    for (int i = 0; i < desc.num_layers; i++) {
+        p->desc.layers[i].format = desc.layers[i].drm_format;
+        p->desc.layers[i].nb_planes = desc.layers[i].num_planes;
+        for (int j = 0; j < desc.layers[i].num_planes; j++)
+        {
+            p->desc.layers[i].planes[j].object_index = desc.layers[i].object_index[j];
+            p->desc.layers[i].planes[j].offset = desc.layers[i].offset[j];
+            p->desc.layers[i].planes[j].pitch = desc.layers[i].pitch[j];
+        }
+
+        num_returned_planes += desc.layers[i].num_planes;
+    }
+    for (int i = 0; i < desc.num_objects; i++) {
+        p->desc.objects[i].format_modifier = desc.objects[i].drm_format_modifier;
+        p->desc.objects[i].fd = desc.objects[i].fd;
+        p->desc.objects[i].size = desc.objects[i].size;
+    }
+
+    // We can handle composed formats if the total number of planes is still
+    // equal the number of planes we expect. Complex formats with auxiliary
+    // planes cannot be supported.
+    if (p->num_planes != 0 && p->num_planes != num_returned_planes) {
+        mp_msg(mapper->log, p_owner->probing_formats ? MSGL_DEBUG : MSGL_ERR,
+               "Mapped surface with format '%s' has unexpected number of planes. "
+               "(%d layers and %d planes, but expected %d planes)\n",
+               mp_imgfmt_to_name(mapper->src->params.hw_subfmt),
+               desc.num_layers, num_returned_planes, p->num_planes);
+        goto err;
+    }
+
+    if (!p_owner->dmabuf_interop.interop_map(mapper, &p_owner->dmabuf_interop,
+                                             p_owner->probing_formats))
+        goto err;
+
+    if (desc.fourcc == VA_FOURCC_YV12)
+        MPSWAP(struct ra_tex*, mapper->tex[1], mapper->tex[2]);
+
+    return 0;
+
+err:
+    mapper_unmap(mapper);
+
+    if (!p_owner->probing_formats)
+        MP_FATAL(mapper, "mapping VAAPI EGL image failed\n");
+    return -1;
+}
+
+static bool try_format_map(struct ra_hwdec *hw, struct mp_image *surface)
+{
+    struct ra_hwdec_mapper *mapper = ra_hwdec_mapper_create(hw, &surface->params);
+    if (!mapper) {
+        MP_DBG(hw, "Failed to create mapper\n");
+        return false;
+    }
+
+    bool ok = ra_hwdec_mapper_map(mapper, surface) >= 0;
+    ra_hwdec_mapper_free(&mapper);
+    return ok;
+}
+
+static void try_format_pixfmt(struct ra_hwdec *hw, enum AVPixelFormat pixfmt)
+{
+    bool supported = false;
+    struct priv_owner *p = hw->priv;
+
+    int mp_fmt = pixfmt2imgfmt(pixfmt);
+    if (!mp_fmt)
+        return;
+
+    int num_formats = 0;
+    for (int n = 0; p->formats && p->formats[n]; n++) {
+        if (p->formats[n] == mp_fmt)
+            return; // already added
+        num_formats += 1;
+    }
+
+    AVBufferRef *fref = NULL;
+    struct mp_image *s = NULL;
+    AVFrame *frame = NULL;
+    fref = av_hwframe_ctx_alloc(p->ctx->av_device_ref);
+    if (!fref)
+        goto err;
+    AVHWFramesContext *fctx = (void *)fref->data;
+    fctx->format = AV_PIX_FMT_VAAPI;
+    fctx->sw_format = pixfmt;
+    fctx->width = 128;
+    fctx->height = 128;
+    if (av_hwframe_ctx_init(fref) < 0)
+        goto err;
+    frame = av_frame_alloc();
+    if (!frame)
+        goto err;
+    if (av_hwframe_get_buffer(fref, frame, 0) < 0)
+        goto err;
+    s = mp_image_from_av_frame(frame);
+    if (!s || !mp_image_params_valid(&s->params))
+        goto err;
+    if (try_format_map(hw, s)) {
+        supported = true;
+        MP_TARRAY_APPEND(p, p->formats, num_formats, mp_fmt);
+        MP_TARRAY_APPEND(p, p->formats, num_formats, 0); // terminate it
+    }
+err:
+    if (!supported)
+        MP_DBG(hw, "Unsupported format: %s\n",
+               mp_imgfmt_to_name(mp_fmt));
+
+    talloc_free(s);
+    av_frame_free(&frame);
+    av_buffer_unref(&fref);
+}
+
+static void try_format_config(struct ra_hwdec *hw, AVVAAPIHWConfig *hwconfig)
+{
+    struct priv_owner *p = hw->priv;
+    enum AVPixelFormat *fmts = NULL;
+
+    AVHWFramesConstraints *fc =
+            av_hwdevice_get_hwframe_constraints(p->ctx->av_device_ref, hwconfig);
+    if (!fc) {
+        MP_WARN(hw, "failed to retrieve libavutil frame constraints\n");
+        return;
+    }
+
+    /*
+     * We need a hwframe_ctx to be able to get the valid formats, but to
+     * initialise it, we need a format, so we get the first format from the
+     * hwconfig. We don't care about the other formats in the config because the
+     * transfer formats list will already include them.
+     */
+    AVBufferRef *fref = NULL;
+    fref = av_hwframe_ctx_alloc(p->ctx->av_device_ref);
+    if (!fref) {
+        MP_WARN(hw, "failed to alloc libavutil frame context\n");
+        goto err;
+    }
+    AVHWFramesContext *fctx = (void *)fref->data;
+    fctx->format = AV_PIX_FMT_VAAPI;
+    fctx->sw_format = fc->valid_sw_formats[0];
+    fctx->width = 128;
+    fctx->height = 128;
+    if (av_hwframe_ctx_init(fref) < 0) {
+        MP_WARN(hw, "failed to init libavutil frame context\n");
+        goto err;
+    }
+
+    int ret = av_hwframe_transfer_get_formats(fref, AV_HWFRAME_TRANSFER_DIRECTION_TO, &fmts, 0);
+    if (ret) {
+        MP_WARN(hw, "failed to get libavutil frame context supported formats\n");
+        goto err;
+    }
+
+    for (int n = 0; fmts &&
+                    fmts[n] != AV_PIX_FMT_NONE; n++)
+        try_format_pixfmt(hw, fmts[n]);
+
+err:
+    av_hwframe_constraints_free(&fc);
+    av_buffer_unref(&fref);
+    av_free(fmts);
+}
+
+static void determine_working_formats(struct ra_hwdec *hw)
+{
+    struct priv_owner *p = hw->priv;
+    VAStatus status;
+    VAProfile *profiles = NULL;
+    VAEntrypoint *entrypoints = NULL;
+
+    MP_VERBOSE(hw, "Going to probe surface formats (may log bogus errors)...\n");
+    p->probing_formats = true;
+
+    AVVAAPIHWConfig *hwconfig = av_hwdevice_hwconfig_alloc(p->ctx->av_device_ref);
+    if (!hwconfig) {
+        MP_WARN(hw, "Could not allocate FFmpeg AVVAAPIHWConfig\n");
+        goto done;
+    }
+
+    profiles = talloc_zero_array(NULL, VAProfile, vaMaxNumProfiles(p->display));
+    entrypoints = talloc_zero_array(NULL, VAEntrypoint,
+                                    vaMaxNumEntrypoints(p->display));
+    int num_profiles = 0;
+    status = vaQueryConfigProfiles(p->display, profiles, &num_profiles);
+    if (!CHECK_VA_STATUS(hw, "vaQueryConfigProfiles()"))
+        num_profiles = 0;
+
+    /*
+     * We need to find one declared format to bootstrap probing. So find a valid
+     * decoding profile and use its config. If try_format_config() finds any
+     * formats, they will be all the supported formats, and we don't need to
+     * look at any other profiles.
+     */
+    for (int n = 0; n < num_profiles; n++) {
+        VAProfile profile = profiles[n];
+        if (profile == VAProfileNone) {
+            // We don't use the None profile.
+            continue;
+        }
+        int num_ep = 0;
+        status = vaQueryConfigEntrypoints(p->display, profile, entrypoints,
+                                          &num_ep);
+        if (status != VA_STATUS_SUCCESS) {
+            MP_DBG(hw, "vaQueryConfigEntrypoints(): '%s' for profile %d",
+                   vaErrorStr(status), (int)profile);
+            continue;
+        }
+        for (int ep = 0; ep < num_ep; ep++) {
+            if (entrypoints[ep] != VAEntrypointVLD) {
+                // We are only interested in decoding entrypoints.
+                continue;
+            }
+            VAConfigID config = VA_INVALID_ID;
+            status = vaCreateConfig(p->display, profile, entrypoints[ep],
+                                    NULL, 0, &config);
+            if (status != VA_STATUS_SUCCESS) {
+                MP_DBG(hw, "vaCreateConfig(): '%s' for profile %d",
+                       vaErrorStr(status), (int)profile);
+                continue;
+            }
+
+            hwconfig->config_id = config;
+            try_format_config(hw, hwconfig);
+
+            vaDestroyConfig(p->display, config);
+            if (p->formats && p->formats[0]) {
+                goto done;
+            }
+        }
+    }
+
+done:
+    av_free(hwconfig);
+    talloc_free(profiles);
+    talloc_free(entrypoints);
+
+    p->probing_formats = false;
+
+    MP_DBG(hw, "Supported formats:\n");
+    for (int n = 0; p->formats && p->formats[n]; n++)
+        MP_DBG(hw, " %s\n", mp_imgfmt_to_name(p->formats[n]));
+    MP_VERBOSE(hw, "Done probing surface formats.\n");
+}
+
+const struct ra_hwdec_driver ra_hwdec_vaapi = {
+    .name = "vaapi",
+    .priv_size = sizeof(struct priv_owner),
+    .imgfmts = {IMGFMT_VAAPI, 0},
+    .init = init,
+    .uninit = uninit,
+    .mapper = &(const struct ra_hwdec_mapper_driver){
+        .priv_size = sizeof(struct dmabuf_interop_priv),
+        .init = mapper_init,
+        .uninit = mapper_uninit,
+        .map = mapper_map,
+        .unmap = mapper_unmap,
+    },
+};
diff --git a/video/out/hwdec/hwdec_vt.c b/video/out/hwdec/hwdec_vt.c
new file mode 100644
index 0000000..ab41d02
--- /dev/null
+++ b/video/out/hwdec/hwdec_vt.c
@@ -0,0 +1,141 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stddef.h>
+#include <string.h>
+#include <assert.h>
+#include <unistd.h>
+
+#include <libavutil/hwcontext.h>
+#include <libavutil/hwcontext_videotoolbox.h>
+
+#include "config.h"
+
+#include "video/out/gpu/hwdec.h"
+#include "video/out/hwdec/hwdec_vt.h"
+
+static void uninit(struct ra_hwdec *hw)
+{
+    struct priv_owner *p = hw->priv;
+
+    hwdec_devices_remove(hw->devs, &p->hwctx);
+    av_buffer_unref(&p->hwctx.av_device_ref);
+}
+
+const static vt_interop_init interop_inits[] = {
+#if HAVE_VIDEOTOOLBOX_GL || HAVE_IOS_GL
+    vt_gl_init,
+#endif
+#if HAVE_VIDEOTOOLBOX_PL
+    vt_pl_init,
+#endif
+    NULL
+};
+
+static int init(struct ra_hwdec *hw)
+{
+    struct priv_owner *p = hw->priv;
+
+    for (int i = 0; interop_inits[i]; i++) {
+        if (interop_inits[i](hw)) {
+            break;
+        }
+    }
+
+    if (!p->interop_map || !p->interop_unmap) {
+        MP_VERBOSE(hw, "VT hwdec only works with OpenGL or Vulkan backends.\n");
+        return -1;
+    }
+
+    p->hwctx = (struct mp_hwdec_ctx){
+        .driver_name = hw->driver->name,
+        .hw_imgfmt = IMGFMT_VIDEOTOOLBOX,
+    };
+
+    int ret = av_hwdevice_ctx_create(&p->hwctx.av_device_ref,
+                                     AV_HWDEVICE_TYPE_VIDEOTOOLBOX, NULL, NULL, 0);
+    if (ret != 0) {
+        MP_VERBOSE(hw, "Failed to create hwdevice_ctx: %s\n", av_err2str(ret));
+        return -1;
+    }
+
+    hwdec_devices_add(hw->devs, &p->hwctx);
+
+    return 0;
+}
+
+static void mapper_unmap(struct ra_hwdec_mapper *mapper)
+{
+    struct priv_owner *p_owner = mapper->owner->priv;
+
+    p_owner->interop_unmap(mapper);
+}
+
+static void mapper_uninit(struct ra_hwdec_mapper *mapper)
+{
+    struct priv_owner *p_owner = mapper->owner->priv;
+    if (p_owner->interop_uninit) {
+        p_owner->interop_uninit(mapper);
+    }
+}
+
+static int mapper_init(struct ra_hwdec_mapper *mapper)
+{
+    struct priv_owner *p_owner = mapper->owner->priv;
+    struct priv *p = mapper->priv;
+
+    mapper->dst_params = mapper->src_params;
+    mapper->dst_params.imgfmt = mapper->src_params.hw_subfmt;
+    mapper->dst_params.hw_subfmt = 0;
+
+    if (!mapper->dst_params.imgfmt) {
+        MP_ERR(mapper, "Unsupported CVPixelBuffer format.\n");
+        return -1;
+    }
+
+    if (!ra_get_imgfmt_desc(mapper->ra, mapper->dst_params.imgfmt, &p->desc)) {
+        MP_ERR(mapper, "Unsupported texture format.\n");
+        return -1;
+    }
+
+    if (p_owner->interop_init)
+        return p_owner->interop_init(mapper);
+
+    return 0;
+}
+
+static int mapper_map(struct ra_hwdec_mapper *mapper)
+{
+    struct priv_owner *p_owner = mapper->owner->priv;
+
+    return p_owner->interop_map(mapper);
+}
+
+const struct ra_hwdec_driver ra_hwdec_videotoolbox = {
+    .name = "videotoolbox",
+    .priv_size = sizeof(struct priv_owner),
+    .imgfmts = {IMGFMT_VIDEOTOOLBOX, 0},
+    .init = init,
+    .uninit = uninit,
+    .mapper = &(const struct ra_hwdec_mapper_driver){
+        .priv_size = sizeof(struct priv),
+        .init = mapper_init,
+        .uninit = mapper_uninit,
+        .map = mapper_map,
+        .unmap = mapper_unmap,
+    },
+};
diff --git a/video/out/hwdec/hwdec_vt.h b/video/out/hwdec/hwdec_vt.h
new file mode 100644
index 0000000..b79c641
--- /dev/null
+++ b/video/out/hwdec/hwdec_vt.h
@@ -0,0 +1,63 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include <CoreVideo/CoreVideo.h>
+
+#include "config.h"
+#include "video/out/gpu/hwdec.h"
+
+struct priv_owner {
+    struct mp_hwdec_ctx hwctx;
+
+    int (*interop_init)(struct ra_hwdec_mapper *mapper);
+    void (*interop_uninit)(struct ra_hwdec_mapper *mapper);
+
+    int (*interop_map)(struct ra_hwdec_mapper *mapper);
+    void (*interop_unmap)(struct ra_hwdec_mapper *mapper);
+};
+
+#ifndef __OBJC__
+typedef struct __CVMetalTextureCache *CVMetalTextureCacheRef;
+typedef CVImageBufferRef CVMetalTextureRef;
+#endif
+
+struct priv {
+    void *interop_mapper_priv;
+
+    CVPixelBufferRef pbuf;
+
+#if HAVE_VIDEOTOOLBOX_GL
+    GLuint gl_planes[MP_MAX_PLANES];
+#elif HAVE_IOS_GL
+    CVOpenGLESTextureCacheRef gl_texture_cache;
+    CVOpenGLESTextureRef gl_planes[MP_MAX_PLANES];
+#endif
+
+#if HAVE_VIDEOTOOLBOX_PL
+    CVMetalTextureCacheRef mtl_texture_cache;
+    CVMetalTextureRef mtl_planes[MP_MAX_PLANES];
+#endif
+
+    struct ra_imgfmt_desc desc;
+};
+
+typedef bool (*vt_interop_init)(const struct ra_hwdec *hw);
+
+bool vt_gl_init(const struct ra_hwdec *hw);
+bool vt_pl_init(const struct ra_hwdec *hw);
diff --git a/video/out/hwdec/hwdec_vt_pl.m b/video/out/hwdec/hwdec_vt_pl.m
new file mode 100644
index 0000000..cd133a0
--- /dev/null
+++ b/video/out/hwdec/hwdec_vt_pl.m
@@ -0,0 +1,312 @@
+/*
+ * Copyright (c) 2013 Stefano Pigozzi <stefano.pigozzi@gmail.com>
+ *               2017 Aman Gupta <ffmpeg@tmm1.net>
+ *               2023 rcombs <rcombs@rcombs.me>
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+
+#include <CoreVideo/CoreVideo.h>
+#include <Metal/Metal.h>
+
+#include <libavutil/hwcontext.h>
+
+#include <libplacebo/renderer.h>
+
+#include "config.h"
+
+#include "video/out/gpu/hwdec.h"
+#include "video/out/placebo/ra_pl.h"
+#include "video/mp_image_pool.h"
+
+#if HAVE_VULKAN
+#include "video/out/vulkan/common.h"
+#endif
+
+#include "hwdec_vt.h"
+
+static bool check_hwdec(const struct ra_hwdec *hw)
+{
+    pl_gpu gpu = ra_pl_get(hw->ra_ctx->ra);
+    if (!gpu) {
+        // This is not a libplacebo RA;
+        return false;
+    }
+
+    if (!(gpu->import_caps.tex & PL_HANDLE_MTL_TEX)) {
+        MP_VERBOSE(hw, "VideoToolbox libplacebo interop requires support for "
+                       "PL_HANDLE_MTL_TEX import.\n");
+        return false;
+    }
+
+    return true;
+}
+
+static int mapper_init(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+
+    mapper->dst_params = mapper->src_params;
+    mapper->dst_params.imgfmt = mapper->src_params.hw_subfmt;
+    mapper->dst_params.hw_subfmt = 0;
+
+    if (!mapper->dst_params.imgfmt) {
+        MP_ERR(mapper, "Unsupported CVPixelBuffer format.\n");
+        return -1;
+    }
+
+    if (!ra_get_imgfmt_desc(mapper->ra, mapper->dst_params.imgfmt, &p->desc)) {
+        MP_ERR(mapper, "Unsupported texture format.\n");
+        return -1;
+    }
+
+    for (int n = 0; n < p->desc.num_planes; n++) {
+        if (!p->desc.planes[n] || p->desc.planes[n]->ctype != RA_CTYPE_UNORM) {
+            MP_ERR(mapper, "Format unsupported.\n");
+            return -1;
+        }
+    }
+
+    id<MTLDevice> mtl_device = nil;
+
+#ifdef VK_EXT_METAL_OBJECTS_SPEC_VERSION
+    pl_gpu gpu = ra_pl_get(mapper->ra);
+    if (gpu) {
+        pl_vulkan vulkan = pl_vulkan_get(gpu);
+        if (vulkan && vulkan->device && vulkan->instance && vulkan->get_proc_addr) {
+            PFN_vkExportMetalObjectsEXT pExportMetalObjects = (PFN_vkExportMetalObjectsEXT)vulkan->get_proc_addr(vulkan->instance, "vkExportMetalObjectsEXT");
+            if (pExportMetalObjects) {
+                VkExportMetalDeviceInfoEXT device_info = {
+                    .sType = VK_STRUCTURE_TYPE_EXPORT_METAL_DEVICE_INFO_EXT,
+                    .pNext = NULL,
+                    .mtlDevice = nil,
+                };
+
+                VkExportMetalObjectsInfoEXT objects_info = {
+                    .sType = VK_STRUCTURE_TYPE_EXPORT_METAL_OBJECTS_INFO_EXT,
+                    .pNext = &device_info,
+                };
+
+                pExportMetalObjects(vulkan->device, &objects_info);
+
+                mtl_device = device_info.mtlDevice;
+                [mtl_device retain];
+            }
+        }
+    }
+#endif
+
+    if (!mtl_device) {
+        mtl_device = MTLCreateSystemDefaultDevice();
+    }
+
+    CVReturn err = CVMetalTextureCacheCreate(
+        kCFAllocatorDefault,
+        NULL,
+        mtl_device,
+        NULL,
+        &p->mtl_texture_cache);
+
+    [mtl_device release];
+
+    if (err != noErr) {
+        MP_ERR(mapper, "Failure in CVOpenGLESTextureCacheCreate: %d\n", err);
+        return -1;
+    }
+
+    return 0;
+}
+
+static void mapper_unmap(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+
+    for (int i = 0; i < p->desc.num_planes; i++) {
+        ra_tex_free(mapper->ra, &mapper->tex[i]);
+        if (p->mtl_planes[i]) {
+            CFRelease(p->mtl_planes[i]);
+            p->mtl_planes[i] = NULL;
+        }
+    }
+
+    CVMetalTextureCacheFlush(p->mtl_texture_cache, 0);
+}
+
+static const struct {
+    const char *glsl;
+    MTLPixelFormat mtl;
+} mtl_fmts[] = {
+    {"r16f",           MTLPixelFormatR16Float     },
+    {"r32f",           MTLPixelFormatR32Float     },
+    {"rg16f",          MTLPixelFormatRG16Float    },
+    {"rg32f",          MTLPixelFormatRG32Float    },
+    {"rgba16f",        MTLPixelFormatRGBA16Float  },
+    {"rgba32f",        MTLPixelFormatRGBA32Float  },
+    {"r11f_g11f_b10f", MTLPixelFormatRG11B10Float },
+
+    {"r8",             MTLPixelFormatR8Unorm      },
+    {"r16",            MTLPixelFormatR16Unorm     },
+    {"rg8",            MTLPixelFormatRG8Unorm     },
+    {"rg16",           MTLPixelFormatRG16Unorm    },
+    {"rgba8",          MTLPixelFormatRGBA8Unorm   },
+    {"rgba16",         MTLPixelFormatRGBA16Unorm  },
+    {"rgb10_a2",       MTLPixelFormatRGB10A2Unorm },
+
+    {"r8_snorm",       MTLPixelFormatR8Snorm      },
+    {"r16_snorm",      MTLPixelFormatR16Snorm     },
+    {"rg8_snorm",      MTLPixelFormatRG8Snorm     },
+    {"rg16_snorm",     MTLPixelFormatRG16Snorm    },
+    {"rgba8_snorm",    MTLPixelFormatRGBA8Snorm   },
+    {"rgba16_snorm",   MTLPixelFormatRGBA16Snorm  },
+
+    {"r8ui",           MTLPixelFormatR8Uint       },
+    {"r16ui",          MTLPixelFormatR16Uint      },
+    {"r32ui",          MTLPixelFormatR32Uint      },
+    {"rg8ui",          MTLPixelFormatRG8Uint      },
+    {"rg16ui",         MTLPixelFormatRG16Uint     },
+    {"rg32ui",         MTLPixelFormatRG32Uint     },
+    {"rgba8ui",        MTLPixelFormatRGBA8Uint    },
+    {"rgba16ui",       MTLPixelFormatRGBA16Uint   },
+    {"rgba32ui",       MTLPixelFormatRGBA32Uint   },
+    {"rgb10_a2ui",     MTLPixelFormatRGB10A2Uint  },
+
+    {"r8i",            MTLPixelFormatR8Sint       },
+    {"r16i",           MTLPixelFormatR16Sint      },
+    {"r32i",           MTLPixelFormatR32Sint      },
+    {"rg8i",           MTLPixelFormatRG8Sint      },
+    {"rg16i",          MTLPixelFormatRG16Sint     },
+    {"rg32i",          MTLPixelFormatRG32Sint     },
+    {"rgba8i",         MTLPixelFormatRGBA8Sint    },
+    {"rgba16i",        MTLPixelFormatRGBA16Sint   },
+    {"rgba32i",        MTLPixelFormatRGBA32Sint   },
+
+    { NULL,            MTLPixelFormatInvalid },
+};
+
+static MTLPixelFormat get_mtl_fmt(const char* glsl)
+{
+    if (!glsl)
+        return MTLPixelFormatInvalid;
+
+    for (int i = 0; mtl_fmts[i].glsl; i++) {
+        if (!strcmp(glsl, mtl_fmts[i].glsl))
+            return mtl_fmts[i].mtl;
+    }
+
+    return MTLPixelFormatInvalid;
+}
+
+static int mapper_map(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+    pl_gpu gpu = ra_pl_get(mapper->owner->ra_ctx->ra);
+
+    CVPixelBufferRelease(p->pbuf);
+    p->pbuf = (CVPixelBufferRef)mapper->src->planes[3];
+    CVPixelBufferRetain(p->pbuf);
+
+    const bool planar = CVPixelBufferIsPlanar(p->pbuf);
+    const int planes  = CVPixelBufferGetPlaneCount(p->pbuf);
+    assert((planar && planes == p->desc.num_planes) || p->desc.num_planes == 1);
+
+    for (int i = 0; i < p->desc.num_planes; i++) {
+        const struct ra_format *fmt = p->desc.planes[i];
+
+        pl_fmt plfmt = ra_pl_fmt_get(fmt);
+        MTLPixelFormat format = get_mtl_fmt(plfmt->glsl_format);
+
+        if (!format) {
+            MP_ERR(mapper, "Format unsupported.\n");
+            return -1;
+        }
+
+        size_t width  = CVPixelBufferGetWidthOfPlane(p->pbuf, i),
+               height = CVPixelBufferGetHeightOfPlane(p->pbuf, i);
+
+        CVReturn err = CVMetalTextureCacheCreateTextureFromImage(
+            kCFAllocatorDefault,
+            p->mtl_texture_cache,
+            p->pbuf,
+            NULL,
+            format,
+            width,
+            height,
+            i,
+            &p->mtl_planes[i]);
+
+        if (err != noErr) {
+            MP_ERR(mapper, "error creating texture for plane %d: %d\n", i, err);
+            return -1;
+        }
+
+        struct pl_tex_params tex_params = {
+            .w = width,
+            .h = height,
+            .d = 0,
+            .format = plfmt,
+            .sampleable = true,
+            .import_handle = PL_HANDLE_MTL_TEX,
+            .shared_mem = (struct pl_shared_mem) {
+                .handle = {
+                    .handle = CVMetalTextureGetTexture(p->mtl_planes[i]),
+                },
+            },
+        };
+
+        pl_tex pltex = pl_tex_create(gpu, &tex_params);
+        if (!pltex)
+            return -1;
+
+        struct ra_tex *ratex = talloc_ptrtype(NULL, ratex);
+        int ret = mppl_wrap_tex(mapper->ra, pltex, ratex);
+        if (!ret) {
+            pl_tex_destroy(gpu, &pltex);
+            talloc_free(ratex);
+            return -1;
+        }
+        mapper->tex[i] = ratex;
+    }
+
+    return 0;
+}
+
+static void mapper_uninit(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+
+    CVPixelBufferRelease(p->pbuf);
+    if (p->mtl_texture_cache) {
+        CFRelease(p->mtl_texture_cache);
+        p->mtl_texture_cache = NULL;
+    }
+}
+
+bool vt_pl_init(const struct ra_hwdec *hw)
+{
+    struct priv_owner *p = hw->priv;
+
+    if (!check_hwdec(hw))
+        return false;
+
+    p->interop_init   = mapper_init;
+    p->interop_uninit = mapper_uninit;
+    p->interop_map    = mapper_map;
+    p->interop_unmap  = mapper_unmap;
+
+    return true;
+}
diff --git a/video/out/hwdec/hwdec_vulkan.c b/video/out/hwdec/hwdec_vulkan.c
new file mode 100644
index 0000000..5f7354d
--- /dev/null
+++ b/video/out/hwdec/hwdec_vulkan.c
@@ -0,0 +1,333 @@
+/*
+ * Copyright (c) 2022 Philip Langdale <philipl@overt.org>
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+#include "video/out/gpu/hwdec.h"
+#include "video/out/vulkan/context.h"
+#include "video/out/placebo/ra_pl.h"
+
+#include <libavutil/hwcontext.h>
+#include <libavutil/hwcontext_vulkan.h>
+
+struct vulkan_hw_priv {
+    struct mp_hwdec_ctx hwctx;
+    pl_gpu gpu;
+};
+
+struct vulkan_mapper_priv {
+    struct mp_image layout;
+    AVVkFrame *vkf;
+    pl_tex tex[4];
+};
+
+static void lock_queue(struct AVHWDeviceContext *ctx,
+                       uint32_t queue_family, uint32_t index)
+{
+    pl_vulkan vulkan = ctx->user_opaque;
+    vulkan->lock_queue(vulkan, queue_family, index);
+}
+
+static void unlock_queue(struct AVHWDeviceContext *ctx,
+                         uint32_t queue_family, uint32_t index)
+{
+    pl_vulkan vulkan = ctx->user_opaque;
+    vulkan->unlock_queue(vulkan, queue_family, index);
+}
+
+static int vulkan_init(struct ra_hwdec *hw)
+{
+    AVBufferRef *hw_device_ctx = NULL;
+    int ret = 0;
+    struct vulkan_hw_priv *p = hw->priv;
+    int level = hw->probing ? MSGL_V : MSGL_ERR;
+
+    struct mpvk_ctx *vk = ra_vk_ctx_get(hw->ra_ctx);
+    if (!vk) {
+        MP_MSG(hw, level, "This is not a libplacebo vulkan gpu api context.\n");
+        return 0;
+    }
+
+    p->gpu = ra_pl_get(hw->ra_ctx->ra);
+    if (!p->gpu) {
+        MP_MSG(hw, level, "Failed to obtain pl_gpu.\n");
+        return 0;
+    }
+
+    /*
+     * libplacebo initialises all queues, but we still need to discover which
+     * one is the decode queue.
+     */
+    uint32_t num_qf = 0;
+    VkQueueFamilyProperties *qf = NULL;
+    vkGetPhysicalDeviceQueueFamilyProperties(vk->vulkan->phys_device, &num_qf, NULL);
+    if (!num_qf)
+        goto error;
+
+    qf = talloc_array(NULL, VkQueueFamilyProperties, num_qf);
+    vkGetPhysicalDeviceQueueFamilyProperties(vk->vulkan->phys_device, &num_qf, qf);
+
+    int decode_index = -1, decode_count = 0;
+    for (int i = 0; i < num_qf; i++) {
+        /*
+         * Pick the first discovered decode queue that we find. Maybe a day will
+         * come when this needs to be smarter, but I'm sure a bunch of other
+         * things will have to change too.
+         */
+        if ((qf[i].queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) {
+            decode_index = i;
+            decode_count = qf[i].queueCount;
+        }
+    }
+
+    hw_device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_VULKAN);
+    if (!hw_device_ctx)
+        goto error;
+
+    AVHWDeviceContext *device_ctx = (void *)hw_device_ctx->data;
+    AVVulkanDeviceContext *device_hwctx = device_ctx->hwctx;
+
+    device_ctx->user_opaque = (void *)vk->vulkan;
+    device_hwctx->lock_queue = lock_queue;
+    device_hwctx->unlock_queue = unlock_queue;
+    device_hwctx->get_proc_addr = vk->vkinst->get_proc_addr;
+    device_hwctx->inst = vk->vkinst->instance;
+    device_hwctx->phys_dev = vk->vulkan->phys_device;
+    device_hwctx->act_dev = vk->vulkan->device;
+    device_hwctx->device_features = *vk->vulkan->features;
+    device_hwctx->enabled_inst_extensions = vk->vkinst->extensions;
+    device_hwctx->nb_enabled_inst_extensions = vk->vkinst->num_extensions;
+    device_hwctx->enabled_dev_extensions = vk->vulkan->extensions;
+    device_hwctx->nb_enabled_dev_extensions = vk->vulkan->num_extensions;
+    device_hwctx->queue_family_index = vk->vulkan->queue_graphics.index;
+    device_hwctx->nb_graphics_queues = vk->vulkan->queue_graphics.count;
+    device_hwctx->queue_family_tx_index = vk->vulkan->queue_transfer.index;
+    device_hwctx->nb_tx_queues = vk->vulkan->queue_transfer.count;
+    device_hwctx->queue_family_comp_index = vk->vulkan->queue_compute.index;
+    device_hwctx->nb_comp_queues = vk->vulkan->queue_compute.count;
+    device_hwctx->queue_family_decode_index = decode_index;
+    device_hwctx->nb_decode_queues = decode_count;
+
+    ret = av_hwdevice_ctx_init(hw_device_ctx);
+    if (ret < 0) {
+        MP_MSG(hw, level, "av_hwdevice_ctx_init failed\n");
+        goto error;
+    }
+
+    p->hwctx = (struct mp_hwdec_ctx) {
+        .driver_name = hw->driver->name,
+        .av_device_ref = hw_device_ctx,
+        .hw_imgfmt = IMGFMT_VULKAN,
+    };
+    hwdec_devices_add(hw->devs, &p->hwctx);
+
+    talloc_free(qf);
+    return 0;
+
+ error:
+    talloc_free(qf);
+    av_buffer_unref(&hw_device_ctx);
+    return -1;
+}
+
+static void vulkan_uninit(struct ra_hwdec *hw)
+{
+    struct vulkan_hw_priv *p = hw->priv;
+
+    hwdec_devices_remove(hw->devs, &p->hwctx);
+    av_buffer_unref(&p->hwctx.av_device_ref);
+}
+
+static int mapper_init(struct ra_hwdec_mapper *mapper)
+{
+    struct vulkan_mapper_priv *p = mapper->priv;
+
+    mapper->dst_params = mapper->src_params;
+    mapper->dst_params.imgfmt = mapper->src_params.hw_subfmt;
+    mapper->dst_params.hw_subfmt = 0;
+
+    mp_image_set_params(&p->layout, &mapper->dst_params);
+
+    struct ra_imgfmt_desc desc = {0};
+    if (!ra_get_imgfmt_desc(mapper->ra, mapper->dst_params.imgfmt, &desc))
+        return -1;
+
+    return 0;
+}
+
+static void mapper_uninit(struct ra_hwdec_mapper *mapper)
+{
+
+}
+
+static void mapper_unmap(struct ra_hwdec_mapper *mapper)
+{
+    struct vulkan_hw_priv *p_owner = mapper->owner->priv;
+    struct vulkan_mapper_priv *p = mapper->priv;
+    if (!mapper->src)
+        goto end;
+
+    AVHWFramesContext *hwfc = (AVHWFramesContext *) mapper->src->hwctx->data;;
+    const AVVulkanFramesContext *vkfc = hwfc->hwctx;;
+    AVVkFrame *vkf = p->vkf;
+
+    int num_images;
+    for (num_images = 0; (vkf->img[num_images] != VK_NULL_HANDLE); num_images++);
+
+    for (int i = 0; (p->tex[i] != NULL); i++) {
+        pl_tex *tex = &p->tex[i];
+        if (!*tex)
+            continue;
+
+        // If we have multiple planes and one image, then that is a multiplane
+        // frame. Anything else is treated as one-image-per-plane.
+        int index = p->layout.num_planes > 1 && num_images == 1 ? 0 : i;
+
+        // Update AVVkFrame state to reflect current layout
+        bool ok = pl_vulkan_hold_ex(p_owner->gpu, pl_vulkan_hold_params(
+            .tex = *tex,
+            .out_layout = &vkf->layout[index],
+            .qf = VK_QUEUE_FAMILY_IGNORED,
+            .semaphore = (pl_vulkan_sem) {
+                .sem = vkf->sem[index],
+                .value = vkf->sem_value[index] + 1,
+            },
+        ));
+
+        vkf->access[index] = 0;
+        vkf->sem_value[index] += !!ok;
+        *tex = NULL;
+    }
+
+    vkfc->unlock_frame(hwfc, vkf);
+
+ end:
+    for (int i = 0; i < p->layout.num_planes; i++)
+        ra_tex_free(mapper->ra, &mapper->tex[i]);
+
+    p->vkf = NULL;
+}
+
+static int mapper_map(struct ra_hwdec_mapper *mapper)
+{
+    bool result = false;
+    struct vulkan_hw_priv *p_owner = mapper->owner->priv;
+    struct vulkan_mapper_priv *p = mapper->priv;
+    pl_vulkan vk = pl_vulkan_get(p_owner->gpu);
+    if (!vk)
+        return -1;
+
+    AVHWFramesContext *hwfc = (AVHWFramesContext *) mapper->src->hwctx->data;
+    const AVVulkanFramesContext *vkfc = hwfc->hwctx;
+    AVVkFrame *vkf = (AVVkFrame *) mapper->src->planes[0];
+
+    /*
+     * We need to use the dimensions from the HW Frames Context for the
+     * textures, as the underlying images may be larger than the logical frame
+     * size. This most often happens with 1080p content where the actual frame
+     * height is 1088.
+     */
+    struct mp_image raw_layout;
+    mp_image_setfmt(&raw_layout, p->layout.params.imgfmt);
+    mp_image_set_size(&raw_layout, hwfc->width, hwfc->height);
+
+    int num_images;
+    for (num_images = 0; (vkf->img[num_images] != VK_NULL_HANDLE); num_images++);
+    const VkFormat *vk_fmt = av_vkfmt_from_pixfmt(hwfc->sw_format);
+
+    vkfc->lock_frame(hwfc, vkf);
+
+    for (int i = 0; i < p->layout.num_planes; i++) {
+        pl_tex *tex = &p->tex[i];
+        VkImageAspectFlags aspect = VK_IMAGE_ASPECT_COLOR_BIT;
+        int index = i;
+
+        // If we have multiple planes and one image, then that is a multiplane
+        // frame. Anything else is treated as one-image-per-plane.
+        if (p->layout.num_planes > 1 && num_images == 1) {
+            index = 0;
+
+            switch (i) {
+            case 0:
+                aspect = VK_IMAGE_ASPECT_PLANE_0_BIT_KHR;
+                break;
+            case 1:
+                aspect = VK_IMAGE_ASPECT_PLANE_1_BIT_KHR;
+                break;
+            case 2:
+                aspect = VK_IMAGE_ASPECT_PLANE_2_BIT_KHR;
+                break;
+            default:
+                goto error;
+            }
+        }
+
+        *tex = pl_vulkan_wrap(p_owner->gpu, pl_vulkan_wrap_params(
+            .image = vkf->img[index],
+            .width = mp_image_plane_w(&raw_layout, i),
+            .height = mp_image_plane_h(&raw_layout, i),
+            .format = vk_fmt[i],
+            .usage = vkfc->usage,
+            .aspect = aspect,
+        ));
+        if (!*tex)
+            goto error;
+
+        pl_vulkan_release_ex(p_owner->gpu, pl_vulkan_release_params(
+            .tex = p->tex[i],
+            .layout = vkf->layout[index],
+            .qf = VK_QUEUE_FAMILY_IGNORED,
+            .semaphore = (pl_vulkan_sem) {
+                .sem = vkf->sem[index],
+                .value = vkf->sem_value[index],
+            },
+        ));
+
+        struct ra_tex *ratex = talloc_ptrtype(NULL, ratex);
+        result = mppl_wrap_tex(mapper->ra, *tex, ratex);
+        if (!result) {
+            pl_tex_destroy(p_owner->gpu, tex);
+            talloc_free(ratex);
+            goto error;
+        }
+        mapper->tex[i] = ratex;
+    }
+
+    p->vkf = vkf;
+    return 0;
+
+ error:
+    vkfc->unlock_frame(hwfc, vkf);
+    mapper_unmap(mapper);
+    return -1;
+}
+
+const struct ra_hwdec_driver ra_hwdec_vulkan = {
+    .name = "vulkan",
+    .imgfmts = {IMGFMT_VULKAN, 0},
+    .priv_size = sizeof(struct vulkan_hw_priv),
+    .init = vulkan_init,
+    .uninit = vulkan_uninit,
+    .mapper = &(const struct ra_hwdec_mapper_driver){
+        .priv_size = sizeof(struct vulkan_mapper_priv),
+        .init = mapper_init,
+        .uninit = mapper_uninit,
+        .map = mapper_map,
+        .unmap = mapper_unmap,
+    },
+};
diff --git a/video/out/libmpv.h b/video/out/libmpv.h
new file mode 100644
index 0000000..a697eaf
--- /dev/null
+++ b/video/out/libmpv.h
@@ -0,0 +1,83 @@
+#pragma once
+
+#include <stdint.h>
+#include <stdbool.h>
+#include "libmpv/render.h"
+#include "vo.h"
+
+// Helper for finding a parameter value. It returns the direct pointer to the
+// value, and if not present, just returns the def argument. In particular, if
+// def is not NULL, this never returns NULL (unless a param value is defined
+// as accepting NULL, or the libmpv API user is triggering UB).
+void *get_mpv_render_param(mpv_render_param *params, mpv_render_param_type type,
+                           void *def);
+
+#define GET_MPV_RENDER_PARAM(params, type, ctype, def) \
+    (*(ctype *)get_mpv_render_param(params, type, &(ctype){(def)}))
+
+typedef int (*mp_render_cb_control_fn)(struct vo *vo, void *cb_ctx, int *events,
+                                       uint32_t request, void *data);
+void mp_render_context_set_control_callback(mpv_render_context *ctx,
+                                            mp_render_cb_control_fn callback,
+                                            void *callback_ctx);
+bool mp_render_context_acquire(mpv_render_context *ctx);
+
+struct render_backend {
+    struct mpv_global *global;
+    struct mp_log *log;
+    const struct render_backend_fns *fns;
+
+    // Set on init, immutable afterwards.
+    int driver_caps;
+    struct mp_hwdec_devices *hwdec_devs;
+
+    void *priv;
+};
+
+// Generic backend for rendering via libmpv. This corresponds to vo/vo_driver,
+// except for rendering via the mpv_render_*() API. (As a consequence it's as
+// generic as the VO API.) Like with VOs, one backend can support multiple
+// underlying GPU APIs.
+struct render_backend_fns {
+    // Returns libmpv error code. In particular, this function has to check for
+    // MPV_RENDER_PARAM_API_TYPE, and silently return MPV_ERROR_NOT_IMPLEMENTED
+    // if the API is not included in this backend.
+    // If this fails, ->destroy() will be called.
+    int (*init)(struct render_backend *ctx, mpv_render_param *params);
+    // Check if the passed IMGFMT_ is supported.
+    bool (*check_format)(struct render_backend *ctx, int imgfmt);
+    // Implementation of mpv_render_context_set_parameter(). Optional.
+    int (*set_parameter)(struct render_backend *ctx, mpv_render_param param);
+    // Like vo_driver.reconfig().
+    void (*reconfig)(struct render_backend *ctx, struct mp_image_params *params);
+    // Like VOCTRL_RESET.
+    void (*reset)(struct render_backend *ctx);
+    void (*screenshot)(struct render_backend *ctx, struct vo_frame *frame,
+                       struct voctrl_screenshot *args);
+    void (*perfdata)(struct render_backend *ctx,
+                     struct voctrl_performance_data *out);
+    // Like vo_driver.get_image().
+    struct mp_image *(*get_image)(struct render_backend *ctx, int imgfmt,
+                                  int w, int h, int stride_align, int flags);
+    // This has two purposes: 1. set queue attributes on VO, 2. update the
+    // renderer's OSD pointer. Keep in mind that as soon as the caller releases
+    // the renderer lock, the VO pointer can become invalid. The OSD pointer
+    // will technically remain valid (even though it's a vo field), until it's
+    // unset with this function.
+    // Will be called if vo changes, or if renderer options change.
+    void (*update_external)(struct render_backend *ctx, struct vo *vo);
+    // Update screen area.
+    void (*resize)(struct render_backend *ctx, struct mp_rect *src,
+                   struct mp_rect *dst, struct mp_osd_res *osd);
+    // Get target surface size from mpv_render_context_render() arguments.
+    int (*get_target_size)(struct render_backend *ctx, mpv_render_param *params,
+                           int *out_w, int *out_h);
+    // Implementation of mpv_render_context_render().
+    int (*render)(struct render_backend *ctx, mpv_render_param *params,
+                  struct vo_frame *frame);
+    // Free all data in ctx->priv.
+    void (*destroy)(struct render_backend *ctx);
+};
+
+extern const struct render_backend_fns render_backend_gpu;
+extern const struct render_backend_fns render_backend_sw;
diff --git a/video/out/libmpv_sw.c b/video/out/libmpv_sw.c
new file mode 100644
index 0000000..f1b08f0
--- /dev/null
+++ b/video/out/libmpv_sw.c
@@ -0,0 +1,208 @@
+#include "libmpv/render_gl.h"
+#include "libmpv.h"
+#include "sub/osd.h"
+#include "video/sws_utils.h"
+
+struct priv {
+    struct libmpv_gpu_context *context;
+
+    struct mp_sws_context *sws;
+    struct osd_state *osd;
+
+    struct mp_image_params src_params, dst_params;
+    struct mp_rect src_rc, dst_rc;
+    struct mp_osd_res osd_rc;
+    bool anything_changed;
+};
+
+static int init(struct render_backend *ctx, mpv_render_param *params)
+{
+    ctx->priv = talloc_zero(NULL, struct priv);
+    struct priv *p = ctx->priv;
+
+    char *api = get_mpv_render_param(params, MPV_RENDER_PARAM_API_TYPE, NULL);
+    if (!api)
+        return MPV_ERROR_INVALID_PARAMETER;
+
+    if (strcmp(api, MPV_RENDER_API_TYPE_SW) != 0)
+        return MPV_ERROR_NOT_IMPLEMENTED;
+
+    p->sws = mp_sws_alloc(p);
+    mp_sws_enable_cmdline_opts(p->sws, ctx->global);
+
+    p->anything_changed = true;
+
+    return 0;
+}
+
+static bool check_format(struct render_backend *ctx, int imgfmt)
+{
+    struct priv *p = ctx->priv;
+
+    // Note: we don't know the output format yet. Using an arbitrary supported
+    //       format is fine, because we know that any supported input format can
+    //       be converted to any supported output format.
+    return mp_sws_supports_formats(p->sws, IMGFMT_RGB0, imgfmt);
+}
+
+static int set_parameter(struct render_backend *ctx, mpv_render_param param)
+{
+    return MPV_ERROR_NOT_IMPLEMENTED;
+}
+
+static void reconfig(struct render_backend *ctx, struct mp_image_params *params)
+{
+    struct priv *p = ctx->priv;
+
+    p->src_params = *params;
+    p->anything_changed = true;
+}
+
+static void reset(struct render_backend *ctx)
+{
+    // stateless
+}
+
+static void update_external(struct render_backend *ctx, struct vo *vo)
+{
+    struct priv *p = ctx->priv;
+
+    p->osd = vo ? vo->osd : NULL;
+}
+
+static void resize(struct render_backend *ctx, struct mp_rect *src,
+                   struct mp_rect *dst, struct mp_osd_res *osd)
+{
+    struct priv *p = ctx->priv;
+
+    p->src_rc = *src;
+    p->dst_rc = *dst;
+    p->osd_rc = *osd;
+    p->anything_changed = true;
+}
+
+static int get_target_size(struct render_backend *ctx, mpv_render_param *params,
+                           int *out_w, int *out_h)
+{
+    int *sz = get_mpv_render_param(params, MPV_RENDER_PARAM_SW_SIZE, NULL);
+    if (!sz)
+        return MPV_ERROR_INVALID_PARAMETER;
+
+    *out_w = sz[0];
+    *out_h = sz[1];
+    return 0;
+}
+
+static int render(struct render_backend *ctx, mpv_render_param *params,
+                  struct vo_frame *frame)
+{
+    struct priv *p = ctx->priv;
+
+    int *sz = get_mpv_render_param(params, MPV_RENDER_PARAM_SW_SIZE, NULL);
+    char *fmt = get_mpv_render_param(params, MPV_RENDER_PARAM_SW_FORMAT, NULL);
+    size_t *stride = get_mpv_render_param(params, MPV_RENDER_PARAM_SW_STRIDE, NULL);
+    void *ptr = get_mpv_render_param(params, MPV_RENDER_PARAM_SW_POINTER, NULL);
+
+    if (!sz || !fmt || !stride || !ptr)
+        return MPV_ERROR_INVALID_PARAMETER;
+
+    char *prev_fmt = mp_imgfmt_to_name(p->dst_params.imgfmt);
+    if (strcmp(prev_fmt, fmt) != 0)
+        p->anything_changed = true;
+
+    if (sz[0] != p->dst_params.w || sz[1] != p->dst_params.h)
+        p->anything_changed = true;
+
+    if (p->anything_changed) {
+        p->dst_params = (struct mp_image_params){
+            .imgfmt = mp_imgfmt_from_name(bstr0(fmt)),
+            .w = sz[0],
+            .h = sz[1],
+        };
+
+        // Exclude "problematic" formats. In particular, reject multi-plane and
+        // hw formats. Exclude non-byte-aligned formats for easier stride
+        // checking.
+        struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(p->dst_params.imgfmt);
+        if (!(desc.flags & MP_IMGFLAG_COLOR_RGB) ||
+            !(desc.flags & (MP_IMGFLAG_TYPE_UINT | MP_IMGFLAG_TYPE_FLOAT)) ||
+            (desc.flags & MP_IMGFLAG_TYPE_PAL8) ||
+            !(desc.flags & MP_IMGFLAG_BYTE_ALIGNED) ||
+            desc.num_planes != 1)
+            return MPV_ERROR_UNSUPPORTED;
+
+        mp_image_params_guess_csp(&p->dst_params);
+
+        // Can be unset if rendering before any video was loaded.
+        if (p->src_params.imgfmt) {
+            p->sws->src = p->src_params;
+            p->sws->src.w = mp_rect_w(p->src_rc);
+            p->sws->src.h = mp_rect_h(p->src_rc);
+
+            p->sws->dst = p->dst_params;
+            p->sws->dst.w = mp_rect_w(p->dst_rc);
+            p->sws->dst.h = mp_rect_h(p->dst_rc);
+
+            if (mp_sws_reinit(p->sws) < 0)
+                return MPV_ERROR_UNSUPPORTED; // probably
+        }
+
+        p->anything_changed = false;
+    }
+
+    struct mp_image wrap_img = {0};
+    mp_image_set_params(&wrap_img, &p->dst_params);
+
+    size_t bpp = wrap_img.fmt.bpp[0] / 8;
+    if (!bpp || bpp * wrap_img.w > *stride || *stride % bpp)
+        return MPV_ERROR_INVALID_PARAMETER;
+
+    wrap_img.planes[0] = ptr;
+    wrap_img.stride[0] = *stride;
+
+    struct mp_image *img = frame->current;
+    if (img) {
+        assert(p->src_params.imgfmt);
+
+        mp_image_clear_rc_inv(&wrap_img, p->dst_rc);
+
+        struct mp_image src = *img;
+        struct mp_rect src_rc = p->src_rc;
+        src_rc.x0 = MP_ALIGN_DOWN(src_rc.x0, src.fmt.align_x);
+        src_rc.y0 = MP_ALIGN_DOWN(src_rc.y0, src.fmt.align_y);
+        mp_image_crop_rc(&src, src_rc);
+
+        struct mp_image dst = wrap_img;
+        mp_image_crop_rc(&dst, p->dst_rc);
+
+        if (mp_sws_scale(p->sws, &dst, &src) < 0) {
+            mp_image_clear(&wrap_img, 0, 0, wrap_img.w, wrap_img.h);
+            return MPV_ERROR_GENERIC;
+        }
+    } else {
+        mp_image_clear(&wrap_img, 0, 0, wrap_img.w, wrap_img.h);
+    }
+
+    if (p->osd)
+        osd_draw_on_image(p->osd, p->osd_rc, img ? img->pts : 0, 0, &wrap_img);
+
+    return 0;
+}
+
+static void destroy(struct render_backend *ctx)
+{
+    // nop
+}
+
+const struct render_backend_fns render_backend_sw = {
+    .init = init,
+    .check_format = check_format,
+    .set_parameter = set_parameter,
+    .reconfig = reconfig,
+    .reset = reset,
+    .update_external = update_external,
+    .resize = resize,
+    .get_target_size = get_target_size,
+    .render = render,
+    .destroy = destroy,
+};
diff --git a/video/out/mac/common.swift b/video/out/mac/common.swift
new file mode 100644
index 0000000..aac7050
--- /dev/null
+++ b/video/out/mac/common.swift
@@ -0,0 +1,691 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import Cocoa
+import IOKit.pwr_mgt
+
+class Common: NSObject {
+    var mpv: MPVHelper?
+    var log: LogHelper
+    let queue: DispatchQueue = DispatchQueue(label: "io.mpv.queue")
+
+    var window: Window?
+    var view: View?
+    var titleBar: TitleBar?
+
+    var link: CVDisplayLink?
+
+    let eventsLock = NSLock()
+    var events: Int = 0
+
+    var lightSensor: io_connect_t = 0
+    var lastLmu: UInt64 = 0
+    var lightSensorIOPort: IONotificationPortRef?
+
+    var displaySleepAssertion: IOPMAssertionID = IOPMAssertionID(0)
+
+    var appNotificationObservers: [NSObjectProtocol] = []
+
+    var cursorVisibilityWanted: Bool = true
+
+    var title: String = "mpv" {
+        didSet { if let window = window { window.title = title } }
+    }
+
+    init(_ mpLog: OpaquePointer?) {
+        log = LogHelper(mpLog)
+    }
+
+    func initMisc(_ vo: UnsafeMutablePointer<vo>) {
+        guard let mpv = mpv else {
+            log.sendError("Something went wrong, no MPVHelper was initialized")
+            exit(1)
+        }
+
+        startDisplayLink(vo)
+        initLightSensor()
+        addDisplayReconfigureObserver()
+        addAppNotifications()
+        mpv.setMacOptionCallback(macOptsWakeupCallback, context: self)
+    }
+
+    func initApp() {
+        guard let mpv = mpv else {
+            log.sendError("Something went wrong, no MPVHelper was initialized")
+            exit(1)
+        }
+
+        var policy: NSApplication.ActivationPolicy = .regular
+        switch mpv.macOpts.macos_app_activation_policy {
+        case 0:
+            policy = .regular
+        case 1:
+            policy = .accessory
+        case 2:
+            policy = .prohibited
+        default:
+            break
+        }
+
+        NSApp.setActivationPolicy(policy)
+        setAppIcon()
+    }
+
+    func initWindow(_ vo: UnsafeMutablePointer<vo>, _ previousActiveApp: NSRunningApplication?) {
+        let (mpv, targetScreen, wr) = getInitProperties(vo)
+
+        guard let view = self.view else {
+            log.sendError("Something went wrong, no View was initialized")
+            exit(1)
+        }
+
+        window = Window(contentRect: wr, screen: targetScreen, view: view, common: self)
+        guard let window = self.window else {
+            log.sendError("Something went wrong, no Window was initialized")
+            exit(1)
+        }
+
+        window.setOnTop(Bool(mpv.opts.ontop), Int(mpv.opts.ontop_level))
+        window.setOnAllWorkspaces(Bool(mpv.opts.all_workspaces))
+        window.keepAspect = Bool(mpv.opts.keepaspect_window)
+        window.title = title
+        window.border = Bool(mpv.opts.border)
+
+        titleBar = TitleBar(frame: wr, window: window, common: self)
+
+        let minimized = Bool(mpv.opts.window_minimized)
+        window.isRestorable = false
+        window.isReleasedWhenClosed = false
+        window.setMaximized(minimized ? false : Bool(mpv.opts.window_maximized))
+        window.setMinimized(minimized)
+        window.makeMain()
+        window.makeKey()
+
+        if !minimized {
+            window.orderFront(nil)
+        }
+
+        NSApp.activate(ignoringOtherApps: mpv.opts.focus_on_open)
+
+        // workaround for macOS 10.15 to refocus the previous App
+        if (!mpv.opts.focus_on_open) {
+            previousActiveApp?.activate(options: .activateAllWindows)
+        }
+    }
+
+    func initView(_ vo: UnsafeMutablePointer<vo>, _ layer: CALayer) {
+        let (_, _, wr) = getInitProperties(vo)
+
+        view = View(frame: wr, common: self)
+        guard let view = self.view else {
+            log.sendError("Something went wrong, no View was initialized")
+            exit(1)
+        }
+
+        view.layer = layer
+        view.wantsLayer = true
+        view.layerContentsPlacement = .scaleProportionallyToFit
+    }
+
+    func initWindowState() {
+        if mpv?.opts.fullscreen ?? false {
+            DispatchQueue.main.async {
+                self.window?.toggleFullScreen(nil)
+            }
+        } else {
+            window?.isMovableByWindowBackground = true
+        }
+    }
+
+    func uninitCommon() {
+        setCursorVisibility(true)
+        stopDisplaylink()
+        uninitLightSensor()
+        removeDisplayReconfigureObserver()
+        removeAppNotifications()
+        enableDisplaySleep()
+        window?.orderOut(nil)
+
+        titleBar?.removeFromSuperview()
+        view?.removeFromSuperview()
+    }
+
+    func displayLinkCallback(_ displayLink: CVDisplayLink,
+                                   _ inNow: UnsafePointer<CVTimeStamp>,
+                            _ inOutputTime: UnsafePointer<CVTimeStamp>,
+                                 _ flagsIn: CVOptionFlags,
+                                _ flagsOut: UnsafeMutablePointer<CVOptionFlags>) -> CVReturn
+    {
+        return kCVReturnSuccess
+    }
+
+    func startDisplayLink(_ vo: UnsafeMutablePointer<vo>) {
+        CVDisplayLinkCreateWithActiveCGDisplays(&link)
+
+        guard let screen = getTargetScreen(forFullscreen: false) ?? NSScreen.main,
+              let link = self.link else
+        {
+            log.sendWarning("Couldn't start DisplayLink, no MPVHelper, Screen or DisplayLink available")
+            return
+        }
+
+        CVDisplayLinkSetCurrentCGDisplay(link, screen.displayID)
+        CVDisplayLinkSetOutputHandler(link) { link, now, out, inFlags, outFlags -> CVReturn in
+            return self.displayLinkCallback(link, now, out, inFlags, outFlags)
+        }
+        CVDisplayLinkStart(link)
+    }
+
+    func stopDisplaylink() {
+        if let link = self.link, CVDisplayLinkIsRunning(link) {
+            CVDisplayLinkStop(link)
+        }
+    }
+
+    func updateDisplaylink() {
+        guard let screen = window?.screen, let link = self.link else {
+            log.sendWarning("Couldn't update DisplayLink, no Screen or DisplayLink available")
+            return
+        }
+
+        CVDisplayLinkSetCurrentCGDisplay(link, screen.displayID)
+        queue.asyncAfter(deadline: DispatchTime.now() + 0.1) {
+            self.flagEvents(VO_EVENT_WIN_STATE)
+        }
+    }
+
+    func currentFps() -> Double {
+        if let link = self.link {
+            var actualFps = CVDisplayLinkGetActualOutputVideoRefreshPeriod(link)
+            let nominalData = CVDisplayLinkGetNominalOutputVideoRefreshPeriod(link)
+
+            if (nominalData.flags & Int32(CVTimeFlags.isIndefinite.rawValue)) < 1 {
+                let nominalFps = Double(nominalData.timeScale) / Double(nominalData.timeValue)
+
+                if actualFps > 0 {
+                    actualFps = 1/actualFps
+                }
+
+                if fabs(actualFps - nominalFps) > 0.1 {
+                    log.sendVerbose("Falling back to nominal display refresh rate: \(nominalFps)")
+                    return nominalFps
+                } else {
+                    return actualFps
+                }
+            }
+        } else {
+            log.sendWarning("No DisplayLink available")
+        }
+
+        log.sendWarning("Falling back to standard display refresh rate: 60Hz")
+        return 60.0
+    }
+
+    func enableDisplaySleep() {
+        IOPMAssertionRelease(displaySleepAssertion)
+        displaySleepAssertion = IOPMAssertionID(0)
+    }
+
+    func disableDisplaySleep() {
+        if displaySleepAssertion != IOPMAssertionID(0) { return }
+        IOPMAssertionCreateWithName(
+            kIOPMAssertionTypePreventUserIdleDisplaySleep as CFString,
+            IOPMAssertionLevel(kIOPMAssertionLevelOn),
+            "io.mpv.video_playing_back" as CFString,
+            &displaySleepAssertion)
+    }
+
+    func lmuToLux(_ v: UInt64) -> Int {
+        // the polinomial approximation for apple lmu value -> lux was empirically
+        // derived by firefox developers (Apple provides no documentation).
+        // https://bugzilla.mozilla.org/show_bug.cgi?id=793728
+        let power_c4: Double = 1 / pow(10, 27)
+        let power_c3: Double = 1 / pow(10, 19)
+        let power_c2: Double = 1 / pow(10, 12)
+        let power_c1: Double = 1 / pow(10, 5)
+
+        let lum = Double(v)
+        let term4: Double = -3.0 * power_c4 * pow(lum, 4.0)
+        let term3: Double = 2.6 * power_c3 * pow(lum, 3.0)
+        let term2: Double = -3.4 * power_c2 * pow(lum, 2.0)
+        let term1: Double = 3.9 * power_c1 * lum
+
+        let lux = Int(ceil(term4 + term3 + term2 + term1 - 0.19))
+        return lux > 0 ? lux : 0
+    }
+
+    var lightSensorCallback: IOServiceInterestCallback = { (ctx, service, messageType, messageArgument) -> Void in
+        let com = unsafeBitCast(ctx, to: Common.self)
+
+        var outputs: UInt32 = 2
+        var values: [UInt64] = [0, 0]
+
+        var kr = IOConnectCallMethod(com.lightSensor, 0, nil, 0, nil, 0, &values, &outputs, nil, nil)
+        if kr == KERN_SUCCESS {
+            var mean = (values[0] + values[1]) / 2
+            if com.lastLmu != mean {
+                com.lastLmu = mean
+                com.lightSensorUpdate()
+            }
+        }
+    }
+
+    func lightSensorUpdate() {
+        log.sendWarning("lightSensorUpdate not implemented")
+    }
+
+    func initLightSensor() {
+        let srv = IOServiceGetMatchingService(kIOMasterPortDefault, IOServiceMatching("AppleLMUController"))
+        if srv == IO_OBJECT_NULL {
+            log.sendVerbose("Can't find an ambient light sensor")
+            return
+        }
+
+        lightSensorIOPort = IONotificationPortCreate(kIOMasterPortDefault)
+        IONotificationPortSetDispatchQueue(lightSensorIOPort, queue)
+        var n = io_object_t()
+        IOServiceAddInterestNotification(lightSensorIOPort, srv, kIOGeneralInterest, lightSensorCallback, MPVHelper.bridge(obj: self), &n)
+        let kr = IOServiceOpen(srv, mach_task_self_, 0, &lightSensor)
+        IOObjectRelease(srv)
+
+        if kr != KERN_SUCCESS {
+            log.sendVerbose("Can't start ambient light sensor connection")
+            return
+        }
+        lightSensorCallback(MPVHelper.bridge(obj: self), 0, 0, nil)
+    }
+
+    func uninitLightSensor() {
+        if lightSensorIOPort != nil {
+            IONotificationPortDestroy(lightSensorIOPort)
+            IOObjectRelease(lightSensor)
+        }
+    }
+
+    var reconfigureCallback: CGDisplayReconfigurationCallBack = { (display, flags, userInfo) in
+        if flags.contains(.setModeFlag) {
+            let com = unsafeBitCast(userInfo, to: Common.self)
+            let displayID = com.window?.screen?.displayID ?? display
+
+            if displayID == display {
+                com.log.sendVerbose("Detected display mode change, updating screen refresh rate")
+                com.flagEvents(VO_EVENT_WIN_STATE)
+            }
+        }
+    }
+
+    func addDisplayReconfigureObserver() {
+        CGDisplayRegisterReconfigurationCallback(reconfigureCallback, MPVHelper.bridge(obj: self))
+    }
+
+    func removeDisplayReconfigureObserver() {
+        CGDisplayRemoveReconfigurationCallback(reconfigureCallback, MPVHelper.bridge(obj: self))
+    }
+
+    func addAppNotifications() {
+        appNotificationObservers.append(NotificationCenter.default.addObserver(
+            forName: NSApplication.didBecomeActiveNotification,
+            object: nil,
+            queue: .main,
+            using: { [weak self] (_) in self?.appDidBecomeActive() }
+        ))
+        appNotificationObservers.append(NotificationCenter.default.addObserver(
+            forName: NSApplication.didResignActiveNotification,
+            object: nil,
+            queue: .main,
+            using: { [weak self] (_) in self?.appDidResignActive() }
+        ))
+    }
+
+    func removeAppNotifications() {
+        appNotificationObservers.forEach { NotificationCenter.default.removeObserver($0) }
+        appNotificationObservers.removeAll()
+    }
+
+    func appDidBecomeActive() {
+        flagEvents(VO_EVENT_FOCUS)
+    }
+
+    func appDidResignActive() {
+        flagEvents(VO_EVENT_FOCUS)
+    }
+
+    func setAppIcon() {
+        if let app = NSApp as? Application,
+            ProcessInfo.processInfo.environment["MPVBUNDLE"] != "true"
+        {
+            NSApp.applicationIconImage = app.getMPVIcon()
+        }
+    }
+
+    func updateCursorVisibility() {
+        setCursorVisibility(cursorVisibilityWanted)
+    }
+
+    func setCursorVisibility(_ visible: Bool) {
+        NSCursor.setHiddenUntilMouseMoves(!visible && (view?.canHideCursor() ?? false))
+    }
+
+    func updateICCProfile() {
+        log.sendWarning("updateICCProfile not implemented")
+    }
+
+    func getScreenBy(id screenID: Int) -> NSScreen? {
+        if screenID >= NSScreen.screens.count {
+            log.sendInfo("Screen ID \(screenID) does not exist, falling back to current device")
+            return nil
+        } else if screenID < 0 {
+            return nil
+        }
+        return NSScreen.screens[screenID]
+    }
+
+    func getScreenBy(name screenName: String?) -> NSScreen? {
+        for screen in NSScreen.screens {
+            if screen.localizedName == screenName {
+                return screen
+            }
+        }
+        return nil
+    }
+
+    func getTargetScreen(forFullscreen fs: Bool) -> NSScreen? {
+        guard let mpv = mpv else {
+            log.sendWarning("Unexpected nil value in getTargetScreen")
+            return nil
+        }
+
+        let screenID = fs ? mpv.opts.fsscreen_id : mpv.opts.screen_id
+        var name: String?
+        if let screenName = fs ? mpv.opts.fsscreen_name : mpv.opts.screen_name {
+            name = String(cString: screenName)
+        }
+        return getScreenBy(id: Int(screenID)) ?? getScreenBy(name: name)
+    }
+
+    func getCurrentScreen() -> NSScreen? {
+         return window != nil ? window?.screen :
+                                    getTargetScreen(forFullscreen: false) ??
+                                    NSScreen.main
+    }
+
+    func getWindowGeometry(forScreen screen: NSScreen,
+                           videoOut vo: UnsafeMutablePointer<vo>) -> NSRect {
+        let r = screen.convertRectToBacking(screen.frame)
+        let targetFrame = (mpv?.macOpts.macos_geometry_calculation ?? Int32(FRAME_VISIBLE)) == FRAME_VISIBLE
+            ? screen.visibleFrame : screen.frame
+        let rv = screen.convertRectToBacking(targetFrame)
+
+        // convert origin to be relative to target screen
+        var originY = rv.origin.y - r.origin.y
+        let originX = rv.origin.x - r.origin.x
+        // flip the y origin, mp_rect expects the origin at the top-left
+        // macOS' windowing system operates from the bottom-left
+        originY = -(originY + rv.size.height)
+        var screenRC: mp_rect = mp_rect(x0: Int32(originX),
+                                        y0: Int32(originY),
+                                        x1: Int32(originX + rv.size.width),
+                                        y1: Int32(originY + rv.size.height))
+
+        var geo: vo_win_geometry = vo_win_geometry()
+        vo_calc_window_geometry2(vo, &screenRC, Double(screen.backingScaleFactor), &geo)
+        vo_apply_window_geometry(vo, &geo)
+
+        let height = CGFloat(geo.win.y1 - geo.win.y0)
+        let width = CGFloat(geo.win.x1 - geo.win.x0)
+        // flip the y origin again
+        let y = CGFloat(-geo.win.y1)
+        let x = CGFloat(geo.win.x0)
+        return screen.convertRectFromBacking(NSMakeRect(x, y, width, height))
+    }
+
+    func getInitProperties(_ vo: UnsafeMutablePointer<vo>) -> (MPVHelper, NSScreen, NSRect) {
+        guard let mpv = mpv else {
+            log.sendError("Something went wrong, no MPVHelper was initialized")
+            exit(1)
+        }
+        guard let targetScreen = getTargetScreen(forFullscreen: false) ?? NSScreen.main else {
+            log.sendError("Something went wrong, no Screen was found")
+            exit(1)
+        }
+
+        let wr = getWindowGeometry(forScreen: targetScreen, videoOut: vo)
+
+        return (mpv, targetScreen, wr)
+    }
+
+    // call before initApp, because on macOS +10.15 it changes the active App
+    func getActiveApp() -> NSRunningApplication? {
+        return NSWorkspace.shared.runningApplications.first(where: {$0.isActive})
+    }
+
+    func flagEvents(_ ev: Int) {
+        eventsLock.lock()
+        events |= ev
+        eventsLock.unlock()
+
+        guard let vout = mpv?.vo else {
+            log.sendWarning("vo nil in flagEvents")
+            return
+        }
+        vo_wakeup(vout)
+    }
+
+    func checkEvents() -> Int {
+        eventsLock.lock()
+        let ev = events
+        events = 0
+        eventsLock.unlock()
+        return ev
+    }
+
+    func windowDidEndAnimation() {}
+    func windowSetToFullScreen() {}
+    func windowSetToWindow() {}
+    func windowDidUpdateFrame() {}
+    func windowDidChangeScreen() {}
+    func windowDidChangeScreenProfile() {}
+    func windowDidChangeBackingProperties() {}
+    func windowWillStartLiveResize() {}
+    func windowDidEndLiveResize() {}
+    func windowDidResize() {}
+    func windowDidChangeOcclusionState() {}
+
+    @objc func control(_ vo: UnsafeMutablePointer<vo>,
+                         events: UnsafeMutablePointer<Int32>,
+                         request: UInt32,
+                         data: UnsafeMutableRawPointer?) -> Int32
+    {
+        guard let mpv = mpv else {
+            log.sendWarning("Unexpected nil value in Control Callback")
+            return VO_FALSE
+        }
+
+        switch mp_voctrl(request) {
+        case VOCTRL_CHECK_EVENTS:
+            events.pointee |= Int32(checkEvents())
+            return VO_TRUE
+        case VOCTRL_VO_OPTS_CHANGED:
+            var opt: UnsafeMutableRawPointer?
+            while mpv.nextChangedOption(property: &opt) {
+                switch opt {
+                case MPVHelper.getPointer(&mpv.optsPtr.pointee.border):
+                    DispatchQueue.main.async {
+                        self.window?.border = Bool(mpv.opts.border)
+                    }
+                case MPVHelper.getPointer(&mpv.optsPtr.pointee.fullscreen):
+                    DispatchQueue.main.async {
+                        self.window?.toggleFullScreen(nil)
+                    }
+                case MPVHelper.getPointer(&mpv.optsPtr.pointee.ontop): fallthrough
+                case MPVHelper.getPointer(&mpv.optsPtr.pointee.ontop_level):
+                    DispatchQueue.main.async {
+                        self.window?.setOnTop(Bool(mpv.opts.ontop), Int(mpv.opts.ontop_level))
+                    }
+                case MPVHelper.getPointer(&mpv.optsPtr.pointee.all_workspaces):
+                    DispatchQueue.main.async {
+                        self.window?.setOnAllWorkspaces(Bool(mpv.opts.all_workspaces))
+                    }
+                case MPVHelper.getPointer(&mpv.optsPtr.pointee.keepaspect_window):
+                    DispatchQueue.main.async {
+                        self.window?.keepAspect = Bool(mpv.opts.keepaspect_window)
+                    }
+                case MPVHelper.getPointer(&mpv.optsPtr.pointee.window_minimized):
+                    DispatchQueue.main.async {
+                        self.window?.setMinimized(Bool(mpv.opts.window_minimized))
+                    }
+                case MPVHelper.getPointer(&mpv.optsPtr.pointee.window_maximized):
+                    DispatchQueue.main.async {
+                        self.window?.setMaximized(Bool(mpv.opts.window_maximized))
+                    }
+                default:
+                    break
+                }
+            }
+            return VO_TRUE
+        case VOCTRL_GET_DISPLAY_FPS:
+            let fps = data!.assumingMemoryBound(to: CDouble.self)
+            fps.pointee = currentFps()
+            return VO_TRUE
+        case VOCTRL_GET_HIDPI_SCALE:
+            let scaleFactor = data!.assumingMemoryBound(to: CDouble.self)
+            let screen = getCurrentScreen()
+            let factor = window?.backingScaleFactor ??
+                         screen?.backingScaleFactor ?? 1.0
+            scaleFactor.pointee = Double(factor)
+            return VO_TRUE
+        case VOCTRL_RESTORE_SCREENSAVER:
+            enableDisplaySleep()
+            return VO_TRUE
+        case VOCTRL_KILL_SCREENSAVER:
+            disableDisplaySleep()
+            return VO_TRUE
+        case VOCTRL_SET_CURSOR_VISIBILITY:
+            let cursorVisibility = data!.assumingMemoryBound(to: CBool.self)
+            cursorVisibilityWanted = cursorVisibility.pointee
+            DispatchQueue.main.async {
+                self.setCursorVisibility(self.cursorVisibilityWanted)
+            }
+            return VO_TRUE
+        case VOCTRL_GET_ICC_PROFILE:
+            let screen = getCurrentScreen()
+            guard var iccData = screen?.colorSpace?.iccProfileData else {
+                log.sendWarning("No Screen available to retrieve ICC profile")
+                return VO_TRUE
+            }
+
+            let icc = data!.assumingMemoryBound(to: bstr.self)
+            iccData.withUnsafeMutableBytes { (ptr: UnsafeMutableRawBufferPointer) in
+                guard let baseAddress = ptr.baseAddress, ptr.count > 0 else { return }
+                let u8Ptr = baseAddress.assumingMemoryBound(to: UInt8.self)
+                icc.pointee = bstrdup(nil, bstr(start: u8Ptr, len: ptr.count))
+            }
+            return VO_TRUE
+        case VOCTRL_GET_AMBIENT_LUX:
+            if lightSensor != 0 {
+                let lux = data!.assumingMemoryBound(to: Int32.self)
+                lux.pointee = Int32(lmuToLux(lastLmu))
+                return VO_TRUE;
+            }
+            return VO_NOTIMPL
+        case VOCTRL_GET_UNFS_WINDOW_SIZE:
+            let sizeData = data!.assumingMemoryBound(to: Int32.self)
+            let size = UnsafeMutableBufferPointer(start: sizeData, count: 2)
+            var rect = window?.unfsContentFrame ?? NSRect(x: 0, y: 0, width: 1280, height: 720)
+            if let screen = window?.currentScreen, !Bool(mpv.opts.hidpi_window_scale) {
+                rect = screen.convertRectToBacking(rect)
+            }
+
+            size[0] = Int32(rect.size.width)
+            size[1] = Int32(rect.size.height)
+            return VO_TRUE
+        case VOCTRL_SET_UNFS_WINDOW_SIZE:
+            let sizeData = data!.assumingMemoryBound(to: Int32.self)
+            let size = UnsafeBufferPointer(start: sizeData, count: 2)
+            var rect = NSMakeRect(0, 0, CGFloat(size[0]), CGFloat(size[1]))
+            DispatchQueue.main.async {
+                if let screen = self.window?.currentScreen, !Bool(self.mpv?.opts.hidpi_window_scale ?? true) {
+                    rect = screen.convertRectFromBacking(rect)
+                }
+                self.window?.updateSize(rect.size)
+            }
+            return VO_TRUE
+        case VOCTRL_GET_DISPLAY_NAMES:
+            let dnames = data!.assumingMemoryBound(to: UnsafeMutablePointer<UnsafeMutablePointer<Int8>?>?.self)
+            var array: UnsafeMutablePointer<UnsafeMutablePointer<Int8>?>? = nil
+            var count: Int32 = 0
+            let displayName = getCurrentScreen()?.localizedName ?? "Unknown"
+
+            SWIFT_TARRAY_STRING_APPEND(nil, &array, &count, ta_xstrdup(nil, displayName))
+            SWIFT_TARRAY_STRING_APPEND(nil, &array, &count, nil)
+            dnames.pointee = array
+            return VO_TRUE
+        case VOCTRL_GET_DISPLAY_RES:
+            guard let screen = getCurrentScreen() else {
+                log.sendWarning("No Screen available to retrieve frame")
+                return VO_NOTAVAIL
+            }
+            let sizeData = data!.assumingMemoryBound(to: Int32.self)
+            let size = UnsafeMutableBufferPointer(start: sizeData, count: 2)
+            let frame = screen.convertRectToBacking(screen.frame)
+            size[0] = Int32(frame.size.width)
+            size[1] = Int32(frame.size.height)
+            return VO_TRUE
+        case VOCTRL_GET_FOCUSED:
+            let focus = data!.assumingMemoryBound(to: CBool.self)
+            focus.pointee = NSApp.isActive
+            return VO_TRUE
+        case VOCTRL_UPDATE_WINDOW_TITLE:
+            let titleData = data!.assumingMemoryBound(to: Int8.self)
+            DispatchQueue.main.async {
+                let title = NSString(utf8String: titleData) as String?
+                self.title = title ?? "Unknown Title"
+            }
+            return VO_TRUE
+        default:
+            return VO_NOTIMPL
+        }
+    }
+
+    let macOptsWakeupCallback: swift_wakeup_cb_fn = { ( ctx ) in
+        let com = unsafeBitCast(ctx, to: Common.self)
+        DispatchQueue.main.async {
+            com.macOptsUpdate()
+        }
+    }
+
+    func macOptsUpdate() {
+        guard let mpv = mpv else {
+            log.sendWarning("Unexpected nil value in mac opts update")
+            return
+        }
+
+        var opt: UnsafeMutableRawPointer?
+        while mpv.nextChangedMacOption(property: &opt) {
+            switch opt {
+            case MPVHelper.getPointer(&mpv.macOptsPtr.pointee.macos_title_bar_appearance):
+                titleBar?.set(appearance: Int(mpv.macOpts.macos_title_bar_appearance))
+            case MPVHelper.getPointer(&mpv.macOptsPtr.pointee.macos_title_bar_material):
+                titleBar?.set(material: Int(mpv.macOpts.macos_title_bar_material))
+            case MPVHelper.getPointer(&mpv.macOptsPtr.pointee.macos_title_bar_color):
+                titleBar?.set(color: mpv.macOpts.macos_title_bar_color)
+            default:
+                break
+            }
+        }
+    }
+}
diff --git a/video/out/mac/gl_layer.swift b/video/out/mac/gl_layer.swift
new file mode 100644
index 0000000..dd96af7
--- /dev/null
+++ b/video/out/mac/gl_layer.swift
@@ -0,0 +1,322 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import Cocoa
+import OpenGL.GL
+import OpenGL.GL3
+
+let glVersions: [CGLOpenGLProfile] = [
+    kCGLOGLPVersion_3_2_Core,
+    kCGLOGLPVersion_Legacy
+]
+
+let glFormatBase: [CGLPixelFormatAttribute] = [
+    kCGLPFAOpenGLProfile,
+    kCGLPFAAccelerated,
+    kCGLPFADoubleBuffer
+]
+
+let glFormatSoftwareBase: [CGLPixelFormatAttribute] = [
+    kCGLPFAOpenGLProfile,
+    kCGLPFARendererID,
+    CGLPixelFormatAttribute(UInt32(kCGLRendererGenericFloatID)),
+    kCGLPFADoubleBuffer
+]
+
+let glFormatOptional: [[CGLPixelFormatAttribute]] = [
+    [kCGLPFABackingStore],
+    [kCGLPFAAllowOfflineRenderers]
+]
+
+let glFormat10Bit: [CGLPixelFormatAttribute] = [
+    kCGLPFAColorSize,
+    _CGLPixelFormatAttribute(rawValue: 64),
+    kCGLPFAColorFloat
+]
+
+let glFormatAutoGPU: [CGLPixelFormatAttribute] = [
+    kCGLPFASupportsAutomaticGraphicsSwitching
+]
+
+let attributeLookUp: [UInt32:String] = [
+    kCGLOGLPVersion_3_2_Core.rawValue:     "kCGLOGLPVersion_3_2_Core",
+    kCGLOGLPVersion_Legacy.rawValue:       "kCGLOGLPVersion_Legacy",
+    kCGLPFAOpenGLProfile.rawValue:         "kCGLPFAOpenGLProfile",
+    UInt32(kCGLRendererGenericFloatID):    "kCGLRendererGenericFloatID",
+    kCGLPFARendererID.rawValue:            "kCGLPFARendererID",
+    kCGLPFAAccelerated.rawValue:           "kCGLPFAAccelerated",
+    kCGLPFADoubleBuffer.rawValue:          "kCGLPFADoubleBuffer",
+    kCGLPFABackingStore.rawValue:          "kCGLPFABackingStore",
+    kCGLPFAColorSize.rawValue:             "kCGLPFAColorSize",
+    kCGLPFAColorFloat.rawValue:            "kCGLPFAColorFloat",
+    kCGLPFAAllowOfflineRenderers.rawValue: "kCGLPFAAllowOfflineRenderers",
+    kCGLPFASupportsAutomaticGraphicsSwitching.rawValue: "kCGLPFASupportsAutomaticGraphicsSwitching",
+]
+
+class GLLayer: CAOpenGLLayer {
+    unowned var cocoaCB: CocoaCB
+    var libmpv: LibmpvHelper { get { return cocoaCB.libmpv } }
+
+    let displayLock = NSLock()
+    let cglContext: CGLContextObj
+    let cglPixelFormat: CGLPixelFormatObj
+    var needsFlip: Bool = false
+    var forceDraw: Bool = false
+    var surfaceSize: NSSize = NSSize(width: 0, height: 0)
+    var bufferDepth: GLint = 8
+
+    enum Draw: Int { case normal = 1, atomic, atomicEnd }
+    var draw: Draw = .normal
+
+    let queue: DispatchQueue = DispatchQueue(label: "io.mpv.queue.draw")
+
+    var needsICCUpdate: Bool = false {
+        didSet {
+            if needsICCUpdate == true {
+                update()
+            }
+        }
+    }
+
+    var inLiveResize: Bool = false {
+        didSet {
+            if inLiveResize {
+                isAsynchronous = true
+            }
+            update(force: true)
+        }
+    }
+
+    init(cocoaCB ccb: CocoaCB) {
+        cocoaCB = ccb
+        (cglPixelFormat, bufferDepth) = GLLayer.createPixelFormat(ccb)
+        cglContext = GLLayer.createContext(ccb, cglPixelFormat)
+        super.init()
+        autoresizingMask = [.layerWidthSizable, .layerHeightSizable]
+        backgroundColor = NSColor.black.cgColor
+
+        if bufferDepth > 8 {
+            contentsFormat = .RGBA16Float
+        }
+
+        var i: GLint = 1
+        CGLSetParameter(cglContext, kCGLCPSwapInterval, &i)
+        CGLSetCurrentContext(cglContext)
+
+        libmpv.initRender()
+        libmpv.setRenderUpdateCallback(updateCallback, context: self)
+        libmpv.setRenderControlCallback(cocoaCB.controlCallback, context: cocoaCB)
+    }
+
+    // necessary for when the layer containing window changes the screen
+    override init(layer: Any) {
+        guard let oldLayer = layer as? GLLayer else {
+            fatalError("init(layer: Any) passed an invalid layer")
+        }
+        cocoaCB = oldLayer.cocoaCB
+        surfaceSize = oldLayer.surfaceSize
+        cglPixelFormat = oldLayer.cglPixelFormat
+        cglContext = oldLayer.cglContext
+        super.init()
+    }
+
+    required init?(coder: NSCoder) {
+        fatalError("init(coder:) has not been implemented")
+    }
+
+    override func canDraw(inCGLContext ctx: CGLContextObj,
+                          pixelFormat pf: CGLPixelFormatObj,
+                          forLayerTime t: CFTimeInterval,
+                          displayTime ts: UnsafePointer<CVTimeStamp>?) -> Bool {
+        if inLiveResize == false {
+            isAsynchronous = false
+        }
+        return cocoaCB.backendState == .initialized &&
+               (forceDraw || libmpv.isRenderUpdateFrame())
+    }
+
+    override func draw(inCGLContext ctx: CGLContextObj,
+                       pixelFormat pf: CGLPixelFormatObj,
+                       forLayerTime t: CFTimeInterval,
+                       displayTime ts: UnsafePointer<CVTimeStamp>?) {
+        needsFlip = false
+        forceDraw = false
+
+        if draw.rawValue >= Draw.atomic.rawValue {
+             if draw == .atomic {
+                draw = .atomicEnd
+             } else {
+                atomicDrawingEnd()
+             }
+        }
+
+        updateSurfaceSize()
+        libmpv.drawRender(surfaceSize, bufferDepth, ctx)
+
+        if needsICCUpdate {
+            needsICCUpdate = false
+            cocoaCB.updateICCProfile()
+        }
+    }
+
+    func updateSurfaceSize() {
+        var dims: [GLint] = [0, 0, 0, 0]
+        glGetIntegerv(GLenum(GL_VIEWPORT), &dims)
+        surfaceSize = NSSize(width: CGFloat(dims[2]), height: CGFloat(dims[3]))
+
+        if NSEqualSizes(surfaceSize, NSZeroSize) {
+            surfaceSize = bounds.size
+            surfaceSize.width *= contentsScale
+            surfaceSize.height *= contentsScale
+        }
+    }
+
+    func atomicDrawingStart() {
+        if draw == .normal {
+            NSDisableScreenUpdates()
+            draw = .atomic
+        }
+    }
+
+    func atomicDrawingEnd() {
+        if draw.rawValue >= Draw.atomic.rawValue {
+            NSEnableScreenUpdates()
+            draw = .normal
+        }
+    }
+
+    override func copyCGLPixelFormat(forDisplayMask mask: UInt32) -> CGLPixelFormatObj {
+        return cglPixelFormat
+    }
+
+    override func copyCGLContext(forPixelFormat pf: CGLPixelFormatObj) -> CGLContextObj {
+        contentsScale = cocoaCB.window?.backingScaleFactor ?? 1.0
+        return cglContext
+    }
+
+    let updateCallback: mpv_render_update_fn = { (ctx) in
+        let layer: GLLayer = unsafeBitCast(ctx, to: GLLayer.self)
+        layer.update()
+    }
+
+    override func display() {
+        displayLock.lock()
+        let isUpdate = needsFlip
+        super.display()
+        CATransaction.flush()
+        if isUpdate && needsFlip {
+            CGLSetCurrentContext(cglContext)
+            if libmpv.isRenderUpdateFrame() {
+                libmpv.drawRender(NSZeroSize, bufferDepth, cglContext, skip: true)
+            }
+        }
+        displayLock.unlock()
+    }
+
+    func update(force: Bool = false) {
+        if force { forceDraw = true }
+        queue.async {
+            if self.forceDraw || !self.inLiveResize {
+                self.needsFlip = true
+                self.display()
+            }
+        }
+    }
+
+    class func createPixelFormat(_ ccb: CocoaCB) -> (CGLPixelFormatObj, GLint) {
+        var pix: CGLPixelFormatObj?
+        var depth: GLint = 8
+        var err: CGLError = CGLError(rawValue: 0)
+        let swRender = ccb.libmpv.macOpts.cocoa_cb_sw_renderer
+
+        if swRender != 1 {
+            (pix, depth, err) = GLLayer.findPixelFormat(ccb)
+        }
+
+        if (err != kCGLNoError || pix == nil) && swRender != 0 {
+            (pix, depth, err) = GLLayer.findPixelFormat(ccb, software: true)
+        }
+
+        guard let pixelFormat = pix, err == kCGLNoError else {
+            ccb.log.sendError("Couldn't create any CGL pixel format")
+            exit(1)
+        }
+
+        return (pixelFormat, depth)
+    }
+
+    class func findPixelFormat(_ ccb: CocoaCB, software: Bool = false) -> (CGLPixelFormatObj?, GLint, CGLError) {
+        var pix: CGLPixelFormatObj?
+        var err: CGLError = CGLError(rawValue: 0)
+        var npix: GLint = 0
+
+        for ver in glVersions {
+            var glBase = software ? glFormatSoftwareBase : glFormatBase
+            glBase.insert(CGLPixelFormatAttribute(ver.rawValue), at: 1)
+
+            var glFormat = [glBase]
+            if ccb.libmpv.macOpts.cocoa_cb_10bit_context {
+                glFormat += [glFormat10Bit]
+            }
+            glFormat += glFormatOptional
+
+            if !ccb.libmpv.macOpts.macos_force_dedicated_gpu {
+                glFormat += [glFormatAutoGPU]
+            }
+
+            for index in stride(from: glFormat.count-1, through: 0, by: -1) {
+                let format = glFormat.flatMap { $0 } + [_CGLPixelFormatAttribute(rawValue: 0)]
+                err = CGLChoosePixelFormat(format, &pix, &npix)
+
+                if err == kCGLBadAttribute || err == kCGLBadPixelFormat || pix == nil {
+                    glFormat.remove(at: index)
+                } else {
+                    let attArray = format.map({ (value: _CGLPixelFormatAttribute) -> String in
+                        return attributeLookUp[value.rawValue] ?? String(value.rawValue)
+                    })
+
+                    ccb.log.sendVerbose("Created CGL pixel format with attributes: " +
+                                    "\(attArray.joined(separator: ", "))")
+                    return (pix, glFormat.contains(glFormat10Bit) ? 16 : 8, err)
+                }
+            }
+        }
+
+        let errS = String(cString: CGLErrorString(err))
+        ccb.log.sendWarning("Couldn't create a " +
+                           "\(software ? "software" : "hardware accelerated") " +
+                           "CGL pixel format: \(errS) (\(err.rawValue))")
+        if software == false && ccb.libmpv.macOpts.cocoa_cb_sw_renderer == -1 {
+            ccb.log.sendWarning("Falling back to software renderer")
+        }
+
+        return (pix, 8, err)
+    }
+
+    class func createContext(_ ccb: CocoaCB, _ pixelFormat: CGLPixelFormatObj) -> CGLContextObj {
+        var context: CGLContextObj?
+        let error = CGLCreateContext(pixelFormat, nil, &context)
+
+        guard let cglContext = context, error == kCGLNoError else {
+            let errS = String(cString: CGLErrorString(error))
+            ccb.log.sendError("Couldn't create a CGLContext: " + errS)
+            exit(1)
+        }
+
+        return cglContext
+    }
+}
diff --git a/video/out/mac/metal_layer.swift b/video/out/mac/metal_layer.swift
new file mode 100644
index 0000000..7cea87c
--- /dev/null
+++ b/video/out/mac/metal_layer.swift
@@ -0,0 +1,43 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import Cocoa
+
+class MetalLayer: CAMetalLayer {
+    unowned var common: MacCommon
+
+    init(common com: MacCommon) {
+        common = com
+        super.init()
+
+        pixelFormat = .rgba16Float
+        backgroundColor = NSColor.black.cgColor
+    }
+
+    // necessary for when the layer containing window changes the screen
+    override init(layer: Any) {
+        guard let oldLayer = layer as? MetalLayer else {
+            fatalError("init(layer: Any) passed an invalid layer")
+        }
+        common = oldLayer.common
+        super.init()
+    }
+
+    required init?(coder: NSCoder) {
+        fatalError("init(coder:) has not been implemented")
+    }
+}
diff --git a/video/out/mac/title_bar.swift b/video/out/mac/title_bar.swift
new file mode 100644
index 0000000..764c1ff
--- /dev/null
+++ b/video/out/mac/title_bar.swift
@@ -0,0 +1,229 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import Cocoa
+
+class TitleBar: NSVisualEffectView {
+    unowned var common: Common
+    var mpv: MPVHelper? { get { return common.mpv } }
+
+    var systemBar: NSView? {
+        get { return common.window?.standardWindowButton(.closeButton)?.superview }
+    }
+    static var height: CGFloat {
+        get { return NSWindow.frameRect(forContentRect: CGRect.zero, styleMask: .titled).size.height }
+    }
+    var buttons: [NSButton] {
+        get { return ([.closeButton, .miniaturizeButton, .zoomButton] as [NSWindow.ButtonType]).compactMap { common.window?.standardWindowButton($0) } }
+    }
+
+    override var material: NSVisualEffectView.Material {
+        get { return super.material }
+        set {
+            super.material = newValue
+            // fix for broken deprecated materials
+            if material == .light || material == .dark || material == .mediumLight ||
+               material == .ultraDark
+            {
+                state = .active
+            } else {
+                state = .followsWindowActiveState
+            }
+
+        }
+    }
+
+    init(frame: NSRect, window: NSWindow, common com: Common) {
+        let f = NSMakeRect(0, frame.size.height - TitleBar.height,
+                           frame.size.width, TitleBar.height + 1)
+        common = com
+        super.init(frame: f)
+        buttons.forEach { $0.isHidden = true }
+        isHidden = true
+        alphaValue = 0
+        blendingMode = .withinWindow
+        autoresizingMask = [.width, .minYMargin]
+        systemBar?.alphaValue = 0
+        state = .followsWindowActiveState
+        wantsLayer = true
+
+        window.contentView?.addSubview(self, positioned: .above, relativeTo: nil)
+        window.titlebarAppearsTransparent = true
+        window.styleMask.insert(.fullSizeContentView)
+        set(appearance: Int(mpv?.macOpts.macos_title_bar_appearance ?? 0))
+        set(material: Int(mpv?.macOpts.macos_title_bar_material ?? 0))
+        set(color: mpv?.macOpts.macos_title_bar_color ?? "#00000000")
+    }
+
+    required init?(coder: NSCoder) {
+        fatalError("init(coder:) has not been implemented")
+    }
+
+    // catch these events so they are not propagated to the underlying view
+    override func mouseDown(with event: NSEvent) { }
+
+    override func mouseUp(with event: NSEvent) {
+        if event.clickCount > 1 {
+            let def = UserDefaults.standard
+            var action = def.string(forKey: "AppleActionOnDoubleClick")
+
+            // macOS 10.10 and earlier
+            if action == nil {
+                action = def.bool(forKey: "AppleMiniaturizeOnDoubleClick") == true ?
+                    "Minimize" : "Maximize"
+            }
+
+            if action == "Minimize" {
+                window?.miniaturize(self)
+            } else if action == "Maximize" {
+                window?.zoom(self)
+            }
+        }
+
+        common.window?.isMoving = false
+    }
+
+    func set(appearance: Any) {
+        if appearance is Int {
+            window?.appearance = appearanceFrom(string: String(appearance as? Int ?? 0))
+        } else {
+            window?.appearance = appearanceFrom(string: appearance as? String ?? "auto")
+        }
+    }
+
+    func set(material: Any) {
+        if material is Int {
+            self.material = materialFrom(string: String(material as? Int ?? 0))
+        } else {
+            self.material = materialFrom(string: material as? String ?? "titlebar")
+        }
+    }
+
+    func set(color: Any) {
+        if color is String {
+            layer?.backgroundColor = NSColor(hex: color as? String ?? "#00000000").cgColor
+        } else {
+            let col = color as? m_color ?? m_color(r: 0, g: 0, b: 0, a: 0)
+            let red   = CGFloat(col.r)/255
+            let green = CGFloat(col.g)/255
+            let blue  = CGFloat(col.b)/255
+            let alpha = CGFloat(col.a)/255
+            layer?.backgroundColor = NSColor(calibratedRed: red, green: green,
+                                             blue: blue, alpha: alpha).cgColor
+        }
+    }
+
+    func show() {
+        guard let window = common.window else { return }
+        if !window.border && !window.isInFullscreen { return }
+        let loc = common.view?.convert(window.mouseLocationOutsideOfEventStream, from: nil)
+
+        buttons.forEach { $0.isHidden = false }
+        NSAnimationContext.runAnimationGroup({ (context) -> Void in
+            context.duration = 0.20
+            systemBar?.animator().alphaValue = 1
+            if !window.isInFullscreen && !window.isAnimating {
+                animator().alphaValue = 1
+                isHidden = false
+            }
+        }, completionHandler: nil )
+
+        if loc?.y ?? 0 > TitleBar.height {
+            hideDelayed()
+        } else {
+            NSObject.cancelPreviousPerformRequests(withTarget: self, selector: #selector(hide), object: nil)
+        }
+    }
+
+    @objc func hide(_ duration: TimeInterval = 0.20) {
+        guard let window = common.window else { return }
+        if window.isInFullscreen && !window.isAnimating {
+            alphaValue = 0
+            isHidden = true
+            return
+        }
+        NSAnimationContext.runAnimationGroup({ (context) -> Void in
+            context.duration = duration
+            systemBar?.animator().alphaValue = 0
+            animator().alphaValue = 0
+        }, completionHandler: {
+            self.buttons.forEach { $0.isHidden = true }
+            self.isHidden = true
+        })
+    }
+
+    func hideDelayed() {
+        NSObject.cancelPreviousPerformRequests(withTarget: self,
+                                                 selector: #selector(hide),
+                                                   object: nil)
+        perform(#selector(hide), with: nil, afterDelay: 0.5)
+    }
+
+    func appearanceFrom(string: String) -> NSAppearance? {
+        switch string {
+        case "1", "aqua":
+            return NSAppearance(named: .aqua)
+        case "2", "darkAqua":
+            return NSAppearance(named: .darkAqua)
+        case "3", "vibrantLight":
+            return NSAppearance(named: .vibrantLight)
+        case "4", "vibrantDark":
+            return NSAppearance(named: .vibrantDark)
+        case "5", "aquaHighContrast":
+            return NSAppearance(named: .accessibilityHighContrastAqua)
+        case "6", "darkAquaHighContrast":
+            return NSAppearance(named: .accessibilityHighContrastDarkAqua)
+        case "7", "vibrantLightHighContrast":
+            return NSAppearance(named: .accessibilityHighContrastVibrantLight)
+        case "8", "vibrantDarkHighContrast":
+            return NSAppearance(named: .accessibilityHighContrastVibrantDark)
+        case "0", "auto": fallthrough
+        default:
+            return nil
+        }
+
+
+        let style = UserDefaults.standard.string(forKey: "AppleInterfaceStyle")
+        return appearanceFrom(string: style == nil ? "aqua" : "vibrantDark")
+    }
+
+    func materialFrom(string: String) -> NSVisualEffectView.Material {
+        switch string {
+        case "0",  "titlebar":              return .titlebar
+        case "1",  "selection":             return .selection
+        case "2,", "menu":                  return .menu
+        case "3",  "popover":               return .popover
+        case "4",  "sidebar":               return .sidebar
+        case "5,", "headerView":            return .headerView
+        case "6",  "sheet":                 return .sheet
+        case "7",  "windowBackground":      return .windowBackground
+        case "8",  "hudWindow":             return .hudWindow
+        case "9",  "fullScreen":            return .fullScreenUI
+        case "10", "toolTip":               return .toolTip
+        case "11", "contentBackground":     return .contentBackground
+        case "12", "underWindowBackground": return .underWindowBackground
+        case "13", "underPageBackground":   return .underPageBackground
+        case "14", "dark":                  return .dark
+        case "15", "light":                 return .light
+        case "16", "mediumLight":           return .mediumLight
+        case "17", "ultraDark":             return .ultraDark
+        default:                            break
+        }
+
+        return .titlebar
+    }
+}
diff --git a/video/out/mac/view.swift b/video/out/mac/view.swift
new file mode 100644
index 0000000..c4776c3
--- /dev/null
+++ b/video/out/mac/view.swift
@@ -0,0 +1,297 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import Cocoa
+
+class View: NSView {
+    unowned var common: Common
+    var mpv: MPVHelper? { get { return common.mpv } }
+
+    var tracker: NSTrackingArea?
+    var hasMouseDown: Bool = false
+
+    override var isFlipped: Bool { return true }
+    override var acceptsFirstResponder: Bool { return true }
+
+
+    init(frame: NSRect, common com: Common) {
+        common = com
+        super.init(frame: frame)
+        autoresizingMask = [.width, .height]
+        wantsBestResolutionOpenGLSurface = true
+        registerForDraggedTypes([ .fileURL, .URL, .string ])
+    }
+
+    required init?(coder: NSCoder) {
+        fatalError("init(coder:) has not been implemented")
+    }
+
+    override func updateTrackingAreas() {
+        if let tracker = self.tracker {
+            removeTrackingArea(tracker)
+        }
+
+        tracker = NSTrackingArea(rect: bounds,
+            options: [.activeAlways, .mouseEnteredAndExited, .mouseMoved, .enabledDuringMouseDrag],
+            owner: self, userInfo: nil)
+        // here tracker is guaranteed to be none-nil
+        addTrackingArea(tracker!)
+
+        if containsMouseLocation() {
+            cocoa_put_key_with_modifiers(SWIFT_KEY_MOUSE_LEAVE, 0)
+        }
+    }
+
+    override func draggingEntered(_ sender: NSDraggingInfo) -> NSDragOperation {
+        guard let types = sender.draggingPasteboard.types else { return [] }
+        if types.contains(.fileURL) || types.contains(.URL) || types.contains(.string) {
+            return .copy
+        }
+        return []
+    }
+
+    func isURL(_ str: String) -> Bool {
+        // force unwrapping is fine here, regex is guaranteed to be valid
+        let regex = try! NSRegularExpression(pattern: "^(https?|ftp)://[^\\s/$.?#].[^\\s]*$",
+                                             options: .caseInsensitive)
+        let isURL = regex.numberOfMatches(in: str,
+                                     options: [],
+                                       range: NSRange(location: 0, length: str.count))
+        return isURL > 0
+    }
+
+    override func performDragOperation(_ sender: NSDraggingInfo) -> Bool {
+        let pb = sender.draggingPasteboard
+        guard let types = pb.types else { return false }
+
+        if types.contains(.fileURL) || types.contains(.URL) {
+            if let urls = pb.readObjects(forClasses: [NSURL.self]) as? [URL] {
+                let files = urls.map { $0.absoluteString }
+                EventsResponder.sharedInstance().handleFilesArray(files)
+                return true
+            }
+        } else if types.contains(.string) {
+            guard let str = pb.string(forType: .string) else { return false }
+            var filesArray: [String] = []
+
+            for val in str.components(separatedBy: "\n") {
+                let url = val.trimmingCharacters(in: .whitespacesAndNewlines)
+                let path = (url as NSString).expandingTildeInPath
+                if isURL(url) {
+                    filesArray.append(url)
+                } else if path.starts(with: "/") {
+                    filesArray.append(path)
+                }
+            }
+            EventsResponder.sharedInstance().handleFilesArray(filesArray)
+            return true
+        }
+        return false
+    }
+
+    override func acceptsFirstMouse(for event: NSEvent?) -> Bool {
+        return true
+    }
+
+    override func becomeFirstResponder() -> Bool {
+        return true
+    }
+
+    override func resignFirstResponder() -> Bool {
+        return true
+    }
+
+    override func mouseEntered(with event: NSEvent) {
+        if mpv?.mouseEnabled() ?? true {
+            cocoa_put_key_with_modifiers(SWIFT_KEY_MOUSE_ENTER, 0)
+        }
+        common.updateCursorVisibility()
+    }
+
+    override func mouseExited(with event: NSEvent) {
+        if mpv?.mouseEnabled() ?? true {
+            cocoa_put_key_with_modifiers(SWIFT_KEY_MOUSE_LEAVE, 0)
+        }
+        common.titleBar?.hide()
+        common.setCursorVisibility(true)
+    }
+
+    override func mouseMoved(with event: NSEvent) {
+        if mpv?.mouseEnabled() ?? true {
+            signalMouseMovement(event)
+        }
+        common.titleBar?.show()
+    }
+
+    override func mouseDragged(with event: NSEvent) {
+        if mpv?.mouseEnabled() ?? true {
+            signalMouseMovement(event)
+        }
+    }
+
+    override func mouseDown(with event: NSEvent) {
+        if mpv?.mouseEnabled() ?? true {
+            signalMouseDown(event)
+        }
+    }
+
+    override func mouseUp(with event: NSEvent) {
+        if mpv?.mouseEnabled() ?? true {
+            signalMouseUp(event)
+        }
+        common.window?.isMoving = false
+    }
+
+    override func rightMouseDown(with event: NSEvent) {
+        if mpv?.mouseEnabled() ?? true {
+            signalMouseDown(event)
+        }
+    }
+
+    override func rightMouseUp(with event: NSEvent) {
+        if mpv?.mouseEnabled() ?? true {
+            signalMouseUp(event)
+        }
+    }
+
+    override func otherMouseDown(with event: NSEvent) {
+        if mpv?.mouseEnabled() ?? true {
+            signalMouseDown(event)
+        }
+    }
+
+    override func otherMouseUp(with event: NSEvent) {
+        if mpv?.mouseEnabled() ?? true {
+            signalMouseUp(event)
+        }
+    }
+
+    override func magnify(with event: NSEvent) {
+        event.phase == .ended ?
+            common.windowDidEndLiveResize() : common.windowWillStartLiveResize()
+
+        common.window?.addWindowScale(Double(event.magnification))
+    }
+
+    func signalMouseDown(_ event: NSEvent) {
+        signalMouseEvent(event, MP_KEY_STATE_DOWN)
+        if event.clickCount > 1 {
+            signalMouseEvent(event, MP_KEY_STATE_UP)
+        }
+    }
+
+    func signalMouseUp(_ event: NSEvent) {
+        signalMouseEvent(event, MP_KEY_STATE_UP)
+    }
+
+    func signalMouseEvent(_ event: NSEvent, _ state: UInt32) {
+        hasMouseDown = state == MP_KEY_STATE_DOWN
+        let mpkey = getMpvButton(event)
+        cocoa_put_key_with_modifiers((mpkey | Int32(state)), Int32(event.modifierFlags.rawValue))
+    }
+
+    func signalMouseMovement(_ event: NSEvent) {
+        var point = convert(event.locationInWindow, from: nil)
+        point = convertToBacking(point)
+        point.y = -point.y
+
+        common.window?.updateMovableBackground(point)
+        if !(common.window?.isMoving ?? false) {
+            mpv?.setMousePosition(point)
+        }
+    }
+
+    func preciseScroll(_ event: NSEvent) {
+        var delta: Double
+        var cmd: Int32
+
+        if abs(event.deltaY) >= abs(event.deltaX) {
+            delta = Double(event.deltaY) * 0.1
+            cmd = delta > 0 ? SWIFT_WHEEL_UP : SWIFT_WHEEL_DOWN
+        } else {
+            delta = Double(event.deltaX) * 0.1
+            cmd = delta > 0 ? SWIFT_WHEEL_LEFT : SWIFT_WHEEL_RIGHT
+        }
+
+        mpv?.putAxis(cmd, delta: abs(delta))
+    }
+
+    override func scrollWheel(with event: NSEvent) {
+        if !(mpv?.mouseEnabled() ?? true) {
+            return
+        }
+
+        if event.hasPreciseScrollingDeltas {
+            preciseScroll(event)
+        } else {
+            let modifiers = event.modifierFlags
+            let deltaX = modifiers.contains(.shift) ? event.scrollingDeltaY : event.scrollingDeltaX
+            let deltaY = modifiers.contains(.shift) ? event.scrollingDeltaX : event.scrollingDeltaY
+            var mpkey: Int32
+
+            if abs(deltaY) >= abs(deltaX) {
+                mpkey = deltaY > 0 ? SWIFT_WHEEL_UP : SWIFT_WHEEL_DOWN
+            } else {
+                mpkey = deltaX > 0 ? SWIFT_WHEEL_LEFT : SWIFT_WHEEL_RIGHT
+            }
+
+            cocoa_put_key_with_modifiers(mpkey, Int32(modifiers.rawValue))
+        }
+    }
+
+    func containsMouseLocation() -> Bool {
+        var topMargin: CGFloat = 0.0
+        let menuBarHeight = NSApp.mainMenu?.menuBarHeight ?? 23.0
+
+        guard let window = common.window else { return false }
+        guard var vF = window.screen?.frame else { return false }
+
+        if window.isInFullscreen && (menuBarHeight > 0) {
+            topMargin = TitleBar.height + 1 + menuBarHeight
+        }
+
+        vF.size.height -= topMargin
+
+        let vFW = window.convertFromScreen(vF)
+        let vFV = convert(vFW, from: nil)
+        let pt = convert(window.mouseLocationOutsideOfEventStream, from: nil)
+
+        var clippedBounds = bounds.intersection(vFV)
+        if !window.isInFullscreen {
+            clippedBounds.origin.y += TitleBar.height
+            clippedBounds.size.height -= TitleBar.height
+        }
+        return clippedBounds.contains(pt)
+    }
+
+    func canHideCursor() -> Bool {
+        guard let window = common.window else { return false }
+        return !hasMouseDown && containsMouseLocation() && window.isKeyWindow
+    }
+
+    func getMpvButton(_ event: NSEvent) -> Int32 {
+        let buttonNumber = event.buttonNumber
+        switch (buttonNumber) {
+            case 0:  return SWIFT_MBTN_LEFT
+            case 1:  return SWIFT_MBTN_RIGHT
+            case 2:  return SWIFT_MBTN_MID
+            case 3:  return SWIFT_MBTN_BACK
+            case 4:  return SWIFT_MBTN_FORWARD
+            default: return SWIFT_MBTN9 + Int32(buttonNumber - 5)
+        }
+    }
+}
diff --git a/video/out/mac/window.swift b/video/out/mac/window.swift
new file mode 100644
index 0000000..7b1a858
--- /dev/null
+++ b/video/out/mac/window.swift
@@ -0,0 +1,593 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import Cocoa
+
+class Window: NSWindow, NSWindowDelegate {
+    weak var common: Common! = nil
+    var mpv: MPVHelper? { get { return common.mpv } }
+
+    var targetScreen: NSScreen?
+    var previousScreen: NSScreen?
+    var currentScreen: NSScreen?
+    var unfScreen: NSScreen?
+
+    var unfsContentFrame: NSRect?
+    var isInFullscreen: Bool = false
+    var isMoving: Bool = false
+    var previousStyleMask: NSWindow.StyleMask = [.titled, .closable, .miniaturizable, .resizable]
+
+    var isAnimating: Bool = false
+    let animationLock: NSCondition = NSCondition()
+
+    var unfsContentFramePixel: NSRect { get { return convertToBacking(unfsContentFrame ?? NSRect(x: 0, y: 0, width: 160, height: 90)) } }
+    var framePixel: NSRect { get { return convertToBacking(frame) } }
+
+    var keepAspect: Bool = true {
+        didSet {
+            if let contentViewFrame = contentView?.frame, !isInFullscreen {
+                unfsContentFrame = convertToScreen(contentViewFrame)
+            }
+
+            if keepAspect {
+                contentAspectRatio = unfsContentFrame?.size ?? contentAspectRatio
+            } else {
+                resizeIncrements = NSSize(width: 1.0, height: 1.0)
+            }
+        }
+    }
+
+    var border: Bool = true {
+        didSet { if !border { common.titleBar?.hide() } }
+    }
+
+    override var canBecomeKey: Bool { return true }
+    override var canBecomeMain: Bool { return true }
+
+    override var styleMask: NSWindow.StyleMask {
+        get { return super.styleMask }
+        set {
+            let responder = firstResponder
+            let windowTitle = title
+            previousStyleMask = super.styleMask
+            super.styleMask = newValue
+            makeFirstResponder(responder)
+            title = windowTitle
+        }
+    }
+
+    convenience init(contentRect: NSRect, screen: NSScreen?, view: NSView, common com: Common) {
+        self.init(contentRect: contentRect,
+                  styleMask: [.titled, .closable, .miniaturizable, .resizable],
+                  backing: .buffered, defer: false, screen: screen)
+
+        // workaround for an AppKit bug where the NSWindow can't be placed on a
+        // none Main screen NSScreen outside the Main screen's frame bounds
+        if let wantedScreen = screen, screen != NSScreen.main {
+            var absoluteWantedOrigin = contentRect.origin
+            absoluteWantedOrigin.x += wantedScreen.frame.origin.x
+            absoluteWantedOrigin.y += wantedScreen.frame.origin.y
+
+            if !NSEqualPoints(absoluteWantedOrigin, self.frame.origin) {
+                self.setFrameOrigin(absoluteWantedOrigin)
+            }
+        }
+
+        common = com
+        title = com.title
+        minSize = NSMakeSize(160, 90)
+        collectionBehavior = .fullScreenPrimary
+        delegate = self
+
+        if let cView = contentView {
+            cView.addSubview(view)
+            view.frame = cView.frame
+            unfsContentFrame = convertToScreen(cView.frame)
+        }
+
+        targetScreen = screen
+        currentScreen = screen
+        unfScreen = screen
+
+        if let app = NSApp as? Application {
+            app.menuBar.register(#selector(setHalfWindowSize), for: MPM_H_SIZE)
+            app.menuBar.register(#selector(setNormalWindowSize), for: MPM_N_SIZE)
+            app.menuBar.register(#selector(setDoubleWindowSize), for: MPM_D_SIZE)
+            app.menuBar.register(#selector(performMiniaturize(_:)), for: MPM_MINIMIZE)
+            app.menuBar.register(#selector(performZoom(_:)), for: MPM_ZOOM)
+        }
+    }
+
+    override func toggleFullScreen(_ sender: Any?) {
+        if isAnimating {
+            return
+        }
+
+        animationLock.lock()
+        isAnimating = true
+        animationLock.unlock()
+
+        targetScreen = common.getTargetScreen(forFullscreen: !isInFullscreen)
+        if targetScreen == nil && previousScreen == nil {
+            targetScreen = screen
+        } else if targetScreen == nil {
+            targetScreen = previousScreen
+            previousScreen = nil
+        } else {
+            previousScreen = screen
+        }
+
+        if let contentViewFrame = contentView?.frame, !isInFullscreen {
+            unfsContentFrame = convertToScreen(contentViewFrame)
+            unfScreen = screen
+        }
+        // move window to target screen when going to fullscreen
+        if let tScreen = targetScreen, !isInFullscreen && (tScreen != screen) {
+            let frame = calculateWindowPosition(for: tScreen, withoutBounds: false)
+            setFrame(frame, display: true)
+        }
+
+        if Bool(mpv?.opts.native_fs ?? true) {
+            super.toggleFullScreen(sender)
+        } else {
+            if !isInFullscreen {
+                setToFullScreen()
+            }
+            else {
+                setToWindow()
+            }
+        }
+    }
+
+    func customWindowsToEnterFullScreen(for window: NSWindow) -> [NSWindow]? {
+        return [window]
+    }
+
+    func customWindowsToExitFullScreen(for window: NSWindow) -> [NSWindow]? {
+        return [window]
+    }
+
+    func window(_ window: NSWindow, startCustomAnimationToEnterFullScreenWithDuration duration: TimeInterval) {
+        guard let tScreen = targetScreen else { return }
+        common.view?.layerContentsPlacement = .scaleProportionallyToFit
+        common.titleBar?.hide()
+        NSAnimationContext.runAnimationGroup({ (context) -> Void in
+            context.duration = getFsAnimationDuration(duration - 0.05)
+            window.animator().setFrame(tScreen.frame, display: true)
+        }, completionHandler: nil)
+    }
+
+    func window(_ window: NSWindow, startCustomAnimationToExitFullScreenWithDuration duration: TimeInterval) {
+        guard let tScreen = targetScreen, let currentScreen = screen else { return }
+        let newFrame = calculateWindowPosition(for: tScreen, withoutBounds: tScreen == screen)
+        let intermediateFrame = aspectFit(rect: newFrame, in: currentScreen.frame)
+        common.titleBar?.hide(0.0)
+
+        NSAnimationContext.runAnimationGroup({ (context) -> Void in
+            context.duration = 0.0
+            common.view?.layerContentsPlacement = .scaleProportionallyToFill
+            window.animator().setFrame(intermediateFrame, display: true)
+        }, completionHandler: {
+            NSAnimationContext.runAnimationGroup({ (context) -> Void in
+                context.duration = self.getFsAnimationDuration(duration - 0.05)
+                self.styleMask.remove(.fullScreen)
+                window.animator().setFrame(newFrame, display: true)
+            }, completionHandler: nil)
+        })
+    }
+
+    func windowDidEnterFullScreen(_ notification: Notification) {
+        isInFullscreen = true
+        mpv?.setOption(fullscreen: isInFullscreen)
+        common.updateCursorVisibility()
+        endAnimation(frame)
+        common.titleBar?.show()
+    }
+
+    func windowDidExitFullScreen(_ notification: Notification) {
+        guard let tScreen = targetScreen else { return }
+        isInFullscreen = false
+        mpv?.setOption(fullscreen: isInFullscreen)
+        endAnimation(calculateWindowPosition(for: tScreen, withoutBounds: targetScreen == screen))
+        common.view?.layerContentsPlacement = .scaleProportionallyToFit
+    }
+
+    func windowDidFailToEnterFullScreen(_ window: NSWindow) {
+        guard let tScreen = targetScreen else { return }
+        let newFrame = calculateWindowPosition(for: tScreen, withoutBounds: targetScreen == screen)
+        setFrame(newFrame, display: true)
+        endAnimation()
+    }
+
+    func windowDidFailToExitFullScreen(_ window: NSWindow) {
+        guard let targetFrame = targetScreen?.frame else { return }
+        setFrame(targetFrame, display: true)
+        endAnimation()
+        common.view?.layerContentsPlacement = .scaleProportionallyToFit
+    }
+
+    func endAnimation(_ newFrame: NSRect = NSZeroRect) {
+        if !NSEqualRects(newFrame, NSZeroRect) && isAnimating {
+            NSAnimationContext.runAnimationGroup({ (context) -> Void in
+                context.duration = 0.01
+                self.animator().setFrame(newFrame, display: true)
+            }, completionHandler: nil )
+        }
+
+        animationLock.lock()
+        isAnimating = false
+        animationLock.signal()
+        animationLock.unlock()
+        common.windowDidEndAnimation()
+    }
+
+    func setToFullScreen() {
+        guard let targetFrame = targetScreen?.frame else { return }
+
+        if #available(macOS 11.0, *) {
+            styleMask = .borderless
+            common.titleBar?.hide(0.0)
+        } else {
+            styleMask.insert(.fullScreen)
+        }
+
+        NSApp.presentationOptions = [.autoHideMenuBar, .autoHideDock]
+        setFrame(targetFrame, display: true)
+        endAnimation()
+        isInFullscreen = true
+        mpv?.setOption(fullscreen: isInFullscreen)
+        common.windowSetToFullScreen()
+    }
+
+    func setToWindow() {
+        guard let tScreen = targetScreen else { return }
+
+        if #available(macOS 11.0, *) {
+            styleMask = previousStyleMask
+            common.titleBar?.hide(0.0)
+        } else {
+            styleMask.remove(.fullScreen)
+        }
+
+        let newFrame = calculateWindowPosition(for: tScreen, withoutBounds: targetScreen == screen)
+        NSApp.presentationOptions = []
+        setFrame(newFrame, display: true)
+        endAnimation()
+        isInFullscreen = false
+        mpv?.setOption(fullscreen: isInFullscreen)
+        common.windowSetToWindow()
+    }
+
+    func waitForAnimation() {
+        animationLock.lock()
+        while(isAnimating){
+            animationLock.wait()
+        }
+        animationLock.unlock()
+    }
+
+    func getFsAnimationDuration(_ def: Double) -> Double {
+        let duration = mpv?.macOpts.macos_fs_animation_duration ?? -1
+        if duration < 0 {
+            return def
+        } else {
+            return Double(duration)/1000
+        }
+    }
+
+    func setOnTop(_ state: Bool, _ ontopLevel: Int) {
+        if state {
+            switch ontopLevel {
+            case -1:
+                level = .floating
+            case -2:
+                level = .statusBar + 1
+            case -3:
+                level = NSWindow.Level(Int(CGWindowLevelForKey(.desktopWindow)))
+            default:
+                level = NSWindow.Level(ontopLevel)
+            }
+            collectionBehavior.remove(.transient)
+            collectionBehavior.insert(.managed)
+        } else {
+            level = .normal
+        }
+    }
+
+    func setOnAllWorkspaces(_ state: Bool) {
+        if state {
+            collectionBehavior.insert(.canJoinAllSpaces)
+        } else {
+            collectionBehavior.remove(.canJoinAllSpaces)
+        }
+    }
+
+    func setMinimized(_ stateWanted: Bool) {
+        if isMiniaturized == stateWanted { return }
+
+        if stateWanted {
+            performMiniaturize(self)
+        } else {
+            deminiaturize(self)
+        }
+    }
+
+    func setMaximized(_ stateWanted: Bool) {
+        if isZoomed == stateWanted { return }
+
+        zoom(self)
+    }
+
+    func updateMovableBackground(_ pos: NSPoint) {
+        if !isInFullscreen {
+            isMovableByWindowBackground = mpv?.canBeDraggedAt(pos) ?? true
+        } else {
+            isMovableByWindowBackground = false
+        }
+    }
+
+    func updateFrame(_ rect: NSRect) {
+        if rect != frame {
+            let cRect = frameRect(forContentRect: rect)
+            unfsContentFrame = rect
+            setFrame(cRect, display: true)
+            common.windowDidUpdateFrame()
+        }
+    }
+
+    func updateSize(_ size: NSSize) {
+        if let currentSize = contentView?.frame.size, size != currentSize {
+            let newContentFrame = centeredContentSize(for: frame, size: size)
+            if !isInFullscreen {
+                updateFrame(newContentFrame)
+            } else {
+                unfsContentFrame = newContentFrame
+            }
+        }
+    }
+
+    override func setFrame(_ frameRect: NSRect, display flag: Bool) {
+        if frameRect.width < minSize.width || frameRect.height < minSize.height {
+            common.log.sendVerbose("tried to set too small window size: \(frameRect.size)")
+            return
+        }
+
+        super.setFrame(frameRect, display: flag)
+
+        if let size = unfsContentFrame?.size, keepAspect {
+            contentAspectRatio = size
+        }
+    }
+
+    func centeredContentSize(for rect: NSRect, size sz: NSSize) -> NSRect {
+        let cRect = contentRect(forFrameRect: rect)
+        let dx = (cRect.size.width  - sz.width)  / 2
+        let dy = (cRect.size.height - sz.height) / 2
+        return NSInsetRect(cRect, dx, dy)
+    }
+
+    func aspectFit(rect r: NSRect, in rTarget: NSRect) -> NSRect {
+        var s = rTarget.width / r.width
+        if r.height*s > rTarget.height {
+            s = rTarget.height / r.height
+        }
+        let w = r.width * s
+        let h = r.height * s
+        return NSRect(x: rTarget.midX - w/2, y: rTarget.midY - h/2, width: w, height: h)
+    }
+
+    func calculateWindowPosition(for tScreen: NSScreen, withoutBounds: Bool) -> NSRect {
+        guard let contentFrame = unfsContentFrame, let screen = unfScreen else {
+            return frame
+        }
+        var newFrame = frameRect(forContentRect: contentFrame)
+        let targetFrame = tScreen.frame
+        let targetVisibleFrame = tScreen.visibleFrame
+        let unfsScreenFrame = screen.frame
+        let visibleWindow = NSIntersectionRect(unfsScreenFrame, newFrame)
+
+        // calculate visible area of every side
+        let left = newFrame.origin.x - unfsScreenFrame.origin.x
+        let right = unfsScreenFrame.size.width -
+            (newFrame.origin.x - unfsScreenFrame.origin.x + newFrame.size.width)
+        let bottom = newFrame.origin.y - unfsScreenFrame.origin.y
+        let top = unfsScreenFrame.size.height -
+            (newFrame.origin.y - unfsScreenFrame.origin.y + newFrame.size.height)
+
+        // normalize visible areas, decide which one to take horizontal/vertical
+        var xPer = (unfsScreenFrame.size.width - visibleWindow.size.width)
+        var yPer = (unfsScreenFrame.size.height - visibleWindow.size.height)
+        if xPer != 0 { xPer = (left >= 0 || right < 0 ? left : right) / xPer }
+        if yPer != 0 { yPer = (bottom >= 0 || top < 0 ? bottom : top) / yPer }
+
+        // calculate visible area for every side for target screen
+        let xNewLeft = targetFrame.origin.x +
+            (targetFrame.size.width - visibleWindow.size.width) * xPer
+        let xNewRight = targetFrame.origin.x + targetFrame.size.width -
+            (targetFrame.size.width - visibleWindow.size.width) * xPer - newFrame.size.width
+        let yNewBottom = targetFrame.origin.y +
+            (targetFrame.size.height - visibleWindow.size.height) * yPer
+        let yNewTop = targetFrame.origin.y + targetFrame.size.height -
+            (targetFrame.size.height - visibleWindow.size.height) * yPer - newFrame.size.height
+
+        // calculate new coordinates, decide which one to take horizontal/vertical
+        newFrame.origin.x = left >= 0 || right < 0 ? xNewLeft : xNewRight
+        newFrame.origin.y = bottom >= 0 || top < 0 ? yNewBottom : yNewTop
+
+        // don't place new window on top of a visible menubar
+        let topMar = targetFrame.size.height -
+            (newFrame.origin.y - targetFrame.origin.y + newFrame.size.height)
+        let menuBarHeight = targetFrame.size.height -
+            (targetVisibleFrame.size.height + targetVisibleFrame.origin.y)
+        if topMar < menuBarHeight {
+            newFrame.origin.y -= top - menuBarHeight
+        }
+
+        if withoutBounds {
+            return newFrame
+        }
+
+        // screen bounds right and left
+        if newFrame.origin.x + newFrame.size.width > targetFrame.origin.x + targetFrame.size.width {
+            newFrame.origin.x = targetFrame.origin.x + targetFrame.size.width - newFrame.size.width
+        }
+        if newFrame.origin.x < targetFrame.origin.x {
+            newFrame.origin.x = targetFrame.origin.x
+        }
+
+        // screen bounds top and bottom
+        if newFrame.origin.y + newFrame.size.height > targetFrame.origin.y + targetFrame.size.height {
+            newFrame.origin.y = targetFrame.origin.y + targetFrame.size.height - newFrame.size.height
+        }
+        if newFrame.origin.y < targetFrame.origin.y {
+            newFrame.origin.y = targetFrame.origin.y
+        }
+        return newFrame
+    }
+
+    override func constrainFrameRect(_ frameRect: NSRect, to tScreen: NSScreen?) -> NSRect {
+        if (isAnimating && !isInFullscreen) || (!isAnimating && isInFullscreen ||
+            level == NSWindow.Level(Int(CGWindowLevelForKey(.desktopWindow))))
+        {
+            return frameRect
+        }
+
+        guard let ts: NSScreen = tScreen ?? screen ?? NSScreen.main else {
+            return frameRect
+        }
+        var nf: NSRect = frameRect
+        let of: NSRect = frame
+        let vf: NSRect = (isAnimating ? (targetScreen ?? ts) : ts).visibleFrame
+        let ncf: NSRect = contentRect(forFrameRect: nf)
+
+        // screen bounds top and bottom
+        if NSMaxY(nf) > NSMaxY(vf) {
+            nf.origin.y = NSMaxY(vf) - NSHeight(nf)
+        }
+        if NSMaxY(ncf) < NSMinY(vf) {
+            nf.origin.y = NSMinY(vf) + NSMinY(ncf) - NSMaxY(ncf)
+        }
+
+        // screen bounds right and left
+        if NSMinX(nf) > NSMaxX(vf) {
+            nf.origin.x = NSMaxX(vf) - NSWidth(nf)
+        }
+        if NSMaxX(nf) < NSMinX(vf) {
+            nf.origin.x = NSMinX(vf)
+        }
+
+        if NSHeight(nf) < NSHeight(vf) && NSHeight(of) > NSHeight(vf) && !isInFullscreen {
+            // If the window height is smaller than the visible frame, but it was
+            // bigger previously recenter the smaller window vertically. This is
+            // needed to counter the 'snap to top' behaviour.
+            nf.origin.y = (NSHeight(vf) - NSHeight(nf)) / 2
+        }
+        return nf
+    }
+
+    @objc func setNormalWindowSize() { setWindowScale(1.0) }
+    @objc func setHalfWindowSize()   { setWindowScale(0.5) }
+    @objc func setDoubleWindowSize() { setWindowScale(2.0) }
+
+    func setWindowScale(_ scale: Double) {
+        mpv?.command("set window-scale \(scale)")
+    }
+
+    func addWindowScale(_ scale: Double) {
+        if !isInFullscreen {
+            mpv?.command("add window-scale \(scale)")
+        }
+    }
+
+    func windowDidChangeScreen(_ notification: Notification) {
+        if screen == nil {
+            return
+        }
+        if !isAnimating && (currentScreen != screen) {
+            previousScreen = screen
+        }
+        if currentScreen != screen {
+            common.updateDisplaylink()
+            common.windowDidChangeScreen()
+        }
+        currentScreen = screen
+    }
+
+    func windowDidChangeScreenProfile(_ notification: Notification) {
+        common.windowDidChangeScreenProfile()
+    }
+
+    func windowDidChangeBackingProperties(_ notification: Notification) {
+        common.windowDidChangeBackingProperties()
+        common.flagEvents(VO_EVENT_DPI)
+    }
+
+    func windowWillStartLiveResize(_ notification: Notification) {
+        common.windowWillStartLiveResize()
+    }
+
+    func windowDidEndLiveResize(_ notification: Notification) {
+        common.windowDidEndLiveResize()
+        mpv?.setOption(maximized: isZoomed)
+
+        if let contentViewFrame = contentView?.frame,
+               !isAnimating && !isInFullscreen
+        {
+            unfsContentFrame = convertToScreen(contentViewFrame)
+        }
+    }
+
+    func windowDidResize(_ notification: Notification) {
+        common.windowDidResize()
+    }
+
+    func windowShouldClose(_ sender: NSWindow) -> Bool {
+        cocoa_put_key(MP_KEY_CLOSE_WIN)
+        return false
+    }
+
+    func windowDidMiniaturize(_ notification: Notification) {
+        mpv?.setOption(minimized: true)
+    }
+
+    func windowDidDeminiaturize(_ notification: Notification) {
+        mpv?.setOption(minimized: false)
+    }
+
+    func windowDidResignKey(_ notification: Notification) {
+        common.setCursorVisibility(true)
+    }
+
+    func windowDidBecomeKey(_ notification: Notification) {
+        common.updateCursorVisibility()
+    }
+
+    func windowDidChangeOcclusionState(_ notification: Notification) {
+        if occlusionState.contains(.visible) {
+            common.windowDidChangeOcclusionState()
+            common.updateCursorVisibility()
+        }
+    }
+
+    func windowWillMove(_ notification: Notification) {
+        isMoving = true
+    }
+
+    func windowDidMove(_ notification: Notification) {
+        mpv?.setOption(maximized: isZoomed)
+    }
+}
diff --git a/video/out/mac_common.swift b/video/out/mac_common.swift
new file mode 100644
index 0000000..349712b
--- /dev/null
+++ b/video/out/mac_common.swift
@@ -0,0 +1,174 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+import Cocoa
+
+class MacCommon: Common {
+    @objc var layer: MetalLayer?
+
+    var timer: PreciseTimer?
+    var swapTime: UInt64 = 0
+    let swapLock: NSCondition = NSCondition()
+
+    var needsICCUpdate: Bool = false
+
+    @objc init(_ vo: UnsafeMutablePointer<vo>) {
+        let newlog = mp_log_new(vo, vo.pointee.log, "mac")
+        super.init(newlog)
+        mpv = MPVHelper(vo, log)
+        timer = PreciseTimer(common: self)
+
+        DispatchQueue.main.sync {
+            layer = MetalLayer(common: self)
+            initMisc(vo)
+        }
+    }
+
+    @objc func config(_ vo: UnsafeMutablePointer<vo>) -> Bool {
+        mpv?.vo = vo
+
+        DispatchQueue.main.sync {
+            let previousActiveApp = getActiveApp()
+            initApp()
+
+            let (_, _, wr) = getInitProperties(vo)
+
+            guard let layer = self.layer else {
+                log.sendError("Something went wrong, no MetalLayer was initialized")
+                exit(1)
+            }
+
+            if window == nil {
+                initView(vo, layer)
+                initWindow(vo, previousActiveApp)
+                initWindowState()
+            }
+
+            if !NSEqualSizes(window?.unfsContentFramePixel.size ?? NSZeroSize, wr.size) {
+                window?.updateSize(wr.size)
+            }
+
+            windowDidResize()
+            needsICCUpdate = true
+        }
+
+        return true
+    }
+
+    @objc func uninit(_ vo: UnsafeMutablePointer<vo>) {
+        window?.waitForAnimation()
+
+        timer?.terminate()
+
+        DispatchQueue.main.sync {
+            window?.delegate = nil
+            window?.close()
+
+            uninitCommon()
+        }
+    }
+
+    @objc func swapBuffer() {
+        if mpv?.macOpts.macos_render_timer ?? Int32(RENDER_TIMER_CALLBACK) != RENDER_TIMER_SYSTEM {
+            swapLock.lock()
+            while(swapTime < 1) {
+                swapLock.wait()
+            }
+            swapTime = 0
+            swapLock.unlock()
+        }
+
+        if needsICCUpdate {
+            needsICCUpdate = false
+            updateICCProfile()
+        }
+    }
+
+    func updateRenderSize(_ size: NSSize) {
+        mpv?.vo.pointee.dwidth = Int32(size.width)
+        mpv?.vo.pointee.dheight = Int32(size.height)
+        flagEvents(VO_EVENT_RESIZE | VO_EVENT_EXPOSE)
+    }
+
+    override func displayLinkCallback(_ displayLink: CVDisplayLink,
+                                            _ inNow: UnsafePointer<CVTimeStamp>,
+                                     _ inOutputTime: UnsafePointer<CVTimeStamp>,
+                                          _ flagsIn: CVOptionFlags,
+                                         _ flagsOut: UnsafeMutablePointer<CVOptionFlags>) -> CVReturn
+    {
+        let frameTimer = mpv?.macOpts.macos_render_timer ?? Int32(RENDER_TIMER_CALLBACK)
+        let signalSwap = {
+            self.swapLock.lock()
+            self.swapTime += 1
+            self.swapLock.signal()
+            self.swapLock.unlock()
+        }
+
+        if frameTimer != RENDER_TIMER_SYSTEM {
+            if let timer = self.timer, frameTimer == RENDER_TIMER_PRECISE {
+                timer.scheduleAt(time: inOutputTime.pointee.hostTime, closure: signalSwap)
+                return kCVReturnSuccess
+            }
+
+            signalSwap()
+        }
+
+        return kCVReturnSuccess
+    }
+
+    override func startDisplayLink(_ vo: UnsafeMutablePointer<vo>) {
+        super.startDisplayLink(vo)
+        timer?.updatePolicy(periodSeconds: 1 / currentFps())
+    }
+
+    override func updateDisplaylink() {
+        super.updateDisplaylink()
+        timer?.updatePolicy(periodSeconds: 1 / currentFps())
+    }
+
+    override func lightSensorUpdate() {
+        flagEvents(VO_EVENT_AMBIENT_LIGHTING_CHANGED)
+    }
+
+    @objc override func updateICCProfile() {
+        guard let colorSpace = window?.screen?.colorSpace else {
+            log.sendWarning("Couldn't update ICC Profile, no color space available")
+            return
+        }
+
+        layer?.colorspace = colorSpace.cgColorSpace
+        flagEvents(VO_EVENT_ICC_PROFILE_CHANGED)
+    }
+
+    override func windowDidResize() {
+        guard let window = window else {
+            log.sendWarning("No window available on window resize event")
+            return
+        }
+
+        updateRenderSize(window.framePixel.size)
+    }
+
+    override func windowDidChangeScreenProfile() {
+        needsICCUpdate = true
+    }
+
+    override func windowDidChangeBackingProperties() {
+        layer?.contentsScale = window?.backingScaleFactor ?? 1
+        windowDidResize()
+    }
+}
diff --git a/video/out/meson.build b/video/out/meson.build
new file mode 100644
index 0000000..e2808d6
--- /dev/null
+++ b/video/out/meson.build
@@ -0,0 +1,51 @@
+wl_protocol_dir = wayland['deps'][2].get_variable(pkgconfig: 'pkgdatadir', internal: 'pkgdatadir')
+protocols = [[wl_protocol_dir, 'stable/presentation-time/presentation-time.xml'],
+             [wl_protocol_dir, 'stable/viewporter/viewporter.xml'],
+             [wl_protocol_dir, 'stable/xdg-shell/xdg-shell.xml'],
+             [wl_protocol_dir, 'unstable/idle-inhibit/idle-inhibit-unstable-v1.xml'],
+             [wl_protocol_dir, 'unstable/linux-dmabuf/linux-dmabuf-unstable-v1.xml'],
+             [wl_protocol_dir, 'unstable/xdg-decoration/xdg-decoration-unstable-v1.xml']]
+wl_protocols_source = []
+wl_protocols_headers = []
+
+foreach v: ['1.27', '1.31', '1.32']
+    features += {'wayland-protocols-' + v.replace('.', '-'):
+        wayland['deps'][2].version().version_compare('>=' + v)}
+endforeach
+
+if features['wayland-protocols-1-27']
+    protocols += [[wl_protocol_dir, 'staging/content-type/content-type-v1.xml'],
+                  [wl_protocol_dir, 'staging/single-pixel-buffer/single-pixel-buffer-v1.xml']]
+endif
+if features['wayland-protocols-1-31']
+    protocols += [[wl_protocol_dir, 'staging/fractional-scale/fractional-scale-v1.xml']]
+endif
+if features['wayland-protocols-1-32']
+    protocols += [[wl_protocol_dir, 'staging/cursor-shape/cursor-shape-v1.xml'],
+                  [wl_protocol_dir, 'unstable/tablet/tablet-unstable-v2.xml']] # required by cursor-shape
+endif
+
+foreach p: protocols
+    xml = join_paths(p)
+    wl_protocols_source += custom_target(xml.underscorify() + '_c',
+        input: xml,
+        output: '@BASENAME@.c',
+        command: [wayland['scanner'], 'private-code', '@INPUT@', '@OUTPUT@'],
+    )
+    wl_protocols_headers += custom_target(xml.underscorify() + '_h',
+        input: xml,
+        output: '@BASENAME@.h',
+        command: [wayland['scanner'], 'client-header', '@INPUT@', '@OUTPUT@'],
+    )
+endforeach
+
+lib_client_protocols = static_library('protocols',
+                                      wl_protocols_source + wl_protocols_headers,
+                                      dependencies: wayland['deps'][0])
+
+client_protocols = declare_dependency(link_with: lib_client_protocols,
+                                      sources: wl_protocols_headers)
+
+dependencies += [client_protocols, wayland['deps']]
+
+sources += files('wayland_common.c')
diff --git a/video/out/opengl/angle_dynamic.c b/video/out/opengl/angle_dynamic.c
new file mode 100644
index 0000000..2483828
--- /dev/null
+++ b/video/out/opengl/angle_dynamic.c
@@ -0,0 +1,39 @@
+#include <windows.h>
+
+#include "angle_dynamic.h"
+
+#include "common/common.h"
+#include "osdep/threads.h"
+
+#if HAVE_EGL_ANGLE_LIB
+bool angle_load(void)
+{
+    return true;
+}
+#else
+#define ANGLE_DECL(NAME, VAR) \
+    VAR;
+ANGLE_FNS(ANGLE_DECL)
+
+static bool angle_loaded;
+static mp_once angle_load_once = MP_STATIC_ONCE_INITIALIZER;
+
+static void angle_do_load(void)
+{
+    // Note: we let this handle "leak", as the functions remain valid forever.
+    HANDLE angle_dll = LoadLibraryW(L"LIBEGL.DLL");
+    if (!angle_dll)
+        return;
+#define ANGLE_LOAD_ENTRY(NAME, VAR) \
+    NAME = (void *)GetProcAddress(angle_dll, #NAME); \
+    if (!NAME) return;
+    ANGLE_FNS(ANGLE_LOAD_ENTRY)
+    angle_loaded = true;
+}
+
+bool angle_load(void)
+{
+    mp_exec_once(&angle_load_once, angle_do_load);
+    return angle_loaded;
+}
+#endif
diff --git a/video/out/opengl/angle_dynamic.h b/video/out/opengl/angle_dynamic.h
new file mode 100644
index 0000000..d419c3f
--- /dev/null
+++ b/video/out/opengl/angle_dynamic.h
@@ -0,0 +1,89 @@
+// Based on Khronos headers, thus MIT licensed.
+
+#ifndef MP_ANGLE_DYNAMIC_H
+#define MP_ANGLE_DYNAMIC_H
+
+#include <stdbool.h>
+
+#include <EGL/egl.h>
+#include <EGL/eglext.h>
+
+#include "config.h"
+
+#define ANGLE_FNS(FN) \
+    FN(eglBindAPI, EGLBoolean (*EGLAPIENTRY PFN_eglBindAPI)(EGLenum)) \
+    FN(eglBindTexImage, EGLBoolean (*EGLAPIENTRY PFN_eglBindTexImage) \
+        (EGLDisplay, EGLSurface, EGLint)) \
+    FN(eglChooseConfig, EGLBoolean (*EGLAPIENTRY PFN_eglChooseConfig) \
+        (EGLDisplay, const EGLint *, EGLConfig *, EGLint, EGLint *)) \
+    FN(eglCreateContext, EGLContext (*EGLAPIENTRY PFN_eglCreateContext) \
+        (EGLDisplay, EGLConfig, EGLContext, const EGLint *)) \
+    FN(eglCreatePbufferFromClientBuffer, EGLSurface (*EGLAPIENTRY \
+        PFN_eglCreatePbufferFromClientBuffer)(EGLDisplay, EGLenum, \
+        EGLClientBuffer, EGLConfig, const EGLint *)) \
+    FN(eglCreateWindowSurface, EGLSurface (*EGLAPIENTRY \
+        PFN_eglCreateWindowSurface)(EGLDisplay, EGLConfig, \
+        EGLNativeWindowType, const EGLint *)) \
+    FN(eglDestroyContext, EGLBoolean (*EGLAPIENTRY PFN_eglDestroyContext) \
+        (EGLDisplay, EGLContext)) \
+    FN(eglDestroySurface, EGLBoolean (*EGLAPIENTRY PFN_eglDestroySurface) \
+        (EGLDisplay, EGLSurface)) \
+    FN(eglGetConfigAttrib, EGLBoolean (*EGLAPIENTRY PFN_eglGetConfigAttrib) \
+        (EGLDisplay, EGLConfig, EGLint, EGLint *)) \
+    FN(eglGetCurrentContext, EGLContext (*EGLAPIENTRY \
+        PFN_eglGetCurrentContext)(void)) \
+    FN(eglGetCurrentDisplay, EGLDisplay (*EGLAPIENTRY \
+        PFN_eglGetCurrentDisplay)(void)) \
+    FN(eglGetDisplay, EGLDisplay (*EGLAPIENTRY PFN_eglGetDisplay) \
+        (EGLNativeDisplayType)) \
+    FN(eglGetError, EGLint (*EGLAPIENTRY PFN_eglGetError)(void)) \
+    FN(eglGetProcAddress, void *(*EGLAPIENTRY \
+        PFN_eglGetProcAddress)(const char *)) \
+    FN(eglInitialize, EGLBoolean (*EGLAPIENTRY PFN_eglInitialize) \
+        (EGLDisplay, EGLint *, EGLint *)) \
+    FN(eglMakeCurrent, EGLBoolean (*EGLAPIENTRY PFN_eglMakeCurrent) \
+        (EGLDisplay, EGLSurface, EGLSurface, EGLContext)) \
+    FN(eglQueryString, const char *(*EGLAPIENTRY PFN_eglQueryString) \
+        (EGLDisplay, EGLint)) \
+    FN(eglSwapBuffers, EGLBoolean (*EGLAPIENTRY PFN_eglSwapBuffers) \
+        (EGLDisplay, EGLSurface)) \
+    FN(eglSwapInterval, EGLBoolean (*EGLAPIENTRY PFN_eglSwapInterval) \
+        (EGLDisplay, EGLint)) \
+    FN(eglReleaseTexImage, EGLBoolean (*EGLAPIENTRY PFN_eglReleaseTexImage) \
+        (EGLDisplay, EGLSurface, EGLint)) \
+    FN(eglTerminate, EGLBoolean (*EGLAPIENTRY PFN_eglTerminate)(EGLDisplay)) \
+    FN(eglWaitClient, EGLBoolean (*EGLAPIENTRY PFN_eglWaitClient)(void))
+
+#define ANGLE_EXT_DECL(NAME, VAR) \
+    extern VAR;
+ANGLE_FNS(ANGLE_EXT_DECL)
+
+bool angle_load(void);
+
+// Source compatibility to statically linked ANGLE.
+#if !HAVE_EGL_ANGLE_LIB
+#define eglBindAPI                      PFN_eglBindAPI
+#define eglBindTexImage                 PFN_eglBindTexImage
+#define eglChooseConfig                 PFN_eglChooseConfig
+#define eglCreateContext                PFN_eglCreateContext
+#define eglCreatePbufferFromClientBuffer PFN_eglCreatePbufferFromClientBuffer
+#define eglCreateWindowSurface          PFN_eglCreateWindowSurface
+#define eglDestroyContext               PFN_eglDestroyContext
+#define eglDestroySurface               PFN_eglDestroySurface
+#define eglGetConfigAttrib              PFN_eglGetConfigAttrib
+#define eglGetCurrentContext            PFN_eglGetCurrentContext
+#define eglGetCurrentDisplay            PFN_eglGetCurrentDisplay
+#define eglGetDisplay                   PFN_eglGetDisplay
+#define eglGetError                     PFN_eglGetError
+#define eglGetProcAddress               PFN_eglGetProcAddress
+#define eglInitialize                   PFN_eglInitialize
+#define eglMakeCurrent                  PFN_eglMakeCurrent
+#define eglQueryString                  PFN_eglQueryString
+#define eglReleaseTexImage              PFN_eglReleaseTexImage
+#define eglSwapBuffers                  PFN_eglSwapBuffers
+#define eglSwapInterval                 PFN_eglSwapInterval
+#define eglTerminate                    PFN_eglTerminate
+#define eglWaitClient                   PFN_eglWaitClient
+#endif
+
+#endif
diff --git a/video/out/opengl/common.c b/video/out/opengl/common.c
new file mode 100644
index 0000000..ee26508
--- /dev/null
+++ b/video/out/opengl/common.c
@@ -0,0 +1,694 @@
+/*
+ * common OpenGL routines
+ *
+ * copyleft (C) 2005-2010 Reimar Döffinger <Reimar.Doeffinger@gmx.de>
+ * Special thanks go to the xine team and Matthias Hopf, whose video_out_opengl.c
+ * gave me lots of good ideas.
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stddef.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <string.h>
+#include <strings.h>
+#include <stdbool.h>
+#include <math.h>
+#include <assert.h>
+
+#include "common.h"
+#include "common/common.h"
+#include "utils.h"
+
+// This guesses if the current GL context is a suspected software renderer.
+static bool is_software_gl(GL *gl)
+{
+    const char *renderer = gl->GetString(GL_RENDERER);
+    // Note we don't attempt to blacklist Microsoft's fallback implementation.
+    // It only provides OpenGL 1.1 and will be skipped anyway.
+    return !renderer ||
+           strcmp(renderer, "Software Rasterizer") == 0 ||
+           strstr(renderer, "llvmpipe") ||
+           strstr(renderer, "softpipe") ||
+           strcmp(renderer, "Mesa X11") == 0 ||
+           strcmp(renderer, "Apple Software Renderer") == 0;
+}
+
+// This guesses whether our DR path is fast or slow
+static bool is_fast_dr(GL *gl)
+{
+    const char *vendor = gl->GetString(GL_VENDOR);
+    if (!vendor)
+        return false;
+
+    return strcasecmp(vendor, "AMD") == 0 ||
+           strcasecmp(vendor, "NVIDIA Corporation") == 0 ||
+           strcasecmp(vendor, "ATI Technologies Inc.") == 0;    // AMD on Windows
+}
+
+static void GLAPIENTRY dummy_glBindFramebuffer(GLenum target, GLuint framebuffer)
+{
+    assert(framebuffer == 0);
+}
+
+#define FN_OFFS(name) offsetof(GL, name)
+
+#define DEF_FN(name)            {FN_OFFS(name), "gl" # name}
+#define DEF_FN_NAME(name, str)  {FN_OFFS(name), str}
+
+struct gl_function {
+    ptrdiff_t offset;
+    char *name;
+};
+
+struct gl_functions {
+    const char *extension;      // introduced with this extension in any version
+    int provides;               // bitfield of MPGL_CAP_* constants
+    int ver_core;               // introduced as required function
+    int ver_es_core;            // introduced as required GL ES function
+    int ver_exclude;            // not applicable to versions >= ver_exclude
+    int ver_es_exclude;         // same for GLES
+    const struct gl_function *functions;
+};
+
+#define MAX_FN_COUNT 100        // max functions per gl_functions section
+
+// Note: to keep the number of sections low, some functions are in multiple
+//       sections (if there are tricky combinations of GL/ES versions)
+static const struct gl_functions gl_functions[] = {
+    // GL 2.1+ desktop and GLES 2.0+ (anything we support)
+    // Probably all of these are in GL 2.0 too, but we require GLSL 120.
+    {
+        .ver_core = 210,
+        .ver_es_core = 200,
+        .functions = (const struct gl_function[]) {
+            DEF_FN(ActiveTexture),
+            DEF_FN(AttachShader),
+            DEF_FN(BindAttribLocation),
+            DEF_FN(BindBuffer),
+            DEF_FN(BindTexture),
+            DEF_FN(BlendFuncSeparate),
+            DEF_FN(BufferData),
+            DEF_FN(BufferSubData),
+            DEF_FN(Clear),
+            DEF_FN(ClearColor),
+            DEF_FN(CompileShader),
+            DEF_FN(CreateProgram),
+            DEF_FN(CreateShader),
+            DEF_FN(DeleteBuffers),
+            DEF_FN(DeleteProgram),
+            DEF_FN(DeleteShader),
+            DEF_FN(DeleteTextures),
+            DEF_FN(Disable),
+            DEF_FN(DisableVertexAttribArray),
+            DEF_FN(DrawArrays),
+            DEF_FN(Enable),
+            DEF_FN(EnableVertexAttribArray),
+            DEF_FN(Finish),
+            DEF_FN(Flush),
+            DEF_FN(GenBuffers),
+            DEF_FN(GenTextures),
+            DEF_FN(GetAttribLocation),
+            DEF_FN(GetError),
+            DEF_FN(GetIntegerv),
+            DEF_FN(GetProgramInfoLog),
+            DEF_FN(GetProgramiv),
+            DEF_FN(GetShaderInfoLog),
+            DEF_FN(GetShaderiv),
+            DEF_FN(GetString),
+            DEF_FN(GetUniformLocation),
+            DEF_FN(LinkProgram),
+            DEF_FN(PixelStorei),
+            DEF_FN(ReadPixels),
+            DEF_FN(Scissor),
+            DEF_FN(ShaderSource),
+            DEF_FN(TexImage2D),
+            DEF_FN(TexParameteri),
+            DEF_FN(TexSubImage2D),
+            DEF_FN(Uniform1f),
+            DEF_FN(Uniform2f),
+            DEF_FN(Uniform3f),
+            DEF_FN(Uniform1i),
+            DEF_FN(UniformMatrix2fv),
+            DEF_FN(UniformMatrix3fv),
+            DEF_FN(UseProgram),
+            DEF_FN(VertexAttribPointer),
+            DEF_FN(Viewport),
+            {0}
+        },
+    },
+    // GL 2.1+ desktop only (and GLSL 120 shaders)
+    {
+        .ver_core = 210,
+        .provides = MPGL_CAP_ROW_LENGTH | MPGL_CAP_1D_TEX,
+        .functions = (const struct gl_function[]) {
+            DEF_FN(DrawBuffer),
+            DEF_FN(GetTexLevelParameteriv),
+            DEF_FN(ReadBuffer),
+            DEF_FN(TexImage1D),
+            DEF_FN(UnmapBuffer),
+            {0}
+        },
+    },
+    // GL 2.1 has this as extension only.
+    {
+        .ver_exclude = 300,
+        .ver_es_exclude = 300,
+        .extension = "GL_ARB_map_buffer_range",
+        .functions = (const struct gl_function[]) {
+            DEF_FN(MapBufferRange),
+            {0}
+        },
+    },
+    // GL 3.0+ and ES 3.x core only functions.
+    {
+        .ver_core = 300,
+        .ver_es_core = 300,
+        .functions = (const struct gl_function[]) {
+            DEF_FN(BindBufferBase),
+            DEF_FN(BlitFramebuffer),
+            DEF_FN(GetStringi),
+            DEF_FN(MapBufferRange),
+            // for ES 3.0
+            DEF_FN(ReadBuffer),
+            DEF_FN(UnmapBuffer),
+            {0}
+        },
+    },
+    // For ES 3.1 core
+    {
+        .ver_es_core = 310,
+        .functions = (const struct gl_function[]) {
+            DEF_FN(GetTexLevelParameteriv),
+            {0}
+        },
+    },
+    {
+        .ver_core = 210,
+        .ver_es_core = 300,
+        .provides = MPGL_CAP_3D_TEX,
+        .functions = (const struct gl_function[]) {
+            DEF_FN(TexImage3D),
+            {0}
+        },
+    },
+    // Useful for ES 2.0
+    {
+        .ver_core = 110,
+        .ver_es_core = 300,
+        .extension = "GL_EXT_unpack_subimage",
+        .provides = MPGL_CAP_ROW_LENGTH,
+    },
+    // Framebuffers, extension in GL 2.x, core in GL 3.x core.
+    {
+        .ver_core = 300,
+        .ver_es_core = 200,
+        .extension = "GL_ARB_framebuffer_object",
+        .provides = MPGL_CAP_FB,
+        .functions = (const struct gl_function[]) {
+            DEF_FN(BindFramebuffer),
+            DEF_FN(GenFramebuffers),
+            DEF_FN(DeleteFramebuffers),
+            DEF_FN(CheckFramebufferStatus),
+            DEF_FN(FramebufferTexture2D),
+            DEF_FN(GetFramebufferAttachmentParameteriv),
+            {0}
+        },
+    },
+    // VAOs, extension in GL 2.x, core in GL 3.x core.
+    {
+        .ver_core = 300,
+        .ver_es_core = 300,
+        .extension = "GL_ARB_vertex_array_object",
+        .provides = MPGL_CAP_VAO,
+        .functions = (const struct gl_function[]) {
+            DEF_FN(GenVertexArrays),
+            DEF_FN(BindVertexArray),
+            DEF_FN(DeleteVertexArrays),
+            {0}
+        }
+    },
+    // GL_RED / GL_RG textures, extension in GL 2.x, core in GL 3.x core.
+    {
+        .ver_core = 300,
+        .ver_es_core = 300,
+        .extension = "GL_ARB_texture_rg",
+        .provides = MPGL_CAP_TEX_RG,
+    },
+    {
+        .ver_core = 300,
+        .ver_es_core = 300,
+        .extension = "GL_EXT_texture_rg",
+        .provides = MPGL_CAP_TEX_RG,
+    },
+    // GL_R16 etc.
+    {
+        .extension = "GL_EXT_texture_norm16",
+        .provides = MPGL_CAP_EXT16,
+        .ver_exclude = 1, // never in desktop GL
+    },
+    // Float texture support for GL 2.x
+    {
+        .extension = "GL_ARB_texture_float",
+        .provides = MPGL_CAP_ARB_FLOAT,
+        .ver_exclude = 300,
+        .ver_es_exclude = 1,
+    },
+    // 16 bit float textures that can be rendered to in GLES
+    {
+        .extension = "GL_EXT_color_buffer_half_float",
+        .provides = MPGL_CAP_EXT_CR_HFLOAT,
+        .ver_exclude = 1,
+        .ver_es_exclude = 320,
+    },
+    {
+        .ver_core = 320,
+        .ver_es_core = 300,
+        .extension = "GL_ARB_sync",
+        .functions = (const struct gl_function[]) {
+            DEF_FN(FenceSync),
+            DEF_FN(ClientWaitSync),
+            DEF_FN(DeleteSync),
+            {0}
+        },
+    },
+    {
+        .ver_core = 330,
+        .extension = "GL_ARB_timer_query",
+        .functions = (const struct gl_function[]) {
+            DEF_FN(GenQueries),
+            DEF_FN(DeleteQueries),
+            DEF_FN(BeginQuery),
+            DEF_FN(EndQuery),
+            DEF_FN(QueryCounter),
+            DEF_FN(IsQuery),
+            DEF_FN(GetQueryObjectiv),
+            DEF_FN(GetQueryObjecti64v),
+            DEF_FN(GetQueryObjectuiv),
+            DEF_FN(GetQueryObjectui64v),
+            {0}
+        },
+    },
+    {
+        .extension = "GL_EXT_disjoint_timer_query",
+        .functions = (const struct gl_function[]) {
+            DEF_FN_NAME(GenQueries, "glGenQueriesEXT"),
+            DEF_FN_NAME(DeleteQueries, "glDeleteQueriesEXT"),
+            DEF_FN_NAME(BeginQuery, "glBeginQueryEXT"),
+            DEF_FN_NAME(EndQuery, "glEndQueryEXT"),
+            DEF_FN_NAME(QueryCounter, "glQueryCounterEXT"),
+            DEF_FN_NAME(IsQuery, "glIsQueryEXT"),
+            DEF_FN_NAME(GetQueryObjectiv, "glGetQueryObjectivEXT"),
+            DEF_FN_NAME(GetQueryObjecti64v, "glGetQueryObjecti64vEXT"),
+            DEF_FN_NAME(GetQueryObjectuiv, "glGetQueryObjectuivEXT"),
+            DEF_FN_NAME(GetQueryObjectui64v, "glGetQueryObjectui64vEXT"),
+            {0}
+        },
+    },
+    {
+        .ver_core = 430,
+        .extension = "GL_ARB_invalidate_subdata",
+        .functions = (const struct gl_function[]) {
+            DEF_FN(InvalidateTexImage),
+            {0}
+        },
+    },
+    {
+        .ver_core = 430,
+        .ver_es_core = 300,
+        .functions = (const struct gl_function[]) {
+            DEF_FN(InvalidateFramebuffer),
+            {0}
+        },
+    },
+    {
+        .ver_core = 410,
+        .ver_es_core = 300,
+        .extension = "GL_ARB_get_program_binary",
+        .functions = (const struct gl_function[]) {
+            DEF_FN(GetProgramBinary),
+            DEF_FN(ProgramBinary),
+            {0}
+        },
+    },
+    {
+        .ver_core = 440,
+        .extension = "GL_ARB_buffer_storage",
+        .functions = (const struct gl_function[]) {
+            DEF_FN(BufferStorage),
+            {0}
+        },
+    },
+    // Equivalent extension for ES
+    {
+        .extension = "GL_EXT_buffer_storage",
+        .functions = (const struct gl_function[]) {
+            DEF_FN_NAME(BufferStorage, "glBufferStorageEXT"),
+            {0}
+        },
+    },
+    {
+        .ver_core = 420,
+        .ver_es_core = 310,
+        .extension = "GL_ARB_shader_image_load_store",
+        .functions = (const struct gl_function[]) {
+            DEF_FN(BindImageTexture),
+            DEF_FN(MemoryBarrier),
+            {0}
+        },
+    },
+    {
+        .ver_core = 310,
+        .ver_es_core = 300,
+        .extension = "GL_ARB_uniform_buffer_object",
+        .provides = MPGL_CAP_UBO,
+    },
+    {
+        .ver_core = 430,
+        .ver_es_core = 310,
+        .extension = "GL_ARB_shader_storage_buffer_object",
+        .provides = MPGL_CAP_SSBO,
+    },
+    {
+        .ver_core = 430,
+        .ver_es_core = 310,
+        .extension = "GL_ARB_compute_shader",
+        .functions = (const struct gl_function[]) {
+            DEF_FN(DispatchCompute),
+            {0},
+        },
+    },
+    {
+        .ver_core = 430,
+        .extension = "GL_ARB_arrays_of_arrays",
+        .provides = MPGL_CAP_NESTED_ARRAY,
+    },
+    // Swap control, always an OS specific extension
+    // The OSX code loads this manually.
+    {
+        .extension = "GLX_SGI_swap_control",
+        .functions = (const struct gl_function[]) {
+            DEF_FN_NAME(SwapInterval, "glXSwapIntervalSGI"),
+            {0},
+        },
+    },
+    // This one overrides GLX_SGI_swap_control on platforms using mesa. The
+    // only difference is that it supports glXSwapInterval(0).
+    {
+        .extension = "GLX_MESA_swap_control",
+        .functions = (const struct gl_function[]) {
+            DEF_FN_NAME(SwapInterval, "glXSwapIntervalMESA"),
+            {0},
+        },
+    },
+    {
+        .extension = "WGL_EXT_swap_control",
+        .functions = (const struct gl_function[]) {
+            DEF_FN_NAME(SwapInterval, "wglSwapIntervalEXT"),
+            {0},
+        },
+    },
+    {
+        .extension = "GLX_SGI_video_sync",
+        .functions = (const struct gl_function[]) {
+            DEF_FN_NAME(GetVideoSync, "glXGetVideoSyncSGI"),
+            DEF_FN_NAME(WaitVideoSync, "glXWaitVideoSyncSGI"),
+            {0},
+        },
+    },
+    // For gl_hwdec_vdpau.c
+    // http://www.opengl.org/registry/specs/NV/vdpau_interop.txt
+    {
+        .extension = "GL_NV_vdpau_interop",
+        .provides = MPGL_CAP_VDPAU,
+        .functions = (const struct gl_function[]) {
+            // (only functions needed by us)
+            DEF_FN(VDPAUInitNV),
+            DEF_FN(VDPAUFiniNV),
+            DEF_FN(VDPAURegisterOutputSurfaceNV),
+            DEF_FN(VDPAURegisterVideoSurfaceNV),
+            DEF_FN(VDPAUUnregisterSurfaceNV),
+            DEF_FN(VDPAUSurfaceAccessNV),
+            DEF_FN(VDPAUMapSurfacesNV),
+            DEF_FN(VDPAUUnmapSurfacesNV),
+            {0}
+        },
+    },
+#if HAVE_GL_DXINTEROP
+    {
+        .extension = "WGL_NV_DX_interop",
+        .provides = MPGL_CAP_DXINTEROP,
+        .functions = (const struct gl_function[]) {
+            DEF_FN_NAME(DXSetResourceShareHandleNV, "wglDXSetResourceShareHandleNV"),
+            DEF_FN_NAME(DXOpenDeviceNV, "wglDXOpenDeviceNV"),
+            DEF_FN_NAME(DXCloseDeviceNV, "wglDXCloseDeviceNV"),
+            DEF_FN_NAME(DXRegisterObjectNV, "wglDXRegisterObjectNV"),
+            DEF_FN_NAME(DXUnregisterObjectNV, "wglDXUnregisterObjectNV"),
+            DEF_FN_NAME(DXLockObjectsNV, "wglDXLockObjectsNV"),
+            DEF_FN_NAME(DXUnlockObjectsNV, "wglDXUnlockObjectsNV"),
+            {0}
+        },
+    },
+#endif
+    // Apple Packed YUV Formats
+    // For gl_hwdec_vda.c
+    // http://www.opengl.org/registry/specs/APPLE/rgb_422.txt
+    {
+        .extension = "GL_APPLE_rgb_422",
+        .provides = MPGL_CAP_APPLE_RGB_422,
+    },
+    {
+        .ver_core = 430,
+        .extension = "GL_ARB_debug_output",
+        .provides = MPGL_CAP_DEBUG,
+        .functions = (const struct gl_function[]) {
+            // (only functions needed by us)
+            DEF_FN(DebugMessageCallback),
+            {0}
+        },
+    },
+    // ES version uses a different extension.
+    {
+        .ver_es_core = 320,
+        .extension = "GL_KHR_debug",
+        .provides = MPGL_CAP_DEBUG,
+        .functions = (const struct gl_function[]) {
+            // (only functions needed by us)
+            DEF_FN(DebugMessageCallback),
+            {0}
+        },
+    },
+    {
+        .extension = "GL_ANGLE_translated_shader_source",
+        .functions = (const struct gl_function[]) {
+            DEF_FN(GetTranslatedShaderSourceANGLE),
+            {0}
+        },
+    },
+};
+
+#undef FN_OFFS
+#undef DEF_FN_HARD
+#undef DEF_FN
+#undef DEF_FN_NAME
+
+// Fill the GL struct with function pointers and extensions from the current
+// GL context. Called by the backend.
+// get_fn: function to resolve function names
+// ext2: an extra extension string
+// log: used to output messages
+void mpgl_load_functions2(GL *gl, void *(*get_fn)(void *ctx, const char *n),
+                          void *fn_ctx, const char *ext2, struct mp_log *log)
+{
+    talloc_free(gl->extensions);
+    *gl = (GL) {
+        .extensions = talloc_strdup(gl, ext2 ? ext2 : ""),
+        .get_fn = get_fn,
+        .fn_ctx = fn_ctx,
+    };
+
+    gl->GetString = get_fn(fn_ctx, "glGetString");
+    if (!gl->GetString) {
+        mp_err(log, "Can't load OpenGL functions.\n");
+        goto error;
+    }
+
+    int major = 0, minor = 0;
+    const char *version_string = gl->GetString(GL_VERSION);
+    if (!version_string) {
+        mp_fatal(log, "glGetString(GL_VERSION) returned NULL.\n");
+        goto error;
+    }
+    mp_verbose(log, "GL_VERSION='%s'\n",  version_string);
+    if (strncmp(version_string, "OpenGL ES ", 10) == 0) {
+        version_string += 10;
+        gl->es = 100;
+    }
+    if (sscanf(version_string, "%d.%d", &major, &minor) < 2)
+        goto error;
+    gl->version = MPGL_VER(major, minor);
+    mp_verbose(log, "Detected %s %d.%d.\n", gl->es ? "GLES" : "desktop OpenGL",
+               major, minor);
+
+    if (gl->es) {
+        gl->es = gl->version;
+        gl->version = 0;
+        if (gl->es < 200) {
+            mp_fatal(log, "At least GLESv2 required.\n");
+            goto error;
+        }
+    }
+
+    mp_verbose(log, "GL_VENDOR='%s'\n",   gl->GetString(GL_VENDOR));
+    mp_verbose(log, "GL_RENDERER='%s'\n", gl->GetString(GL_RENDERER));
+    const char *shader = gl->GetString(GL_SHADING_LANGUAGE_VERSION);
+    if (shader)
+        mp_verbose(log, "GL_SHADING_LANGUAGE_VERSION='%s'\n", shader);
+
+    if (gl->version >= 300) {
+        gl->GetStringi = get_fn(fn_ctx, "glGetStringi");
+        gl->GetIntegerv = get_fn(fn_ctx, "glGetIntegerv");
+
+        if (!(gl->GetStringi && gl->GetIntegerv))
+            goto error;
+
+        GLint exts;
+        gl->GetIntegerv(GL_NUM_EXTENSIONS, &exts);
+        for (int n = 0; n < exts; n++) {
+            const char *ext = gl->GetStringi(GL_EXTENSIONS, n);
+            gl->extensions = talloc_asprintf_append(gl->extensions, " %s", ext);
+        }
+
+    } else {
+        const char *ext = (char*)gl->GetString(GL_EXTENSIONS);
+        gl->extensions = talloc_asprintf_append(gl->extensions, " %s", ext);
+    }
+
+    mp_dbg(log, "Combined OpenGL extensions string:\n%s\n", gl->extensions);
+
+    for (int n = 0; n < MP_ARRAY_SIZE(gl_functions); n++) {
+        const struct gl_functions *section = &gl_functions[n];
+        int version = gl->es ? gl->es : gl->version;
+        int ver_core = gl->es ? section->ver_es_core : section->ver_core;
+
+        // NOTE: Function entrypoints can exist, even if they do not work.
+        //       We must always check extension strings and versions.
+
+        if (gl->version && section->ver_exclude &&
+            gl->version >= section->ver_exclude)
+            continue;
+        if (gl->es && section->ver_es_exclude &&
+            gl->es >= section->ver_es_exclude)
+            continue;
+
+        bool exists = false, must_exist = false;
+        if (ver_core)
+            must_exist = version >= ver_core;
+
+        if (section->extension)
+            exists = gl_check_extension(gl->extensions, section->extension);
+
+        exists |= must_exist;
+        if (!exists)
+            continue;
+
+        void *loaded[MAX_FN_COUNT] = {0};
+        bool all_loaded = true;
+        const struct gl_function *fnlist = section->functions;
+
+        for (int i = 0; fnlist && fnlist[i].name; i++) {
+            const struct gl_function *fn = &fnlist[i];
+            void *ptr = get_fn(fn_ctx, fn->name);
+            if (!ptr) {
+                all_loaded = false;
+                if (must_exist) {
+                    mp_err(log, "GL %d.%d function %s not found.\n",
+                           MPGL_VER_GET_MAJOR(ver_core),
+                           MPGL_VER_GET_MINOR(ver_core), fn->name);
+                    goto error;
+                } else {
+                    mp_warn(log, "Function %s from extension %s not found.\n",
+                            fn->name, section->extension);
+                }
+                break;
+            }
+            assert(i < MAX_FN_COUNT);
+            loaded[i] = ptr;
+        }
+
+        if (all_loaded) {
+            gl->mpgl_caps |= section->provides;
+            for (int i = 0; fnlist && fnlist[i].name; i++) {
+                const struct gl_function *fn = &fnlist[i];
+                void **funcptr = (void**)(((char*)gl) + fn->offset);
+                if (loaded[i])
+                    *funcptr = loaded[i];
+            }
+            if (!must_exist && section->extension)
+                mp_verbose(log, "Loaded extension %s.\n", section->extension);
+        }
+    }
+
+    gl->glsl_version = 0;
+    if (gl->es) {
+        if (gl->es >= 200)
+            gl->glsl_version = 100;
+        if (gl->es >= 300)
+            gl->glsl_version = gl->es;
+    } else {
+        gl->glsl_version = 120;
+        int glsl_major = 0, glsl_minor = 0;
+        if (shader && sscanf(shader, "%d.%d", &glsl_major, &glsl_minor) == 2)
+            gl->glsl_version = glsl_major * 100 + glsl_minor;
+        // restrict GLSL version to be forwards compatible
+        gl->glsl_version = MPMIN(gl->glsl_version, 440);
+    }
+
+    if (is_software_gl(gl)) {
+        gl->mpgl_caps |= MPGL_CAP_SW;
+        mp_verbose(log, "Detected suspected software renderer.\n");
+    }
+
+    if (!is_fast_dr(gl))
+        gl->mpgl_caps |= MPGL_CAP_SLOW_DR;
+
+    // GL_ARB_compute_shader & GL_ARB_shader_image_load_store
+    if (gl->DispatchCompute && gl->BindImageTexture)
+        gl->mpgl_caps |= MPGL_CAP_COMPUTE_SHADER;
+
+    // Provided for simpler handling if no framebuffer support is available.
+    if (!gl->BindFramebuffer)
+        gl->BindFramebuffer = &dummy_glBindFramebuffer;
+    return;
+
+error:
+    gl->version = 0;
+    gl->es = 0;
+    gl->mpgl_caps = 0;
+}
+
+static void *get_procaddr_wrapper(void *ctx, const char *name)
+{
+    void *(*getProcAddress)(const GLubyte *) = ctx;
+    return getProcAddress ? getProcAddress((const GLubyte*)name) : NULL;
+}
+
+void mpgl_load_functions(GL *gl, void *(*getProcAddress)(const GLubyte *),
+                         const char *ext2, struct mp_log *log)
+{
+    mpgl_load_functions2(gl, get_procaddr_wrapper, getProcAddress, ext2, log);
+}
diff --git a/video/out/opengl/common.h b/video/out/opengl/common.h
new file mode 100644
index 0000000..a6b02c9
--- /dev/null
+++ b/video/out/opengl/common.h
@@ -0,0 +1,258 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MPLAYER_GL_COMMON_H
+#define MPLAYER_GL_COMMON_H
+
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+
+#include "config.h"
+#include "common/msg.h"
+#include "misc/bstr.h"
+
+#include "video/csputils.h"
+#include "video/mp_image.h"
+#include "video/out/vo.h"
+#include "video/out/gpu/ra.h"
+
+#include "gl_headers.h"
+
+#if HAVE_GL_WIN32
+#include <windows.h>
+#endif
+
+struct GL;
+typedef struct GL GL;
+
+enum {
+    MPGL_CAP_ROW_LENGTH         = (1 << 4),     // GL_[UN]PACK_ROW_LENGTH
+    MPGL_CAP_FB                 = (1 << 5),
+    MPGL_CAP_VAO                = (1 << 6),
+    MPGL_CAP_TEX_RG             = (1 << 10),    // GL_ARB_texture_rg / GL 3.x
+    MPGL_CAP_VDPAU              = (1 << 11),    // GL_NV_vdpau_interop
+    MPGL_CAP_APPLE_RGB_422      = (1 << 12),    // GL_APPLE_rgb_422
+    MPGL_CAP_1D_TEX             = (1 << 14),
+    MPGL_CAP_3D_TEX             = (1 << 15),
+    MPGL_CAP_DEBUG              = (1 << 16),
+    MPGL_CAP_DXINTEROP          = (1 << 17),    // WGL_NV_DX_interop
+    MPGL_CAP_EXT16              = (1 << 18),    // GL_EXT_texture_norm16
+    MPGL_CAP_ARB_FLOAT          = (1 << 19),    // GL_ARB_texture_float
+    MPGL_CAP_EXT_CR_HFLOAT      = (1 << 20),    // GL_EXT_color_buffer_half_float
+    MPGL_CAP_UBO                = (1 << 21),    // GL_ARB_uniform_buffer_object
+    MPGL_CAP_SSBO               = (1 << 22),    // GL_ARB_shader_storage_buffer_object
+    MPGL_CAP_COMPUTE_SHADER     = (1 << 23),    // GL_ARB_compute_shader & GL_ARB_shader_image_load_store
+    MPGL_CAP_NESTED_ARRAY       = (1 << 24),    // GL_ARB_arrays_of_arrays
+
+    MPGL_CAP_SLOW_DR            = (1 << 29),    // direct rendering is assumed to be slow
+    MPGL_CAP_SW                 = (1 << 30),    // indirect or sw renderer
+};
+
+// E.g. 310 means 3.1
+// Code doesn't have to use the macros; they are for convenience only.
+#define MPGL_VER(major, minor) (((major) * 100) + (minor) * 10)
+#define MPGL_VER_GET_MAJOR(ver) ((unsigned)(ver) / 100)
+#define MPGL_VER_GET_MINOR(ver) ((unsigned)(ver) % 100 / 10)
+
+#define MPGL_VER_P(ver) MPGL_VER_GET_MAJOR(ver), MPGL_VER_GET_MINOR(ver)
+
+void mpgl_load_functions(GL *gl, void *(*getProcAddress)(const GLubyte *),
+                         const char *ext2, struct mp_log *log);
+void mpgl_load_functions2(GL *gl, void *(*get_fn)(void *ctx, const char *n),
+                          void *fn_ctx, const char *ext2, struct mp_log *log);
+
+typedef void (GLAPIENTRY *MP_GLDEBUGPROC)(GLenum, GLenum, GLuint, GLenum,
+                                          GLsizei, const GLchar *,const void *);
+
+//function pointers loaded from the OpenGL library
+struct GL {
+    int version;                // MPGL_VER() mangled (e.g. 210 for 2.1)
+    int es;                     // es version (e.g. 300), 0 for desktop GL
+    int glsl_version;           // e.g. 130 for GLSL 1.30
+    char *extensions;           // Equivalent to GL_EXTENSIONS
+    int mpgl_caps;              // Bitfield of MPGL_CAP_* constants
+    bool debug_context;         // use of e.g. GLX_CONTEXT_DEBUG_BIT_ARB
+
+    // Set to false if the implementation follows normal GL semantics, which is
+    // upside down. Set to true if it does *not*, i.e. if rendering is right
+    // side up
+    bool flipped;
+
+    // Copy of function pointer used to load GL.
+    // Caution: Not necessarily valid to use after VO init has completed!
+    void *(*get_fn)(void *ctx, const char *n);
+    void *fn_ctx;
+
+    void (GLAPIENTRY *Viewport)(GLint, GLint, GLsizei, GLsizei);
+    void (GLAPIENTRY *Clear)(GLbitfield);
+    void (GLAPIENTRY *GenTextures)(GLsizei, GLuint *);
+    void (GLAPIENTRY *DeleteTextures)(GLsizei, const GLuint *);
+    void (GLAPIENTRY *ClearColor)(GLclampf, GLclampf, GLclampf, GLclampf);
+    void (GLAPIENTRY *Enable)(GLenum);
+    void (GLAPIENTRY *Disable)(GLenum);
+    const GLubyte *(GLAPIENTRY * GetString)(GLenum);
+    void (GLAPIENTRY *BlendFuncSeparate)(GLenum, GLenum, GLenum, GLenum);
+    void (GLAPIENTRY *Flush)(void);
+    void (GLAPIENTRY *Finish)(void);
+    void (GLAPIENTRY *PixelStorei)(GLenum, GLint);
+    void (GLAPIENTRY *TexImage1D)(GLenum, GLint, GLint, GLsizei, GLint,
+                                  GLenum, GLenum, const GLvoid *);
+    void (GLAPIENTRY *TexImage2D)(GLenum, GLint, GLint, GLsizei, GLsizei,
+                                  GLint, GLenum, GLenum, const GLvoid *);
+    void (GLAPIENTRY *TexSubImage2D)(GLenum, GLint, GLint, GLint,
+                                     GLsizei, GLsizei, GLenum, GLenum,
+                                     const GLvoid *);
+    void (GLAPIENTRY *TexParameteri)(GLenum, GLenum, GLint);
+    void (GLAPIENTRY *GetIntegerv)(GLenum, GLint *);
+    void (GLAPIENTRY *ReadPixels)(GLint, GLint, GLsizei, GLsizei, GLenum,
+                                  GLenum, GLvoid *);
+    void (GLAPIENTRY *ReadBuffer)(GLenum);
+    void (GLAPIENTRY *DrawBuffer)(GLenum);
+    void (GLAPIENTRY *DrawArrays)(GLenum, GLint, GLsizei);
+    GLenum (GLAPIENTRY *GetError)(void);
+    void (GLAPIENTRY *GetTexLevelParameteriv)(GLenum, GLint, GLenum, GLint *);
+    void (GLAPIENTRY *Scissor)(GLint, GLint, GLsizei, GLsizei);
+
+    void (GLAPIENTRY *GenBuffers)(GLsizei, GLuint *);
+    void (GLAPIENTRY *DeleteBuffers)(GLsizei, const GLuint *);
+    void (GLAPIENTRY *BindBuffer)(GLenum, GLuint);
+    void (GLAPIENTRY *BindBufferBase)(GLenum, GLuint, GLuint);
+    GLvoid * (GLAPIENTRY *MapBufferRange)(GLenum, GLintptr, GLsizeiptr,
+                                          GLbitfield);
+    GLboolean (GLAPIENTRY *UnmapBuffer)(GLenum);
+    void (GLAPIENTRY *BufferData)(GLenum, intptr_t, const GLvoid *, GLenum);
+    void (GLAPIENTRY *BufferSubData)(GLenum, GLintptr, GLsizeiptr, const GLvoid *);
+    void (GLAPIENTRY *ActiveTexture)(GLenum);
+    void (GLAPIENTRY *BindTexture)(GLenum, GLuint);
+    int (GLAPIENTRY *SwapInterval)(int);
+    void (GLAPIENTRY *TexImage3D)(GLenum, GLint, GLenum, GLsizei, GLsizei,
+                                  GLsizei, GLint, GLenum, GLenum,
+                                  const GLvoid *);
+
+    void (GLAPIENTRY *GenVertexArrays)(GLsizei, GLuint *);
+    void (GLAPIENTRY *BindVertexArray)(GLuint);
+    GLint (GLAPIENTRY *GetAttribLocation)(GLuint, const GLchar *);
+    void (GLAPIENTRY *EnableVertexAttribArray)(GLuint);
+    void (GLAPIENTRY *DisableVertexAttribArray)(GLuint);
+    void (GLAPIENTRY *VertexAttribPointer)(GLuint, GLint, GLenum, GLboolean,
+                                           GLsizei, const GLvoid *);
+    void (GLAPIENTRY *DeleteVertexArrays)(GLsizei, const GLuint *);
+    void (GLAPIENTRY *UseProgram)(GLuint);
+    GLint (GLAPIENTRY *GetUniformLocation)(GLuint, const GLchar *);
+    void (GLAPIENTRY *CompileShader)(GLuint);
+    GLuint (GLAPIENTRY *CreateProgram)(void);
+    GLuint (GLAPIENTRY *CreateShader)(GLenum);
+    void (GLAPIENTRY *ShaderSource)(GLuint, GLsizei, const GLchar **,
+                                    const GLint *);
+    void (GLAPIENTRY *LinkProgram)(GLuint);
+    void (GLAPIENTRY *AttachShader)(GLuint, GLuint);
+    void (GLAPIENTRY *DeleteShader)(GLuint);
+    void (GLAPIENTRY *DeleteProgram)(GLuint);
+    void (GLAPIENTRY *GetShaderInfoLog)(GLuint, GLsizei, GLsizei *, GLchar *);
+    void (GLAPIENTRY *GetShaderiv)(GLuint, GLenum, GLint *);
+    void (GLAPIENTRY *GetProgramInfoLog)(GLuint, GLsizei, GLsizei *, GLchar *);
+    void (GLAPIENTRY *GetProgramiv)(GLenum, GLenum, GLint *);
+    void (GLAPIENTRY *GetProgramBinary)(GLuint, GLsizei, GLsizei *, GLenum *,
+                                        void *);
+    void (GLAPIENTRY *ProgramBinary)(GLuint, GLenum, const void *, GLsizei);
+
+    void (GLAPIENTRY *DispatchCompute)(GLuint, GLuint, GLuint);
+    void (GLAPIENTRY *BindImageTexture)(GLuint, GLuint, GLint, GLboolean,
+                                        GLint, GLenum, GLenum);
+    void (GLAPIENTRY *MemoryBarrier)(GLbitfield);
+
+    const GLubyte* (GLAPIENTRY *GetStringi)(GLenum, GLuint);
+    void (GLAPIENTRY *BindAttribLocation)(GLuint, GLuint, const GLchar *);
+    void (GLAPIENTRY *BindFramebuffer)(GLenum, GLuint);
+    void (GLAPIENTRY *GenFramebuffers)(GLsizei, GLuint *);
+    void (GLAPIENTRY *DeleteFramebuffers)(GLsizei, const GLuint *);
+    GLenum (GLAPIENTRY *CheckFramebufferStatus)(GLenum);
+    void (GLAPIENTRY *FramebufferTexture2D)(GLenum, GLenum, GLenum, GLuint,
+                                            GLint);
+    void (GLAPIENTRY *BlitFramebuffer)(GLint, GLint, GLint, GLint, GLint, GLint,
+                                       GLint, GLint, GLbitfield, GLenum);
+    void (GLAPIENTRY *GetFramebufferAttachmentParameteriv)(GLenum, GLenum,
+                                                           GLenum, GLint *);
+
+    void (GLAPIENTRY *Uniform1f)(GLint, GLfloat);
+    void (GLAPIENTRY *Uniform2f)(GLint, GLfloat, GLfloat);
+    void (GLAPIENTRY *Uniform3f)(GLint, GLfloat, GLfloat, GLfloat);
+    void (GLAPIENTRY *Uniform4f)(GLint, GLfloat, GLfloat, GLfloat, GLfloat);
+    void (GLAPIENTRY *Uniform1i)(GLint, GLint);
+    void (GLAPIENTRY *UniformMatrix2fv)(GLint, GLsizei, GLboolean,
+                                        const GLfloat *);
+    void (GLAPIENTRY *UniformMatrix3fv)(GLint, GLsizei, GLboolean,
+                                        const GLfloat *);
+
+    void (GLAPIENTRY *InvalidateTexImage)(GLuint, GLint);
+    void (GLAPIENTRY *InvalidateFramebuffer)(GLenum, GLsizei, const GLenum *);
+
+    GLsync (GLAPIENTRY *FenceSync)(GLenum, GLbitfield);
+    GLenum (GLAPIENTRY *ClientWaitSync)(GLsync, GLbitfield, GLuint64);
+    void (GLAPIENTRY *DeleteSync)(GLsync sync);
+
+    void (GLAPIENTRY *BufferStorage)(GLenum, intptr_t, const GLvoid *, GLenum);
+
+    void (GLAPIENTRY *GenQueries)(GLsizei, GLuint *);
+    void (GLAPIENTRY *DeleteQueries)(GLsizei, const GLuint *);
+    void (GLAPIENTRY *BeginQuery)(GLenum,  GLuint);
+    void (GLAPIENTRY *EndQuery)(GLenum);
+    void (GLAPIENTRY *QueryCounter)(GLuint, GLenum);
+    GLboolean (GLAPIENTRY *IsQuery)(GLuint);
+    void (GLAPIENTRY *GetQueryObjectiv)(GLuint, GLenum, GLint *);
+    void (GLAPIENTRY *GetQueryObjecti64v)(GLuint, GLenum, GLint64 *);
+    void (GLAPIENTRY *GetQueryObjectuiv)(GLuint, GLenum, GLuint *);
+    void (GLAPIENTRY *GetQueryObjectui64v)(GLuint, GLenum, GLuint64 *);
+
+    void (GLAPIENTRY *VDPAUInitNV)(const GLvoid *, const GLvoid *);
+    void (GLAPIENTRY *VDPAUFiniNV)(void);
+    GLvdpauSurfaceNV (GLAPIENTRY *VDPAURegisterOutputSurfaceNV)
+        (GLvoid *, GLenum, GLsizei, const GLuint *);
+    GLvdpauSurfaceNV (GLAPIENTRY *VDPAURegisterVideoSurfaceNV)
+        (GLvoid *, GLenum, GLsizei, const GLuint *);
+    void (GLAPIENTRY *VDPAUUnregisterSurfaceNV)(GLvdpauSurfaceNV);
+    void (GLAPIENTRY *VDPAUSurfaceAccessNV)(GLvdpauSurfaceNV, GLenum);
+    void (GLAPIENTRY *VDPAUMapSurfacesNV)(GLsizei, const GLvdpauSurfaceNV *);
+    void (GLAPIENTRY *VDPAUUnmapSurfacesNV)(GLsizei, const GLvdpauSurfaceNV *);
+
+#if HAVE_GL_WIN32
+    // The HANDLE type might not be present on non-Win32
+    BOOL (GLAPIENTRY *DXSetResourceShareHandleNV)(void *dxObject,
+        HANDLE shareHandle);
+    HANDLE (GLAPIENTRY *DXOpenDeviceNV)(void *dxDevice);
+    BOOL (GLAPIENTRY *DXCloseDeviceNV)(HANDLE hDevice);
+    HANDLE (GLAPIENTRY *DXRegisterObjectNV)(HANDLE hDevice, void *dxObject,
+        GLuint name, GLenum type, GLenum access);
+    BOOL (GLAPIENTRY *DXUnregisterObjectNV)(HANDLE hDevice, HANDLE hObject);
+    BOOL (GLAPIENTRY *DXLockObjectsNV)(HANDLE hDevice, GLint count,
+        HANDLE *hObjects);
+    BOOL (GLAPIENTRY *DXUnlockObjectsNV)(HANDLE hDevice, GLint count,
+        HANDLE *hObjects);
+#endif
+
+    GLint (GLAPIENTRY *GetVideoSync)(GLuint *);
+    GLint (GLAPIENTRY *WaitVideoSync)(GLint, GLint, unsigned int *);
+
+    void (GLAPIENTRY *GetTranslatedShaderSourceANGLE)(GLuint, GLsizei,
+                                                      GLsizei*, GLchar* source);
+
+    void (GLAPIENTRY *DebugMessageCallback)(MP_GLDEBUGPROC callback,
+                                            const void *userParam);
+};
+
+#endif /* MPLAYER_GL_COMMON_H */
diff --git a/video/out/opengl/context.c b/video/out/opengl/context.c
new file mode 100644
index 0000000..05e279b
--- /dev/null
+++ b/video/out/opengl/context.c
@@ -0,0 +1,324 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "options/m_config.h"
+#include "context.h"
+#include "ra_gl.h"
+#include "utils.h"
+
+// 0-terminated list of desktop GL versions a backend should try to
+// initialize. Each entry is the minimum required version.
+const int mpgl_min_required_gl_versions[] = {
+    /*
+     * Nvidia drivers will not provide the highest supported version
+     * when 320 core is requested. Instead, it just returns 3.2. This
+     * would be bad, as we actually want compute shaders that require
+     * 4.2, so we have to request a sufficiently high version. We use
+     * 440 to maximise driver compatibility as we don't need anything
+     * from newer versions.
+     */
+    440,
+    320,
+    210,
+    0
+};
+
+enum {
+    FLUSH_NO = 0,
+    FLUSH_YES,
+    FLUSH_AUTO,
+};
+
+struct opengl_opts {
+    bool use_glfinish;
+    bool waitvsync;
+    int vsync_pattern[2];
+    int swapinterval;
+    int early_flush;
+    int gles_mode;
+};
+
+#define OPT_BASE_STRUCT struct opengl_opts
+const struct m_sub_options opengl_conf = {
+    .opts = (const struct m_option[]) {
+        {"opengl-glfinish", OPT_BOOL(use_glfinish)},
+        {"opengl-waitvsync", OPT_BOOL(waitvsync)},
+        {"opengl-swapinterval", OPT_INT(swapinterval)},
+        {"opengl-check-pattern-a", OPT_INT(vsync_pattern[0])},
+        {"opengl-check-pattern-b", OPT_INT(vsync_pattern[1])},
+        {"opengl-es", OPT_CHOICE(gles_mode,
+            {"auto", GLES_AUTO}, {"yes", GLES_YES}, {"no", GLES_NO})},
+        {"opengl-early-flush", OPT_CHOICE(early_flush,
+            {"no", FLUSH_NO}, {"yes", FLUSH_YES}, {"auto", FLUSH_AUTO})},
+        {0},
+    },
+    .defaults = &(const struct opengl_opts) {
+        .swapinterval = 1,
+    },
+    .size = sizeof(struct opengl_opts),
+};
+
+struct priv {
+    GL *gl;
+    struct mp_log *log;
+    struct ra_gl_ctx_params params;
+    struct opengl_opts *opts;
+    struct ra_swapchain_fns fns;
+    GLuint main_fb;
+    struct ra_tex *wrapped_fb; // corresponds to main_fb
+    // for debugging:
+    int frames_rendered;
+    unsigned int prev_sgi_sync_count;
+    // for gl_vsync_pattern
+    int last_pattern;
+    int matches, mismatches;
+    // for swapchain_depth simulation
+    GLsync *vsync_fences;
+    int num_vsync_fences;
+};
+
+enum gles_mode ra_gl_ctx_get_glesmode(struct ra_ctx *ctx)
+{
+    void *tmp = talloc_new(NULL);
+    struct opengl_opts *opts;
+    enum gles_mode mode;
+
+    opts = mp_get_config_group(tmp, ctx->global, &opengl_conf);
+    mode = opts->gles_mode;
+
+    talloc_free(tmp);
+    return mode;
+}
+
+void ra_gl_ctx_uninit(struct ra_ctx *ctx)
+{
+    if (ctx->swapchain) {
+        struct priv *p = ctx->swapchain->priv;
+        if (ctx->ra && p->wrapped_fb)
+            ra_tex_free(ctx->ra, &p->wrapped_fb);
+        talloc_free(ctx->swapchain);
+        ctx->swapchain = NULL;
+    }
+
+    // Clean up any potentially left-over debug callback
+    if (ctx->ra)
+        ra_gl_set_debug(ctx->ra, false);
+
+    ra_free(&ctx->ra);
+}
+
+static const struct ra_swapchain_fns ra_gl_swapchain_fns;
+
+bool ra_gl_ctx_init(struct ra_ctx *ctx, GL *gl, struct ra_gl_ctx_params params)
+{
+    struct ra_swapchain *sw = ctx->swapchain = talloc_ptrtype(NULL, sw);
+    *sw = (struct ra_swapchain) {
+        .ctx = ctx,
+    };
+
+    struct priv *p = sw->priv = talloc_ptrtype(sw, p);
+    *p = (struct priv) {
+        .gl     = gl,
+        .log    = ctx->log,
+        .params = params,
+        .opts   = mp_get_config_group(p, ctx->global, &opengl_conf),
+        .fns    = ra_gl_swapchain_fns,
+    };
+
+    sw->fns = &p->fns;
+
+    const struct ra_swapchain_fns *ext = p->params.external_swapchain;
+    if (ext) {
+        if (ext->color_depth)
+            p->fns.color_depth = ext->color_depth;
+        if (ext->start_frame)
+            p->fns.start_frame = ext->start_frame;
+        if (ext->submit_frame)
+            p->fns.submit_frame = ext->submit_frame;
+        if (ext->swap_buffers)
+            p->fns.swap_buffers = ext->swap_buffers;
+    }
+
+    if (!gl->version && !gl->es)
+        return false;
+
+    if (gl->mpgl_caps & MPGL_CAP_SW) {
+        MP_WARN(p, "Suspected software renderer or indirect context.\n");
+        if (ctx->opts.probing && !ctx->opts.allow_sw)
+            return false;
+    }
+
+    gl->debug_context = ctx->opts.debug;
+
+    if (gl->SwapInterval) {
+        gl->SwapInterval(p->opts->swapinterval);
+    } else {
+        MP_VERBOSE(p, "GL_*_swap_control extension missing.\n");
+    }
+
+    ctx->ra = ra_create_gl(p->gl, ctx->log);
+    return !!ctx->ra;
+}
+
+void ra_gl_ctx_resize(struct ra_swapchain *sw, int w, int h, int fbo)
+{
+    struct priv *p = sw->priv;
+    if (p->main_fb == fbo && p->wrapped_fb && p->wrapped_fb->params.w == w
+        && p->wrapped_fb->params.h == h)
+        return;
+
+    if (p->wrapped_fb)
+        ra_tex_free(sw->ctx->ra, &p->wrapped_fb);
+
+    p->main_fb = fbo;
+    p->wrapped_fb = ra_create_wrapped_fb(sw->ctx->ra, fbo, w, h);
+}
+
+int ra_gl_ctx_color_depth(struct ra_swapchain *sw)
+{
+    struct priv *p = sw->priv;
+    GL *gl = p->gl;
+
+    if (!p->wrapped_fb)
+        return 0;
+
+    if ((gl->es < 300 && !gl->version) || !(gl->mpgl_caps & MPGL_CAP_FB))
+        return 0;
+
+    gl->BindFramebuffer(GL_FRAMEBUFFER, p->main_fb);
+
+    GLenum obj = gl->version ? GL_BACK_LEFT : GL_BACK;
+    if (p->main_fb)
+        obj = GL_COLOR_ATTACHMENT0;
+
+    GLint depth_g = 0;
+
+    gl->GetFramebufferAttachmentParameteriv(GL_FRAMEBUFFER, obj,
+                            GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE, &depth_g);
+
+    gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
+
+    return depth_g;
+}
+
+bool ra_gl_ctx_start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo)
+{
+    struct priv *p = sw->priv;
+
+    bool visible = true;
+    if (p->params.check_visible)
+        visible = p->params.check_visible(sw->ctx);
+
+    // If out_fbo is NULL, this was called from vo_gpu_next. Bail out.
+    if (!out_fbo || !visible)
+        return visible;
+
+    *out_fbo = (struct ra_fbo) {
+         .tex = p->wrapped_fb,
+         .flip = !p->gl->flipped, // OpenGL FBs are normally flipped
+    };
+    return true;
+}
+
+bool ra_gl_ctx_submit_frame(struct ra_swapchain *sw, const struct vo_frame *frame)
+{
+    struct priv *p = sw->priv;
+    GL *gl = p->gl;
+
+    if (p->opts->use_glfinish)
+        gl->Finish();
+
+    if (gl->FenceSync && !p->params.external_swapchain) {
+        GLsync fence = gl->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+        if (fence)
+            MP_TARRAY_APPEND(p, p->vsync_fences, p->num_vsync_fences, fence);
+    }
+
+    switch (p->opts->early_flush) {
+    case FLUSH_AUTO:
+        if (frame->display_synced)
+            break;
+        MP_FALLTHROUGH;
+    case FLUSH_YES:
+        gl->Flush();
+    }
+
+    return true;
+}
+
+static void check_pattern(struct priv *p, int item)
+{
+    int expected = p->opts->vsync_pattern[p->last_pattern];
+    if (item == expected) {
+        p->last_pattern++;
+        if (p->last_pattern >= 2)
+            p->last_pattern = 0;
+        p->matches++;
+    } else {
+        p->mismatches++;
+        MP_WARN(p, "wrong pattern, expected %d got %d (hit: %d, mis: %d)\n",
+                expected, item, p->matches, p->mismatches);
+    }
+}
+
+void ra_gl_ctx_swap_buffers(struct ra_swapchain *sw)
+{
+    struct priv *p = sw->priv;
+    GL *gl = p->gl;
+
+    p->params.swap_buffers(sw->ctx);
+    p->frames_rendered++;
+
+    if (p->frames_rendered > 5 && !sw->ctx->opts.debug)
+        ra_gl_set_debug(sw->ctx->ra, false);
+
+    if ((p->opts->waitvsync || p->opts->vsync_pattern[0])
+        && gl->GetVideoSync)
+    {
+        unsigned int n1 = 0, n2 = 0;
+        gl->GetVideoSync(&n1);
+        if (p->opts->waitvsync)
+            gl->WaitVideoSync(2, (n1 + 1) % 2, &n2);
+        int step = n1 - p->prev_sgi_sync_count;
+        p->prev_sgi_sync_count = n1;
+        MP_DBG(p, "Flip counts: %u->%u, step=%d\n", n1, n2, step);
+        if (p->opts->vsync_pattern[0])
+            check_pattern(p, step);
+    }
+
+    while (p->num_vsync_fences >= sw->ctx->vo->opts->swapchain_depth) {
+        gl->ClientWaitSync(p->vsync_fences[0], GL_SYNC_FLUSH_COMMANDS_BIT, 1e9);
+        gl->DeleteSync(p->vsync_fences[0]);
+        MP_TARRAY_REMOVE_AT(p->vsync_fences, p->num_vsync_fences, 0);
+    }
+}
+
+static void ra_gl_ctx_get_vsync(struct ra_swapchain *sw,
+                                struct vo_vsync_info *info)
+{
+    struct priv *p = sw->priv;
+    if (p->params.get_vsync)
+        p->params.get_vsync(sw->ctx, info);
+}
+
+static const struct ra_swapchain_fns ra_gl_swapchain_fns = {
+    .color_depth   = ra_gl_ctx_color_depth,
+    .start_frame   = ra_gl_ctx_start_frame,
+    .submit_frame  = ra_gl_ctx_submit_frame,
+    .swap_buffers  = ra_gl_ctx_swap_buffers,
+    .get_vsync     = ra_gl_ctx_get_vsync,
+};
diff --git a/video/out/opengl/context.h b/video/out/opengl/context.h
new file mode 100644
index 0000000..c96450e
--- /dev/null
+++ b/video/out/opengl/context.h
@@ -0,0 +1,58 @@
+#pragma once
+
+#include "common/global.h"
+#include "video/out/gpu/context.h"
+#include "common.h"
+
+extern const int mpgl_min_required_gl_versions[];
+
+enum gles_mode {
+    GLES_AUTO = 0,
+    GLES_YES,
+    GLES_NO,
+};
+
+// Returns the gles mode based on the --opengl opts.
+enum gles_mode ra_gl_ctx_get_glesmode(struct ra_ctx *ctx);
+
+// These are a set of helpers for ra_ctx providers based on ra_gl.
+// The init function also initializes ctx->ra and ctx->swapchain, so the user
+// doesn't have to do this manually. (Similarly, the uninit function will
+// clean them up)
+
+struct ra_gl_ctx_params {
+    // For special contexts (i.e. wayland) that want to check visibility
+    // before drawing a frame.
+    bool (*check_visible)(struct ra_ctx *ctx);
+
+    // Set to the platform-specific function to swap buffers, like
+    // glXSwapBuffers, eglSwapBuffers etc. This will be called by
+    // ra_gl_ctx_swap_buffers. Required unless you either never call that
+    // function or if you override it yourself.
+    void (*swap_buffers)(struct ra_ctx *ctx);
+
+    // See ra_swapchain_fns.get_vsync.
+    void (*get_vsync)(struct ra_ctx *ctx, struct vo_vsync_info *info);
+
+    // If this is set to non-NULL, then the ra_gl_ctx will consider the GL
+    // implementation to be using an external swapchain, which disables the
+    // software simulation of --swapchain-depth. Any functions defined by this
+    // ra_swapchain_fns structs will entirely replace the equivalent ra_gl_ctx
+    // functions in the resulting ra_swapchain.
+    const struct ra_swapchain_fns *external_swapchain;
+};
+
+void ra_gl_ctx_uninit(struct ra_ctx *ctx);
+bool ra_gl_ctx_init(struct ra_ctx *ctx, GL *gl, struct ra_gl_ctx_params params);
+
+// Call this any time the window size or main framebuffer changes
+void ra_gl_ctx_resize(struct ra_swapchain *sw, int w, int h, int fbo);
+
+// These functions are normally set in the ra_swapchain->fns, but if an
+// implementation has a need to override this fns struct with custom functions
+// for whatever reason, these can be used to inherit the original behavior.
+int ra_gl_ctx_color_depth(struct ra_swapchain *sw);
+struct mp_image *ra_gl_ctx_screenshot(struct ra_swapchain *sw);
+bool ra_gl_ctx_start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo);
+bool ra_gl_ctx_submit_frame(struct ra_swapchain *sw, const struct vo_frame *frame);
+void ra_gl_ctx_swap_buffers(struct ra_swapchain *sw);
diff --git a/video/out/opengl/context_android.c b/video/out/opengl/context_android.c
new file mode 100644
index 0000000..bc1717c
--- /dev/null
+++ b/video/out/opengl/context_android.c
@@ -0,0 +1,130 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <EGL/egl.h>
+#include <EGL/eglext.h>
+
+#include "video/out/android_common.h"
+#include "egl_helpers.h"
+#include "common/common.h"
+#include "context.h"
+
+struct priv {
+    struct GL gl;
+    EGLDisplay egl_display;
+    EGLContext egl_context;
+    EGLSurface egl_surface;
+};
+
+static void android_swap_buffers(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    eglSwapBuffers(p->egl_display, p->egl_surface);
+}
+
+static void android_uninit(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    ra_gl_ctx_uninit(ctx);
+
+    if (p->egl_surface) {
+        eglMakeCurrent(p->egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE,
+                       EGL_NO_CONTEXT);
+        eglDestroySurface(p->egl_display, p->egl_surface);
+    }
+    if (p->egl_context)
+        eglDestroyContext(p->egl_display, p->egl_context);
+
+    vo_android_uninit(ctx->vo);
+}
+
+static bool android_init(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv = talloc_zero(ctx, struct priv);
+
+    if (!vo_android_init(ctx->vo))
+        goto fail;
+
+    p->egl_display = eglGetDisplay(EGL_DEFAULT_DISPLAY);
+    if (!eglInitialize(p->egl_display, NULL, NULL)) {
+        MP_FATAL(ctx, "EGL failed to initialize.\n");
+        goto fail;
+    }
+
+    EGLConfig config;
+    if (!mpegl_create_context(ctx, p->egl_display, &p->egl_context, &config))
+        goto fail;
+
+    ANativeWindow *native_window = vo_android_native_window(ctx->vo);
+    EGLint format;
+    eglGetConfigAttrib(p->egl_display, config, EGL_NATIVE_VISUAL_ID, &format);
+    ANativeWindow_setBuffersGeometry(native_window, 0, 0, format);
+
+    p->egl_surface = eglCreateWindowSurface(p->egl_display, config,
+                                    (EGLNativeWindowType)native_window, NULL);
+
+    if (p->egl_surface == EGL_NO_SURFACE) {
+        MP_FATAL(ctx, "Could not create EGL surface!\n");
+        goto fail;
+    }
+
+    if (!eglMakeCurrent(p->egl_display, p->egl_surface, p->egl_surface,
+                        p->egl_context)) {
+        MP_FATAL(ctx, "Failed to set context!\n");
+        goto fail;
+    }
+
+    mpegl_load_functions(&p->gl, ctx->log);
+
+    struct ra_gl_ctx_params params = {
+        .swap_buffers = android_swap_buffers,
+    };
+
+    if (!ra_gl_ctx_init(ctx, &p->gl, params))
+        goto fail;
+
+    return true;
+fail:
+    android_uninit(ctx);
+    return false;
+}
+
+static bool android_reconfig(struct ra_ctx *ctx)
+{
+    int w, h;
+    if (!vo_android_surface_size(ctx->vo, &w, &h))
+        return false;
+
+    ctx->vo->dwidth = w;
+    ctx->vo->dheight = h;
+    ra_gl_ctx_resize(ctx->swapchain, w, h, 0);
+    return true;
+}
+
+static int android_control(struct ra_ctx *ctx, int *events, int request, void *arg)
+{
+    return VO_NOTIMPL;
+}
+
+const struct ra_ctx_fns ra_ctx_android = {
+    .type           = "opengl",
+    .name           = "android",
+    .reconfig       = android_reconfig,
+    .control        = android_control,
+    .init           = android_init,
+    .uninit         = android_uninit,
+};
diff --git a/video/out/opengl/context_angle.c b/video/out/opengl/context_angle.c
new file mode 100644
index 0000000..553718a
--- /dev/null
+++ b/video/out/opengl/context_angle.c
@@ -0,0 +1,653 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <windows.h>
+#include <EGL/egl.h>
+#include <EGL/eglext.h>
+#include <EGL/eglext_angle.h>
+#include <d3d11.h>
+#include <dxgi1_2.h>
+#include <dwmapi.h>
+
+#include "angle_dynamic.h"
+#include "egl_helpers.h"
+#include "video/out/gpu/d3d11_helpers.h"
+
+#include "common/common.h"
+#include "options/m_config.h"
+#include "video/out/w32_common.h"
+#include "osdep/windows_utils.h"
+#include "context.h"
+#include "utils.h"
+
+#ifndef EGL_D3D_TEXTURE_ANGLE
+#define EGL_D3D_TEXTURE_ANGLE 0x33A3
+#endif
+#ifndef EGL_OPTIMAL_SURFACE_ORIENTATION_ANGLE
+#define EGL_OPTIMAL_SURFACE_ORIENTATION_ANGLE 0x33A7
+#define EGL_SURFACE_ORIENTATION_ANGLE 0x33A8
+#define EGL_SURFACE_ORIENTATION_INVERT_Y_ANGLE 0x0002
+#endif
+
+enum {
+    RENDERER_AUTO,
+    RENDERER_D3D9,
+    RENDERER_D3D11,
+};
+
+struct angle_opts {
+    int renderer;
+    int d3d11_warp;
+    int d3d11_feature_level;
+    int egl_windowing;
+    bool flip;
+};
+
+#define OPT_BASE_STRUCT struct angle_opts
+const struct m_sub_options angle_conf = {
+    .opts = (const struct m_option[]) {
+        {"angle-renderer", OPT_CHOICE(renderer,
+            {"auto", RENDERER_AUTO},
+            {"d3d9", RENDERER_D3D9},
+            {"d3d11", RENDERER_D3D11})},
+        {"angle-d3d11-warp", OPT_CHOICE(d3d11_warp,
+            {"auto", -1},
+            {"no", 0},
+            {"yes", 1})},
+        {"angle-d3d11-feature-level", OPT_CHOICE(d3d11_feature_level,
+            {"11_0", D3D_FEATURE_LEVEL_11_0},
+            {"10_1", D3D_FEATURE_LEVEL_10_1},
+            {"10_0", D3D_FEATURE_LEVEL_10_0},
+            {"9_3", D3D_FEATURE_LEVEL_9_3})},
+        {"angle-egl-windowing", OPT_CHOICE(egl_windowing,
+            {"auto", -1},
+            {"no", 0},
+            {"yes", 1})},
+        {"angle-flip", OPT_BOOL(flip)},
+        {0}
+    },
+    .defaults = &(const struct angle_opts) {
+        .renderer = RENDERER_AUTO,
+        .d3d11_warp = -1,
+        .d3d11_feature_level = D3D_FEATURE_LEVEL_11_0,
+        .egl_windowing = -1,
+        .flip = true,
+    },
+    .size = sizeof(struct angle_opts),
+};
+
+struct priv {
+    GL gl;
+
+    IDXGISwapChain *dxgi_swapchain;
+
+    ID3D11Device *d3d11_device;
+    ID3D11DeviceContext *d3d11_context;
+    ID3D11Texture2D *d3d11_backbuffer;
+
+    EGLConfig egl_config;
+    EGLDisplay egl_display;
+    EGLDeviceEXT egl_device;
+    EGLContext egl_context;
+    EGLSurface egl_window; // For the EGL windowing surface only
+    EGLSurface egl_backbuffer; // For the DXGI swap chain based surface
+
+    int sc_width, sc_height; // Swap chain width and height
+    int swapinterval;
+    bool flipped;
+
+    struct angle_opts *opts;
+};
+
+static __thread struct ra_ctx *current_ctx;
+
+static void update_sizes(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    p->sc_width = ctx->vo->dwidth ? ctx->vo->dwidth : 1;
+    p->sc_height = ctx->vo->dheight ? ctx->vo->dheight : 1;
+}
+
+static void d3d11_backbuffer_release(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    if (p->egl_backbuffer) {
+        eglMakeCurrent(p->egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE,
+                       EGL_NO_CONTEXT);
+        eglDestroySurface(p->egl_display, p->egl_backbuffer);
+    }
+    p->egl_backbuffer = EGL_NO_SURFACE;
+
+    SAFE_RELEASE(p->d3d11_backbuffer);
+}
+
+static bool d3d11_backbuffer_get(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    struct vo *vo = ctx->vo;
+    HRESULT hr;
+
+    hr = IDXGISwapChain_GetBuffer(p->dxgi_swapchain, 0, &IID_ID3D11Texture2D,
+        (void**)&p->d3d11_backbuffer);
+    if (FAILED(hr)) {
+        MP_FATAL(vo, "Couldn't get swap chain back buffer\n");
+        return false;
+    }
+
+    EGLint pbuffer_attributes[] = {
+        EGL_TEXTURE_FORMAT, EGL_TEXTURE_RGBA,
+        EGL_TEXTURE_TARGET, EGL_TEXTURE_2D,
+        EGL_NONE,
+    };
+    p->egl_backbuffer = eglCreatePbufferFromClientBuffer(p->egl_display,
+        EGL_D3D_TEXTURE_ANGLE, p->d3d11_backbuffer, p->egl_config,
+        pbuffer_attributes);
+    if (!p->egl_backbuffer) {
+        MP_FATAL(vo, "Couldn't create EGL pbuffer\n");
+        return false;
+    }
+
+    eglMakeCurrent(p->egl_display, p->egl_backbuffer, p->egl_backbuffer,
+                   p->egl_context);
+    return true;
+}
+
+static void d3d11_backbuffer_resize(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    struct vo *vo = ctx->vo;
+    HRESULT hr;
+
+    int old_sc_width = p->sc_width;
+    int old_sc_height = p->sc_height;
+
+    update_sizes(ctx);
+    // Avoid unnecessary resizing
+    if (old_sc_width == p->sc_width && old_sc_height == p->sc_height)
+        return;
+
+    // All references to backbuffers must be released before ResizeBuffers
+    // (including references held by ANGLE)
+    d3d11_backbuffer_release(ctx);
+
+    // The DirectX runtime may report errors related to the device like
+    // DXGI_ERROR_DEVICE_REMOVED at this point
+    hr = IDXGISwapChain_ResizeBuffers(p->dxgi_swapchain, 0, p->sc_width,
+        p->sc_height, DXGI_FORMAT_UNKNOWN, 0);
+    if (FAILED(hr))
+        MP_FATAL(vo, "Couldn't resize swapchain: %s\n", mp_HRESULT_to_str(hr));
+
+    if (!d3d11_backbuffer_get(ctx))
+        MP_FATAL(vo, "Couldn't get back buffer after resize\n");
+}
+
+static void d3d11_device_destroy(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    PFNEGLRELEASEDEVICEANGLEPROC eglReleaseDeviceANGLE =
+        (PFNEGLRELEASEDEVICEANGLEPROC)eglGetProcAddress("eglReleaseDeviceANGLE");
+
+    if (p->egl_display)
+        eglTerminate(p->egl_display);
+    p->egl_display = EGL_NO_DISPLAY;
+
+    if (p->egl_device && eglReleaseDeviceANGLE)
+        eglReleaseDeviceANGLE(p->egl_device);
+    p->egl_device = 0;
+
+    SAFE_RELEASE(p->d3d11_device);
+}
+
+static bool d3d11_device_create(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    struct vo *vo = ctx->vo;
+    struct angle_opts *o = p->opts;
+
+    struct d3d11_device_opts device_opts = {
+        .allow_warp = o->d3d11_warp != 0,
+        .force_warp = o->d3d11_warp == 1,
+        .max_feature_level = o->d3d11_feature_level,
+        .min_feature_level = D3D_FEATURE_LEVEL_9_3,
+        .max_frame_latency = ctx->vo->opts->swapchain_depth,
+    };
+    if (!mp_d3d11_create_present_device(vo->log, &device_opts, &p->d3d11_device))
+        return false;
+    ID3D11Device_GetImmediateContext(p->d3d11_device, &p->d3d11_context);
+
+    PFNEGLGETPLATFORMDISPLAYEXTPROC eglGetPlatformDisplayEXT =
+        (PFNEGLGETPLATFORMDISPLAYEXTPROC)eglGetProcAddress("eglGetPlatformDisplayEXT");
+    if (!eglGetPlatformDisplayEXT) {
+        MP_FATAL(vo, "Missing EGL_EXT_platform_base\n");
+        return false;
+    }
+    PFNEGLCREATEDEVICEANGLEPROC eglCreateDeviceANGLE =
+        (PFNEGLCREATEDEVICEANGLEPROC)eglGetProcAddress("eglCreateDeviceANGLE");
+    if (!eglCreateDeviceANGLE) {
+        MP_FATAL(vo, "Missing EGL_EXT_platform_device\n");
+        return false;
+    }
+
+    p->egl_device = eglCreateDeviceANGLE(EGL_D3D11_DEVICE_ANGLE,
+        p->d3d11_device, NULL);
+    if (!p->egl_device) {
+        MP_FATAL(vo, "Couldn't create EGL device\n");
+        return false;
+    }
+
+    p->egl_display = eglGetPlatformDisplayEXT(EGL_PLATFORM_DEVICE_EXT,
+        p->egl_device, NULL);
+    if (!p->egl_display) {
+        MP_FATAL(vo, "Couldn't get EGL display\n");
+        return false;
+    }
+
+    return true;
+}
+
+static void d3d11_swapchain_surface_destroy(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    bool had_swapchain = p->dxgi_swapchain;
+    SAFE_RELEASE(p->dxgi_swapchain);
+    d3d11_backbuffer_release(ctx);
+
+    // Ensure the swapchain is destroyed by flushing the D3D11 immediate
+    // context. This is needed because the HWND may be reused. See:
+    // https://msdn.microsoft.com/en-us/library/windows/desktop/ff476425.aspx
+    if (had_swapchain && p->d3d11_context)
+        ID3D11DeviceContext_Flush(p->d3d11_context);
+}
+
+static bool d3d11_swapchain_surface_create(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    struct vo *vo = ctx->vo;
+    struct angle_opts *o = p->opts;
+
+    if (!p->d3d11_device)
+        goto fail;
+
+    update_sizes(ctx);
+    struct d3d11_swapchain_opts swapchain_opts = {
+        .window = vo_w32_hwnd(vo),
+        .width = p->sc_width,
+        .height = p->sc_height,
+        .flip = o->flip,
+        // Add one frame for the backbuffer and one frame of "slack" to reduce
+        // contention with the window manager when acquiring the backbuffer
+        .length = ctx->vo->opts->swapchain_depth + 2,
+        .usage = DXGI_USAGE_RENDER_TARGET_OUTPUT | DXGI_USAGE_SHADER_INPUT,
+    };
+    if (!mp_d3d11_create_swapchain(p->d3d11_device, vo->log, &swapchain_opts,
+                                   &p->dxgi_swapchain))
+        goto fail;
+    if (!d3d11_backbuffer_get(ctx))
+        goto fail;
+
+    p->flipped = true;
+    return true;
+
+fail:
+    d3d11_swapchain_surface_destroy(ctx);
+    return false;
+}
+
+static void d3d9_device_destroy(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    if (p->egl_display)
+        eglTerminate(p->egl_display);
+    p->egl_display = EGL_NO_DISPLAY;
+}
+
+static bool d3d9_device_create(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    struct vo *vo = ctx->vo;
+
+    PFNEGLGETPLATFORMDISPLAYEXTPROC eglGetPlatformDisplayEXT =
+        (PFNEGLGETPLATFORMDISPLAYEXTPROC)eglGetProcAddress("eglGetPlatformDisplayEXT");
+    if (!eglGetPlatformDisplayEXT) {
+        MP_FATAL(vo, "Missing EGL_EXT_platform_base\n");
+        return false;
+    }
+
+    EGLint display_attributes[] = {
+        EGL_PLATFORM_ANGLE_TYPE_ANGLE,
+            EGL_PLATFORM_ANGLE_TYPE_D3D9_ANGLE,
+        EGL_PLATFORM_ANGLE_DEVICE_TYPE_ANGLE,
+            EGL_PLATFORM_ANGLE_DEVICE_TYPE_HARDWARE_ANGLE,
+        EGL_NONE,
+    };
+    p->egl_display = eglGetPlatformDisplayEXT(EGL_PLATFORM_ANGLE_ANGLE,
+        EGL_DEFAULT_DISPLAY, display_attributes);
+    if (p->egl_display == EGL_NO_DISPLAY) {
+        MP_FATAL(vo, "Couldn't get display\n");
+        return false;
+    }
+
+    return true;
+}
+
+static void egl_window_surface_destroy(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    if (p->egl_window) {
+        eglMakeCurrent(p->egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE,
+                       EGL_NO_CONTEXT);
+    }
+}
+
+static bool egl_window_surface_create(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    struct vo *vo = ctx->vo;
+
+    int window_attribs_len = 0;
+    EGLint *window_attribs = NULL;
+
+    EGLint flip_val;
+    if (eglGetConfigAttrib(p->egl_display, p->egl_config,
+                           EGL_OPTIMAL_SURFACE_ORIENTATION_ANGLE, &flip_val))
+    {
+        if (flip_val == EGL_SURFACE_ORIENTATION_INVERT_Y_ANGLE) {
+            MP_TARRAY_APPEND(NULL, window_attribs, window_attribs_len,
+                EGL_SURFACE_ORIENTATION_ANGLE);
+            MP_TARRAY_APPEND(NULL, window_attribs, window_attribs_len,
+                EGL_SURFACE_ORIENTATION_INVERT_Y_ANGLE);
+            p->flipped = true;
+            MP_VERBOSE(vo, "Rendering flipped.\n");
+        }
+    }
+
+    MP_TARRAY_APPEND(NULL, window_attribs, window_attribs_len, EGL_NONE);
+    p->egl_window = eglCreateWindowSurface(p->egl_display, p->egl_config,
+                                           vo_w32_hwnd(vo), window_attribs);
+    talloc_free(window_attribs);
+    if (!p->egl_window) {
+        MP_FATAL(vo, "Could not create EGL surface!\n");
+        goto fail;
+    }
+
+    eglMakeCurrent(p->egl_display, p->egl_window, p->egl_window,
+                   p->egl_context);
+    return true;
+fail:
+    egl_window_surface_destroy(ctx);
+    return false;
+}
+
+static void context_destroy(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    if (p->egl_context) {
+        eglMakeCurrent(p->egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE,
+                       EGL_NO_CONTEXT);
+        eglDestroyContext(p->egl_display, p->egl_context);
+    }
+    p->egl_context = EGL_NO_CONTEXT;
+}
+
+static bool context_init(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    struct vo *vo = ctx->vo;
+
+    if (!eglInitialize(p->egl_display, NULL, NULL)) {
+        MP_FATAL(vo, "Couldn't initialize EGL\n");
+        goto fail;
+    }
+
+    const char *exts = eglQueryString(p->egl_display, EGL_EXTENSIONS);
+    if (exts)
+        MP_DBG(vo, "EGL extensions: %s\n", exts);
+
+    if (!mpegl_create_context(ctx, p->egl_display, &p->egl_context,
+                              &p->egl_config))
+    {
+        MP_FATAL(vo, "Could not create EGL context!\n");
+        goto fail;
+    }
+
+    return true;
+fail:
+    context_destroy(ctx);
+    return false;
+}
+
+static void angle_uninit(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    ra_gl_ctx_uninit(ctx);
+
+    DwmEnableMMCSS(FALSE);
+
+    // Uninit the EGL surface implementation that is being used. Note: This may
+    // result in the *_destroy function being called twice since it is also
+    // called when the surface create function fails. This is fine because the
+    // *_destroy functions are idempotent.
+    if (p->dxgi_swapchain)
+        d3d11_swapchain_surface_destroy(ctx);
+    else
+        egl_window_surface_destroy(ctx);
+
+    context_destroy(ctx);
+
+    // Uninit the EGL device implementation that is being used
+    if (p->d3d11_device)
+        d3d11_device_destroy(ctx);
+    else
+        d3d9_device_destroy(ctx);
+
+    vo_w32_uninit(ctx->vo);
+}
+
+static int GLAPIENTRY angle_swap_interval(int interval)
+{
+    if (!current_ctx)
+        return 0;
+    struct priv *p = current_ctx->priv;
+
+    if (p->dxgi_swapchain) {
+        p->swapinterval = MPCLAMP(interval, 0, 4);
+        return 1;
+    } else {
+        return eglSwapInterval(p->egl_display, interval);
+    }
+}
+
+static void d3d11_swap_buffers(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    // Calling Present() on a flip-sequential swap chain will silently change
+    // the underlying storage of the back buffer to point to the next buffer in
+    // the chain. This results in the RTVs for the back buffer becoming
+    // unbound. Since ANGLE doesn't know we called Present(), it will continue
+    // using the unbound RTVs, so we must save and restore them ourselves.
+    ID3D11RenderTargetView *rtvs[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT] = {0};
+    ID3D11DepthStencilView *dsv = NULL;
+    ID3D11DeviceContext_OMGetRenderTargets(p->d3d11_context,
+        MP_ARRAY_SIZE(rtvs), rtvs, &dsv);
+
+    HRESULT hr = IDXGISwapChain_Present(p->dxgi_swapchain, p->swapinterval, 0);
+    if (FAILED(hr))
+        MP_FATAL(ctx->vo, "Couldn't present: %s\n", mp_HRESULT_to_str(hr));
+
+    // Restore the RTVs and release the objects
+    ID3D11DeviceContext_OMSetRenderTargets(p->d3d11_context,
+        MP_ARRAY_SIZE(rtvs), rtvs, dsv);
+    for (int i = 0; i < MP_ARRAY_SIZE(rtvs); i++)
+        SAFE_RELEASE(rtvs[i]);
+    SAFE_RELEASE(dsv);
+}
+
+static void egl_swap_buffers(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    eglSwapBuffers(p->egl_display, p->egl_window);
+}
+
+static void angle_swap_buffers(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    if (p->dxgi_swapchain)
+        d3d11_swap_buffers(ctx);
+    else
+        egl_swap_buffers(ctx);
+}
+
+
+static int angle_color_depth(struct ra_swapchain *sw)
+{
+    // Only 8-bit output is supported at the moment
+    return 8;
+}
+
+static bool angle_submit_frame(struct ra_swapchain *sw,
+                               const struct vo_frame *frame)
+{
+    struct priv *p = sw->ctx->priv;
+    bool ret = ra_gl_ctx_submit_frame(sw, frame);
+    if (p->d3d11_context) {
+        // DXGI Present doesn't flush the immediate context, which can make
+        // timers inaccurate, since the end queries might not be sent until the
+        // next frame. Fix this by flushing the context now.
+        ID3D11DeviceContext_Flush(p->d3d11_context);
+    }
+    return ret;
+}
+
+static bool angle_init(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv = talloc_zero(ctx, struct priv);
+    struct vo *vo = ctx->vo;
+    GL *gl = &p->gl;
+
+    p->opts = mp_get_config_group(ctx, ctx->global, &angle_conf);
+    struct angle_opts *o = p->opts;
+
+    if (!angle_load()) {
+        MP_VERBOSE(vo, "Failed to load LIBEGL.DLL\n");
+        goto fail;
+    }
+
+    // Create the underlying EGL device implementation
+    bool context_ok = false;
+    if ((!context_ok && !o->renderer) || o->renderer == RENDERER_D3D11) {
+        context_ok = d3d11_device_create(ctx);
+        if (context_ok) {
+            context_ok = context_init(ctx);
+            if (!context_ok)
+                d3d11_device_destroy(ctx);
+        }
+    }
+    if ((!context_ok && !o->renderer) || o->renderer == RENDERER_D3D9) {
+        context_ok = d3d9_device_create(ctx);
+        if (context_ok) {
+            MP_VERBOSE(vo, "Using Direct3D 9\n");
+
+            context_ok = context_init(ctx);
+            if (!context_ok)
+                d3d9_device_destroy(ctx);
+        }
+    }
+    if (!context_ok)
+        goto fail;
+
+    if (!vo_w32_init(vo))
+        goto fail;
+
+    // Create the underlying EGL surface implementation
+    bool surface_ok = false;
+    if ((!surface_ok && o->egl_windowing == -1) || o->egl_windowing == 0) {
+        surface_ok = d3d11_swapchain_surface_create(ctx);
+    }
+    if ((!surface_ok && o->egl_windowing == -1) || o->egl_windowing == 1) {
+        surface_ok = egl_window_surface_create(ctx);
+        if (surface_ok)
+            MP_VERBOSE(vo, "Using EGL windowing\n");
+    }
+    if (!surface_ok)
+        goto fail;
+
+    mpegl_load_functions(gl, vo->log);
+
+    current_ctx = ctx;
+    gl->SwapInterval = angle_swap_interval;
+
+    // Custom swapchain impl for the D3D11 swapchain-based surface
+    static const struct ra_swapchain_fns dxgi_swapchain_fns = {
+        .color_depth = angle_color_depth,
+        .submit_frame = angle_submit_frame,
+    };
+    struct ra_gl_ctx_params params = {
+        .swap_buffers = angle_swap_buffers,
+        .external_swapchain = p->dxgi_swapchain ? &dxgi_swapchain_fns : NULL,
+    };
+
+    gl->flipped = p->flipped;
+    if (!ra_gl_ctx_init(ctx, gl, params))
+        goto fail;
+
+    DwmEnableMMCSS(TRUE); // DWM MMCSS cargo-cult. The dxgl backend also does this.
+
+    return true;
+fail:
+    angle_uninit(ctx);
+    return false;
+}
+
+static void resize(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    if (p->dxgi_swapchain)
+        d3d11_backbuffer_resize(ctx);
+    else
+        eglWaitClient(); // Should get ANGLE to resize its swapchain
+    ra_gl_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight, 0);
+}
+
+static bool angle_reconfig(struct ra_ctx *ctx)
+{
+    vo_w32_config(ctx->vo);
+    resize(ctx);
+    return true;
+}
+
+static int angle_control(struct ra_ctx *ctx, int *events, int request, void *arg)
+{
+    int ret = vo_w32_control(ctx->vo, events, request, arg);
+    if (*events & VO_EVENT_RESIZE)
+        resize(ctx);
+    return ret;
+}
+
+const struct ra_ctx_fns ra_ctx_angle = {
+    .type           = "opengl",
+    .name           = "angle",
+    .init           = angle_init,
+    .reconfig       = angle_reconfig,
+    .control        = angle_control,
+    .uninit         = angle_uninit,
+};
diff --git a/video/out/opengl/context_drm_egl.c b/video/out/opengl/context_drm_egl.c
new file mode 100644
index 0000000..2db428f
--- /dev/null
+++ b/video/out/opengl/context_drm_egl.c
@@ -0,0 +1,744 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <signal.h>
+#include <string.h>
+#include <poll.h>
+#include <unistd.h>
+
+#include <gbm.h>
+#include <EGL/egl.h>
+#include <EGL/eglext.h>
+#include <drm_fourcc.h>
+
+#include "libmpv/render_gl.h"
+#include "common/common.h"
+#include "osdep/timer.h"
+#include "video/out/drm_atomic.h"
+#include "video/out/drm_common.h"
+#include "video/out/present_sync.h"
+
+#include "egl_helpers.h"
+#include "common.h"
+#include "context.h"
+
+#ifndef EGL_PLATFORM_GBM_MESA
+#define EGL_PLATFORM_GBM_MESA 0x31D7
+#endif
+
+#ifndef EGL_PLATFORM_GBM_KHR
+#define EGL_PLATFORM_GBM_KHR 0x31D7
+#endif
+
+struct gbm_frame {
+    struct gbm_bo *bo;
+};
+
+struct gbm {
+    struct gbm_surface *surface;
+    struct gbm_device *device;
+    struct gbm_frame **bo_queue;
+    unsigned int num_bos;
+};
+
+struct egl {
+    EGLDisplay display;
+    EGLContext context;
+    EGLSurface surface;
+};
+
+struct priv {
+    GL gl;
+
+    struct egl egl;
+    struct gbm gbm;
+
+    GLsync *vsync_fences;
+    unsigned int num_vsync_fences;
+
+    uint32_t gbm_format;
+    uint64_t *gbm_modifiers;
+    unsigned int num_gbm_modifiers;
+
+    struct mpv_opengl_drm_params_v2 drm_params;
+    struct mpv_opengl_drm_draw_surface_size draw_surface_size;
+};
+
+// Not general. Limited to only the formats being used in this module
+static const char *gbm_format_to_string(uint32_t format)
+{
+    switch (format) {
+    case GBM_FORMAT_XRGB8888:
+        return "GBM_FORMAT_XRGB8888";
+    case GBM_FORMAT_ARGB8888:
+        return "GBM_FORMAT_ARGB8888";
+    case GBM_FORMAT_XBGR8888:
+        return "GBM_FORMAT_XBGR8888";
+    case GBM_FORMAT_ABGR8888:
+        return "GBM_FORMAT_ABGR8888";
+    case GBM_FORMAT_XRGB2101010:
+        return "GBM_FORMAT_XRGB2101010";
+    case GBM_FORMAT_ARGB2101010:
+        return "GBM_FORMAT_ARGB2101010";
+    case GBM_FORMAT_XBGR2101010:
+        return "GBM_FORMAT_XBGR2101010";
+    case GBM_FORMAT_ABGR2101010:
+        return "GBM_FORMAT_ABGR2101010";
+    default:
+        return "UNKNOWN";
+    }
+}
+
+// Allow falling back to an ARGB EGLConfig when we have an XRGB framebuffer.
+// Also allow falling back to an XRGB EGLConfig for ARGB framebuffers, since
+// this seems necessary to work with broken Mali drivers that don't report
+// their EGLConfigs as supporting alpha properly.
+static uint32_t fallback_format_for(uint32_t format)
+{
+    switch (format) {
+    case GBM_FORMAT_XRGB8888:
+        return GBM_FORMAT_ARGB8888;
+    case GBM_FORMAT_ARGB8888:
+        return GBM_FORMAT_XRGB8888;
+    case GBM_FORMAT_XBGR8888:
+        return GBM_FORMAT_ABGR8888;
+    case GBM_FORMAT_ABGR8888:
+        return GBM_FORMAT_XBGR8888;
+    case GBM_FORMAT_XRGB2101010:
+        return GBM_FORMAT_ARGB2101010;
+    case GBM_FORMAT_ARGB2101010:
+        return GBM_FORMAT_XRGB2101010;
+    case GBM_FORMAT_XBGR2101010:
+        return GBM_FORMAT_ABGR2101010;
+    case GBM_FORMAT_ABGR2101010:
+        return GBM_FORMAT_XBGR2101010;
+    default:
+        return 0;
+    }
+}
+
+static int match_config_to_visual(void *user_data, EGLConfig *configs, int num_configs)
+{
+    struct ra_ctx *ctx = (struct ra_ctx*)user_data;
+    struct priv *p = ctx->priv;
+    const EGLint visual_id[] = {
+        (EGLint)p->gbm_format,
+        (EGLint)fallback_format_for(p->gbm_format),
+        0
+    };
+
+    for (unsigned int i = 0; visual_id[i] != 0; ++i) {
+        MP_VERBOSE(ctx, "Attempting to find EGLConfig matching %s\n",
+                   gbm_format_to_string(visual_id[i]));
+        for (unsigned int j = 0; j < num_configs; ++j) {
+            EGLint id;
+
+            if (!eglGetConfigAttrib(p->egl.display, configs[j], EGL_NATIVE_VISUAL_ID, &id))
+                continue;
+
+            if (visual_id[i] == id) {
+                MP_VERBOSE(ctx, "Found matching EGLConfig for %s\n",
+                           gbm_format_to_string(visual_id[i]));
+                return j;
+            }
+        }
+        MP_VERBOSE(ctx, "No matching EGLConfig for %s\n", gbm_format_to_string(visual_id[i]));
+    }
+
+    MP_ERR(ctx, "Could not find EGLConfig matching the GBM visual (%s).\n",
+           gbm_format_to_string(p->gbm_format));
+    return -1;
+}
+
+static EGLDisplay egl_get_display(struct gbm_device *gbm_device)
+{
+    EGLDisplay ret;
+
+    ret = mpegl_get_display(EGL_PLATFORM_GBM_MESA, "EGL_MESA_platform_gbm", gbm_device);
+    if (ret != EGL_NO_DISPLAY)
+        return ret;
+
+    ret = mpegl_get_display(EGL_PLATFORM_GBM_KHR, "EGL_KHR_platform_gbm", gbm_device);
+    if (ret != EGL_NO_DISPLAY)
+        return ret;
+
+    return eglGetDisplay(gbm_device);
+}
+
+static bool init_egl(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    MP_VERBOSE(ctx, "Initializing EGL\n");
+    p->egl.display = egl_get_display(p->gbm.device);
+
+    if (p->egl.display == EGL_NO_DISPLAY) {
+        MP_ERR(ctx, "Failed to get EGL display.\n");
+        return false;
+    }
+    if (!eglInitialize(p->egl.display, NULL, NULL)) {
+        MP_ERR(ctx, "Failed to initialize EGL.\n");
+        return false;
+    }
+    EGLConfig config;
+    if (!mpegl_create_context_cb(ctx,
+                                 p->egl.display,
+                                 (struct mpegl_cb){match_config_to_visual, ctx},
+                                 &p->egl.context,
+                                 &config))
+        return false;
+
+    MP_VERBOSE(ctx, "Initializing EGL surface\n");
+    p->egl.surface = mpegl_create_window_surface(
+        p->egl.display, config, p->gbm.surface);
+    if (p->egl.surface == EGL_NO_SURFACE) {
+        p->egl.surface = eglCreateWindowSurface(
+            p->egl.display, config, p->gbm.surface, NULL);
+    }
+    if (p->egl.surface == EGL_NO_SURFACE) {
+        MP_ERR(ctx, "Failed to create EGL surface.\n");
+        return false;
+    }
+    return true;
+}
+
+static bool init_gbm(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    struct vo_drm_state *drm = ctx->vo->drm;
+    MP_VERBOSE(ctx->vo, "Creating GBM device\n");
+    p->gbm.device = gbm_create_device(drm->fd);
+    if (!p->gbm.device) {
+        MP_ERR(ctx->vo, "Failed to create GBM device.\n");
+        return false;
+    }
+
+    MP_VERBOSE(ctx->vo, "Initializing GBM surface (%d x %d)\n",
+        p->draw_surface_size.width, p->draw_surface_size.height);
+    if (p->num_gbm_modifiers == 0) {
+        p->gbm.surface = gbm_surface_create(
+            p->gbm.device,
+            p->draw_surface_size.width,
+            p->draw_surface_size.height,
+            p->gbm_format,
+            GBM_BO_USE_SCANOUT | GBM_BO_USE_RENDERING);
+    } else {
+        p->gbm.surface = gbm_surface_create_with_modifiers(
+            p->gbm.device,
+            p->draw_surface_size.width,
+            p->draw_surface_size.height,
+            p->gbm_format,
+            p->gbm_modifiers,
+            p->num_gbm_modifiers);
+    }
+    if (!p->gbm.surface) {
+        MP_ERR(ctx->vo, "Failed to create GBM surface.\n");
+        return false;
+    }
+    return true;
+}
+
+static void framebuffer_destroy_callback(struct gbm_bo *bo, void *data)
+{
+    struct framebuffer *fb = data;
+    if (fb) {
+        drmModeRmFB(fb->fd, fb->id);
+    }
+}
+
+static void update_framebuffer_from_bo(struct ra_ctx *ctx, struct gbm_bo *bo)
+{
+    struct priv *p = ctx->priv;
+    struct vo_drm_state *drm = ctx->vo->drm;
+    struct framebuffer *fb = gbm_bo_get_user_data(bo);
+    if (fb) {
+        drm->fb = fb;
+        return;
+    }
+
+    fb = talloc_zero(ctx, struct framebuffer);
+    fb->fd     = drm->fd;
+    fb->width  = gbm_bo_get_width(bo);
+    fb->height = gbm_bo_get_height(bo);
+    uint64_t modifier = gbm_bo_get_modifier(bo);
+
+    int ret;
+    if (p->num_gbm_modifiers == 0 || modifier == DRM_FORMAT_MOD_INVALID) {
+        uint32_t stride = gbm_bo_get_stride(bo);
+        uint32_t handle = gbm_bo_get_handle(bo).u32;
+        ret = drmModeAddFB2(fb->fd, fb->width, fb->height,
+                            p->gbm_format,
+                            (uint32_t[4]){handle, 0, 0, 0},
+                            (uint32_t[4]){stride, 0, 0, 0},
+                            (uint32_t[4]){0, 0, 0, 0},
+                            &fb->id, 0);
+    } else {
+        MP_VERBOSE(ctx, "GBM surface using modifier 0x%"PRIX64"\n", modifier);
+
+        uint32_t handles[4] = {0};
+        uint32_t strides[4] = {0};
+        uint32_t offsets[4] = {0};
+        uint64_t modifiers[4] = {0};
+
+        const int num_planes = gbm_bo_get_plane_count(bo);
+        for (int i = 0; i < num_planes; ++i) {
+            handles[i] = gbm_bo_get_handle_for_plane(bo, i).u32;
+            strides[i] = gbm_bo_get_stride_for_plane(bo, i);
+            offsets[i] = gbm_bo_get_offset(bo, i);
+            modifiers[i] = modifier;
+        }
+
+        ret = drmModeAddFB2WithModifiers(fb->fd, fb->width, fb->height,
+                                         p->gbm_format,
+                                         handles, strides, offsets, modifiers,
+                                         &fb->id, DRM_MODE_FB_MODIFIERS);
+    }
+    if (ret) {
+        MP_ERR(ctx->vo, "Failed to create framebuffer: %s\n", mp_strerror(errno));
+    }
+    gbm_bo_set_user_data(bo, fb, framebuffer_destroy_callback);
+    drm->fb = fb;
+}
+
+static void queue_flip(struct ra_ctx *ctx, struct gbm_frame *frame)
+{
+    struct vo_drm_state *drm = ctx->vo->drm;
+
+    update_framebuffer_from_bo(ctx, frame->bo);
+
+    struct drm_atomic_context *atomic_ctx = drm->atomic_context;
+    drm_object_set_property(atomic_ctx->request, atomic_ctx->draw_plane, "FB_ID", drm->fb->id);
+    drm_object_set_property(atomic_ctx->request, atomic_ctx->draw_plane, "CRTC_ID", atomic_ctx->crtc->id);
+    drm_object_set_property(atomic_ctx->request, atomic_ctx->draw_plane, "ZPOS", 1);
+
+    int ret = drmModeAtomicCommit(drm->fd, atomic_ctx->request,
+                                  DRM_MODE_ATOMIC_NONBLOCK | DRM_MODE_PAGE_FLIP_EVENT, drm);
+
+    if (ret)
+        MP_WARN(ctx->vo, "Failed to commit atomic request: %s\n", mp_strerror(ret));
+    drm->waiting_for_flip = !ret;
+
+    drmModeAtomicFree(atomic_ctx->request);
+    atomic_ctx->request = drmModeAtomicAlloc();
+}
+
+static void enqueue_bo(struct ra_ctx *ctx, struct gbm_bo *bo)
+{
+    struct priv *p = ctx->priv;
+
+    struct gbm_frame *new_frame = talloc(p, struct gbm_frame);
+    new_frame->bo = bo;
+    MP_TARRAY_APPEND(p, p->gbm.bo_queue, p->gbm.num_bos, new_frame);
+}
+
+static void dequeue_bo(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    talloc_free(p->gbm.bo_queue[0]);
+    MP_TARRAY_REMOVE_AT(p->gbm.bo_queue, p->gbm.num_bos, 0);
+}
+
+static void swapchain_step(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    if (!(p->gbm.num_bos > 0))
+        return;
+
+    if (p->gbm.bo_queue[0]->bo)
+        gbm_surface_release_buffer(p->gbm.surface, p->gbm.bo_queue[0]->bo);
+    dequeue_bo(ctx);
+}
+
+static void new_fence(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    if (p->gl.FenceSync) {
+        GLsync fence = p->gl.FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+        if (fence)
+            MP_TARRAY_APPEND(p, p->vsync_fences, p->num_vsync_fences, fence);
+    }
+}
+
+static void wait_fence(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    while (p->num_vsync_fences && (p->num_vsync_fences >= p->gbm.num_bos)) {
+        p->gl.ClientWaitSync(p->vsync_fences[0], GL_SYNC_FLUSH_COMMANDS_BIT, 1e9);
+        p->gl.DeleteSync(p->vsync_fences[0]);
+        MP_TARRAY_REMOVE_AT(p->vsync_fences, p->num_vsync_fences, 0);
+    }
+}
+
+static bool drm_egl_start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo)
+{
+    struct ra_ctx *ctx = sw->ctx;
+    struct priv *p = ctx->priv;
+    struct vo_drm_state *drm = ctx->vo->drm;
+
+    if (!drm->atomic_context->request) {
+        drm->atomic_context->request = drmModeAtomicAlloc();
+        p->drm_params.atomic_request_ptr = &drm->atomic_context->request;
+    }
+
+    return ra_gl_ctx_start_frame(sw, out_fbo);
+}
+
+static bool drm_egl_submit_frame(struct ra_swapchain *sw, const struct vo_frame *frame)
+{
+    struct ra_ctx *ctx = sw->ctx;
+    struct vo_drm_state *drm = ctx->vo->drm;
+
+    drm->still = frame->still;
+
+    return ra_gl_ctx_submit_frame(sw, frame);
+}
+
+static void drm_egl_swap_buffers(struct ra_swapchain *sw)
+{
+    struct ra_ctx *ctx = sw->ctx;
+    struct priv *p = ctx->priv;
+    struct vo_drm_state *drm = ctx->vo->drm;
+    const bool drain = drm->paused || drm->still;  // True when we need to drain the swapchain
+
+    if (!drm->active)
+        return;
+
+    wait_fence(ctx);
+
+    eglSwapBuffers(p->egl.display, p->egl.surface);
+
+    struct gbm_bo *new_bo = gbm_surface_lock_front_buffer(p->gbm.surface);
+    if (!new_bo) {
+        MP_ERR(ctx->vo, "Couldn't lock front buffer\n");
+        return;
+    }
+    enqueue_bo(ctx, new_bo);
+    new_fence(ctx);
+
+    while (drain || p->gbm.num_bos > ctx->vo->opts->swapchain_depth ||
+           !gbm_surface_has_free_buffers(p->gbm.surface)) {
+        if (drm->waiting_for_flip) {
+            vo_drm_wait_on_flip(drm);
+            swapchain_step(ctx);
+        }
+        if (p->gbm.num_bos <= 1)
+            break;
+        if (!p->gbm.bo_queue[1] || !p->gbm.bo_queue[1]->bo) {
+            MP_ERR(ctx->vo, "Hole in swapchain?\n");
+            swapchain_step(ctx);
+            continue;
+        }
+        queue_flip(ctx, p->gbm.bo_queue[1]);
+    }
+}
+
+static const struct ra_swapchain_fns drm_egl_swapchain = {
+    .start_frame   = drm_egl_start_frame,
+    .submit_frame  = drm_egl_submit_frame,
+    .swap_buffers  = drm_egl_swap_buffers,
+};
+
+static void drm_egl_uninit(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    struct vo_drm_state *drm = ctx->vo->drm;
+    if (drm) {
+        struct drm_atomic_context *atomic_ctx = drm->atomic_context;
+
+        if (drmModeAtomicCommit(drm->fd, atomic_ctx->request, 0, NULL))
+            MP_ERR(ctx->vo, "Failed to commit atomic request: %s\n",
+                    mp_strerror(errno));
+
+        drmModeAtomicFree(atomic_ctx->request);
+    }
+
+    ra_gl_ctx_uninit(ctx);
+    vo_drm_uninit(ctx->vo);
+
+    if (p) {
+        // According to GBM documentation all BO:s must be released
+        // before gbm_surface_destroy can be called on the surface.
+        while (p->gbm.num_bos) {
+            swapchain_step(ctx);
+        }
+
+        eglMakeCurrent(p->egl.display, EGL_NO_SURFACE, EGL_NO_SURFACE,
+                       EGL_NO_CONTEXT);
+        if (p->egl.display != EGL_NO_DISPLAY) {
+            eglDestroySurface(p->egl.display, p->egl.surface);
+            eglDestroyContext(p->egl.display, p->egl.context);
+        }
+        if (p->gbm.surface)
+            gbm_surface_destroy(p->gbm.surface);
+        eglTerminate(p->egl.display);
+        gbm_device_destroy(p->gbm.device);
+
+        if (p->drm_params.render_fd != -1)
+            close(p->drm_params.render_fd);
+    }
+}
+
+// If the draw plane supports ARGB we want to use that, but if it doesn't we
+// fall back on XRGB. If we do not have atomic there is no particular reason to
+// be using ARGB (drmprime hwdec will not work without atomic, anyway), so we
+// fall back to XRGB (another reason is that we do not have the convenient
+// atomic_ctx and its convenient plane fields).
+static bool probe_gbm_format(struct ra_ctx *ctx, uint32_t argb_format, uint32_t xrgb_format)
+{
+    struct priv *p = ctx->priv;
+    struct vo_drm_state *drm = ctx->vo->drm;
+
+    drmModePlane *drmplane = drmModeGetPlane(drm->fd, drm->atomic_context->draw_plane->id);
+    bool have_argb = false;
+    bool have_xrgb = false;
+    bool result = false;
+    for (unsigned int i = 0; i < drmplane->count_formats; ++i) {
+        if (drmplane->formats[i] == argb_format) {
+            have_argb = true;
+        } else if (drmplane->formats[i] == xrgb_format) {
+            have_xrgb = true;
+        }
+    }
+
+    if (have_argb) {
+        p->gbm_format = argb_format;
+        MP_VERBOSE(ctx->vo, "%s supported by draw plane.\n", gbm_format_to_string(argb_format));
+        result = true;
+    } else if (have_xrgb) {
+        p->gbm_format = xrgb_format;
+        MP_VERBOSE(ctx->vo, "%s not supported by draw plane: Falling back to %s.\n",
+                   gbm_format_to_string(argb_format), gbm_format_to_string(xrgb_format));
+        result = true;
+    }
+
+    drmModeFreePlane(drmplane);
+    return result;
+}
+
+static bool probe_gbm_modifiers(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    struct vo_drm_state *drm = ctx->vo->drm;
+
+    drmModePropertyBlobPtr blob =  drm_object_get_property_blob(drm->atomic_context->draw_plane,
+                                                                "IN_FORMATS");
+    if (!blob) {
+        MP_VERBOSE(ctx->vo, "Failed to find IN_FORMATS property\n");
+        return false;
+    }
+
+    struct drm_format_modifier_blob *data = blob->data;
+    uint32_t *fmts = (uint32_t *)((char *)data + data->formats_offset);
+    struct drm_format_modifier *mods =
+        (struct drm_format_modifier *)((char *)data + data->modifiers_offset);
+
+    for (unsigned int j = 0; j < data->count_modifiers; ++j) {
+        struct drm_format_modifier *mod = &mods[j];
+        for (uint64_t k = 0; k < 64; ++k) {
+            if (mod->formats & (1ull << k)) {
+                uint32_t fmt = fmts[k + mod->offset];
+                if (fmt == p->gbm_format) {
+                    MP_TARRAY_APPEND(p, p->gbm_modifiers,
+                                        p->num_gbm_modifiers, mod->modifier);
+                    MP_VERBOSE(ctx->vo, "Supported modifier: 0x%"PRIX64"\n",
+                                (uint64_t)mod->modifier);
+                    break;
+                }
+            }
+        }
+    }
+    drmModeFreePropertyBlob(blob);
+
+    if (p->num_gbm_modifiers == 0) {
+        MP_VERBOSE(ctx->vo, "No supported DRM modifiers found.\n");
+    }
+    return true;
+}
+
+static void drm_egl_get_vsync(struct ra_ctx *ctx, struct vo_vsync_info *info)
+{
+    struct vo_drm_state *drm = ctx->vo->drm;
+    present_sync_get_info(drm->present, info);
+}
+
+static bool drm_egl_init(struct ra_ctx *ctx)
+{
+    if (!vo_drm_init(ctx->vo))
+        goto err;
+
+    struct priv *p = ctx->priv = talloc_zero(ctx, struct priv);
+    struct vo_drm_state *drm = ctx->vo->drm;
+
+    if (ctx->vo->drm->opts->draw_surface_size.wh_valid) {
+        p->draw_surface_size.width = ctx->vo->drm->opts->draw_surface_size.w;
+        p->draw_surface_size.height = ctx->vo->drm->opts->draw_surface_size.h;
+    } else {
+        p->draw_surface_size.width = drm->mode.mode.hdisplay;
+        p->draw_surface_size.height = drm->mode.mode.vdisplay;
+    }
+
+    drm->width = p->draw_surface_size.width;
+    drm->height = p->draw_surface_size.height;
+
+    uint32_t argb_format;
+    uint32_t xrgb_format;
+    switch (ctx->vo->drm->opts->drm_format) {
+    case DRM_OPTS_FORMAT_XRGB2101010:
+        argb_format = GBM_FORMAT_ARGB2101010;
+        xrgb_format = GBM_FORMAT_XRGB2101010;
+        break;
+    case DRM_OPTS_FORMAT_XBGR2101010:
+        argb_format = GBM_FORMAT_ABGR2101010;
+        xrgb_format = GBM_FORMAT_XBGR2101010;
+        break;
+    case DRM_OPTS_FORMAT_XBGR8888:
+        argb_format = GBM_FORMAT_ABGR8888;
+        xrgb_format = GBM_FORMAT_XBGR8888;
+        break;
+    default:
+        argb_format = GBM_FORMAT_ARGB8888;
+        xrgb_format = GBM_FORMAT_XRGB8888;
+        break;
+    }
+
+    if (!probe_gbm_format(ctx, argb_format, xrgb_format)) {
+        MP_ERR(ctx->vo, "No suitable format found on draw plane (tried: %s and %s).\n",
+               gbm_format_to_string(argb_format), gbm_format_to_string(xrgb_format));
+        goto err;
+    }
+
+    // It is not fatal if this fails. We'll just try without modifiers.
+    probe_gbm_modifiers(ctx);
+
+    if (!init_gbm(ctx)) {
+        MP_ERR(ctx->vo, "Failed to setup GBM.\n");
+        goto err;
+    }
+
+    if (!init_egl(ctx)) {
+        MP_ERR(ctx->vo, "Failed to setup EGL.\n");
+        goto err;
+    }
+
+    if (!eglMakeCurrent(p->egl.display, p->egl.surface, p->egl.surface,
+                        p->egl.context)) {
+        MP_ERR(ctx->vo, "Failed to make context current.\n");
+        goto err;
+    }
+
+    mpegl_load_functions(&p->gl, ctx->vo->log);
+    // required by gbm_surface_lock_front_buffer
+    eglSwapBuffers(p->egl.display, p->egl.surface);
+
+    MP_VERBOSE(ctx, "Preparing framebuffer\n");
+    struct gbm_bo *new_bo = gbm_surface_lock_front_buffer(p->gbm.surface);
+    if (!new_bo) {
+        MP_ERR(ctx, "Failed to lock GBM surface.\n");
+        goto err;
+    }
+
+    enqueue_bo(ctx, new_bo);
+    update_framebuffer_from_bo(ctx, new_bo);
+    if (!drm->fb || !drm->fb->id) {
+        MP_ERR(ctx, "Failed to create framebuffer.\n");
+        goto err;
+    }
+
+    if (!vo_drm_acquire_crtc(ctx->vo->drm)) {
+        MP_ERR(ctx, "Failed to set CRTC for connector %u: %s\n",
+               drm->connector->connector_id, mp_strerror(errno));
+        goto err;
+    }
+
+    vo_drm_set_monitor_par(ctx->vo);
+
+    p->drm_params.fd = drm->fd;
+    p->drm_params.crtc_id = drm->crtc_id;
+    p->drm_params.connector_id = drm->connector->connector_id;
+    p->drm_params.atomic_request_ptr = &drm->atomic_context->request;
+    char *rendernode_path = drmGetRenderDeviceNameFromFd(drm->fd);
+    if (rendernode_path) {
+        MP_VERBOSE(ctx, "Opening render node \"%s\"\n", rendernode_path);
+        p->drm_params.render_fd = open(rendernode_path, O_RDWR | O_CLOEXEC);
+        if (p->drm_params.render_fd == -1) {
+            MP_WARN(ctx, "Cannot open render node: %s\n", mp_strerror(errno));
+        }
+        free(rendernode_path);
+    } else {
+        p->drm_params.render_fd = -1;
+        MP_VERBOSE(ctx, "Could not find path to render node.\n");
+    }
+
+    struct ra_gl_ctx_params params = {
+        .external_swapchain = &drm_egl_swapchain,
+        .get_vsync          = &drm_egl_get_vsync,
+    };
+    if (!ra_gl_ctx_init(ctx, &p->gl, params))
+        goto err;
+
+    ra_add_native_resource(ctx->ra, "drm_params_v2", &p->drm_params);
+    ra_add_native_resource(ctx->ra, "drm_draw_surface_size", &p->draw_surface_size);
+
+    return true;
+
+err:
+    drm_egl_uninit(ctx);
+    return false;
+}
+
+static bool drm_egl_reconfig(struct ra_ctx *ctx)
+{
+    struct vo_drm_state *drm = ctx->vo->drm;
+    ctx->vo->dwidth  = drm->fb->width;
+    ctx->vo->dheight = drm->fb->height;
+    ra_gl_ctx_resize(ctx->swapchain, drm->fb->width, drm->fb->height, 0);
+    return true;
+}
+
+static int drm_egl_control(struct ra_ctx *ctx, int *events, int request,
+                           void *arg)
+{
+    int ret = vo_drm_control(ctx->vo, events, request, arg);
+    return ret;
+}
+
+static void drm_egl_wait_events(struct ra_ctx *ctx, int64_t until_time_ns)
+{
+    vo_drm_wait_events(ctx->vo, until_time_ns);
+}
+
+static void drm_egl_wakeup(struct ra_ctx *ctx)
+{
+    vo_drm_wakeup(ctx->vo);
+}
+
+const struct ra_ctx_fns ra_ctx_drm_egl = {
+    .type           = "opengl",
+    .name           = "drm",
+    .reconfig       = drm_egl_reconfig,
+    .control        = drm_egl_control,
+    .init           = drm_egl_init,
+    .uninit         = drm_egl_uninit,
+    .wait_events    = drm_egl_wait_events,
+    .wakeup         = drm_egl_wakeup,
+};
diff --git a/video/out/opengl/context_dxinterop.c b/video/out/opengl/context_dxinterop.c
new file mode 100644
index 0000000..cda696f
--- /dev/null
+++ b/video/out/opengl/context_dxinterop.c
@@ -0,0 +1,605 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <windows.h>
+#include <versionhelpers.h>
+#include <d3d9.h>
+#include <dwmapi.h>
+#include "osdep/windows_utils.h"
+#include "video/out/w32_common.h"
+#include "context.h"
+#include "utils.h"
+
+// For WGL_ACCESS_WRITE_DISCARD_NV, etc.
+#include <GL/wglext.h>
+
+EXTERN_C IMAGE_DOS_HEADER __ImageBase;
+#define HINST_THISCOMPONENT ((HINSTANCE)&__ImageBase)
+
+// mingw-w64 header typo?
+#ifndef IDirect3DSwapChain9Ex_GetBackBuffer
+#define IDirect3DSwapChain9Ex_GetBackBuffer IDirect3DSwapChain9EX_GetBackBuffer
+#endif
+
+struct priv {
+    GL gl;
+
+    HMODULE d3d9_dll;
+    HRESULT (WINAPI *Direct3DCreate9Ex)(UINT SDKVersion, IDirect3D9Ex **ppD3D);
+
+    // Direct3D9 device and resources
+    IDirect3D9Ex *d3d9ex;
+    IDirect3DDevice9Ex *device;
+    HANDLE device_h;
+    IDirect3DSwapChain9Ex *swapchain;
+    IDirect3DSurface9 *backbuffer;
+    IDirect3DSurface9 *rtarget;
+    HANDLE rtarget_h;
+
+    // OpenGL offscreen context
+    HWND os_wnd;
+    HDC os_dc;
+    HGLRC os_ctx;
+
+    // OpenGL resources
+    GLuint texture;
+    GLuint main_fb;
+
+    // Did we lose the device?
+    bool lost_device;
+
+    // Requested and current parameters
+    int requested_swapinterval;
+    int width, height, swapinterval;
+};
+
+static __thread struct ra_ctx *current_ctx;
+
+static void pump_message_loop(void)
+{
+    // We have a hidden window on this thread (for the OpenGL context,) so pump
+    // its message loop at regular intervals to be safe
+    MSG message;
+    while (PeekMessageW(&message, NULL, 0, 0, PM_REMOVE))
+        DispatchMessageW(&message);
+}
+
+static void *w32gpa(const GLubyte *procName)
+{
+    HMODULE oglmod;
+    void *res = wglGetProcAddress(procName);
+    if (res)
+        return res;
+    oglmod = GetModuleHandleW(L"opengl32.dll");
+    return GetProcAddress(oglmod, procName);
+}
+
+static int os_ctx_create(struct ra_ctx *ctx)
+{
+    static const wchar_t os_wnd_class[] = L"mpv offscreen gl";
+    struct priv *p = ctx->priv;
+    GL *gl = &p->gl;
+    HGLRC legacy_context = NULL;
+
+    RegisterClassExW(&(WNDCLASSEXW) {
+        .cbSize = sizeof(WNDCLASSEXW),
+        .style = CS_OWNDC,
+        .lpfnWndProc = DefWindowProc,
+        .hInstance = HINST_THISCOMPONENT,
+        .lpszClassName = os_wnd_class,
+    });
+
+    // Create a hidden window for an offscreen OpenGL context. It might also be
+    // possible to use the VO window, but MSDN recommends against drawing to
+    // the same window with flip mode present and other APIs, so play it safe.
+    p->os_wnd = CreateWindowExW(0, os_wnd_class, os_wnd_class, 0, 0, 0, 200,
+        200, NULL, NULL, HINST_THISCOMPONENT, NULL);
+    p->os_dc = GetDC(p->os_wnd);
+    if (!p->os_dc) {
+        MP_FATAL(ctx->vo, "Couldn't create window for offscreen rendering\n");
+        goto fail;
+    }
+
+    // Choose a pixel format. It probably doesn't matter what this is because
+    // the primary framebuffer will not be used.
+    PIXELFORMATDESCRIPTOR pfd = {
+        .nSize = sizeof pfd,
+        .nVersion = 1,
+        .dwFlags = PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER,
+        .iPixelType = PFD_TYPE_RGBA,
+        .cColorBits = 24,
+        .iLayerType = PFD_MAIN_PLANE,
+    };
+    int pf = ChoosePixelFormat(p->os_dc, &pfd);
+    if (!pf) {
+        MP_FATAL(ctx->vo,
+                 "Couldn't choose pixelformat for offscreen rendering: %s\n",
+                 mp_LastError_to_str());
+        goto fail;
+    }
+    SetPixelFormat(p->os_dc, pf, &pfd);
+
+    legacy_context = wglCreateContext(p->os_dc);
+    if (!legacy_context || !wglMakeCurrent(p->os_dc, legacy_context)) {
+        MP_FATAL(ctx->vo, "Couldn't create OpenGL context for offscreen rendering: %s\n",
+                 mp_LastError_to_str());
+        goto fail;
+    }
+
+    const char *(GLAPIENTRY *wglGetExtensionsStringARB)(HDC hdc)
+        = w32gpa((const GLubyte*)"wglGetExtensionsStringARB");
+    if (!wglGetExtensionsStringARB) {
+        MP_FATAL(ctx->vo, "The OpenGL driver does not support OpenGL 3.x\n");
+        goto fail;
+    }
+
+    const char *wgl_exts = wglGetExtensionsStringARB(p->os_dc);
+    if (!gl_check_extension(wgl_exts, "WGL_ARB_create_context")) {
+        MP_FATAL(ctx->vo, "The OpenGL driver does not support OpenGL 3.x\n");
+        goto fail;
+    }
+
+    HGLRC (GLAPIENTRY *wglCreateContextAttribsARB)(HDC hDC, HGLRC hShareContext,
+                                                   const int *attribList)
+        = w32gpa((const GLubyte*)"wglCreateContextAttribsARB");
+    if (!wglCreateContextAttribsARB) {
+        MP_FATAL(ctx->vo, "The OpenGL driver does not support OpenGL 3.x\n");
+        goto fail;
+    }
+
+    int attribs[] = {
+        WGL_CONTEXT_MAJOR_VERSION_ARB, 3,
+        WGL_CONTEXT_MINOR_VERSION_ARB, 0,
+        WGL_CONTEXT_FLAGS_ARB, 0,
+        WGL_CONTEXT_PROFILE_MASK_ARB, WGL_CONTEXT_CORE_PROFILE_BIT_ARB,
+        0
+    };
+
+    p->os_ctx = wglCreateContextAttribsARB(p->os_dc, 0, attribs);
+    if (!p->os_ctx) {
+        // NVidia, instead of ignoring WGL_CONTEXT_FLAGS_ARB, will error out if
+        // it's present on pre-3.2 contexts.
+        // Remove it from attribs and retry the context creation.
+        attribs[6] = attribs[7] = 0;
+        p->os_ctx = wglCreateContextAttribsARB(p->os_dc, 0, attribs);
+    }
+    if (!p->os_ctx) {
+        MP_FATAL(ctx->vo,
+                 "Couldn't create OpenGL 3.x context for offscreen rendering: %s\n",
+                 mp_LastError_to_str());
+        goto fail;
+    }
+
+    wglMakeCurrent(p->os_dc, NULL);
+    wglDeleteContext(legacy_context);
+    legacy_context = NULL;
+
+    if (!wglMakeCurrent(p->os_dc, p->os_ctx)) {
+        MP_FATAL(ctx->vo,
+                 "Couldn't activate OpenGL 3.x context for offscreen rendering: %s\n",
+                 mp_LastError_to_str());
+        goto fail;
+    }
+
+    mpgl_load_functions(gl, w32gpa, wgl_exts, ctx->vo->log);
+    if (!(gl->mpgl_caps & MPGL_CAP_DXINTEROP)) {
+        MP_FATAL(ctx->vo, "WGL_NV_DX_interop is not supported\n");
+        goto fail;
+    }
+
+    return 0;
+fail:
+    if (legacy_context) {
+        wglMakeCurrent(p->os_dc, NULL);
+        wglDeleteContext(legacy_context);
+    }
+    return -1;
+}
+
+static void os_ctx_destroy(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    if (p->os_ctx) {
+        wglMakeCurrent(p->os_dc, NULL);
+        wglDeleteContext(p->os_ctx);
+    }
+    if (p->os_dc)
+        ReleaseDC(p->os_wnd, p->os_dc);
+    if (p->os_wnd)
+        DestroyWindow(p->os_wnd);
+}
+
+static int d3d_size_dependent_create(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    GL *gl = &p->gl;
+    HRESULT hr;
+
+    IDirect3DSwapChain9 *sw9;
+    hr = IDirect3DDevice9Ex_GetSwapChain(p->device, 0, &sw9);
+    if (FAILED(hr)) {
+        MP_ERR(ctx->vo, "Couldn't get swap chain: %s\n", mp_HRESULT_to_str(hr));
+        return -1;
+    }
+
+    hr = IDirect3DSwapChain9_QueryInterface(sw9, &IID_IDirect3DSwapChain9Ex,
+        (void**)&p->swapchain);
+    if (FAILED(hr)) {
+        SAFE_RELEASE(sw9);
+        MP_ERR(ctx->vo, "Obtained swap chain is not IDirect3DSwapChain9Ex: %s\n",
+               mp_HRESULT_to_str(hr));
+        return -1;
+    }
+    SAFE_RELEASE(sw9);
+
+    hr = IDirect3DSwapChain9Ex_GetBackBuffer(p->swapchain, 0,
+        D3DBACKBUFFER_TYPE_MONO, &p->backbuffer);
+    if (FAILED(hr)) {
+        MP_ERR(ctx->vo, "Couldn't get backbuffer: %s\n", mp_HRESULT_to_str(hr));
+        return -1;
+    }
+
+    // Get the format of the backbuffer
+    D3DSURFACE_DESC bb_desc = { 0 };
+    IDirect3DSurface9_GetDesc(p->backbuffer, &bb_desc);
+
+    MP_VERBOSE(ctx->vo, "DX_interop backbuffer size: %ux%u\n",
+        (unsigned)bb_desc.Width, (unsigned)bb_desc.Height);
+    MP_VERBOSE(ctx->vo, "DX_interop backbuffer format: %u\n",
+        (unsigned)bb_desc.Format);
+
+    // Create a rendertarget with the same format as the backbuffer for
+    // rendering from OpenGL
+    HANDLE share_handle = NULL;
+    hr = IDirect3DDevice9Ex_CreateRenderTarget(p->device, bb_desc.Width,
+        bb_desc.Height, bb_desc.Format, D3DMULTISAMPLE_NONE, 0, FALSE,
+        &p->rtarget, &share_handle);
+    if (FAILED(hr)) {
+        MP_ERR(ctx->vo, "Couldn't create rendertarget: %s\n", mp_HRESULT_to_str(hr));
+        return -1;
+    }
+
+    // Register the share handle with WGL_NV_DX_interop. Nvidia does not
+    // require the use of share handles, but Intel does.
+    if (share_handle)
+        gl->DXSetResourceShareHandleNV(p->rtarget, share_handle);
+
+    // Create the OpenGL-side texture
+    gl->GenTextures(1, &p->texture);
+
+    // Now share the rendertarget with OpenGL as a texture
+    p->rtarget_h = gl->DXRegisterObjectNV(p->device_h, p->rtarget, p->texture,
+        GL_TEXTURE_2D, WGL_ACCESS_WRITE_DISCARD_NV);
+    if (!p->rtarget_h) {
+        MP_ERR(ctx->vo, "Couldn't share rendertarget with OpenGL: %s\n",
+               mp_LastError_to_str());
+        return -1;
+    }
+
+    // Lock the rendertarget for use from OpenGL. This will only be unlocked in
+    // swap_buffers() when it is blitted to the real Direct3D backbuffer.
+    if (!gl->DXLockObjectsNV(p->device_h, 1, &p->rtarget_h)) {
+        MP_ERR(ctx->vo, "Couldn't lock rendertarget: %s\n",
+               mp_LastError_to_str());
+        return -1;
+    }
+
+    gl->BindFramebuffer(GL_FRAMEBUFFER, p->main_fb);
+    gl->FramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
+        GL_TEXTURE_2D, p->texture, 0);
+    gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
+
+    return 0;
+}
+
+static void d3d_size_dependent_destroy(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    GL *gl = &p->gl;
+
+    if (p->rtarget_h) {
+        gl->DXUnlockObjectsNV(p->device_h, 1, &p->rtarget_h);
+        gl->DXUnregisterObjectNV(p->device_h, p->rtarget_h);
+    }
+    p->rtarget_h = 0;
+    if (p->texture)
+        gl->DeleteTextures(1, &p->texture);
+    p->texture = 0;
+
+    SAFE_RELEASE(p->rtarget);
+    SAFE_RELEASE(p->backbuffer);
+    SAFE_RELEASE(p->swapchain);
+}
+
+static void fill_presentparams(struct ra_ctx *ctx,
+                               D3DPRESENT_PARAMETERS *pparams)
+{
+    struct priv *p = ctx->priv;
+
+    // Present intervals other than IMMEDIATE and ONE don't seem to work. It's
+    // possible that they're not compatible with FLIPEX.
+    UINT presentation_interval;
+    switch (p->requested_swapinterval) {
+    case 0:  presentation_interval = D3DPRESENT_INTERVAL_IMMEDIATE; break;
+    case 1:  presentation_interval = D3DPRESENT_INTERVAL_ONE;       break;
+    default: presentation_interval = D3DPRESENT_INTERVAL_ONE;       break;
+    }
+
+    *pparams = (D3DPRESENT_PARAMETERS) {
+        .Windowed = TRUE,
+        .BackBufferWidth = ctx->vo->dwidth ? ctx->vo->dwidth : 1,
+        .BackBufferHeight = ctx->vo->dheight ? ctx->vo->dheight : 1,
+        // Add one frame for the backbuffer and one frame of "slack" to reduce
+        // contention with the window manager when acquiring the backbuffer
+        .BackBufferCount = ctx->vo->opts->swapchain_depth + 2,
+        .SwapEffect = IsWindows7OrGreater() ? D3DSWAPEFFECT_FLIPEX : D3DSWAPEFFECT_FLIP,
+        // Automatically get the backbuffer format from the display format
+        .BackBufferFormat = D3DFMT_UNKNOWN,
+        .PresentationInterval = presentation_interval,
+        .hDeviceWindow = vo_w32_hwnd(ctx->vo),
+    };
+}
+
+static int d3d_create(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    GL *gl = &p->gl;
+    HRESULT hr;
+
+    p->d3d9_dll = LoadLibraryW(L"d3d9.dll");
+    if (!p->d3d9_dll) {
+        MP_FATAL(ctx->vo, "Failed to load \"d3d9.dll\": %s\n",
+                 mp_LastError_to_str());
+        return -1;
+    }
+
+    // WGL_NV_dx_interop requires Direct3D 9Ex on WDDM systems. Direct3D 9Ex
+    // also enables flip mode present for efficient rendering with the DWM.
+    p->Direct3DCreate9Ex = (void*)GetProcAddress(p->d3d9_dll,
+        "Direct3DCreate9Ex");
+    if (!p->Direct3DCreate9Ex) {
+        MP_FATAL(ctx->vo, "Direct3D 9Ex not supported\n");
+        return -1;
+    }
+
+    hr = p->Direct3DCreate9Ex(D3D_SDK_VERSION, &p->d3d9ex);
+    if (FAILED(hr)) {
+        MP_FATAL(ctx->vo, "Couldn't create Direct3D9Ex: %s\n",
+                 mp_HRESULT_to_str(hr));
+        return -1;
+    }
+
+    D3DPRESENT_PARAMETERS pparams;
+    fill_presentparams(ctx, &pparams);
+
+    hr = IDirect3D9Ex_CreateDeviceEx(p->d3d9ex, D3DADAPTER_DEFAULT,
+        D3DDEVTYPE_HAL, vo_w32_hwnd(ctx->vo),
+        D3DCREATE_HARDWARE_VERTEXPROCESSING | D3DCREATE_PUREDEVICE |
+        D3DCREATE_FPU_PRESERVE | D3DCREATE_MULTITHREADED |
+        D3DCREATE_NOWINDOWCHANGES,
+        &pparams, NULL, &p->device);
+    if (FAILED(hr)) {
+        MP_FATAL(ctx->vo, "Couldn't create device: %s\n", mp_HRESULT_to_str(hr));
+        return -1;
+    }
+
+    IDirect3DDevice9Ex_SetMaximumFrameLatency(p->device, ctx->vo->opts->swapchain_depth);
+
+    // Register the Direct3D device with WGL_NV_dx_interop
+    p->device_h = gl->DXOpenDeviceNV(p->device);
+    if (!p->device_h) {
+        MP_FATAL(ctx->vo, "Couldn't open Direct3D device from OpenGL: %s\n",
+                 mp_LastError_to_str());
+        return -1;
+    }
+
+    return 0;
+}
+
+static void d3d_destroy(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    GL *gl = &p->gl;
+
+    if (p->device_h)
+        gl->DXCloseDeviceNV(p->device_h);
+    SAFE_RELEASE(p->device);
+    SAFE_RELEASE(p->d3d9ex);
+    if (p->d3d9_dll)
+        FreeLibrary(p->d3d9_dll);
+}
+
+static void dxgl_uninit(struct ra_ctx *ctx)
+{
+    ra_gl_ctx_uninit(ctx);
+    d3d_size_dependent_destroy(ctx);
+    d3d_destroy(ctx);
+    os_ctx_destroy(ctx);
+    vo_w32_uninit(ctx->vo);
+    DwmEnableMMCSS(FALSE);
+    pump_message_loop();
+}
+
+static void dxgl_reset(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    HRESULT hr;
+
+    // Check if the device actually needs to be reset
+    if (ctx->vo->dwidth == p->width && ctx->vo->dheight == p->height &&
+        p->requested_swapinterval == p->swapinterval && !p->lost_device)
+        return;
+
+    d3d_size_dependent_destroy(ctx);
+
+    D3DPRESENT_PARAMETERS pparams;
+    fill_presentparams(ctx, &pparams);
+
+    hr = IDirect3DDevice9Ex_ResetEx(p->device, &pparams, NULL);
+    if (FAILED(hr)) {
+        p->lost_device = true;
+        MP_ERR(ctx->vo, "Couldn't reset device: %s\n", mp_HRESULT_to_str(hr));
+        return;
+    }
+
+    if (d3d_size_dependent_create(ctx) < 0) {
+        p->lost_device = true;
+        MP_ERR(ctx->vo, "Couldn't recreate Direct3D objects after reset\n");
+        return;
+    }
+
+    MP_VERBOSE(ctx->vo, "Direct3D device reset\n");
+    p->width = ctx->vo->dwidth;
+    p->height = ctx->vo->dheight;
+    p->swapinterval = p->requested_swapinterval;
+    p->lost_device = false;
+}
+
+static int GLAPIENTRY dxgl_swap_interval(int interval)
+{
+    if (!current_ctx)
+        return 0;
+    struct priv *p = current_ctx->priv;
+
+    p->requested_swapinterval = interval;
+    dxgl_reset(current_ctx);
+    return 1;
+}
+
+static void dxgl_swap_buffers(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    GL *gl = &p->gl;
+    HRESULT hr;
+
+    pump_message_loop();
+
+    // If the device is still lost, try to reset it again
+    if (p->lost_device)
+        dxgl_reset(ctx);
+    if (p->lost_device)
+        return;
+
+    if (!gl->DXUnlockObjectsNV(p->device_h, 1, &p->rtarget_h)) {
+        MP_ERR(ctx->vo, "Couldn't unlock rendertarget for present: %s\n",
+               mp_LastError_to_str());
+        return;
+    }
+
+    // Blit the OpenGL rendertarget to the backbuffer
+    hr = IDirect3DDevice9Ex_StretchRect(p->device, p->rtarget, NULL,
+                                        p->backbuffer, NULL, D3DTEXF_NONE);
+    if (FAILED(hr)) {
+        MP_ERR(ctx->vo, "Couldn't stretchrect for present: %s\n",
+               mp_HRESULT_to_str(hr));
+        return;
+    }
+
+    hr = IDirect3DDevice9Ex_PresentEx(p->device, NULL, NULL, NULL, NULL, 0);
+    switch (hr) {
+    case D3DERR_DEVICELOST:
+    case D3DERR_DEVICEHUNG:
+        MP_VERBOSE(ctx->vo, "Direct3D device lost! Resetting.\n");
+        p->lost_device = true;
+        dxgl_reset(ctx);
+        return;
+    default:
+        if (FAILED(hr))
+            MP_ERR(ctx->vo, "Failed to present: %s\n", mp_HRESULT_to_str(hr));
+    }
+
+    if (!gl->DXLockObjectsNV(p->device_h, 1, &p->rtarget_h)) {
+        MP_ERR(ctx->vo, "Couldn't lock rendertarget after present: %s\n",
+               mp_LastError_to_str());
+    }
+}
+
+static bool dxgl_init(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv = talloc_zero(ctx, struct priv);
+    GL *gl = &p->gl;
+
+    p->requested_swapinterval = 1;
+
+    if (!vo_w32_init(ctx->vo))
+        goto fail;
+    if (os_ctx_create(ctx) < 0)
+        goto fail;
+
+    // Create the shared framebuffer
+    gl->GenFramebuffers(1, &p->main_fb);
+
+    current_ctx = ctx;
+    gl->SwapInterval = dxgl_swap_interval;
+
+    if (d3d_create(ctx) < 0)
+        goto fail;
+    if (d3d_size_dependent_create(ctx) < 0)
+        goto fail;
+
+    static const struct ra_swapchain_fns empty_swapchain_fns = {0};
+    struct ra_gl_ctx_params params = {
+        .swap_buffers = dxgl_swap_buffers,
+        .external_swapchain = &empty_swapchain_fns,
+    };
+
+    gl->flipped = true;
+    if (!ra_gl_ctx_init(ctx, gl, params))
+        goto fail;
+
+    ra_add_native_resource(ctx->ra, "IDirect3DDevice9Ex", p->device);
+    ra_add_native_resource(ctx->ra, "dxinterop_device_HANDLE", p->device_h);
+
+    DwmEnableMMCSS(TRUE);
+    return true;
+fail:
+    dxgl_uninit(ctx);
+    return false;
+}
+
+static void resize(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    dxgl_reset(ctx);
+    ra_gl_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight, p->main_fb);
+}
+
+static bool dxgl_reconfig(struct ra_ctx *ctx)
+{
+    vo_w32_config(ctx->vo);
+    resize(ctx);
+    return true;
+}
+
+static int dxgl_control(struct ra_ctx *ctx, int *events, int request,
+                             void *arg)
+{
+    int ret = vo_w32_control(ctx->vo, events, request, arg);
+    if (*events & VO_EVENT_RESIZE)
+        resize(ctx);
+    return ret;
+}
+
+const struct ra_ctx_fns ra_ctx_dxgl = {
+    .type         = "opengl",
+    .name         = "dxinterop",
+    .init         = dxgl_init,
+    .reconfig     = dxgl_reconfig,
+    .control      = dxgl_control,
+    .uninit       = dxgl_uninit,
+};
diff --git a/video/out/opengl/context_glx.c b/video/out/opengl/context_glx.c
new file mode 100644
index 0000000..4062224
--- /dev/null
+++ b/video/out/opengl/context_glx.c
@@ -0,0 +1,351 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <X11/Xlib.h>
+#include <GL/glx.h>
+
+// FreeBSD 10.0-CURRENT lacks the GLX_ARB_create_context extension completely
+#ifndef GLX_CONTEXT_MAJOR_VERSION_ARB
+#define GLX_CONTEXT_MAJOR_VERSION_ARB           0x2091
+#define GLX_CONTEXT_MINOR_VERSION_ARB           0x2092
+#define GLX_CONTEXT_FLAGS_ARB                   0x2094
+#define GLX_CONTEXT_PROFILE_MASK_ARB            0x9126
+#ifndef __APPLE__
+// These are respectively 0x00000001 and 0x00000002 on OSX
+#define GLX_CONTEXT_DEBUG_BIT_ARB               0x0001
+#define GLX_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB  0x0002
+#endif
+#define GLX_CONTEXT_CORE_PROFILE_BIT_ARB        0x00000001
+#define GLX_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB 0x00000002
+#endif
+// GLX_EXT_create_context_es2_profile
+#ifndef GLX_CONTEXT_ES2_PROFILE_BIT_EXT
+#define GLX_CONTEXT_ES2_PROFILE_BIT_EXT         0x00000004
+#endif
+
+#include "osdep/timer.h"
+#include "video/out/present_sync.h"
+#include "video/out/x11_common.h"
+#include "context.h"
+#include "utils.h"
+
+struct priv {
+    GL gl;
+    XVisualInfo *vinfo;
+    GLXContext context;
+    GLXFBConfig fbc;
+};
+
+static void glx_uninit(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    ra_gl_ctx_uninit(ctx);
+
+    if (p->vinfo)
+        XFree(p->vinfo);
+    if (p->context) {
+        Display *display = ctx->vo->x11->display;
+        glXMakeCurrent(display, None, NULL);
+        glXDestroyContext(display, p->context);
+    }
+
+    vo_x11_uninit(ctx->vo);
+}
+
+typedef GLXContext (*glXCreateContextAttribsARBProc)
+    (Display*, GLXFBConfig, GLXContext, Bool, const int*);
+
+static bool create_context_x11(struct ra_ctx *ctx, GL *gl, bool es)
+{
+    struct priv *p = ctx->priv;
+    struct vo *vo = ctx->vo;
+
+    glXCreateContextAttribsARBProc glXCreateContextAttribsARB =
+        (glXCreateContextAttribsARBProc)
+            glXGetProcAddressARB((const GLubyte *)"glXCreateContextAttribsARB");
+
+    const char *glxstr =
+        glXQueryExtensionsString(vo->x11->display, vo->x11->screen);
+    if (!glxstr) {
+        MP_ERR(ctx, "GLX did not advertise any extensions\n");
+        return false;
+    }
+
+    if (!gl_check_extension(glxstr, "GLX_ARB_create_context_profile") ||
+        !glXCreateContextAttribsARB) {
+        MP_ERR(ctx, "GLX does not support GLX_ARB_create_context_profile\n");
+        return false;
+    }
+
+    int ctx_flags = ctx->opts.debug ? GLX_CONTEXT_DEBUG_BIT_ARB : 0;
+    int profile_mask = GLX_CONTEXT_CORE_PROFILE_BIT_ARB;
+
+    if (es) {
+        profile_mask = GLX_CONTEXT_ES2_PROFILE_BIT_EXT;
+        if (!gl_check_extension(glxstr, "GLX_EXT_create_context_es2_profile"))
+            return false;
+    }
+
+    int context_attribs[] = {
+        GLX_CONTEXT_MAJOR_VERSION_ARB, 0,
+        GLX_CONTEXT_MINOR_VERSION_ARB, 0,
+        GLX_CONTEXT_PROFILE_MASK_ARB, profile_mask,
+        GLX_CONTEXT_FLAGS_ARB, ctx_flags,
+        None
+    };
+
+    GLXContext context;
+
+    if (!es) {
+        for (int n = 0; mpgl_min_required_gl_versions[n]; n++) {
+            int version = mpgl_min_required_gl_versions[n];
+            MP_VERBOSE(ctx, "Creating OpenGL %d.%d context...\n",
+                       MPGL_VER_P(version));
+
+            context_attribs[1] = MPGL_VER_GET_MAJOR(version);
+            context_attribs[3] = MPGL_VER_GET_MINOR(version);
+
+            vo_x11_silence_xlib(1);
+            context = glXCreateContextAttribsARB(vo->x11->display,
+                                                 p->fbc, 0, True,
+                                                 context_attribs);
+            vo_x11_silence_xlib(-1);
+
+            if (context)
+                break;
+        }
+    } else {
+        context_attribs[1] = 2;
+
+        vo_x11_silence_xlib(1);
+        context = glXCreateContextAttribsARB(vo->x11->display,
+                                             p->fbc, 0, True,
+                                             context_attribs);
+        vo_x11_silence_xlib(-1);
+    }
+
+    if (!context)
+        return false;
+
+    // set context
+    if (!glXMakeCurrent(vo->x11->display, vo->x11->window, context)) {
+        MP_FATAL(vo, "Could not set GLX context!\n");
+        glXDestroyContext(vo->x11->display, context);
+        return false;
+    }
+
+    p->context = context;
+
+    mpgl_load_functions(gl, (void *)glXGetProcAddressARB, glxstr, vo->log);
+    return true;
+}
+
+// The GL3/FBC initialization code roughly follows/copies from:
+//  http://www.opengl.org/wiki/Tutorial:_OpenGL_3.0_Context_Creation_(GLX)
+// but also uses some of the old code.
+
+static GLXFBConfig select_fb_config(struct vo *vo, const int *attribs, bool alpha)
+{
+    int fbcount;
+    GLXFBConfig *fbc = glXChooseFBConfig(vo->x11->display, vo->x11->screen,
+                                         attribs, &fbcount);
+    if (!fbc)
+        return NULL;
+
+    // The list in fbc is sorted (so that the first element is the best).
+    GLXFBConfig fbconfig = fbcount > 0 ? fbc[0] : NULL;
+
+    if (alpha) {
+        for (int n = 0; n < fbcount; n++) {
+            XVisualInfo *v = glXGetVisualFromFBConfig(vo->x11->display, fbc[n]);
+            if (v) {
+                bool is_rgba = vo_x11_is_rgba_visual(v);
+                XFree(v);
+                if (is_rgba) {
+                    fbconfig = fbc[n];
+                    break;
+                }
+            }
+        }
+    }
+
+    XFree(fbc);
+
+    return fbconfig;
+}
+
+static void set_glx_attrib(int *attribs, int name, int value)
+{
+    for (int n = 0; attribs[n * 2 + 0] != None; n++) {
+        if (attribs[n * 2 + 0] == name) {
+            attribs[n * 2 + 1] = value;
+            break;
+        }
+    }
+}
+
+static bool glx_check_visible(struct ra_ctx *ctx)
+{
+    return vo_x11_check_visible(ctx->vo);
+}
+
+static void glx_swap_buffers(struct ra_ctx *ctx)
+{
+    glXSwapBuffers(ctx->vo->x11->display, ctx->vo->x11->window);
+    if (ctx->vo->x11->use_present)
+        present_sync_swap(ctx->vo->x11->present);
+}
+
+static void glx_get_vsync(struct ra_ctx *ctx, struct vo_vsync_info *info)
+{
+    struct vo_x11_state *x11 = ctx->vo->x11;
+    if (ctx->vo->x11->use_present)
+        present_sync_get_info(x11->present, info);
+}
+
+static bool glx_init(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv = talloc_zero(ctx, struct priv);
+    struct vo *vo = ctx->vo;
+    GL *gl = &p->gl;
+
+    if (!vo_x11_init(ctx->vo))
+        goto uninit;
+
+    int glx_major, glx_minor;
+
+    if (!glXQueryVersion(vo->x11->display, &glx_major, &glx_minor)) {
+        MP_ERR(ctx, "GLX not found.\n");
+        goto uninit;
+    }
+    // FBConfigs were added in GLX version 1.3.
+    if (MPGL_VER(glx_major, glx_minor) <  MPGL_VER(1, 3)) {
+        MP_ERR(ctx, "GLX version older than 1.3.\n");
+        goto uninit;
+    }
+
+    int glx_attribs[] = {
+        GLX_X_RENDERABLE, True,
+        GLX_X_VISUAL_TYPE, GLX_TRUE_COLOR,
+        GLX_RED_SIZE, 1,
+        GLX_GREEN_SIZE, 1,
+        GLX_BLUE_SIZE, 1,
+        GLX_ALPHA_SIZE, 0,
+        GLX_DOUBLEBUFFER, True,
+        None
+    };
+    GLXFBConfig fbc = NULL;
+    if (ctx->opts.want_alpha) {
+        set_glx_attrib(glx_attribs, GLX_ALPHA_SIZE, 1);
+        fbc = select_fb_config(vo, glx_attribs, true);
+        if (!fbc)
+            set_glx_attrib(glx_attribs, GLX_ALPHA_SIZE, 0);
+    }
+    if (!fbc)
+        fbc = select_fb_config(vo, glx_attribs, false);
+    if (!fbc) {
+        MP_ERR(ctx, "no GLX support present\n");
+        goto uninit;
+    }
+
+    int fbid = -1;
+    if (!glXGetFBConfigAttrib(vo->x11->display, fbc, GLX_FBCONFIG_ID, &fbid))
+        MP_VERBOSE(ctx, "GLX chose FB config with ID 0x%x\n", fbid);
+
+    p->fbc = fbc;
+    p->vinfo = glXGetVisualFromFBConfig(vo->x11->display, fbc);
+    if (p->vinfo) {
+        MP_VERBOSE(ctx, "GLX chose visual with ID 0x%x\n",
+                   (int)p->vinfo->visualid);
+    } else {
+        MP_WARN(ctx, "Selected GLX FB config has no associated X visual\n");
+    }
+
+    if (!vo_x11_create_vo_window(vo, p->vinfo, "gl"))
+        goto uninit;
+
+    bool success = false;
+    enum gles_mode mode = ra_gl_ctx_get_glesmode(ctx);
+
+    if (mode == GLES_NO || mode == GLES_AUTO)
+        success = create_context_x11(ctx, gl, false);
+    if (!success && (mode == GLES_YES || mode == GLES_AUTO))
+        success = create_context_x11(ctx, gl, true);
+    if (success && !glXIsDirect(vo->x11->display, p->context))
+        gl->mpgl_caps |= MPGL_CAP_SW;
+    if (!success)
+        goto uninit;
+
+    struct ra_gl_ctx_params params = {
+        .check_visible = glx_check_visible,
+        .swap_buffers = glx_swap_buffers,
+        .get_vsync    = glx_get_vsync,
+    };
+
+    if (!ra_gl_ctx_init(ctx, gl, params))
+        goto uninit;
+
+    ra_add_native_resource(ctx->ra, "x11", vo->x11->display);
+
+    return true;
+
+uninit:
+    glx_uninit(ctx);
+    return false;
+}
+
+
+static void resize(struct ra_ctx *ctx)
+{
+    ra_gl_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight, 0);
+}
+
+static bool glx_reconfig(struct ra_ctx *ctx)
+{
+    vo_x11_config_vo_window(ctx->vo);
+    resize(ctx);
+    return true;
+}
+
+static int glx_control(struct ra_ctx *ctx, int *events, int request, void *arg)
+{
+    int ret = vo_x11_control(ctx->vo, events, request, arg);
+    if (*events & VO_EVENT_RESIZE)
+        resize(ctx);
+    return ret;
+}
+
+static void glx_wakeup(struct ra_ctx *ctx)
+{
+    vo_x11_wakeup(ctx->vo);
+}
+
+static void glx_wait_events(struct ra_ctx *ctx, int64_t until_time_ns)
+{
+    vo_x11_wait_events(ctx->vo, until_time_ns);
+}
+
+const struct ra_ctx_fns ra_ctx_glx = {
+    .type           = "opengl",
+    .name           = "x11",
+    .reconfig       = glx_reconfig,
+    .control        = glx_control,
+    .wakeup         = glx_wakeup,
+    .wait_events    = glx_wait_events,
+    .init           = glx_init,
+    .uninit         = glx_uninit,
+};
diff --git a/video/out/opengl/context_rpi.c b/video/out/opengl/context_rpi.c
new file mode 100644
index 0000000..0b6babb
--- /dev/null
+++ b/video/out/opengl/context_rpi.c
@@ -0,0 +1,327 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <stdatomic.h>
+#include <stddef.h>
+
+#include <bcm_host.h>
+
+#include <EGL/egl.h>
+#include <EGL/eglext.h>
+
+#include "common/common.h"
+#include "video/out/win_state.h"
+#include "context.h"
+#include "egl_helpers.h"
+
+struct priv {
+    struct GL gl;
+    DISPMANX_DISPLAY_HANDLE_T display;
+    DISPMANX_ELEMENT_HANDLE_T window;
+    DISPMANX_UPDATE_HANDLE_T update;
+    EGLDisplay egl_display;
+    EGLConfig egl_config;
+    EGLContext egl_context;
+    EGLSurface egl_surface;
+    // yep, the API keeps a pointer to it
+    EGL_DISPMANX_WINDOW_T egl_window;
+    int x, y, w, h;
+    double display_fps;
+    atomic_int reload_display;
+    int win_params[4];
+};
+
+static void tv_callback(void *callback_data, uint32_t reason, uint32_t param1,
+                        uint32_t param2)
+{
+    struct ra_ctx *ctx = callback_data;
+    struct priv *p = ctx->priv;
+    atomic_store(&p->reload_display, true);
+    vo_wakeup(ctx->vo);
+}
+
+static void destroy_dispmanx(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    if (p->egl_surface) {
+        eglMakeCurrent(p->egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE,
+                       EGL_NO_CONTEXT);
+        eglDestroySurface(p->egl_display, p->egl_surface);
+        p->egl_surface = EGL_NO_SURFACE;
+    }
+
+    if (p->window)
+        vc_dispmanx_element_remove(p->update, p->window);
+    p->window = 0;
+    if (p->display)
+        vc_dispmanx_display_close(p->display);
+    p->display = 0;
+    if (p->update)
+        vc_dispmanx_update_submit_sync(p->update);
+    p->update = 0;
+}
+
+static void rpi_uninit(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    ra_gl_ctx_uninit(ctx);
+
+    vc_tv_unregister_callback_full(tv_callback, ctx);
+
+    destroy_dispmanx(ctx);
+
+    if (p->egl_context)
+        eglDestroyContext(p->egl_display, p->egl_context);
+    p->egl_context = EGL_NO_CONTEXT;
+    eglReleaseThread();
+    p->egl_display = EGL_NO_DISPLAY;
+}
+
+static bool recreate_dispmanx(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    int display_nr = 0;
+    int layer = 0;
+
+    MP_VERBOSE(ctx, "Recreating DISPMANX state...\n");
+
+    destroy_dispmanx(ctx);
+
+    p->display = vc_dispmanx_display_open(display_nr);
+    p->update = vc_dispmanx_update_start(0);
+    if (!p->display || !p->update) {
+        MP_FATAL(ctx, "Could not get DISPMANX objects.\n");
+        goto fail;
+    }
+
+    uint32_t dispw, disph;
+    if (graphics_get_display_size(0, &dispw, &disph) < 0) {
+        MP_FATAL(ctx, "Could not get display size.\n");
+        goto fail;
+    }
+    p->w = dispw;
+    p->h = disph;
+
+    if (ctx->vo->opts->fullscreen) {
+        p->x = p->y = 0;
+    } else {
+        struct vo_win_geometry geo;
+        struct mp_rect screenrc = {0, 0, p->w, p->h};
+
+        vo_calc_window_geometry(ctx->vo, &screenrc, &geo);
+
+        mp_rect_intersection(&geo.win, &screenrc);
+
+        p->x = geo.win.x0;
+        p->y = geo.win.y0;
+        p->w = geo.win.x1 - geo.win.x0;
+        p->h = geo.win.y1 - geo.win.y0;
+    }
+
+    // dispmanx is like a neanderthal version of Wayland - you can add an
+    // overlay any place on the screen.
+    VC_RECT_T dst = {.x = p->x, .y = p->y, .width = p->w, .height = p->h};
+    VC_RECT_T src = {.width = p->w << 16, .height = p->h << 16};
+    VC_DISPMANX_ALPHA_T alpha = {
+        .flags = DISPMANX_FLAGS_ALPHA_FIXED_ALL_PIXELS,
+        .opacity = 0xFF,
+    };
+    p->window = vc_dispmanx_element_add(p->update, p->display, layer, &dst, 0,
+                                        &src, DISPMANX_PROTECTION_NONE, &alpha,
+                                        0, 0);
+    if (!p->window) {
+        MP_FATAL(ctx, "Could not add DISPMANX element.\n");
+        goto fail;
+    }
+
+    vc_dispmanx_update_submit_sync(p->update);
+    p->update = vc_dispmanx_update_start(0);
+
+    p->egl_window = (EGL_DISPMANX_WINDOW_T){
+        .element = p->window,
+        .width = p->w,
+        .height = p->h,
+    };
+    p->egl_surface = eglCreateWindowSurface(p->egl_display, p->egl_config,
+                                            &p->egl_window, NULL);
+
+    if (p->egl_surface == EGL_NO_SURFACE) {
+        MP_FATAL(ctx, "Could not create EGL surface!\n");
+        goto fail;
+    }
+
+    if (!eglMakeCurrent(p->egl_display, p->egl_surface, p->egl_surface,
+                        p->egl_context))
+    {
+        MP_FATAL(ctx, "Failed to set context!\n");
+        goto fail;
+    }
+
+    p->display_fps = 0;
+    TV_GET_STATE_RESP_T tvstate;
+    TV_DISPLAY_STATE_T tvstate_disp;
+    if (!vc_tv_get_state(&tvstate) && !vc_tv_get_display_state(&tvstate_disp)) {
+        if (tvstate_disp.state & (VC_HDMI_HDMI | VC_HDMI_DVI)) {
+            p->display_fps = tvstate_disp.display.hdmi.frame_rate;
+
+            HDMI_PROPERTY_PARAM_T param = {
+                .property = HDMI_PROPERTY_PIXEL_CLOCK_TYPE,
+            };
+            if (!vc_tv_hdmi_get_property(&param) &&
+                param.param1 == HDMI_PIXEL_CLOCK_TYPE_NTSC)
+                p->display_fps = p->display_fps / 1.001;
+        } else {
+            p->display_fps = tvstate_disp.display.sdtv.frame_rate;
+        }
+    }
+
+    p->win_params[0] = display_nr;
+    p->win_params[1] = layer;
+    p->win_params[2] = p->x;
+    p->win_params[3] = p->y;
+
+    ctx->vo->dwidth = p->w;
+    ctx->vo->dheight = p->h;
+    if (ctx->swapchain)
+        ra_gl_ctx_resize(ctx->swapchain, p->w, p->h, 0);
+
+    ctx->vo->want_redraw = true;
+
+    vo_event(ctx->vo, VO_EVENT_WIN_STATE);
+    return true;
+
+fail:
+    destroy_dispmanx(ctx);
+    return false;
+}
+
+static void rpi_swap_buffers(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    eglSwapBuffers(p->egl_display, p->egl_surface);
+}
+
+static bool rpi_init(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv = talloc_zero(ctx, struct priv);
+
+    bcm_host_init();
+
+    vc_tv_register_callback(tv_callback, ctx);
+
+    p->egl_display = eglGetDisplay(EGL_DEFAULT_DISPLAY);
+    if (!eglInitialize(p->egl_display, NULL, NULL)) {
+        MP_FATAL(ctx, "EGL failed to initialize.\n");
+        goto fail;
+    }
+
+    if (!mpegl_create_context(ctx, p->egl_display, &p->egl_context, &p->egl_config))
+        goto fail;
+
+    if (!recreate_dispmanx(ctx))
+        goto fail;
+
+    mpegl_load_functions(&p->gl, ctx->log);
+
+    struct ra_gl_ctx_params params = {
+        .swap_buffers = rpi_swap_buffers,
+    };
+
+    if (!ra_gl_ctx_init(ctx, &p->gl, params))
+        goto fail;
+
+    ra_add_native_resource(ctx->ra, "MPV_RPI_WINDOW", p->win_params);
+
+    ra_gl_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight, 0);
+    return true;
+
+fail:
+    rpi_uninit(ctx);
+    return false;
+}
+
+static bool rpi_reconfig(struct ra_ctx *ctx)
+{
+    return recreate_dispmanx(ctx);
+}
+
+static struct mp_image *take_screenshot(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    if (!p->display)
+        return NULL;
+
+    struct mp_image *img = mp_image_alloc(IMGFMT_BGR0, p->w, p->h);
+    if (!img)
+        return NULL;
+
+    DISPMANX_RESOURCE_HANDLE_T resource =
+        vc_dispmanx_resource_create(VC_IMAGE_ARGB8888,
+                                    img->w | ((img->w * 4) << 16), img->h,
+                                    &(int32_t){0});
+    if (!resource)
+        goto fail;
+
+    if (vc_dispmanx_snapshot(p->display, resource, 0))
+        goto fail;
+
+    VC_RECT_T rc = {.width = img->w, .height = img->h};
+    if (vc_dispmanx_resource_read_data(resource, &rc, img->planes[0], img->stride[0]))
+        goto fail;
+
+    vc_dispmanx_resource_delete(resource);
+    return img;
+
+fail:
+    vc_dispmanx_resource_delete(resource);
+    talloc_free(img);
+    return NULL;
+}
+
+static int rpi_control(struct ra_ctx *ctx, int *events, int request, void *arg)
+{
+    struct priv *p = ctx->priv;
+
+    switch (request) {
+    case VOCTRL_SCREENSHOT_WIN:
+        *(struct mp_image **)arg = take_screenshot(ctx);
+        return VO_TRUE;
+    case VOCTRL_CHECK_EVENTS:
+        if (atomic_fetch_and(&p->reload_display, 0)) {
+            MP_WARN(ctx, "Recovering from display mode switch...\n");
+            recreate_dispmanx(ctx);
+        }
+        return VO_TRUE;
+    case VOCTRL_GET_DISPLAY_FPS:
+        *(double *)arg = p->display_fps;
+        return VO_TRUE;
+    }
+
+    return VO_NOTIMPL;
+}
+
+const struct ra_ctx_fns ra_ctx_rpi = {
+    .type           = "opengl",
+    .name           = "rpi",
+    .reconfig       = rpi_reconfig,
+    .control        = rpi_control,
+    .init           = rpi_init,
+    .uninit         = rpi_uninit,
+};
diff --git a/video/out/opengl/context_wayland.c b/video/out/opengl/context_wayland.c
new file mode 100644
index 0000000..26c5268
--- /dev/null
+++ b/video/out/opengl/context_wayland.c
@@ -0,0 +1,230 @@
+/*
+ * This file is part of mpv video player.
+ * Copyright © 2013 Alexander Preisinger <alexander.preisinger@gmail.com>
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <wayland-egl.h>
+#include <EGL/egl.h>
+#include <EGL/eglext.h>
+
+#include "video/out/present_sync.h"
+#include "video/out/wayland_common.h"
+#include "context.h"
+#include "egl_helpers.h"
+#include "utils.h"
+
+#define EGL_PLATFORM_WAYLAND_EXT 0x31D8
+
+struct priv {
+    GL gl;
+    EGLDisplay egl_display;
+    EGLContext egl_context;
+    EGLSurface egl_surface;
+    EGLConfig  egl_config;
+    struct wl_egl_window *egl_window;
+};
+
+static void resize(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    struct vo_wayland_state *wl = ctx->vo->wl;
+
+    MP_VERBOSE(wl, "Handling resize on the egl side\n");
+
+    const int32_t width = mp_rect_w(wl->geometry);
+    const int32_t height = mp_rect_h(wl->geometry);
+
+    vo_wayland_set_opaque_region(wl, ctx->opts.want_alpha);
+    if (p->egl_window)
+        wl_egl_window_resize(p->egl_window, width, height, 0, 0);
+
+    wl->vo->dwidth  = width;
+    wl->vo->dheight = height;
+
+    vo_wayland_handle_fractional_scale(wl);
+}
+
+static bool wayland_egl_check_visible(struct ra_ctx *ctx)
+{
+    return vo_wayland_check_visible(ctx->vo);
+}
+
+static void wayland_egl_swap_buffers(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    struct vo_wayland_state *wl = ctx->vo->wl;
+
+    eglSwapBuffers(p->egl_display, p->egl_surface);
+
+    if (!wl->opts->disable_vsync)
+        vo_wayland_wait_frame(wl);
+
+    if (wl->use_present)
+        present_sync_swap(wl->present);
+}
+
+static void wayland_egl_get_vsync(struct ra_ctx *ctx, struct vo_vsync_info *info)
+{
+    struct vo_wayland_state *wl = ctx->vo->wl;
+    if (wl->use_present)
+        present_sync_get_info(wl->present, info);
+}
+
+static bool egl_create_context(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv = talloc_zero(ctx, struct priv);
+    struct vo_wayland_state *wl = ctx->vo->wl;
+
+    if (!(p->egl_display = mpegl_get_display(EGL_PLATFORM_WAYLAND_EXT,
+                                             "EGL_EXT_platform_wayland",
+                                             wl->display)))
+        return false;
+
+    if (eglInitialize(p->egl_display, NULL, NULL) != EGL_TRUE)
+        return false;
+
+    if (!mpegl_create_context(ctx, p->egl_display, &p->egl_context,
+                              &p->egl_config))
+        return false;
+
+    eglMakeCurrent(p->egl_display, NULL, NULL, p->egl_context);
+
+    mpegl_load_functions(&p->gl, wl->log);
+
+    struct ra_gl_ctx_params params = {
+        .check_visible      = wayland_egl_check_visible,
+        .swap_buffers       = wayland_egl_swap_buffers,
+        .get_vsync          = wayland_egl_get_vsync,
+    };
+
+    if (!ra_gl_ctx_init(ctx, &p->gl, params))
+        return false;
+
+    ra_add_native_resource(ctx->ra, "wl", wl->display);
+
+    return true;
+}
+
+static void egl_create_window(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    struct vo_wayland_state *wl = ctx->vo->wl;
+
+    p->egl_window = wl_egl_window_create(wl->surface,
+                                         mp_rect_w(wl->geometry),
+                                         mp_rect_h(wl->geometry));
+
+    p->egl_surface = mpegl_create_window_surface(
+        p->egl_display, p->egl_config, p->egl_window);
+    if (p->egl_surface == EGL_NO_SURFACE) {
+        p->egl_surface = eglCreateWindowSurface(
+            p->egl_display, p->egl_config, p->egl_window, NULL);
+    }
+
+    eglMakeCurrent(p->egl_display, p->egl_surface, p->egl_surface, p->egl_context);
+    // eglMakeCurrent may not configure the draw or read buffers if the context
+    // has been made current previously. On nvidia GL_NONE is bound because EGL_NO_SURFACE
+    // is used initially and we must bind the read and draw buffers here.
+    if(!p->gl.es) {
+        p->gl.ReadBuffer(GL_BACK);
+        p->gl.DrawBuffer(GL_BACK);
+    }
+
+    eglSwapInterval(p->egl_display, 0);
+}
+
+static bool wayland_egl_reconfig(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    if (!vo_wayland_reconfig(ctx->vo))
+        return false;
+
+    if (!p->egl_window)
+        egl_create_window(ctx);
+
+    return true;
+}
+
+static void wayland_egl_uninit(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    ra_gl_ctx_uninit(ctx);
+
+    if (p->egl_context) {
+        eglReleaseThread();
+        if (p->egl_window)
+            wl_egl_window_destroy(p->egl_window);
+        eglDestroySurface(p->egl_display, p->egl_surface);
+        eglMakeCurrent(p->egl_display, NULL, NULL, EGL_NO_CONTEXT);
+        eglDestroyContext(p->egl_display, p->egl_context);
+        p->egl_context = NULL;
+    }
+    eglTerminate(p->egl_display);
+
+    vo_wayland_uninit(ctx->vo);
+}
+
+static int wayland_egl_control(struct ra_ctx *ctx, int *events, int request,
+                             void *data)
+{
+    struct vo_wayland_state *wl = ctx->vo->wl;
+    int r = vo_wayland_control(ctx->vo, events, request, data);
+
+    if (*events & VO_EVENT_RESIZE) {
+        resize(ctx);
+        ra_gl_ctx_resize(ctx->swapchain, wl->vo->dwidth, wl->vo->dheight, 0);
+    }
+
+    return r;
+}
+
+static void wayland_egl_wakeup(struct ra_ctx *ctx)
+{
+    vo_wayland_wakeup(ctx->vo);
+}
+
+static void wayland_egl_wait_events(struct ra_ctx *ctx, int64_t until_time_ns)
+{
+    vo_wayland_wait_events(ctx->vo, until_time_ns);
+}
+
+static void wayland_egl_update_render_opts(struct ra_ctx *ctx)
+{
+    struct vo_wayland_state *wl = ctx->vo->wl;
+    vo_wayland_set_opaque_region(wl, ctx->opts.want_alpha);
+    wl_surface_commit(wl->surface);
+}
+
+static bool wayland_egl_init(struct ra_ctx *ctx)
+{
+    if (!vo_wayland_init(ctx->vo))
+        return false;
+    return egl_create_context(ctx);
+}
+
+const struct ra_ctx_fns ra_ctx_wayland_egl = {
+    .type               = "opengl",
+    .name               = "wayland",
+    .reconfig           = wayland_egl_reconfig,
+    .control            = wayland_egl_control,
+    .wakeup             = wayland_egl_wakeup,
+    .wait_events        = wayland_egl_wait_events,
+    .update_render_opts = wayland_egl_update_render_opts,
+    .init               = wayland_egl_init,
+    .uninit             = wayland_egl_uninit,
+};
diff --git a/video/out/opengl/context_win.c b/video/out/opengl/context_win.c
new file mode 100644
index 0000000..968b176
--- /dev/null
+++ b/video/out/opengl/context_win.c
@@ -0,0 +1,378 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <windows.h>
+#include <dwmapi.h>
+
+#include "options/m_config.h"
+#include "video/out/w32_common.h"
+#include "context.h"
+#include "utils.h"
+
+#if !defined(WGL_CONTEXT_MAJOR_VERSION_ARB)
+/* these are supposed to be defined in wingdi.h but mingw's is too old */
+/* only the bits actually used by mplayer are defined */
+/* reference: http://www.opengl.org/registry/specs/ARB/wgl_create_context.txt */
+
+#define WGL_CONTEXT_MAJOR_VERSION_ARB          0x2091
+#define WGL_CONTEXT_MINOR_VERSION_ARB          0x2092
+#define WGL_CONTEXT_FLAGS_ARB                  0x2094
+#define WGL_CONTEXT_PROFILE_MASK_ARB           0x9126
+#define WGL_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB 0x0002
+#define WGL_CONTEXT_CORE_PROFILE_BIT_ARB   0x00000001
+#endif
+
+struct wingl_opts {
+    int wingl_dwm_flush;
+};
+
+#define OPT_BASE_STRUCT struct wingl_opts
+const struct m_sub_options wingl_conf = {
+    .opts = (const struct m_option[]) {
+        {"opengl-dwmflush", OPT_CHOICE(wingl_dwm_flush,
+            {"no", -1}, {"auto", 0}, {"windowed", 1}, {"yes", 2})},
+        {0}
+    },
+    .size = sizeof(struct wingl_opts),
+};
+
+struct priv {
+    GL gl;
+
+    int opt_swapinterval;
+    int current_swapinterval;
+
+    int (GLAPIENTRY *real_wglSwapInterval)(int);
+    struct m_config_cache *opts_cache;
+    struct wingl_opts *opts;
+
+    HGLRC context;
+    HDC hdc;
+};
+
+static void wgl_uninit(struct ra_ctx *ctx);
+
+static __thread struct priv *current_wgl_context;
+
+static int GLAPIENTRY wgl_swap_interval(int interval)
+{
+    if (current_wgl_context)
+        current_wgl_context->opt_swapinterval = interval;
+    return 0;
+}
+
+static bool create_dc(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    HWND win = vo_w32_hwnd(ctx->vo);
+
+    if (p->hdc)
+        return true;
+
+    HDC hdc = GetDC(win);
+    if (!hdc)
+        return false;
+
+    PIXELFORMATDESCRIPTOR pfd;
+    memset(&pfd, 0, sizeof pfd);
+    pfd.nSize = sizeof pfd;
+    pfd.nVersion = 1;
+    pfd.dwFlags = PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER;
+
+    pfd.iPixelType = PFD_TYPE_RGBA;
+    pfd.cColorBits = 24;
+    pfd.iLayerType = PFD_MAIN_PLANE;
+    int pf = ChoosePixelFormat(hdc, &pfd);
+
+    if (!pf) {
+        MP_ERR(ctx->vo, "unable to select a valid pixel format!\n");
+        ReleaseDC(win, hdc);
+        return false;
+    }
+
+    SetPixelFormat(hdc, pf, &pfd);
+
+    p->hdc = hdc;
+    return true;
+}
+
+static void *wglgpa(const GLubyte *procName)
+{
+    HMODULE oglmod;
+    void *res = wglGetProcAddress(procName);
+    if (res)
+        return res;
+    oglmod = GetModuleHandle(L"opengl32.dll");
+    return GetProcAddress(oglmod, procName);
+}
+
+static bool create_context_wgl_old(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    HDC windc = p->hdc;
+    bool res = false;
+
+    HGLRC context = wglCreateContext(windc);
+    if (!context) {
+        MP_FATAL(ctx->vo, "Could not create GL context!\n");
+        return res;
+    }
+
+    if (!wglMakeCurrent(windc, context)) {
+        MP_FATAL(ctx->vo, "Could not set GL context!\n");
+        wglDeleteContext(context);
+        return res;
+    }
+
+    p->context = context;
+    return true;
+}
+
+static bool create_context_wgl_gl3(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    HDC windc = p->hdc;
+    HGLRC context = 0;
+
+    // A legacy context is needed to get access to the new functions.
+    HGLRC legacy_context = wglCreateContext(windc);
+    if (!legacy_context) {
+        MP_FATAL(ctx->vo, "Could not create GL context!\n");
+        return false;
+    }
+
+    // set context
+    if (!wglMakeCurrent(windc, legacy_context)) {
+        MP_FATAL(ctx->vo, "Could not set GL context!\n");
+        goto out;
+    }
+
+    const char *(GLAPIENTRY *wglGetExtensionsStringARB)(HDC hdc)
+        = wglgpa((const GLubyte*)"wglGetExtensionsStringARB");
+
+    if (!wglGetExtensionsStringARB)
+        goto unsupported;
+
+    const char *wgl_exts = wglGetExtensionsStringARB(windc);
+    if (!gl_check_extension(wgl_exts, "WGL_ARB_create_context"))
+        goto unsupported;
+
+    HGLRC (GLAPIENTRY *wglCreateContextAttribsARB)(HDC hDC, HGLRC hShareContext,
+                                                   const int *attribList)
+        = wglgpa((const GLubyte*)"wglCreateContextAttribsARB");
+
+    if (!wglCreateContextAttribsARB)
+        goto unsupported;
+
+    int attribs[] = {
+        WGL_CONTEXT_MAJOR_VERSION_ARB, 3,
+        WGL_CONTEXT_MINOR_VERSION_ARB, 0,
+        WGL_CONTEXT_FLAGS_ARB, 0,
+        WGL_CONTEXT_PROFILE_MASK_ARB, WGL_CONTEXT_CORE_PROFILE_BIT_ARB,
+        0
+    };
+
+    context = wglCreateContextAttribsARB(windc, 0, attribs);
+    if (!context) {
+        // NVidia, instead of ignoring WGL_CONTEXT_FLAGS_ARB, will error out if
+        // it's present on pre-3.2 contexts.
+        // Remove it from attribs and retry the context creation.
+        attribs[6] = attribs[7] = 0;
+        context = wglCreateContextAttribsARB(windc, 0, attribs);
+    }
+    if (!context) {
+        int err = GetLastError();
+        MP_FATAL(ctx->vo, "Could not create an OpenGL 3.x context: error 0x%x\n", err);
+        goto out;
+    }
+
+    wglMakeCurrent(windc, NULL);
+    wglDeleteContext(legacy_context);
+
+    if (!wglMakeCurrent(windc, context)) {
+        MP_FATAL(ctx->vo, "Could not set GL3 context!\n");
+        wglDeleteContext(context);
+        return false;
+    }
+
+    p->context = context;
+    return true;
+
+unsupported:
+    MP_ERR(ctx->vo, "The OpenGL driver does not support OpenGL 3.x \n");
+out:
+    wglMakeCurrent(windc, NULL);
+    wglDeleteContext(legacy_context);
+    return false;
+}
+
+static void create_ctx(void *ptr)
+{
+    struct ra_ctx *ctx = ptr;
+    struct priv *p = ctx->priv;
+
+    if (!create_dc(ctx))
+        return;
+
+    create_context_wgl_gl3(ctx);
+    if (!p->context)
+        create_context_wgl_old(ctx);
+
+    wglMakeCurrent(p->hdc, NULL);
+}
+
+static bool compositor_active(struct ra_ctx *ctx)
+{
+    // For Windows 7.
+    BOOL enabled = 0;
+    if (FAILED(DwmIsCompositionEnabled(&enabled)) || !enabled)
+        return false;
+
+    // This works at least on Windows 8.1: it returns an error in fullscreen,
+    // which is also when we get consistent timings without DwmFlush. Might
+    // be cargo-cult.
+    DWM_TIMING_INFO info = { .cbSize = sizeof(DWM_TIMING_INFO) };
+    if (FAILED(DwmGetCompositionTimingInfo(0, &info)))
+        return false;
+
+    return true;
+}
+
+static void wgl_swap_buffers(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    SwapBuffers(p->hdc);
+
+    // default if we don't DwmFLush
+    int new_swapinterval = p->opt_swapinterval;
+
+    if (p->opts->wingl_dwm_flush >= 0) {
+        if ((p->opts->wingl_dwm_flush == 1 && !ctx->vo->opts->fullscreen) ||
+            (p->opts->wingl_dwm_flush == 2) ||
+            (p->opts->wingl_dwm_flush == 0 && compositor_active(ctx)))
+        {
+            if (DwmFlush() == S_OK)
+                new_swapinterval = 0;
+        }
+    }
+
+    if (new_swapinterval != p->current_swapinterval &&
+        p->real_wglSwapInterval)
+    {
+        p->real_wglSwapInterval(new_swapinterval);
+        MP_VERBOSE(ctx->vo, "set SwapInterval(%d)\n", new_swapinterval);
+    }
+    p->current_swapinterval = new_swapinterval;
+}
+
+static bool wgl_init(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv = talloc_zero(ctx, struct priv);
+    GL *gl = &p->gl;
+
+    p->opts_cache = m_config_cache_alloc(ctx, ctx->global, &wingl_conf);
+    p->opts = p->opts_cache->opts;
+
+    if (!vo_w32_init(ctx->vo))
+        goto fail;
+
+    vo_w32_run_on_thread(ctx->vo, create_ctx, ctx);
+    if (!p->context)
+        goto fail;
+
+    current_wgl_context = p;
+    wglMakeCurrent(p->hdc, p->context);
+
+    mpgl_load_functions(gl, wglgpa, NULL, ctx->vo->log);
+
+    if (!gl->SwapInterval)
+        MP_VERBOSE(ctx->vo, "WGL_EXT_swap_control missing.\n");
+    p->real_wglSwapInterval = gl->SwapInterval;
+    gl->SwapInterval = wgl_swap_interval;
+    p->current_swapinterval = -1;
+
+    struct ra_gl_ctx_params params = {
+        .swap_buffers = wgl_swap_buffers,
+    };
+
+    if (!ra_gl_ctx_init(ctx, gl, params))
+        goto fail;
+
+    DwmEnableMMCSS(TRUE);
+    return true;
+
+fail:
+    wgl_uninit(ctx);
+    return false;
+}
+
+static void resize(struct ra_ctx *ctx)
+{
+    ra_gl_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight, 0);
+}
+
+static bool wgl_reconfig(struct ra_ctx *ctx)
+{
+    vo_w32_config(ctx->vo);
+    resize(ctx);
+    return true;
+}
+
+static void destroy_gl(void *ptr)
+{
+    struct ra_ctx *ctx = ptr;
+    struct priv *p = ctx->priv;
+    if (p->context)
+        wglDeleteContext(p->context);
+    p->context = 0;
+    if (p->hdc)
+        ReleaseDC(vo_w32_hwnd(ctx->vo), p->hdc);
+    p->hdc = NULL;
+    current_wgl_context = NULL;
+}
+
+static void wgl_uninit(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    ra_gl_ctx_uninit(ctx);
+    if (p->context)
+        wglMakeCurrent(p->hdc, 0);
+    vo_w32_run_on_thread(ctx->vo, destroy_gl, ctx);
+
+    DwmEnableMMCSS(FALSE);
+    vo_w32_uninit(ctx->vo);
+}
+
+static int wgl_control(struct ra_ctx *ctx, int *events, int request, void *arg)
+{
+    int ret = vo_w32_control(ctx->vo, events, request, arg);
+    if (*events & VO_EVENT_RESIZE)
+        resize(ctx);
+    return ret;
+}
+
+const struct ra_ctx_fns ra_ctx_wgl = {
+    .type           = "opengl",
+    .name           = "win",
+    .init           = wgl_init,
+    .reconfig       = wgl_reconfig,
+    .control        = wgl_control,
+    .uninit         = wgl_uninit,
+};
diff --git a/video/out/opengl/context_x11egl.c b/video/out/opengl/context_x11egl.c
new file mode 100644
index 0000000..3201f29
--- /dev/null
+++ b/video/out/opengl/context_x11egl.c
@@ -0,0 +1,225 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+
+#include <X11/Xlib.h>
+#include <X11/extensions/Xpresent.h>
+#include <EGL/egl.h>
+#include <EGL/eglext.h>
+
+#include "common/common.h"
+#include "video/out/present_sync.h"
+#include "video/out/x11_common.h"
+#include "context.h"
+#include "egl_helpers.h"
+#include "utils.h"
+
+#define EGL_PLATFORM_X11_EXT 0x31D5
+
+struct priv {
+    GL gl;
+    EGLDisplay egl_display;
+    EGLContext egl_context;
+    EGLSurface egl_surface;
+};
+
+static void mpegl_uninit(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    ra_gl_ctx_uninit(ctx);
+
+    eglMakeCurrent(p->egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE,
+                   EGL_NO_CONTEXT);
+    eglTerminate(p->egl_display);
+    vo_x11_uninit(ctx->vo);
+}
+
+static int pick_xrgba_config(void *user_data, EGLConfig *configs, int num_configs)
+{
+    struct ra_ctx *ctx = user_data;
+    struct priv *p = ctx->priv;
+    struct vo *vo = ctx->vo;
+
+    for (int n = 0; n < num_configs; n++) {
+        int vID = 0, num;
+        eglGetConfigAttrib(p->egl_display, configs[n], EGL_NATIVE_VISUAL_ID, &vID);
+        XVisualInfo template = {.visualid = vID};
+        XVisualInfo *vi = XGetVisualInfo(vo->x11->display, VisualIDMask,
+                                         &template, &num);
+        if (vi) {
+            bool is_rgba = vo_x11_is_rgba_visual(vi);
+            XFree(vi);
+            if (is_rgba)
+                return n;
+        }
+    }
+
+    return 0;
+}
+
+static bool mpegl_check_visible(struct ra_ctx *ctx)
+{
+    return vo_x11_check_visible(ctx->vo);
+}
+
+static void mpegl_swap_buffers(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    eglSwapBuffers(p->egl_display, p->egl_surface);
+    if (ctx->vo->x11->use_present)
+        present_sync_swap(ctx->vo->x11->present);
+}
+
+static void mpegl_get_vsync(struct ra_ctx *ctx, struct vo_vsync_info *info)
+{
+    struct vo_x11_state *x11 = ctx->vo->x11;
+    if (ctx->vo->x11->use_present)
+        present_sync_get_info(x11->present, info);
+}
+
+static bool mpegl_init(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv = talloc_zero(ctx, struct priv);
+    struct vo *vo = ctx->vo;
+    int msgl = ctx->opts.probing ? MSGL_V : MSGL_FATAL;
+
+    if (!vo_x11_init(vo))
+        goto uninit;
+
+    p->egl_display = mpegl_get_display(EGL_PLATFORM_X11_EXT,
+                                       "EGL_EXT_platform_x11",
+                                        vo->x11->display);
+    if (!eglInitialize(p->egl_display, NULL, NULL)) {
+        MP_MSG(ctx, msgl, "Could not initialize EGL.\n");
+        goto uninit;
+    }
+
+    struct mpegl_cb cb = {
+        .user_data = ctx,
+        .refine_config = ctx->opts.want_alpha ? pick_xrgba_config : NULL,
+    };
+
+    EGLConfig config;
+    if (!mpegl_create_context_cb(ctx, p->egl_display, cb, &p->egl_context, &config))
+        goto uninit;
+
+    int cid, vID, n;
+    if (!eglGetConfigAttrib(p->egl_display, config, EGL_CONFIG_ID, &cid)) {
+        MP_FATAL(ctx, "Getting EGL_CONFIG_ID failed!\n");
+        goto uninit;
+    }
+    if (!eglGetConfigAttrib(p->egl_display, config, EGL_NATIVE_VISUAL_ID, &vID)) {
+        MP_FATAL(ctx, "Getting X visual ID failed!\n");
+        goto uninit;
+    }
+    MP_VERBOSE(ctx, "Choosing visual EGL config 0x%x, visual ID 0x%x\n", cid, vID);
+    XVisualInfo template = {.visualid = vID};
+    XVisualInfo *vi = XGetVisualInfo(vo->x11->display, VisualIDMask, &template, &n);
+
+    if (!vi) {
+        MP_FATAL(ctx, "Getting X visual failed!\n");
+        goto uninit;
+    }
+
+    if (!vo_x11_create_vo_window(vo, vi, "gl")) {
+        XFree(vi);
+        goto uninit;
+    }
+
+    XFree(vi);
+
+    p->egl_surface = mpegl_create_window_surface(
+        p->egl_display, config, &vo->x11->window);
+    if (p->egl_surface == EGL_NO_SURFACE) {
+        p->egl_surface = eglCreateWindowSurface(
+            p->egl_display, config, (EGLNativeWindowType)vo->x11->window, NULL);
+    }
+    if (p->egl_surface == EGL_NO_SURFACE) {
+        MP_FATAL(ctx, "Could not create EGL surface!\n");
+        goto uninit;
+    }
+
+    if (!eglMakeCurrent(p->egl_display, p->egl_surface, p->egl_surface,
+                        p->egl_context))
+    {
+        MP_FATAL(ctx, "Could not make context current!\n");
+        goto uninit;
+    }
+
+    mpegl_load_functions(&p->gl, ctx->log);
+
+    struct ra_gl_ctx_params params = {
+        .check_visible = mpegl_check_visible,
+        .swap_buffers = mpegl_swap_buffers,
+        .get_vsync    = mpegl_get_vsync,
+    };
+
+    if (!ra_gl_ctx_init(ctx, &p->gl, params))
+        goto uninit;
+
+    ra_add_native_resource(ctx->ra, "x11", vo->x11->display);
+
+    return true;
+
+uninit:
+    mpegl_uninit(ctx);
+    return false;
+}
+
+static void resize(struct ra_ctx *ctx)
+{
+    ra_gl_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight, 0);
+}
+
+static bool mpegl_reconfig(struct ra_ctx *ctx)
+{
+    vo_x11_config_vo_window(ctx->vo);
+    resize(ctx);
+    return true;
+}
+
+static int mpegl_control(struct ra_ctx *ctx, int *events, int request,
+                         void *arg)
+{
+    int ret = vo_x11_control(ctx->vo, events, request, arg);
+    if (*events & VO_EVENT_RESIZE)
+        resize(ctx);
+    return ret;
+}
+
+static void mpegl_wakeup(struct ra_ctx *ctx)
+{
+    vo_x11_wakeup(ctx->vo);
+}
+
+static void mpegl_wait_events(struct ra_ctx *ctx, int64_t until_time_ns)
+{
+    vo_x11_wait_events(ctx->vo, until_time_ns);
+}
+
+const struct ra_ctx_fns ra_ctx_x11_egl = {
+    .type           = "opengl",
+    .name           = "x11egl",
+    .reconfig       = mpegl_reconfig,
+    .control        = mpegl_control,
+    .wakeup         = mpegl_wakeup,
+    .wait_events    = mpegl_wait_events,
+    .init           = mpegl_init,
+    .uninit         = mpegl_uninit,
+};
diff --git a/video/out/opengl/egl_helpers.c b/video/out/opengl/egl_helpers.c
new file mode 100644
index 0000000..3bf6239
--- /dev/null
+++ b/video/out/opengl/egl_helpers.c
@@ -0,0 +1,381 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+
+#if HAVE_LIBDL
+#include <dlfcn.h>
+#endif
+
+#include "common/common.h"
+
+#include "egl_helpers.h"
+#include "common.h"
+#include "utils.h"
+#include "context.h"
+
+#if HAVE_EGL_ANGLE
+// On Windows, egl_helpers.c is only used by ANGLE, where the EGL functions may
+// be loaded dynamically from ANGLE DLLs
+#include "angle_dynamic.h"
+#endif
+
+// EGL 1.5
+#ifndef EGL_CONTEXT_OPENGL_PROFILE_MASK
+#define EGL_CONTEXT_MAJOR_VERSION               0x3098
+#define EGL_CONTEXT_MINOR_VERSION               0x30FB
+#define EGL_CONTEXT_OPENGL_PROFILE_MASK         0x30FD
+#define EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT     0x00000001
+#define EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE   0x31B1
+typedef intptr_t EGLAttrib;
+#endif
+
+// Not every EGL provider (like RPI) has these.
+#ifndef EGL_CONTEXT_FLAGS_KHR
+#define EGL_CONTEXT_FLAGS_KHR EGL_NONE
+#endif
+
+#ifndef EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR
+#define EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR 0
+#endif
+
+struct mp_egl_config_attr {
+    int attrib;
+    const char *name;
+};
+
+#define MP_EGL_ATTRIB(id) {id, # id}
+
+static const struct mp_egl_config_attr mp_egl_attribs[] = {
+    MP_EGL_ATTRIB(EGL_CONFIG_ID),
+    MP_EGL_ATTRIB(EGL_RED_SIZE),
+    MP_EGL_ATTRIB(EGL_GREEN_SIZE),
+    MP_EGL_ATTRIB(EGL_BLUE_SIZE),
+    MP_EGL_ATTRIB(EGL_ALPHA_SIZE),
+    MP_EGL_ATTRIB(EGL_COLOR_BUFFER_TYPE),
+    MP_EGL_ATTRIB(EGL_CONFIG_CAVEAT),
+    MP_EGL_ATTRIB(EGL_CONFORMANT),
+    MP_EGL_ATTRIB(EGL_NATIVE_VISUAL_ID),
+};
+
+static void dump_egl_config(struct mp_log *log, int msgl, EGLDisplay display,
+                            EGLConfig config)
+{
+    for (int n = 0; n < MP_ARRAY_SIZE(mp_egl_attribs); n++) {
+        const char *name = mp_egl_attribs[n].name;
+        EGLint v = -1;
+        if (eglGetConfigAttrib(display, config, mp_egl_attribs[n].attrib, &v)) {
+            mp_msg(log, msgl, "  %s=0x%x\n", name, v);
+        } else {
+            mp_msg(log, msgl, "  %s=<error>\n", name);
+        }
+    }
+}
+
+static void *mpegl_get_proc_address(void *ctx, const char *name)
+{
+    void *p = eglGetProcAddress(name);
+#if defined(__GLIBC__) && HAVE_LIBDL
+    // Some crappy ARM/Linux things do not provide EGL 1.5, so above call does
+    // not necessarily return function pointers for core functions. Try to get
+    // them from a loaded GLES lib. As POSIX leaves RTLD_DEFAULT "reserved",
+    // use it only with glibc.
+    if (!p)
+        p = dlsym(RTLD_DEFAULT, name);
+#endif
+    return p;
+}
+
+static bool create_context(struct ra_ctx *ctx, EGLDisplay display,
+                           bool es, struct mpegl_cb cb,
+                           EGLContext *out_context, EGLConfig *out_config)
+{
+    int msgl = ctx->opts.probing ? MSGL_V : MSGL_FATAL;
+
+    EGLenum api;
+    EGLint rend;
+    const char *name;
+
+    if (!es) {
+        api = EGL_OPENGL_API;
+        rend = EGL_OPENGL_BIT;
+        name = "Desktop OpenGL";
+    } else {
+        api = EGL_OPENGL_ES_API;
+        rend = EGL_OPENGL_ES2_BIT;
+        name = "GLES 2.x +";
+    }
+
+    MP_VERBOSE(ctx, "Trying to create %s context.\n", name);
+
+    if (!eglBindAPI(api)) {
+        MP_VERBOSE(ctx, "Could not bind API!\n");
+        return false;
+    }
+
+    EGLint attributes[] = {
+        EGL_SURFACE_TYPE, EGL_WINDOW_BIT,
+        EGL_RED_SIZE, 8,
+        EGL_GREEN_SIZE, 8,
+        EGL_BLUE_SIZE, 8,
+        EGL_ALPHA_SIZE, ctx->opts.want_alpha ? 8 : 0,
+        EGL_RENDERABLE_TYPE, rend,
+        EGL_NONE
+    };
+
+    EGLint num_configs;
+    if (!eglChooseConfig(display, attributes, NULL, 0, &num_configs))
+        num_configs = 0;
+
+    EGLConfig *configs = talloc_array(NULL, EGLConfig, num_configs);
+    if (!eglChooseConfig(display, attributes, configs, num_configs, &num_configs))
+        num_configs = 0;
+
+    if (!num_configs) {
+        talloc_free(configs);
+        MP_MSG(ctx, msgl, "Could not choose EGLConfig for %s!\n", name);
+        return false;
+    }
+
+    for (int n = 0; n < num_configs; n++)
+        dump_egl_config(ctx->log, MSGL_TRACE, display, configs[n]);
+
+    int chosen = 0;
+    if (cb.refine_config)
+        chosen = cb.refine_config(cb.user_data, configs, num_configs);
+    if (chosen < 0) {
+        talloc_free(configs);
+        MP_MSG(ctx, msgl, "Could not refine EGLConfig for %s!\n", name);
+        return false;
+    }
+    EGLConfig config = configs[chosen];
+
+    talloc_free(configs);
+
+    MP_DBG(ctx, "Chosen EGLConfig:\n");
+    dump_egl_config(ctx->log, MSGL_DEBUG, display, config);
+
+    int ctx_flags = ctx->opts.debug ? EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR : 0;
+    EGLContext *egl_ctx = NULL;
+
+    if (!es) {
+        for (int n = 0; mpgl_min_required_gl_versions[n]; n++) {
+            int ver = mpgl_min_required_gl_versions[n];
+
+            EGLint attrs[] = {
+                EGL_CONTEXT_MAJOR_VERSION, MPGL_VER_GET_MAJOR(ver),
+                EGL_CONTEXT_MINOR_VERSION, MPGL_VER_GET_MINOR(ver),
+                EGL_CONTEXT_OPENGL_PROFILE_MASK,
+                    ver >= 320 ? EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT : 0,
+                EGL_CONTEXT_FLAGS_KHR, ctx_flags,
+                EGL_NONE
+            };
+
+            egl_ctx = eglCreateContext(display, config, EGL_NO_CONTEXT, attrs);
+            if (egl_ctx)
+                break;
+        }
+    }
+    if (!egl_ctx) {
+        // Fallback for EGL 1.4 without EGL_KHR_create_context or GLES
+        // Add the context flags only for GLES - GL has been attempted above
+        EGLint attrs[] = {
+            EGL_CONTEXT_CLIENT_VERSION, 2,
+            es ? EGL_CONTEXT_FLAGS_KHR : EGL_NONE, ctx_flags,
+            EGL_NONE
+        };
+
+        egl_ctx = eglCreateContext(display, config, EGL_NO_CONTEXT, attrs);
+    }
+
+    if (!egl_ctx) {
+        MP_MSG(ctx, msgl, "Could not create EGL context for %s!\n", name);
+        return false;
+    }
+
+    *out_context = egl_ctx;
+    *out_config = config;
+    return true;
+}
+
+#define STR_OR_ERR(s) ((s) ? (s) : "(error)")
+
+// Create a context and return it and the config it was created with. If it
+// returns false, the out_* pointers are set to NULL.
+// vo_flags is a combination of VOFLAG_* values.
+bool mpegl_create_context(struct ra_ctx *ctx, EGLDisplay display,
+                          EGLContext *out_context, EGLConfig *out_config)
+{
+    return mpegl_create_context_cb(ctx, display, (struct mpegl_cb){0},
+                                   out_context, out_config);
+}
+
+// Create a context and return it and the config it was created with. If it
+// returns false, the out_* pointers are set to NULL.
+bool mpegl_create_context_cb(struct ra_ctx *ctx, EGLDisplay display,
+                             struct mpegl_cb cb, EGLContext *out_context,
+                             EGLConfig *out_config)
+{
+    *out_context = NULL;
+    *out_config = NULL;
+
+    const char *version = eglQueryString(display, EGL_VERSION);
+    const char *vendor = eglQueryString(display, EGL_VENDOR);
+    const char *apis = eglQueryString(display, EGL_CLIENT_APIS);
+    MP_VERBOSE(ctx, "EGL_VERSION=%s\nEGL_VENDOR=%s\nEGL_CLIENT_APIS=%s\n",
+               STR_OR_ERR(version), STR_OR_ERR(vendor), STR_OR_ERR(apis));
+
+    enum gles_mode mode = ra_gl_ctx_get_glesmode(ctx);
+
+    if ((mode == GLES_NO || mode == GLES_AUTO) &&
+        create_context(ctx, display, false, cb, out_context, out_config))
+        return true;
+
+    if ((mode == GLES_YES || mode == GLES_AUTO) &&
+        create_context(ctx, display, true, cb, out_context, out_config))
+        return true;
+
+    int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR;
+    MP_MSG(ctx, msgl, "Could not create a GL context.\n");
+    return false;
+}
+
+static int GLAPIENTRY swap_interval(int interval)
+{
+    EGLDisplay display = eglGetCurrentDisplay();
+    if (!display)
+        return 1;
+    return !eglSwapInterval(display, interval);
+}
+
+// Load gl version and function pointers into *gl.
+// Expects a current EGL context set.
+void mpegl_load_functions(struct GL *gl, struct mp_log *log)
+{
+    const char *egl_exts = "";
+    EGLDisplay display = eglGetCurrentDisplay();
+    if (display != EGL_NO_DISPLAY)
+        egl_exts = eglQueryString(display, EGL_EXTENSIONS);
+
+    mpgl_load_functions2(gl, mpegl_get_proc_address, NULL, egl_exts, log);
+    if (!gl->SwapInterval)
+        gl->SwapInterval = swap_interval;
+}
+
+static bool is_egl15(void)
+{
+    // It appears that EGL 1.4 is specified to _require_ an initialized display
+    // for EGL_VERSION, while EGL 1.5 is _required_ to return the EGL version.
+    const char *ver = eglQueryString(EGL_NO_DISPLAY, EGL_VERSION);
+    // Of course we have to go through the excruciating pain of parsing a
+    // version string, since EGL provides no other way without a display. In
+    // theory version!=NULL is already proof enough that it's 1.5, but be
+    // extra defensive, since this should have been true for EGL_EXTENSIONS as
+    // well, but then they added an extension that modified standard behavior.
+    int ma = 0, mi = 0;
+    return ver && sscanf(ver, "%d.%d", &ma, &mi) == 2 && (ma > 1 || mi >= 5);
+}
+
+// This is similar to eglGetPlatformDisplay(platform, native_display, NULL),
+// except that it 1. may use eglGetPlatformDisplayEXT, 2. checks for the
+// platform client extension platform_ext_name, and 3. does not support passing
+// an attrib list, because the type for that parameter is different in the EXT
+// and standard functions (EGL can't not fuck up, no matter what).
+//  platform: e.g. EGL_PLATFORM_X11_KHR
+//  platform_ext_name: e.g. "EGL_KHR_platform_x11"
+//  native_display: e.g. X11 Display*
+// Returns EGL_NO_DISPLAY on failure.
+// Warning: the EGL version can be different at runtime depending on the chosen
+// platform, so this might return a display corresponding to some older EGL
+// version (often 1.4).
+// Often, there are two extension variants of a platform (KHR and EXT). If you
+// need to check both, call this function twice. (Why do they define them twice?
+// They're crazy.)
+EGLDisplay mpegl_get_display(EGLenum platform, const char *platform_ext_name,
+                             void *native_display)
+{
+    // EGL is awful. Designed as ultra-portable library, it fails at dealing
+    // with slightly more complex environment than its short-sighted design
+    // could deal with. So they invented an awful, awful kludge that modifies
+    // EGL standard behavior, the EGL_EXT_client_extensions extension. EGL 1.4
+    // normally is to return NULL when querying EGL_EXTENSIONS on EGL_NO_DISPLAY,
+    // however, with that extension, it'll return the set of "client extensions",
+    // which may include EGL_EXT_platform_base.
+
+    // Prerequisite: check the platform extension.
+    // If this is either EGL 1.5, or 1.4 with EGL_EXT_client_extensions, then
+    // this must return a valid extension string.
+    const char *exts = eglQueryString(EGL_NO_DISPLAY, EGL_EXTENSIONS);
+    if (!gl_check_extension(exts, platform_ext_name))
+        return EGL_NO_DISPLAY;
+
+    // Before we go through the EGL 1.4 BS, try if we can use native EGL 1.5
+    if (is_egl15()) {
+        // This is EGL 1.5. It must support querying standard functions through
+        // eglGetProcAddress(). Note that on EGL 1.4, even if the function is
+        // unknown, it could return non-NULL anyway (because EGL is crazy).
+        EGLDisplay (EGLAPIENTRYP GetPlatformDisplay)
+            (EGLenum, void *, const EGLAttrib *) =
+            (void *)eglGetProcAddress("eglGetPlatformDisplay");
+        // (It should be impossible to be NULL, but uh.)
+        if (GetPlatformDisplay)
+            return GetPlatformDisplay(platform, native_display, NULL);
+    }
+
+    if (!gl_check_extension(exts, "EGL_EXT_platform_base"))
+        return EGL_NO_DISPLAY;
+
+    EGLDisplay (EGLAPIENTRYP GetPlatformDisplayEXT)(EGLenum, void*, const EGLint*)
+        = (void *)eglGetProcAddress("eglGetPlatformDisplayEXT");
+
+    // (It should be impossible to be NULL, but uh.)
+    if (GetPlatformDisplayEXT)
+        return GetPlatformDisplayEXT(platform, native_display, NULL);
+
+    return EGL_NO_DISPLAY;
+}
+
+// The same mess but with eglCreatePlatformWindowSurface(EXT)
+// again no support for an attribute list because the type differs
+// Returns EGL_NO_SURFACE on failure.
+EGLSurface mpegl_create_window_surface(EGLDisplay dpy, EGLConfig config,
+                                       void *native_window)
+{
+    // Use the EGL 1.5 function if possible
+    if (is_egl15()) {
+        EGLSurface (EGLAPIENTRYP CreatePlatformWindowSurface)
+            (EGLDisplay, EGLConfig, void *, const EGLAttrib *) =
+            (void *)eglGetProcAddress("eglCreatePlatformWindowSurface");
+        // (It should be impossible to be NULL, but uh.)
+        if (CreatePlatformWindowSurface)
+            return CreatePlatformWindowSurface(dpy, config, native_window, NULL);
+    }
+
+    // Check the extension that provides the *EXT function
+    const char *exts = eglQueryString(EGL_NO_DISPLAY, EGL_EXTENSIONS);
+    if (!gl_check_extension(exts, "EGL_EXT_platform_base"))
+        return EGL_NO_SURFACE;
+
+    EGLSurface (EGLAPIENTRYP CreatePlatformWindowSurfaceEXT)
+        (EGLDisplay, EGLConfig, void *, const EGLint *) =
+        (void *)eglGetProcAddress("eglCreatePlatformWindowSurfaceEXT");
+    // (It should be impossible to be NULL, but uh.)
+    if (CreatePlatformWindowSurfaceEXT)
+        return CreatePlatformWindowSurfaceEXT(dpy, config, native_window, NULL);
+
+    return EGL_NO_SURFACE;
+}
diff --git a/video/out/opengl/egl_helpers.h b/video/out/opengl/egl_helpers.h
new file mode 100644
index 0000000..32ec5d1
--- /dev/null
+++ b/video/out/opengl/egl_helpers.h
@@ -0,0 +1,38 @@
+#ifndef MP_GL_EGL_HELPERS_H
+#define MP_GL_EGL_HELPERS_H
+
+#include <stdbool.h>
+
+#include <EGL/egl.h>
+#include <EGL/eglext.h>
+
+#include "video/out/gpu/context.h"
+
+struct mp_log;
+
+bool mpegl_create_context(struct ra_ctx *ctx, EGLDisplay display,
+                          EGLContext *out_context, EGLConfig *out_config);
+
+struct mpegl_cb {
+    // if set, pick the desired config from the given list and return its index
+    // defaults to 0 (they are sorted by eglChooseConfig). return a negative
+    // number to indicate an error condition or that no suitable configs could
+    // be found.
+    int (*refine_config)(void *user_data, EGLConfig *configs, int num_configs);
+    void *user_data;
+};
+
+bool mpegl_create_context_cb(struct ra_ctx *ctx, EGLDisplay display,
+                             struct mpegl_cb cb, EGLContext *out_context,
+                             EGLConfig *out_config);
+
+struct GL;
+void mpegl_load_functions(struct GL *gl, struct mp_log *log);
+
+EGLDisplay mpegl_get_display(EGLenum platform, const char *platform_ext_name,
+                             void *native_display);
+
+EGLSurface mpegl_create_window_surface(EGLDisplay dpy, EGLConfig config,
+                                       void *native_window);
+
+#endif
diff --git a/video/out/opengl/formats.c b/video/out/opengl/formats.c
new file mode 100644
index 0000000..a0b79e2
--- /dev/null
+++ b/video/out/opengl/formats.c
@@ -0,0 +1,196 @@
+#include "common/common.h"
+#include "formats.h"
+
+enum {
+    // --- GL type aliases (for readability)
+    T_U8        = GL_UNSIGNED_BYTE,
+    T_U16       = GL_UNSIGNED_SHORT,
+    T_FL        = GL_FLOAT,
+};
+
+// List of allowed formats, and their usability for bilinear filtering and FBOs.
+// This is limited to combinations that are useful for our renderer.
+const struct gl_format gl_formats[] = {
+    // These are used for desktop GL 3+, and GLES 3+ with GL_EXT_texture_norm16.
+    {"r8",      GL_R8,       GL_RED,             T_U8,  F_CF | F_GL3 | F_GL2F | F_ES3},
+    {"rg8",     GL_RG8,      GL_RG,              T_U8,  F_CF | F_GL3 | F_GL2F | F_ES3},
+    {"rgb8",    GL_RGB8,     GL_RGB,             T_U8,  F_CF | F_GL3 | F_GL2F | F_ES3},
+    {"rgba8",   GL_RGBA8,    GL_RGBA,            T_U8,  F_CF | F_GL3 | F_GL2F | F_ES3},
+    {"r16",     GL_R16,      GL_RED,             T_U16, F_CF | F_GL3 | F_GL2F | F_EXT16},
+    {"rg16",    GL_RG16,     GL_RG,              T_U16, F_CF | F_GL3 | F_GL2F | F_EXT16},
+    {"rgb16",   GL_RGB16,    GL_RGB,             T_U16, F_CF | F_GL3 | F_GL2F},
+    {"rgba16",  GL_RGBA16,   GL_RGBA,            T_U16, F_CF | F_GL3 | F_GL2F | F_EXT16},
+
+    // Specifically not color-renderable.
+    {"rgb16",   GL_RGB16,    GL_RGB,             T_U16, F_TF | F_EXT16},
+
+    // GL2 legacy. Ignores possibly present FBO extensions (no CF flag set).
+    {"l8",    GL_LUMINANCE8, GL_LUMINANCE,       T_U8,  F_TF | F_GL2},
+    {"la8", GL_LUMINANCE8_ALPHA8, GL_LUMINANCE_ALPHA, T_U8,  F_TF | F_GL2},
+    {"rgb8",    GL_RGB8,     GL_RGB,             T_U8,  F_TF | F_GL2},
+    {"rgba8",   GL_RGBA8,    GL_RGBA,            T_U8,  F_TF | F_GL2},
+    {"l16",  GL_LUMINANCE16, GL_LUMINANCE,       T_U16, F_TF | F_GL2},
+    {"la16", GL_LUMINANCE16_ALPHA16, GL_LUMINANCE_ALPHA, T_U16, F_TF | F_GL2},
+    {"rgb16",   GL_RGB16,    GL_RGB,             T_U16, F_TF | F_GL2},
+    {"rgba16",  GL_RGBA16,   GL_RGBA,            T_U16, F_TF | F_GL2},
+
+    // ES3 legacy. This is literally to compensate for Apple bugs in their iOS
+    // interop (can they do anything right?). ES3 still allows these formats,
+    // but they are deprecated.
+    {"l" ,      GL_LUMINANCE,GL_LUMINANCE,       T_U8,  F_CF | F_ES3},
+    {"la",GL_LUMINANCE_ALPHA,GL_LUMINANCE_ALPHA, T_U8,  F_CF | F_ES3},
+
+    // ES2 legacy
+    {"l" ,      GL_LUMINANCE,GL_LUMINANCE,       T_U8,  F_TF | F_ES2},
+    {"la",GL_LUMINANCE_ALPHA,GL_LUMINANCE_ALPHA, T_U8,  F_TF | F_ES2},
+    {"rgb",     GL_RGB,      GL_RGB,             T_U8,  F_TF | F_ES2},
+    {"rgba",    GL_RGBA,     GL_RGBA,            T_U8,  F_TF | F_ES2},
+
+    // Non-normalized integer formats.
+    // Follows ES 3.0 as to which are color-renderable.
+    {"r8ui",    GL_R8UI,     GL_RED_INTEGER,     T_U8,  F_CR | F_GL3 | F_ES3},
+    {"rg8ui",   GL_RG8UI,    GL_RG_INTEGER,      T_U8,  F_CR | F_GL3 | F_ES3},
+    {"rgb8ui",  GL_RGB8UI,   GL_RGB_INTEGER,     T_U8,         F_GL3 | F_ES3},
+    {"rgba8ui", GL_RGBA8UI,  GL_RGBA_INTEGER,    T_U8,  F_CR | F_GL3 | F_ES3},
+    {"r16ui",   GL_R16UI,    GL_RED_INTEGER,     T_U16, F_CR | F_GL3 | F_ES3},
+    {"rg16ui",  GL_RG16UI,   GL_RG_INTEGER,      T_U16, F_CR | F_GL3 | F_ES3},
+    {"rgb16ui", GL_RGB16UI,  GL_RGB_INTEGER,     T_U16,        F_GL3 | F_ES3},
+    {"rgba16ui",GL_RGBA16UI, GL_RGBA_INTEGER,    T_U16, F_CR | F_GL3 | F_ES3},
+
+    // On GL3+ or GL2.1 with GL_ARB_texture_float, floats work fully.
+    {"r16f",    GL_R16F,     GL_RED,             T_FL,  F_F16 | F_CF | F_GL3 | F_GL2F},
+    {"rg16f",   GL_RG16F,    GL_RG,              T_FL,  F_F16 | F_CF | F_GL3 | F_GL2F},
+    {"rgb16f",  GL_RGB16F,   GL_RGB,             T_FL,  F_F16 | F_CF | F_GL3 | F_GL2F},
+    {"rgba16f", GL_RGBA16F,  GL_RGBA,            T_FL,  F_F16 | F_CF | F_GL3 | F_GL2F},
+    {"r32f",    GL_R32F,     GL_RED,             T_FL,          F_CF | F_GL3 | F_GL2F},
+    {"rg32f",   GL_RG32F,    GL_RG,              T_FL,          F_CF | F_GL3 | F_GL2F},
+    {"rgb32f",  GL_RGB32F,   GL_RGB,             T_FL,          F_CF | F_GL3 | F_GL2F},
+    {"rgba32f", GL_RGBA32F,  GL_RGBA,            T_FL,          F_CF | F_GL3 | F_GL2F},
+
+    // Note: we simply don't support float anything on ES2, despite extensions.
+    // We also don't bother with non-filterable float formats, and we ignore
+    // 32 bit float formats that are not blendable when rendering to them.
+
+    // On ES3.2+, both 16 bit floats work fully (except 3-component formats).
+    // F_EXTF16 implies extensions that also enable 16 bit floats fully.
+    {"r16f",    GL_R16F,     GL_RED,             T_FL,  F_F16 | F_CF | F_ES32 | F_EXTF16},
+    {"rg16f",   GL_RG16F,    GL_RG,              T_FL,  F_F16 | F_CF | F_ES32 | F_EXTF16},
+    {"rgb16f",  GL_RGB16F,   GL_RGB,             T_FL,  F_F16 | F_TF | F_ES32 | F_EXTF16},
+    {"rgba16f", GL_RGBA16F,  GL_RGBA,            T_FL,  F_F16 | F_CF | F_ES32 | F_EXTF16},
+
+    // On ES3.0+, 16 bit floats are texture-filterable.
+    // Don't bother with 32 bit floats; they exist but are neither CR nor TF.
+    {"r16f",    GL_R16F,     GL_RED,             T_FL,  F_F16 | F_TF | F_ES3},
+    {"rg16f",   GL_RG16F,    GL_RG,              T_FL,  F_F16 | F_TF | F_ES3},
+    {"rgb16f",  GL_RGB16F,   GL_RGB,             T_FL,  F_F16 | F_TF | F_ES3},
+    {"rgba16f", GL_RGBA16F,  GL_RGBA,            T_FL,  F_F16 | F_TF | F_ES3},
+
+    // These might be useful as FBO formats.
+    {"rgb10_a2",GL_RGB10_A2, GL_RGBA,
+     GL_UNSIGNED_INT_2_10_10_10_REV,                    F_CF | F_GL3 | F_ES3},
+    {"rgba12",  GL_RGBA12,   GL_RGBA,            T_U16, F_CF | F_GL2 | F_GL3},
+    {"rgb10",   GL_RGB10,    GL_RGB,             T_U16, F_CF | F_GL2 | F_GL3},
+
+    // Special formats.
+    {"rgb565",  GL_RGB8,     GL_RGB,
+     GL_UNSIGNED_SHORT_5_6_5,                           F_TF | F_GL2 | F_GL3},
+    // Worthless, but needed by OSX videotoolbox interop on old Apple hardware.
+    {"appleyp", GL_RGB,      GL_RGB_422_APPLE,
+     GL_UNSIGNED_SHORT_8_8_APPLE,                       F_TF | F_APPL},
+
+    {0}
+};
+
+// Return an or-ed combination of all F_ flags that apply.
+int gl_format_feature_flags(GL *gl)
+{
+    return (gl->version == 210 ? F_GL2 : 0)
+         | (gl->version >= 300 ? F_GL3 : 0)
+         | (gl->es == 200 ? F_ES2 : 0)
+         | (gl->es >= 300 ? F_ES3 : 0)
+         | (gl->es >= 320 ? F_ES32 : 0)
+         | (gl->mpgl_caps & MPGL_CAP_EXT16 ? F_EXT16 : 0)
+         | ((gl->es >= 300 &&
+            (gl->mpgl_caps & MPGL_CAP_EXT_CR_HFLOAT)) ? F_EXTF16 : 0)
+         | ((gl->version == 210 &&
+            (gl->mpgl_caps & MPGL_CAP_ARB_FLOAT) &&
+            (gl->mpgl_caps & MPGL_CAP_TEX_RG) &&
+            (gl->mpgl_caps & MPGL_CAP_FB)) ? F_GL2F : 0)
+         | (gl->mpgl_caps & MPGL_CAP_APPLE_RGB_422 ? F_APPL : 0);
+}
+
+int gl_format_type(const struct gl_format *format)
+{
+    if (!format)
+        return 0;
+    if (format->type == GL_FLOAT)
+        return MPGL_TYPE_FLOAT;
+    if (gl_integer_format_to_base(format->format))
+        return MPGL_TYPE_UINT;
+    return MPGL_TYPE_UNORM;
+}
+
+// Return base internal format of an integer format, or 0 if it's not integer.
+// "format" is like in struct gl_format.
+GLenum gl_integer_format_to_base(GLenum format)
+{
+    switch (format) {
+    case GL_RED_INTEGER:        return GL_RED;
+    case GL_RG_INTEGER:         return GL_RG;
+    case GL_RGB_INTEGER:        return GL_RGB;
+    case GL_RGBA_INTEGER:       return GL_RGBA;
+    }
+    return 0;
+}
+
+// Return the number of bytes per component this format implies.
+// Returns 0 for formats with non-byte alignments and formats which
+// merge multiple components (like GL_UNSIGNED_SHORT_5_6_5).
+// "type" is like in struct gl_format.
+int gl_component_size(GLenum type)
+{
+    switch (type) {
+    case GL_UNSIGNED_BYTE:                      return 1;
+    case GL_UNSIGNED_SHORT:                     return 2;
+    case GL_FLOAT:                              return 4;
+    }
+    return 0;
+}
+
+// Return the number of separate color components.
+// "format" is like in struct gl_format.
+int gl_format_components(GLenum format)
+{
+    switch (format) {
+    case GL_RED:
+    case GL_RED_INTEGER:
+    case GL_LUMINANCE:
+        return 1;
+    case GL_RG:
+    case GL_RG_INTEGER:
+    case GL_LUMINANCE_ALPHA:
+        return 2;
+    case GL_RGB:
+    case GL_RGB_INTEGER:
+        return 3;
+    case GL_RGBA:
+    case GL_RGBA_INTEGER:
+        return 4;
+    }
+    return 0;
+}
+
+// Return the number of bytes per pixel for the given format.
+// Parameter names like in struct gl_format.
+int gl_bytes_per_pixel(GLenum format, GLenum type)
+{
+    // Formats with merged components are special.
+    switch (type) {
+    case GL_UNSIGNED_INT_2_10_10_10_REV:        return 4;
+    case GL_UNSIGNED_SHORT_5_6_5:               return 2;
+    case GL_UNSIGNED_SHORT_8_8_APPLE:           return 2;
+    case GL_UNSIGNED_SHORT_8_8_REV_APPLE:       return 2;
+    }
+
+    return gl_component_size(type) * gl_format_components(format);
+}
diff --git a/video/out/opengl/formats.h b/video/out/opengl/formats.h
new file mode 100644
index 0000000..f727a3b
--- /dev/null
+++ b/video/out/opengl/formats.h
@@ -0,0 +1,51 @@
+#ifndef MPGL_FORMATS_H_
+#define MPGL_FORMATS_H_
+
+#include "common.h"
+
+struct gl_format {
+    const char *name;           // symbolic name for user interaction/debugging
+    GLint internal_format;      // glTexImage argument
+    GLenum format;              // glTexImage argument
+    GLenum type;                // e.g. GL_UNSIGNED_SHORT
+    int flags;                  // F_* flags
+};
+
+enum {
+    // --- gl_format.flags
+
+    // Version flags. If at least 1 flag matches, the format entry is considered
+    // supported on the current GL context.
+    F_GL2       = 1 << 0, // GL2.1-only
+    F_GL3       = 1 << 1, // GL3.0 or later
+    F_ES2       = 1 << 2, // ES2-only
+    F_ES3       = 1 << 3, // ES3.0 or later
+    F_ES32      = 1 << 4, // ES3.2 or later
+    F_EXT16     = 1 << 5, // ES with GL_EXT_texture_norm16
+    F_EXTF16    = 1 << 6, // GL_EXT_color_buffer_half_float
+    F_GL2F      = 1 << 7, // GL2.1-only with texture_rg + texture_float + FBOs
+    F_APPL      = 1 << 8, // GL_APPLE_rgb_422
+
+    // Feature flags. They are additional and signal presence of features.
+    F_CR        = 1 << 16, // color-renderable
+    F_TF        = 1 << 17, // texture-filterable with GL_LINEAR
+    F_CF        = F_CR | F_TF,
+    F_F16       = 1 << 18, // uses half-floats (16 bit) internally, even though
+                           // the format is still GL_FLOAT (32 bit)
+
+    // --- Other constants.
+    MPGL_TYPE_UNORM = RA_CTYPE_UNORM,   // normalized integer (fixed point) formats
+    MPGL_TYPE_UINT  = RA_CTYPE_UINT,    // full integer formats
+    MPGL_TYPE_FLOAT = RA_CTYPE_FLOAT,   // float formats (both full and half)
+};
+
+extern const struct gl_format gl_formats[];
+
+int gl_format_feature_flags(GL *gl);
+int gl_format_type(const struct gl_format *format);
+GLenum gl_integer_format_to_base(GLenum format);
+int gl_component_size(GLenum type);
+int gl_format_components(GLenum format);
+int gl_bytes_per_pixel(GLenum format, GLenum type);
+
+#endif
diff --git a/video/out/opengl/gl_headers.h b/video/out/opengl/gl_headers.h
new file mode 100644
index 0000000..5c36718
--- /dev/null
+++ b/video/out/opengl/gl_headers.h
@@ -0,0 +1,799 @@
+/*
+ * Parts of OpenGL(ES) needed by the OpenGL renderer.
+ *
+ * This excludes function declarations.
+ *
+ * This header is based on:
+ * - Khronos GLES headers (MIT)
+ * - mpv or MPlayer code (LGPL 2.1 or later)
+ * - probably Mesa GL headers (MIT)
+ */
+
+#ifndef MPV_GL_HEADERS_H
+#define MPV_GL_HEADERS_H
+
+#include <stdint.h>
+
+// Enable this to use system headers instead.
+#if 0
+#include <GL/gl.h>
+#include <GLES3/gl3.h>
+#endif
+
+#ifndef GLAPIENTRY
+#ifdef _WIN32
+#define GLAPIENTRY __stdcall
+#else
+#define GLAPIENTRY
+#endif
+#endif
+
+// Typedefs. This needs to work with system headers too (consider GLX), and
+// before C11, duplicated typedefs were an error. So try to tolerate at least
+// Mesa.
+#ifdef GL_TRUE
+    // Tolerate old Mesa which has only definitions up to GL 2.0.
+    #define MP_GET_GL_TYPES_2_0 0
+    #ifdef GL_VERSION_3_2
+        #define MP_GET_GL_TYPES_3_2 0
+    #else
+        #define MP_GET_GL_TYPES_3_2 1
+    #endif
+#else
+    // Get them all.
+    #define MP_GET_GL_TYPES_2_0 1
+    #define MP_GET_GL_TYPES_3_2 1
+#endif
+
+#if MP_GET_GL_TYPES_2_0
+// GL_VERSION_1_0, GL_ES_VERSION_2_0
+typedef unsigned int GLbitfield;
+typedef unsigned char GLboolean;
+typedef unsigned int GLenum;
+typedef float GLfloat;
+typedef int GLint;
+typedef int GLsizei;
+typedef uint8_t GLubyte;
+typedef unsigned int GLuint;
+typedef void GLvoid;
+// GL 1.1 GL_VERSION_1_1, GL_ES_VERSION_2_0
+typedef float GLclampf;
+// GL 1.5 GL_VERSION_1_5, GL_ES_VERSION_2_0
+typedef intptr_t GLintptr;
+typedef ptrdiff_t GLsizeiptr;
+// GL 2.0 GL_VERSION_2_0, GL_ES_VERSION_2_0
+typedef int8_t GLbyte;
+typedef char GLchar;
+typedef short GLshort;
+typedef unsigned short GLushort;
+#endif
+
+#if MP_GET_GL_TYPES_3_2
+// GL 3.2 GL_VERSION_3_2, GL_ES_VERSION_2_0
+typedef int64_t GLint64;
+typedef struct __GLsync *GLsync;
+typedef uint64_t GLuint64;
+#endif
+
+// --- GL 1.1
+
+#define GL_BACK_LEFT                      0x0402
+#define GL_TEXTURE_1D                     0x0DE0
+#define GL_RGB16                          0x8054
+#define GL_RGB10                          0x8052
+#define GL_RGBA12                         0x805A
+#define GL_RGBA16                         0x805B
+#define GL_TEXTURE_RED_SIZE               0x805C
+#define GL_TEXTURE_GREEN_SIZE             0x805D
+#define GL_TEXTURE_BLUE_SIZE              0x805E
+#define GL_TEXTURE_ALPHA_SIZE             0x805F
+
+// --- GL 1.1 (removed from 3.0 core and not in GLES 2/3)
+
+#define GL_TEXTURE_LUMINANCE_SIZE         0x8060
+#define GL_LUMINANCE8                     0x8040
+#define GL_LUMINANCE8_ALPHA8              0x8045
+#define GL_LUMINANCE16                    0x8042
+#define GL_LUMINANCE16_ALPHA16            0x8048
+
+// --- GL 1.5
+
+#define GL_READ_ONLY                      0x88B8
+#define GL_WRITE_ONLY                     0x88B9
+#define GL_READ_WRITE                     0x88BA
+
+// --- GL 3.0
+
+#define GL_R16                            0x822A
+#define GL_RG16                           0x822C
+
+// --- GL 3.1
+
+#define GL_TEXTURE_RECTANGLE              0x84F5
+
+// --- GL 3.3 or GL_ARB_timer_query
+
+#define GL_TIME_ELAPSED                   0x88BF
+#define GL_TIMESTAMP                      0x8E28
+
+// --- GL 4.3 or GL_ARB_debug_output
+
+#define GL_DEBUG_SEVERITY_HIGH            0x9146
+#define GL_DEBUG_SEVERITY_MEDIUM          0x9147
+#define GL_DEBUG_SEVERITY_LOW             0x9148
+#define GL_DEBUG_SEVERITY_NOTIFICATION    0x826B
+
+// --- GL 4.4 or GL_ARB_buffer_storage
+
+#define GL_MAP_PERSISTENT_BIT             0x0040
+#define GL_MAP_COHERENT_BIT               0x0080
+#define GL_DYNAMIC_STORAGE_BIT            0x0100
+#define GL_CLIENT_STORAGE_BIT             0x0200
+
+// --- GL 4.2 or GL_ARB_image_load_store
+
+#define GL_TEXTURE_FETCH_BARRIER_BIT      0x00000008
+
+// --- GL 4.3 or GL_ARB_compute_shader
+
+#define GL_COMPUTE_SHADER                 0x91B9
+#define GL_MAX_COMPUTE_SHARED_MEMORY_SIZE 0x8262
+#define GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS 0x90EB
+
+// --- GL 4.3 or GL_ARB_shader_storage_buffer_object
+
+#define GL_SHADER_STORAGE_BUFFER          0x90D2
+#define GL_SHADER_STORAGE_BARRIER_BIT     0x00002000
+
+// --- GL_NV_vdpau_interop
+
+#define GLvdpauSurfaceNV GLintptr
+#define GL_WRITE_DISCARD_NV               0x88BE
+
+// --- GL_OES_EGL_image_external, GL_NV_EGL_stream_consumer_external
+
+#define GL_TEXTURE_EXTERNAL_OES           0x8D65
+
+// --- GL_APPLE_rgb_422
+
+#define GL_RGB_422_APPLE                  0x8A1F
+#define GL_UNSIGNED_SHORT_8_8_APPLE       0x85BA
+#define GL_UNSIGNED_SHORT_8_8_REV_APPLE   0x85BB
+
+// --- GL_ANGLE_translated_shader_source
+
+#define GL_TRANSLATED_SHADER_SOURCE_LENGTH_ANGLE 0x93A0
+
+// ---- GLES 2
+
+#define GL_DEPTH_BUFFER_BIT               0x00000100
+#define GL_STENCIL_BUFFER_BIT             0x00000400
+#define GL_COLOR_BUFFER_BIT               0x00004000
+#define GL_FALSE                          0
+#define GL_TRUE                           1
+#define GL_POINTS                         0x0000
+#define GL_LINES                          0x0001
+#define GL_LINE_LOOP                      0x0002
+#define GL_LINE_STRIP                     0x0003
+#define GL_TRIANGLES                      0x0004
+#define GL_TRIANGLE_STRIP                 0x0005
+#define GL_TRIANGLE_FAN                   0x0006
+#define GL_ZERO                           0
+#define GL_ONE                            1
+#define GL_SRC_COLOR                      0x0300
+#define GL_ONE_MINUS_SRC_COLOR            0x0301
+#define GL_SRC_ALPHA                      0x0302
+#define GL_ONE_MINUS_SRC_ALPHA            0x0303
+#define GL_DST_ALPHA                      0x0304
+#define GL_ONE_MINUS_DST_ALPHA            0x0305
+#define GL_DST_COLOR                      0x0306
+#define GL_ONE_MINUS_DST_COLOR            0x0307
+#define GL_SRC_ALPHA_SATURATE             0x0308
+#define GL_FUNC_ADD                       0x8006
+#define GL_BLEND_EQUATION                 0x8009
+#define GL_BLEND_EQUATION_RGB             0x8009
+#define GL_BLEND_EQUATION_ALPHA           0x883D
+#define GL_FUNC_SUBTRACT                  0x800A
+#define GL_FUNC_REVERSE_SUBTRACT          0x800B
+#define GL_BLEND_DST_RGB                  0x80C8
+#define GL_BLEND_SRC_RGB                  0x80C9
+#define GL_BLEND_DST_ALPHA                0x80CA
+#define GL_BLEND_SRC_ALPHA                0x80CB
+#define GL_CONSTANT_COLOR                 0x8001
+#define GL_ONE_MINUS_CONSTANT_COLOR       0x8002
+#define GL_CONSTANT_ALPHA                 0x8003
+#define GL_ONE_MINUS_CONSTANT_ALPHA       0x8004
+#define GL_BLEND_COLOR                    0x8005
+#define GL_ARRAY_BUFFER                   0x8892
+#define GL_ELEMENT_ARRAY_BUFFER           0x8893
+#define GL_ARRAY_BUFFER_BINDING           0x8894
+#define GL_ELEMENT_ARRAY_BUFFER_BINDING   0x8895
+#define GL_STREAM_DRAW                    0x88E0
+#define GL_STATIC_DRAW                    0x88E4
+#define GL_DYNAMIC_DRAW                   0x88E8
+#define GL_BUFFER_SIZE                    0x8764
+#define GL_BUFFER_USAGE                   0x8765
+#define GL_CURRENT_VERTEX_ATTRIB          0x8626
+#define GL_FRONT                          0x0404
+#define GL_BACK                           0x0405
+#define GL_FRONT_AND_BACK                 0x0408
+#define GL_TEXTURE_2D                     0x0DE1
+#define GL_CULL_FACE                      0x0B44
+#define GL_BLEND                          0x0BE2
+#define GL_DITHER                         0x0BD0
+#define GL_STENCIL_TEST                   0x0B90
+#define GL_DEPTH_TEST                     0x0B71
+#define GL_SCISSOR_TEST                   0x0C11
+#define GL_POLYGON_OFFSET_FILL            0x8037
+#define GL_SAMPLE_ALPHA_TO_COVERAGE       0x809E
+#define GL_SAMPLE_COVERAGE                0x80A0
+#define GL_NO_ERROR                       0
+#define GL_INVALID_ENUM                   0x0500
+#define GL_INVALID_VALUE                  0x0501
+#define GL_INVALID_OPERATION              0x0502
+#define GL_OUT_OF_MEMORY                  0x0505
+#define GL_CW                             0x0900
+#define GL_CCW                            0x0901
+#define GL_LINE_WIDTH                     0x0B21
+#define GL_ALIASED_POINT_SIZE_RANGE       0x846D
+#define GL_ALIASED_LINE_WIDTH_RANGE       0x846E
+#define GL_CULL_FACE_MODE                 0x0B45
+#define GL_FRONT_FACE                     0x0B46
+#define GL_DEPTH_RANGE                    0x0B70
+#define GL_DEPTH_WRITEMASK                0x0B72
+#define GL_DEPTH_CLEAR_VALUE              0x0B73
+#define GL_DEPTH_FUNC                     0x0B74
+#define GL_STENCIL_CLEAR_VALUE            0x0B91
+#define GL_STENCIL_FUNC                   0x0B92
+#define GL_STENCIL_FAIL                   0x0B94
+#define GL_STENCIL_PASS_DEPTH_FAIL        0x0B95
+#define GL_STENCIL_PASS_DEPTH_PASS        0x0B96
+#define GL_STENCIL_REF                    0x0B97
+#define GL_STENCIL_VALUE_MASK             0x0B93
+#define GL_STENCIL_WRITEMASK              0x0B98
+#define GL_STENCIL_BACK_FUNC              0x8800
+#define GL_STENCIL_BACK_FAIL              0x8801
+#define GL_STENCIL_BACK_PASS_DEPTH_FAIL   0x8802
+#define GL_STENCIL_BACK_PASS_DEPTH_PASS   0x8803
+#define GL_STENCIL_BACK_REF               0x8CA3
+#define GL_STENCIL_BACK_VALUE_MASK        0x8CA4
+#define GL_STENCIL_BACK_WRITEMASK         0x8CA5
+#define GL_VIEWPORT                       0x0BA2
+#define GL_SCISSOR_BOX                    0x0C10
+#define GL_COLOR_CLEAR_VALUE              0x0C22
+#define GL_COLOR_WRITEMASK                0x0C23
+#define GL_UNPACK_ALIGNMENT               0x0CF5
+#define GL_PACK_ALIGNMENT                 0x0D05
+#define GL_MAX_TEXTURE_SIZE               0x0D33
+#define GL_MAX_VIEWPORT_DIMS              0x0D3A
+#define GL_SUBPIXEL_BITS                  0x0D50
+#define GL_RED_BITS                       0x0D52
+#define GL_GREEN_BITS                     0x0D53
+#define GL_BLUE_BITS                      0x0D54
+#define GL_ALPHA_BITS                     0x0D55
+#define GL_DEPTH_BITS                     0x0D56
+#define GL_STENCIL_BITS                   0x0D57
+#define GL_POLYGON_OFFSET_UNITS           0x2A00
+#define GL_POLYGON_OFFSET_FACTOR          0x8038
+#define GL_TEXTURE_BINDING_2D             0x8069
+#define GL_SAMPLE_BUFFERS                 0x80A8
+#define GL_SAMPLES                        0x80A9
+#define GL_SAMPLE_COVERAGE_VALUE          0x80AA
+#define GL_SAMPLE_COVERAGE_INVERT         0x80AB
+#define GL_NUM_COMPRESSED_TEXTURE_FORMATS 0x86A2
+#define GL_COMPRESSED_TEXTURE_FORMATS     0x86A3
+#define GL_DONT_CARE                      0x1100
+#define GL_FASTEST                        0x1101
+#define GL_NICEST                         0x1102
+#define GL_GENERATE_MIPMAP_HINT           0x8192
+#define GL_BYTE                           0x1400
+#define GL_UNSIGNED_BYTE                  0x1401
+#define GL_SHORT                          0x1402
+#define GL_UNSIGNED_SHORT                 0x1403
+#define GL_INT                            0x1404
+#define GL_UNSIGNED_INT                   0x1405
+#define GL_FLOAT                          0x1406
+#define GL_FIXED                          0x140C
+#define GL_DEPTH_COMPONENT                0x1902
+#define GL_ALPHA                          0x1906
+#define GL_RGB                            0x1907
+#define GL_RGBA                           0x1908
+#define GL_LUMINANCE                      0x1909
+#define GL_LUMINANCE_ALPHA                0x190A
+#define GL_UNSIGNED_SHORT_4_4_4_4         0x8033
+#define GL_UNSIGNED_SHORT_5_5_5_1         0x8034
+#define GL_UNSIGNED_SHORT_5_6_5           0x8363
+#define GL_FRAGMENT_SHADER                0x8B30
+#define GL_VERTEX_SHADER                  0x8B31
+#define GL_MAX_VERTEX_ATTRIBS             0x8869
+#define GL_MAX_VERTEX_UNIFORM_VECTORS     0x8DFB
+#define GL_MAX_VARYING_VECTORS            0x8DFC
+#define GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS 0x8B4D
+#define GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS 0x8B4C
+#define GL_MAX_TEXTURE_IMAGE_UNITS        0x8872
+#define GL_MAX_FRAGMENT_UNIFORM_VECTORS   0x8DFD
+#define GL_SHADER_TYPE                    0x8B4F
+#define GL_DELETE_STATUS                  0x8B80
+#define GL_LINK_STATUS                    0x8B82
+#define GL_VALIDATE_STATUS                0x8B83
+#define GL_ATTACHED_SHADERS               0x8B85
+#define GL_ACTIVE_UNIFORMS                0x8B86
+#define GL_ACTIVE_UNIFORM_MAX_LENGTH      0x8B87
+#define GL_ACTIVE_ATTRIBUTES              0x8B89
+#define GL_ACTIVE_ATTRIBUTE_MAX_LENGTH    0x8B8A
+#define GL_SHADING_LANGUAGE_VERSION       0x8B8C
+#define GL_CURRENT_PROGRAM                0x8B8D
+#define GL_NEVER                          0x0200
+#define GL_LESS                           0x0201
+#define GL_EQUAL                          0x0202
+#define GL_LEQUAL                         0x0203
+#define GL_GREATER                        0x0204
+#define GL_NOTEQUAL                       0x0205
+#define GL_GEQUAL                         0x0206
+#define GL_ALWAYS                         0x0207
+#define GL_KEEP                           0x1E00
+#define GL_REPLACE                        0x1E01
+#define GL_INCR                           0x1E02
+#define GL_DECR                           0x1E03
+#define GL_INVERT                         0x150A
+#define GL_INCR_WRAP                      0x8507
+#define GL_DECR_WRAP                      0x8508
+#define GL_VENDOR                         0x1F00
+#define GL_RENDERER                       0x1F01
+#define GL_VERSION                        0x1F02
+#define GL_EXTENSIONS                     0x1F03
+#define GL_NEAREST                        0x2600
+#define GL_LINEAR                         0x2601
+#define GL_NEAREST_MIPMAP_NEAREST         0x2700
+#define GL_LINEAR_MIPMAP_NEAREST          0x2701
+#define GL_NEAREST_MIPMAP_LINEAR          0x2702
+#define GL_LINEAR_MIPMAP_LINEAR           0x2703
+#define GL_TEXTURE_MAG_FILTER             0x2800
+#define GL_TEXTURE_MIN_FILTER             0x2801
+#define GL_TEXTURE_WRAP_S                 0x2802
+#define GL_TEXTURE_WRAP_T                 0x2803
+#define GL_TEXTURE                        0x1702
+#define GL_TEXTURE_CUBE_MAP               0x8513
+#define GL_TEXTURE_BINDING_CUBE_MAP       0x8514
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_X    0x8515
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_X    0x8516
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_Y    0x8517
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y    0x8518
+#define GL_TEXTURE_CUBE_MAP_POSITIVE_Z    0x8519
+#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z    0x851A
+#define GL_MAX_CUBE_MAP_TEXTURE_SIZE      0x851C
+#define GL_TEXTURE0                       0x84C0
+#define GL_TEXTURE1                       0x84C1
+#define GL_TEXTURE2                       0x84C2
+#define GL_TEXTURE3                       0x84C3
+#define GL_TEXTURE4                       0x84C4
+#define GL_TEXTURE5                       0x84C5
+#define GL_TEXTURE6                       0x84C6
+#define GL_TEXTURE7                       0x84C7
+#define GL_TEXTURE8                       0x84C8
+#define GL_TEXTURE9                       0x84C9
+#define GL_TEXTURE10                      0x84CA
+#define GL_TEXTURE11                      0x84CB
+#define GL_TEXTURE12                      0x84CC
+#define GL_TEXTURE13                      0x84CD
+#define GL_TEXTURE14                      0x84CE
+#define GL_TEXTURE15                      0x84CF
+#define GL_TEXTURE16                      0x84D0
+#define GL_TEXTURE17                      0x84D1
+#define GL_TEXTURE18                      0x84D2
+#define GL_TEXTURE19                      0x84D3
+#define GL_TEXTURE20                      0x84D4
+#define GL_TEXTURE21                      0x84D5
+#define GL_TEXTURE22                      0x84D6
+#define GL_TEXTURE23                      0x84D7
+#define GL_TEXTURE24                      0x84D8
+#define GL_TEXTURE25                      0x84D9
+#define GL_TEXTURE26                      0x84DA
+#define GL_TEXTURE27                      0x84DB
+#define GL_TEXTURE28                      0x84DC
+#define GL_TEXTURE29                      0x84DD
+#define GL_TEXTURE30                      0x84DE
+#define GL_TEXTURE31                      0x84DF
+#define GL_ACTIVE_TEXTURE                 0x84E0
+#define GL_REPEAT                         0x2901
+#define GL_CLAMP_TO_EDGE                  0x812F
+#define GL_MIRRORED_REPEAT                0x8370
+#define GL_FLOAT_VEC2                     0x8B50
+#define GL_FLOAT_VEC3                     0x8B51
+#define GL_FLOAT_VEC4                     0x8B52
+#define GL_INT_VEC2                       0x8B53
+#define GL_INT_VEC3                       0x8B54
+#define GL_INT_VEC4                       0x8B55
+#define GL_BOOL                           0x8B56
+#define GL_BOOL_VEC2                      0x8B57
+#define GL_BOOL_VEC3                      0x8B58
+#define GL_BOOL_VEC4                      0x8B59
+#define GL_FLOAT_MAT2                     0x8B5A
+#define GL_FLOAT_MAT3                     0x8B5B
+#define GL_FLOAT_MAT4                     0x8B5C
+#define GL_SAMPLER_2D                     0x8B5E
+#define GL_SAMPLER_CUBE                   0x8B60
+#define GL_VERTEX_ATTRIB_ARRAY_ENABLED    0x8622
+#define GL_VERTEX_ATTRIB_ARRAY_SIZE       0x8623
+#define GL_VERTEX_ATTRIB_ARRAY_STRIDE     0x8624
+#define GL_VERTEX_ATTRIB_ARRAY_TYPE       0x8625
+#define GL_VERTEX_ATTRIB_ARRAY_NORMALIZED 0x886A
+#define GL_VERTEX_ATTRIB_ARRAY_POINTER    0x8645
+#define GL_VERTEX_ATTRIB_ARRAY_BUFFER_BINDING 0x889F
+#define GL_IMPLEMENTATION_COLOR_READ_TYPE 0x8B9A
+#define GL_IMPLEMENTATION_COLOR_READ_FORMAT 0x8B9B
+#define GL_COMPILE_STATUS                 0x8B81
+#define GL_INFO_LOG_LENGTH                0x8B84
+#define GL_SHADER_SOURCE_LENGTH           0x8B88
+#define GL_SHADER_COMPILER                0x8DFA
+#define GL_SHADER_BINARY_FORMATS          0x8DF8
+#define GL_NUM_SHADER_BINARY_FORMATS      0x8DF9
+#define GL_LOW_FLOAT                      0x8DF0
+#define GL_MEDIUM_FLOAT                   0x8DF1
+#define GL_HIGH_FLOAT                     0x8DF2
+#define GL_LOW_INT                        0x8DF3
+#define GL_MEDIUM_INT                     0x8DF4
+#define GL_HIGH_INT                       0x8DF5
+#define GL_FRAMEBUFFER                    0x8D40
+#define GL_RENDERBUFFER                   0x8D41
+#define GL_RGBA4                          0x8056
+#define GL_RGB5_A1                        0x8057
+#define GL_RGB565                         0x8D62
+#define GL_DEPTH_COMPONENT16              0x81A5
+#define GL_STENCIL_INDEX8                 0x8D48
+#define GL_RENDERBUFFER_WIDTH             0x8D42
+#define GL_RENDERBUFFER_HEIGHT            0x8D43
+#define GL_RENDERBUFFER_INTERNAL_FORMAT   0x8D44
+#define GL_RENDERBUFFER_RED_SIZE          0x8D50
+#define GL_RENDERBUFFER_GREEN_SIZE        0x8D51
+#define GL_RENDERBUFFER_BLUE_SIZE         0x8D52
+#define GL_RENDERBUFFER_ALPHA_SIZE        0x8D53
+#define GL_RENDERBUFFER_DEPTH_SIZE        0x8D54
+#define GL_RENDERBUFFER_STENCIL_SIZE      0x8D55
+#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE 0x8CD0
+#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME 0x8CD1
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL 0x8CD2
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE 0x8CD3
+#define GL_COLOR_ATTACHMENT0              0x8CE0
+#define GL_DEPTH_ATTACHMENT               0x8D00
+#define GL_STENCIL_ATTACHMENT             0x8D20
+#define GL_NONE                           0
+#define GL_FRAMEBUFFER_COMPLETE           0x8CD5
+#define GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT 0x8CD6
+#define GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT 0x8CD7
+#define GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS 0x8CD9
+#define GL_FRAMEBUFFER_UNSUPPORTED        0x8CDD
+#define GL_FRAMEBUFFER_BINDING            0x8CA6
+#define GL_RENDERBUFFER_BINDING           0x8CA7
+#define GL_MAX_RENDERBUFFER_SIZE          0x84E8
+#define GL_INVALID_FRAMEBUFFER_OPERATION  0x0506
+
+// ---- GLES 3
+
+#ifndef GL_READ_BUFFER
+typedef unsigned short GLhalf;
+#endif
+
+#define GL_READ_BUFFER                    0x0C02
+#define GL_UNPACK_ROW_LENGTH              0x0CF2
+#define GL_UNPACK_SKIP_ROWS               0x0CF3
+#define GL_UNPACK_SKIP_PIXELS             0x0CF4
+#define GL_PACK_ROW_LENGTH                0x0D02
+#define GL_PACK_SKIP_ROWS                 0x0D03
+#define GL_PACK_SKIP_PIXELS               0x0D04
+#define GL_COLOR                          0x1800
+#define GL_DEPTH                          0x1801
+#define GL_STENCIL                        0x1802
+#define GL_RED                            0x1903
+#define GL_RGB8                           0x8051
+#define GL_RGBA8                          0x8058
+#define GL_RGB10_A2                       0x8059
+#define GL_TEXTURE_BINDING_3D             0x806A
+#define GL_UNPACK_SKIP_IMAGES             0x806D
+#define GL_UNPACK_IMAGE_HEIGHT            0x806E
+#define GL_TEXTURE_3D                     0x806F
+#define GL_TEXTURE_WRAP_R                 0x8072
+#define GL_MAX_3D_TEXTURE_SIZE            0x8073
+#define GL_UNSIGNED_INT_2_10_10_10_REV    0x8368
+#define GL_MAX_ELEMENTS_VERTICES          0x80E8
+#define GL_MAX_ELEMENTS_INDICES           0x80E9
+#define GL_TEXTURE_MIN_LOD                0x813A
+#define GL_TEXTURE_MAX_LOD                0x813B
+#define GL_TEXTURE_BASE_LEVEL             0x813C
+#define GL_TEXTURE_MAX_LEVEL              0x813D
+#define GL_MIN                            0x8007
+#define GL_MAX                            0x8008
+#define GL_DEPTH_COMPONENT24              0x81A6
+#define GL_MAX_TEXTURE_LOD_BIAS           0x84FD
+#define GL_TEXTURE_COMPARE_MODE           0x884C
+#define GL_TEXTURE_COMPARE_FUNC           0x884D
+#define GL_CURRENT_QUERY                  0x8865
+#define GL_QUERY_RESULT                   0x8866
+#define GL_QUERY_RESULT_AVAILABLE         0x8867
+#define GL_BUFFER_MAPPED                  0x88BC
+#define GL_BUFFER_MAP_POINTER             0x88BD
+#define GL_STREAM_READ                    0x88E1
+#define GL_STREAM_COPY                    0x88E2
+#define GL_STATIC_READ                    0x88E5
+#define GL_STATIC_COPY                    0x88E6
+#define GL_DYNAMIC_READ                   0x88E9
+#define GL_DYNAMIC_COPY                   0x88EA
+#define GL_MAX_DRAW_BUFFERS               0x8824
+#define GL_DRAW_BUFFER0                   0x8825
+#define GL_DRAW_BUFFER1                   0x8826
+#define GL_DRAW_BUFFER2                   0x8827
+#define GL_DRAW_BUFFER3                   0x8828
+#define GL_DRAW_BUFFER4                   0x8829
+#define GL_DRAW_BUFFER5                   0x882A
+#define GL_DRAW_BUFFER6                   0x882B
+#define GL_DRAW_BUFFER7                   0x882C
+#define GL_DRAW_BUFFER8                   0x882D
+#define GL_DRAW_BUFFER9                   0x882E
+#define GL_DRAW_BUFFER10                  0x882F
+#define GL_DRAW_BUFFER11                  0x8830
+#define GL_DRAW_BUFFER12                  0x8831
+#define GL_DRAW_BUFFER13                  0x8832
+#define GL_DRAW_BUFFER14                  0x8833
+#define GL_DRAW_BUFFER15                  0x8834
+#define GL_MAX_FRAGMENT_UNIFORM_COMPONENTS 0x8B49
+#define GL_MAX_VERTEX_UNIFORM_COMPONENTS  0x8B4A
+#define GL_SAMPLER_3D                     0x8B5F
+#define GL_SAMPLER_2D_SHADOW              0x8B62
+#define GL_FRAGMENT_SHADER_DERIVATIVE_HINT 0x8B8B
+#define GL_PIXEL_PACK_BUFFER              0x88EB
+#define GL_PIXEL_UNPACK_BUFFER            0x88EC
+#define GL_PIXEL_PACK_BUFFER_BINDING      0x88ED
+#define GL_PIXEL_UNPACK_BUFFER_BINDING    0x88EF
+#define GL_FLOAT_MAT2x3                   0x8B65
+#define GL_FLOAT_MAT2x4                   0x8B66
+#define GL_FLOAT_MAT3x2                   0x8B67
+#define GL_FLOAT_MAT3x4                   0x8B68
+#define GL_FLOAT_MAT4x2                   0x8B69
+#define GL_FLOAT_MAT4x3                   0x8B6A
+#define GL_SRGB                           0x8C40
+#define GL_SRGB8                          0x8C41
+#define GL_SRGB8_ALPHA8                   0x8C43
+#define GL_COMPARE_REF_TO_TEXTURE         0x884E
+#define GL_MAJOR_VERSION                  0x821B
+#define GL_MINOR_VERSION                  0x821C
+#define GL_NUM_EXTENSIONS                 0x821D
+#define GL_RGBA32F                        0x8814
+#define GL_RGB32F                         0x8815
+#define GL_RGBA16F                        0x881A
+#define GL_RGB16F                         0x881B
+#define GL_VERTEX_ATTRIB_ARRAY_INTEGER    0x88FD
+#define GL_MAX_ARRAY_TEXTURE_LAYERS       0x88FF
+#define GL_MIN_PROGRAM_TEXEL_OFFSET       0x8904
+#define GL_MAX_PROGRAM_TEXEL_OFFSET       0x8905
+#define GL_MAX_VARYING_COMPONENTS         0x8B4B
+#define GL_TEXTURE_2D_ARRAY               0x8C1A
+#define GL_TEXTURE_BINDING_2D_ARRAY       0x8C1D
+#define GL_R11F_G11F_B10F                 0x8C3A
+#define GL_UNSIGNED_INT_10F_11F_11F_REV   0x8C3B
+#define GL_RGB9_E5                        0x8C3D
+#define GL_UNSIGNED_INT_5_9_9_9_REV       0x8C3E
+#define GL_TRANSFORM_FEEDBACK_VARYING_MAX_LENGTH 0x8C76
+#define GL_TRANSFORM_FEEDBACK_BUFFER_MODE 0x8C7F
+#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS 0x8C80
+#define GL_TRANSFORM_FEEDBACK_VARYINGS    0x8C83
+#define GL_TRANSFORM_FEEDBACK_BUFFER_START 0x8C84
+#define GL_TRANSFORM_FEEDBACK_BUFFER_SIZE 0x8C85
+#define GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN 0x8C88
+#define GL_RASTERIZER_DISCARD             0x8C89
+#define GL_MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS 0x8C8A
+#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS 0x8C8B
+#define GL_INTERLEAVED_ATTRIBS            0x8C8C
+#define GL_SEPARATE_ATTRIBS               0x8C8D
+#define GL_TRANSFORM_FEEDBACK_BUFFER      0x8C8E
+#define GL_TRANSFORM_FEEDBACK_BUFFER_BINDING 0x8C8F
+#define GL_RGBA32UI                       0x8D70
+#define GL_RGB32UI                        0x8D71
+#define GL_RGBA16UI                       0x8D76
+#define GL_RGB16UI                        0x8D77
+#define GL_RGBA8UI                        0x8D7C
+#define GL_RGB8UI                         0x8D7D
+#define GL_RGBA32I                        0x8D82
+#define GL_RGB32I                         0x8D83
+#define GL_RGBA16I                        0x8D88
+#define GL_RGB16I                         0x8D89
+#define GL_RGBA8I                         0x8D8E
+#define GL_RGB8I                          0x8D8F
+#define GL_RED_INTEGER                    0x8D94
+#define GL_RGB_INTEGER                    0x8D98
+#define GL_RGBA_INTEGER                   0x8D99
+#define GL_SAMPLER_2D_ARRAY               0x8DC1
+#define GL_SAMPLER_2D_ARRAY_SHADOW        0x8DC4
+#define GL_SAMPLER_CUBE_SHADOW            0x8DC5
+#define GL_UNSIGNED_INT_VEC2              0x8DC6
+#define GL_UNSIGNED_INT_VEC3              0x8DC7
+#define GL_UNSIGNED_INT_VEC4              0x8DC8
+#define GL_INT_SAMPLER_2D                 0x8DCA
+#define GL_INT_SAMPLER_3D                 0x8DCB
+#define GL_INT_SAMPLER_CUBE               0x8DCC
+#define GL_INT_SAMPLER_2D_ARRAY           0x8DCF
+#define GL_UNSIGNED_INT_SAMPLER_2D        0x8DD2
+#define GL_UNSIGNED_INT_SAMPLER_3D        0x8DD3
+#define GL_UNSIGNED_INT_SAMPLER_CUBE      0x8DD4
+#define GL_UNSIGNED_INT_SAMPLER_2D_ARRAY  0x8DD7
+#define GL_BUFFER_ACCESS_FLAGS            0x911F
+#define GL_BUFFER_MAP_LENGTH              0x9120
+#define GL_BUFFER_MAP_OFFSET              0x9121
+#define GL_DEPTH_COMPONENT32F             0x8CAC
+#define GL_DEPTH32F_STENCIL8              0x8CAD
+#define GL_FLOAT_32_UNSIGNED_INT_24_8_REV 0x8DAD
+#define GL_FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING 0x8210
+#define GL_FRAMEBUFFER_ATTACHMENT_COMPONENT_TYPE 0x8211
+#define GL_FRAMEBUFFER_ATTACHMENT_RED_SIZE 0x8212
+#define GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE 0x8213
+#define GL_FRAMEBUFFER_ATTACHMENT_BLUE_SIZE 0x8214
+#define GL_FRAMEBUFFER_ATTACHMENT_ALPHA_SIZE 0x8215
+#define GL_FRAMEBUFFER_ATTACHMENT_DEPTH_SIZE 0x8216
+#define GL_FRAMEBUFFER_ATTACHMENT_STENCIL_SIZE 0x8217
+#define GL_FRAMEBUFFER_DEFAULT            0x8218
+#define GL_FRAMEBUFFER_UNDEFINED          0x8219
+#define GL_DEPTH_STENCIL_ATTACHMENT       0x821A
+#define GL_DEPTH_STENCIL                  0x84F9
+#define GL_UNSIGNED_INT_24_8              0x84FA
+#define GL_DEPTH24_STENCIL8               0x88F0
+#define GL_UNSIGNED_NORMALIZED            0x8C17
+#define GL_DRAW_FRAMEBUFFER_BINDING       0x8CA6
+#define GL_READ_FRAMEBUFFER               0x8CA8
+#define GL_DRAW_FRAMEBUFFER               0x8CA9
+#define GL_READ_FRAMEBUFFER_BINDING       0x8CAA
+#define GL_RENDERBUFFER_SAMPLES           0x8CAB
+#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LAYER 0x8CD4
+#define GL_MAX_COLOR_ATTACHMENTS          0x8CDF
+#define GL_COLOR_ATTACHMENT1              0x8CE1
+#define GL_COLOR_ATTACHMENT2              0x8CE2
+#define GL_COLOR_ATTACHMENT3              0x8CE3
+#define GL_COLOR_ATTACHMENT4              0x8CE4
+#define GL_COLOR_ATTACHMENT5              0x8CE5
+#define GL_COLOR_ATTACHMENT6              0x8CE6
+#define GL_COLOR_ATTACHMENT7              0x8CE7
+#define GL_COLOR_ATTACHMENT8              0x8CE8
+#define GL_COLOR_ATTACHMENT9              0x8CE9
+#define GL_COLOR_ATTACHMENT10             0x8CEA
+#define GL_COLOR_ATTACHMENT11             0x8CEB
+#define GL_COLOR_ATTACHMENT12             0x8CEC
+#define GL_COLOR_ATTACHMENT13             0x8CED
+#define GL_COLOR_ATTACHMENT14             0x8CEE
+#define GL_COLOR_ATTACHMENT15             0x8CEF
+#define GL_COLOR_ATTACHMENT16             0x8CF0
+#define GL_COLOR_ATTACHMENT17             0x8CF1
+#define GL_COLOR_ATTACHMENT18             0x8CF2
+#define GL_COLOR_ATTACHMENT19             0x8CF3
+#define GL_COLOR_ATTACHMENT20             0x8CF4
+#define GL_COLOR_ATTACHMENT21             0x8CF5
+#define GL_COLOR_ATTACHMENT22             0x8CF6
+#define GL_COLOR_ATTACHMENT23             0x8CF7
+#define GL_COLOR_ATTACHMENT24             0x8CF8
+#define GL_COLOR_ATTACHMENT25             0x8CF9
+#define GL_COLOR_ATTACHMENT26             0x8CFA
+#define GL_COLOR_ATTACHMENT27             0x8CFB
+#define GL_COLOR_ATTACHMENT28             0x8CFC
+#define GL_COLOR_ATTACHMENT29             0x8CFD
+#define GL_COLOR_ATTACHMENT30             0x8CFE
+#define GL_COLOR_ATTACHMENT31             0x8CFF
+#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE 0x8D56
+#define GL_MAX_SAMPLES                    0x8D57
+#define GL_HALF_FLOAT                     0x140B
+#define GL_MAP_READ_BIT                   0x0001
+#define GL_MAP_WRITE_BIT                  0x0002
+#define GL_MAP_INVALIDATE_RANGE_BIT       0x0004
+#define GL_MAP_INVALIDATE_BUFFER_BIT      0x0008
+#define GL_MAP_FLUSH_EXPLICIT_BIT         0x0010
+#define GL_MAP_UNSYNCHRONIZED_BIT         0x0020
+#define GL_RG                             0x8227
+#define GL_RG_INTEGER                     0x8228
+#define GL_R8                             0x8229
+#define GL_RG8                            0x822B
+#define GL_R16F                           0x822D
+#define GL_R32F                           0x822E
+#define GL_RG16F                          0x822F
+#define GL_RG32F                          0x8230
+#define GL_R8I                            0x8231
+#define GL_R8UI                           0x8232
+#define GL_R16I                           0x8233
+#define GL_R16UI                          0x8234
+#define GL_R32I                           0x8235
+#define GL_R32UI                          0x8236
+#define GL_RG8I                           0x8237
+#define GL_RG8UI                          0x8238
+#define GL_RG16I                          0x8239
+#define GL_RG16UI                         0x823A
+#define GL_RG32I                          0x823B
+#define GL_RG32UI                         0x823C
+#define GL_VERTEX_ARRAY_BINDING           0x85B5
+#define GL_R8_SNORM                       0x8F94
+#define GL_RG8_SNORM                      0x8F95
+#define GL_RGB8_SNORM                     0x8F96
+#define GL_RGBA8_SNORM                    0x8F97
+#define GL_SIGNED_NORMALIZED              0x8F9C
+#define GL_PRIMITIVE_RESTART_FIXED_INDEX  0x8D69
+#define GL_COPY_READ_BUFFER               0x8F36
+#define GL_COPY_WRITE_BUFFER              0x8F37
+#define GL_COPY_READ_BUFFER_BINDING       0x8F36
+#define GL_COPY_WRITE_BUFFER_BINDING      0x8F37
+#define GL_UNIFORM_BUFFER                 0x8A11
+#define GL_UNIFORM_BUFFER_BINDING         0x8A28
+#define GL_UNIFORM_BUFFER_START           0x8A29
+#define GL_UNIFORM_BUFFER_SIZE            0x8A2A
+#define GL_MAX_VERTEX_UNIFORM_BLOCKS      0x8A2B
+#define GL_MAX_FRAGMENT_UNIFORM_BLOCKS    0x8A2D
+#define GL_MAX_COMBINED_UNIFORM_BLOCKS    0x8A2E
+#define GL_MAX_UNIFORM_BUFFER_BINDINGS    0x8A2F
+#define GL_MAX_UNIFORM_BLOCK_SIZE         0x8A30
+#define GL_MAX_COMBINED_VERTEX_UNIFORM_COMPONENTS 0x8A31
+#define GL_MAX_COMBINED_FRAGMENT_UNIFORM_COMPONENTS 0x8A33
+#define GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT 0x8A34
+#define GL_ACTIVE_UNIFORM_BLOCK_MAX_NAME_LENGTH 0x8A35
+#define GL_ACTIVE_UNIFORM_BLOCKS          0x8A36
+#define GL_UNIFORM_TYPE                   0x8A37
+#define GL_UNIFORM_SIZE                   0x8A38
+#define GL_UNIFORM_NAME_LENGTH            0x8A39
+#define GL_UNIFORM_BLOCK_INDEX            0x8A3A
+#define GL_UNIFORM_OFFSET                 0x8A3B
+#define GL_UNIFORM_ARRAY_STRIDE           0x8A3C
+#define GL_UNIFORM_MATRIX_STRIDE          0x8A3D
+#define GL_UNIFORM_IS_ROW_MAJOR           0x8A3E
+#define GL_UNIFORM_BLOCK_BINDING          0x8A3F
+#define GL_UNIFORM_BLOCK_DATA_SIZE        0x8A40
+#define GL_UNIFORM_BLOCK_NAME_LENGTH      0x8A41
+#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORMS  0x8A42
+#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORM_INDICES 0x8A43
+#define GL_UNIFORM_BLOCK_REFERENCED_BY_VERTEX_SHADER 0x8A44
+#define GL_UNIFORM_BLOCK_REFERENCED_BY_FRAGMENT_SHADER 0x8A46
+#define GL_INVALID_INDEX                  0xFFFFFFFFu
+#define GL_MAX_VERTEX_OUTPUT_COMPONENTS   0x9122
+#define GL_MAX_FRAGMENT_INPUT_COMPONENTS  0x9125
+#define GL_MAX_SERVER_WAIT_TIMEOUT        0x9111
+#define GL_OBJECT_TYPE                    0x9112
+#define GL_SYNC_CONDITION                 0x9113
+#define GL_SYNC_STATUS                    0x9114
+#define GL_SYNC_FLAGS                     0x9115
+#define GL_SYNC_FENCE                     0x9116
+#define GL_SYNC_GPU_COMMANDS_COMPLETE     0x9117
+#define GL_UNSIGNALED                     0x9118
+#define GL_SIGNALED                       0x9119
+#define GL_ALREADY_SIGNALED               0x911A
+#define GL_TIMEOUT_EXPIRED                0x911B
+#define GL_CONDITION_SATISFIED            0x911C
+#define GL_WAIT_FAILED                    0x911D
+#define GL_SYNC_FLUSH_COMMANDS_BIT        0x00000001
+#define GL_TIMEOUT_IGNORED                0xFFFFFFFFFFFFFFFFull
+#define GL_VERTEX_ATTRIB_ARRAY_DIVISOR    0x88FE
+#define GL_ANY_SAMPLES_PASSED             0x8C2F
+#define GL_ANY_SAMPLES_PASSED_CONSERVATIVE 0x8D6A
+#define GL_SAMPLER_BINDING                0x8919
+#define GL_RGB10_A2UI                     0x906F
+#define GL_TEXTURE_SWIZZLE_R              0x8E42
+#define GL_TEXTURE_SWIZZLE_G              0x8E43
+#define GL_TEXTURE_SWIZZLE_B              0x8E44
+#define GL_TEXTURE_SWIZZLE_A              0x8E45
+#define GL_GREEN                          0x1904
+#define GL_BLUE                           0x1905
+#define GL_INT_2_10_10_10_REV             0x8D9F
+#define GL_TRANSFORM_FEEDBACK             0x8E22
+#define GL_TRANSFORM_FEEDBACK_PAUSED      0x8E23
+#define GL_TRANSFORM_FEEDBACK_ACTIVE      0x8E24
+#define GL_TRANSFORM_FEEDBACK_BINDING     0x8E25
+#define GL_PROGRAM_BINARY_RETRIEVABLE_HINT 0x8257
+#define GL_PROGRAM_BINARY_LENGTH          0x8741
+#define GL_NUM_PROGRAM_BINARY_FORMATS     0x87FE
+#define GL_PROGRAM_BINARY_FORMATS         0x87FF
+#define GL_COMPRESSED_R11_EAC             0x9270
+#define GL_COMPRESSED_SIGNED_R11_EAC      0x9271
+#define GL_COMPRESSED_RG11_EAC            0x9272
+#define GL_COMPRESSED_SIGNED_RG11_EAC     0x9273
+#define GL_COMPRESSED_RGB8_ETC2           0x9274
+#define GL_COMPRESSED_SRGB8_ETC2          0x9275
+#define GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2 0x9276
+#define GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2 0x9277
+#define GL_COMPRESSED_RGBA8_ETC2_EAC      0x9278
+#define GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC 0x9279
+#define GL_TEXTURE_IMMUTABLE_FORMAT       0x912F
+#define GL_MAX_ELEMENT_INDEX              0x8D6B
+#define GL_NUM_SAMPLE_COUNTS              0x9380
+#define GL_TEXTURE_IMMUTABLE_LEVELS       0x82DF
+
+#endif
diff --git a/video/out/opengl/hwdec_d3d11egl.c b/video/out/opengl/hwdec_d3d11egl.c
new file mode 100644
index 0000000..c312091
--- /dev/null
+++ b/video/out/opengl/hwdec_d3d11egl.c
@@ -0,0 +1,363 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <windows.h>
+#include <d3d11.h>
+
+#include <EGL/egl.h>
+#include <EGL/eglext.h>
+
+#include "angle_dynamic.h"
+
+#include "common/common.h"
+#include "osdep/timer.h"
+#include "osdep/windows_utils.h"
+#include "video/out/gpu/hwdec.h"
+#include "ra_gl.h"
+#include "video/hwdec.h"
+#include "video/d3d.h"
+
+#ifndef EGL_D3D_TEXTURE_SUBRESOURCE_ID_ANGLE
+#define EGL_D3D_TEXTURE_SUBRESOURCE_ID_ANGLE 0x33AB
+#endif
+
+struct priv_owner {
+    struct mp_hwdec_ctx hwctx;
+
+    ID3D11Device *d3d11_device;
+    EGLDisplay egl_display;
+
+    // EGL_KHR_stream
+    EGLStreamKHR (EGLAPIENTRY *CreateStreamKHR)(EGLDisplay dpy,
+                                                const EGLint *attrib_list);
+    EGLBoolean (EGLAPIENTRY *DestroyStreamKHR)(EGLDisplay dpy,
+                                               EGLStreamKHR stream);
+
+    // EGL_KHR_stream_consumer_gltexture
+    EGLBoolean (EGLAPIENTRY *StreamConsumerAcquireKHR)
+                                        (EGLDisplay dpy, EGLStreamKHR stream);
+    EGLBoolean (EGLAPIENTRY *StreamConsumerReleaseKHR)
+                                        (EGLDisplay dpy, EGLStreamKHR stream);
+
+    // EGL_NV_stream_consumer_gltexture_yuv
+    EGLBoolean (EGLAPIENTRY *StreamConsumerGLTextureExternalAttribsNV)
+                (EGLDisplay dpy, EGLStreamKHR stream, EGLAttrib *attrib_list);
+
+    // EGL_ANGLE_stream_producer_d3d_texture
+    EGLBoolean (EGLAPIENTRY *CreateStreamProducerD3DTextureANGLE)
+            (EGLDisplay dpy, EGLStreamKHR stream, const EGLAttrib *attrib_list);
+    EGLBoolean (EGLAPIENTRY *StreamPostD3DTextureANGLE)
+            (EGLDisplay dpy, EGLStreamKHR stream, void *texture,
+             const EGLAttrib *attrib_list);
+};
+
+struct priv {
+    EGLStreamKHR egl_stream;
+    GLuint gl_textures[2];
+};
+
+static void uninit(struct ra_hwdec *hw)
+{
+    struct priv_owner *p = hw->priv;
+
+    hwdec_devices_remove(hw->devs, &p->hwctx);
+
+    if (p->d3d11_device)
+        ID3D11Device_Release(p->d3d11_device);
+}
+
+static int init(struct ra_hwdec *hw)
+{
+    struct priv_owner *p = hw->priv;
+    HRESULT hr;
+
+    if (!ra_is_gl(hw->ra_ctx->ra))
+        return -1;
+    if (!angle_load())
+        return -1;
+
+    EGLDisplay egl_display = eglGetCurrentDisplay();
+    if (!egl_display)
+        return -1;
+
+    if (!eglGetCurrentContext())
+        return -1;
+
+    GL *gl = ra_gl_get(hw->ra_ctx->ra);
+
+    const char *exts = eglQueryString(egl_display, EGL_EXTENSIONS);
+    if (!gl_check_extension(exts, "EGL_ANGLE_d3d_share_handle_client_buffer") ||
+        !gl_check_extension(exts, "EGL_ANGLE_stream_producer_d3d_texture") ||
+        !(gl_check_extension(gl->extensions, "GL_OES_EGL_image_external_essl3") ||
+          gl->es == 200) ||
+        !gl_check_extension(exts, "EGL_EXT_device_query") ||
+        !(gl->mpgl_caps & MPGL_CAP_TEX_RG))
+        return -1;
+
+    p->egl_display = egl_display;
+
+    p->CreateStreamKHR = (void *)eglGetProcAddress("eglCreateStreamKHR");
+    p->DestroyStreamKHR = (void *)eglGetProcAddress("eglDestroyStreamKHR");
+    p->StreamConsumerAcquireKHR =
+        (void *)eglGetProcAddress("eglStreamConsumerAcquireKHR");
+    p->StreamConsumerReleaseKHR =
+        (void *)eglGetProcAddress("eglStreamConsumerReleaseKHR");
+    p->StreamConsumerGLTextureExternalAttribsNV =
+        (void *)eglGetProcAddress("eglStreamConsumerGLTextureExternalAttribsNV");
+    p->CreateStreamProducerD3DTextureANGLE =
+        (void *)eglGetProcAddress("eglCreateStreamProducerD3DTextureANGLE");
+    p->StreamPostD3DTextureANGLE =
+        (void *)eglGetProcAddress("eglStreamPostD3DTextureANGLE");
+
+    if (!p->CreateStreamKHR || !p->DestroyStreamKHR ||
+        !p->StreamConsumerAcquireKHR || !p->StreamConsumerReleaseKHR ||
+        !p->StreamConsumerGLTextureExternalAttribsNV ||
+        !p->CreateStreamProducerD3DTextureANGLE ||
+        !p->StreamPostD3DTextureANGLE)
+    {
+        MP_ERR(hw, "Failed to load some EGLStream functions.\n");
+        goto fail;
+    }
+
+    static const char *es2_exts[] = {"GL_NV_EGL_stream_consumer_external", 0};
+    static const char *es3_exts[] = {"GL_NV_EGL_stream_consumer_external",
+                                     "GL_OES_EGL_image_external_essl3", 0};
+    hw->glsl_extensions = gl->es == 200 ? es2_exts : es3_exts;
+
+    PFNEGLQUERYDISPLAYATTRIBEXTPROC p_eglQueryDisplayAttribEXT =
+        (void *)eglGetProcAddress("eglQueryDisplayAttribEXT");
+    PFNEGLQUERYDEVICEATTRIBEXTPROC p_eglQueryDeviceAttribEXT =
+        (void *)eglGetProcAddress("eglQueryDeviceAttribEXT");
+    if (!p_eglQueryDisplayAttribEXT || !p_eglQueryDeviceAttribEXT)
+        goto fail;
+
+    EGLAttrib device = 0;
+    if (!p_eglQueryDisplayAttribEXT(egl_display, EGL_DEVICE_EXT, &device))
+        goto fail;
+    EGLAttrib d3d_device = 0;
+    if (!p_eglQueryDeviceAttribEXT((EGLDeviceEXT)device,
+                                    EGL_D3D11_DEVICE_ANGLE, &d3d_device))
+    {
+        MP_ERR(hw, "Could not get EGL_D3D11_DEVICE_ANGLE from ANGLE.\n");
+        goto fail;
+    }
+
+    p->d3d11_device = (ID3D11Device *)d3d_device;
+    if (!p->d3d11_device)
+        goto fail;
+    ID3D11Device_AddRef(p->d3d11_device);
+
+    if (!d3d11_check_decoding(p->d3d11_device)) {
+        MP_VERBOSE(hw, "D3D11 video decoding not supported on this system.\n");
+        goto fail;
+    }
+
+    ID3D10Multithread *multithread;
+    hr = ID3D11Device_QueryInterface(p->d3d11_device, &IID_ID3D10Multithread,
+                                     (void **)&multithread);
+    if (FAILED(hr)) {
+        MP_ERR(hw, "Failed to get Multithread interface: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto fail;
+    }
+    ID3D10Multithread_SetMultithreadProtected(multithread, TRUE);
+    ID3D10Multithread_Release(multithread);
+
+    static const int subfmts[] = {IMGFMT_NV12, IMGFMT_P010, 0};
+    p->hwctx = (struct mp_hwdec_ctx){
+        .driver_name = hw->driver->name,
+        .av_device_ref = d3d11_wrap_device_ref(p->d3d11_device),
+        .supported_formats = subfmts,
+        .hw_imgfmt = IMGFMT_D3D11,
+    };
+
+    if (!p->hwctx.av_device_ref) {
+        MP_VERBOSE(hw, "Failed to create hwdevice_ctx\n");
+        return -1;
+    }
+
+    hwdec_devices_add(hw->devs, &p->hwctx);
+
+    return 0;
+fail:
+    return -1;
+}
+
+static void mapper_uninit(struct ra_hwdec_mapper *mapper)
+{
+    struct priv_owner *o = mapper->owner->priv;
+    struct priv *p = mapper->priv;
+    GL *gl = ra_gl_get(mapper->ra);
+
+    if (p->egl_stream)
+        o->DestroyStreamKHR(o->egl_display, p->egl_stream);
+    p->egl_stream = 0;
+
+    gl->DeleteTextures(2, p->gl_textures);
+}
+
+static int mapper_init(struct ra_hwdec_mapper *mapper)
+{
+    struct priv_owner *o = mapper->owner->priv;
+    struct priv *p = mapper->priv;
+    GL *gl = ra_gl_get(mapper->ra);
+
+    struct ra_imgfmt_desc desc = {0};
+
+    ra_get_imgfmt_desc(mapper->ra, mapper->src_params.hw_subfmt, &desc);
+
+    // ANGLE hardcodes the list of accepted formats. This is a subset.
+    if ((mapper->src_params.hw_subfmt != IMGFMT_NV12 &&
+         mapper->src_params.hw_subfmt != IMGFMT_P010) ||
+        desc.num_planes < 1 || desc.num_planes > 2)
+    {
+        MP_FATAL(mapper, "Format not supported.\n");
+        return -1;
+    }
+
+    mapper->dst_params = mapper->src_params;
+    mapper->dst_params.imgfmt = mapper->src_params.hw_subfmt;
+    mapper->dst_params.hw_subfmt = 0;
+
+    // The texture units need to be bound during init only, and are free for
+    // use again after the initialization here is done.
+    int texunits = 0; // [texunits, texunits + num_planes)
+    int num_planes = desc.num_planes;
+    int gl_target = GL_TEXTURE_EXTERNAL_OES;
+
+    p->egl_stream = o->CreateStreamKHR(o->egl_display, (EGLint[]){EGL_NONE});
+    if (!p->egl_stream)
+        goto fail;
+
+    EGLAttrib attrs[(2 + 2 + 1) * 2] = {
+        EGL_COLOR_BUFFER_TYPE,          EGL_YUV_BUFFER_EXT,
+        EGL_YUV_NUMBER_OF_PLANES_EXT,   num_planes,
+    };
+
+    for (int n = 0; n < num_planes; n++) {
+        gl->ActiveTexture(GL_TEXTURE0 + texunits + n);
+        gl->GenTextures(1, &p->gl_textures[n]);
+        gl->BindTexture(gl_target, p->gl_textures[n]);
+        gl->TexParameteri(gl_target, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+        gl->TexParameteri(gl_target, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+        gl->TexParameteri(gl_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+        gl->TexParameteri(gl_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+        attrs[(2 + n) * 2 + 0] = EGL_YUV_PLANE0_TEXTURE_UNIT_NV + n;
+        attrs[(2 + n) * 2 + 1] = texunits + n;
+    }
+
+    attrs[(2 + num_planes) * 2 + 0] = EGL_NONE;
+
+    if (!o->StreamConsumerGLTextureExternalAttribsNV(o->egl_display, p->egl_stream,
+                                                     attrs))
+        goto fail;
+
+    if (!o->CreateStreamProducerD3DTextureANGLE(o->egl_display, p->egl_stream,
+                                                (EGLAttrib[]){EGL_NONE}))
+        goto fail;
+
+    for (int n = 0; n < num_planes; n++) {
+        gl->ActiveTexture(GL_TEXTURE0 + texunits + n);
+        gl->BindTexture(gl_target, 0);
+    }
+    gl->ActiveTexture(GL_TEXTURE0);
+    return 0;
+fail:
+    gl->ActiveTexture(GL_TEXTURE0);
+    MP_ERR(mapper, "Failed to create EGLStream\n");
+    return -1;
+}
+
+static int mapper_map(struct ra_hwdec_mapper *mapper)
+{
+    struct priv_owner *o = mapper->owner->priv;
+    struct priv *p = mapper->priv;
+
+    ID3D11Texture2D *d3d_tex = (void *)mapper->src->planes[0];
+    int d3d_subindex = (intptr_t)mapper->src->planes[1];
+    if (!d3d_tex)
+        return -1;
+
+    EGLAttrib attrs[] = {
+        EGL_D3D_TEXTURE_SUBRESOURCE_ID_ANGLE, d3d_subindex,
+        EGL_NONE,
+    };
+    if (!o->StreamPostD3DTextureANGLE(o->egl_display, p->egl_stream,
+                                      (void *)d3d_tex, attrs))
+    {
+        // ANGLE changed the enum ID of this without warning at one point.
+        attrs[0] = attrs[0] == 0x33AB ? 0x3AAB : 0x33AB;
+        if (!o->StreamPostD3DTextureANGLE(o->egl_display, p->egl_stream,
+                                              (void *)d3d_tex, attrs))
+            return -1;
+    }
+
+    if (!o->StreamConsumerAcquireKHR(o->egl_display, p->egl_stream))
+        return -1;
+
+    D3D11_TEXTURE2D_DESC texdesc;
+    ID3D11Texture2D_GetDesc(d3d_tex, &texdesc);
+
+    for (int n = 0; n < 2; n++) {
+        struct ra_tex_params params = {
+            .dimensions = 2,
+            .w = texdesc.Width / (n ? 2 : 1),
+            .h = texdesc.Height / (n ? 2 : 1),
+            .d = 1,
+            .format = ra_find_unorm_format(mapper->ra, 1, n ? 2 : 1),
+            .render_src = true,
+            .src_linear = true,
+            .external_oes = true,
+        };
+        if (!params.format)
+            return -1;
+
+        mapper->tex[n] = ra_create_wrapped_tex(mapper->ra, &params,
+                                               p->gl_textures[n]);
+        if (!mapper->tex[n])
+            return -1;
+    }
+
+    return 0;
+}
+
+static void mapper_unmap(struct ra_hwdec_mapper *mapper)
+{
+    struct priv_owner *o = mapper->owner->priv;
+    struct priv *p = mapper->priv;
+
+    for (int n = 0; n < 2; n++)
+        ra_tex_free(mapper->ra, &mapper->tex[n]);
+    if (p->egl_stream)
+        o->StreamConsumerReleaseKHR(o->egl_display, p->egl_stream);
+}
+
+const struct ra_hwdec_driver ra_hwdec_d3d11egl = {
+    .name = "d3d11-egl",
+    .priv_size = sizeof(struct priv_owner),
+    .imgfmts = {IMGFMT_D3D11, 0},
+    .init = init,
+    .uninit = uninit,
+    .mapper = &(const struct ra_hwdec_mapper_driver){
+        .priv_size = sizeof(struct priv),
+        .init = mapper_init,
+        .uninit = mapper_uninit,
+        .map = mapper_map,
+        .unmap = mapper_unmap,
+    },
+};
diff --git a/video/out/opengl/hwdec_dxva2egl.c b/video/out/opengl/hwdec_dxva2egl.c
new file mode 100644
index 0000000..979ef59
--- /dev/null
+++ b/video/out/opengl/hwdec_dxva2egl.c
@@ -0,0 +1,384 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <windows.h>
+#include <d3d9.h>
+
+#include <EGL/egl.h>
+#include <EGL/eglext.h>
+
+#include "angle_dynamic.h"
+
+#include "common/common.h"
+#include "osdep/timer.h"
+#include "osdep/windows_utils.h"
+#include "video/out/gpu/hwdec.h"
+#include "ra_gl.h"
+#include "video/hwdec.h"
+#include "video/d3d.h"
+
+struct priv_owner {
+    struct mp_hwdec_ctx hwctx;
+    IDirect3D9Ex       *d3d9ex;
+    IDirect3DDevice9Ex *device9ex;
+
+    EGLDisplay egl_display;
+    EGLConfig  egl_config;
+    EGLint     alpha;
+};
+
+struct priv {
+    IDirect3DDevice9Ex *device9ex; // (no own reference)
+    IDirect3DQuery9    *query9;
+    IDirect3DTexture9  *texture9;
+    IDirect3DSurface9  *surface9;
+
+    EGLDisplay egl_display;
+    EGLSurface egl_surface;
+
+    GLuint gl_texture;
+};
+
+static void uninit(struct ra_hwdec *hw)
+{
+    struct priv_owner *p = hw->priv;
+
+    hwdec_devices_remove(hw->devs, &p->hwctx);
+    av_buffer_unref(&p->hwctx.av_device_ref);
+
+    if (p->device9ex)
+        IDirect3DDevice9Ex_Release(p->device9ex);
+
+    if (p->d3d9ex)
+        IDirect3D9Ex_Release(p->d3d9ex);
+}
+
+static int init(struct ra_hwdec *hw)
+{
+    struct priv_owner *p = hw->priv;
+    HRESULT hr;
+
+    if (!ra_is_gl(hw->ra_ctx->ra))
+        return -1;
+    if (!angle_load())
+        return -1;
+
+    d3d_load_dlls();
+
+    EGLDisplay egl_display = eglGetCurrentDisplay();
+    if (!egl_display)
+        return -1;
+
+    if (!eglGetCurrentContext())
+        return -1;
+
+    const char *exts = eglQueryString(egl_display, EGL_EXTENSIONS);
+    if (!gl_check_extension(exts, "EGL_ANGLE_d3d_share_handle_client_buffer")) {
+        return -1;
+    }
+
+    p->egl_display = egl_display;
+
+    if (!d3d9_dll) {
+        MP_FATAL(hw, "Failed to load \"d3d9.dll\": %s\n",
+                 mp_LastError_to_str());
+        goto fail;
+    }
+
+    HRESULT (WINAPI *Direct3DCreate9Ex)(UINT SDKVersion, IDirect3D9Ex **ppD3D);
+    Direct3DCreate9Ex = (void *)GetProcAddress(d3d9_dll, "Direct3DCreate9Ex");
+    if (!Direct3DCreate9Ex) {
+        MP_FATAL(hw, "Direct3D 9Ex not supported\n");
+        goto fail;
+    }
+
+    hr = Direct3DCreate9Ex(D3D_SDK_VERSION, &p->d3d9ex);
+    if (FAILED(hr)) {
+        MP_FATAL(hw, "Couldn't create Direct3D9Ex: %s\n",
+                 mp_HRESULT_to_str(hr));
+        goto fail;
+    }
+
+    // We must create our own Direct3D9Ex device. ANGLE can give us the device
+    // it's using, but that's probably a ID3D11Device.
+    // (copied from chromium dxva_video_decode_accelerator_win.cc)
+    D3DPRESENT_PARAMETERS present_params = {
+        .BackBufferWidth = 1,
+        .BackBufferHeight = 1,
+        .BackBufferFormat = D3DFMT_UNKNOWN,
+        .BackBufferCount = 1,
+        .SwapEffect = D3DSWAPEFFECT_DISCARD,
+        .hDeviceWindow = NULL,
+        .Windowed = TRUE,
+        .Flags = D3DPRESENTFLAG_VIDEO,
+        .FullScreen_RefreshRateInHz = 0,
+        .PresentationInterval = 0,
+    };
+    hr = IDirect3D9Ex_CreateDeviceEx(p->d3d9ex,
+                                     D3DADAPTER_DEFAULT,
+                                     D3DDEVTYPE_HAL,
+                                     NULL,
+                                     D3DCREATE_FPU_PRESERVE |
+                                     D3DCREATE_HARDWARE_VERTEXPROCESSING |
+                                     D3DCREATE_DISABLE_PSGP_THREADING |
+                                     D3DCREATE_MULTITHREADED,
+                                     &present_params,
+                                     NULL,
+                                     &p->device9ex);
+    if (FAILED(hr)) {
+        MP_FATAL(hw, "Failed to create Direct3D9Ex device: %s\n",
+                 mp_HRESULT_to_str(hr));
+        goto fail;
+    }
+
+    EGLint attrs[] = {
+        EGL_BUFFER_SIZE, 32,
+        EGL_RED_SIZE, 8,
+        EGL_GREEN_SIZE, 8,
+        EGL_BLUE_SIZE, 8,
+        EGL_SURFACE_TYPE, EGL_PBUFFER_BIT,
+        EGL_ALPHA_SIZE, 0,
+        EGL_NONE
+    };
+    EGLint count;
+    if (!eglChooseConfig(p->egl_display, attrs, &p->egl_config, 1, &count) ||
+        !count) {
+        MP_ERR(hw, "Failed to get EGL surface configuration\n");
+        goto fail;
+    }
+
+    if (!eglGetConfigAttrib(p->egl_display, p->egl_config,
+                            EGL_BIND_TO_TEXTURE_RGBA, &p->alpha)) {
+        MP_FATAL(hw, "Failed to query EGL surface alpha\n");
+        goto fail;
+    }
+
+    struct mp_image_params dummy_params = {
+        .imgfmt = IMGFMT_DXVA2,
+        .w = 256,
+        .h = 256,
+    };
+    struct ra_hwdec_mapper *mapper = ra_hwdec_mapper_create(hw, &dummy_params);
+    if (!mapper)
+        goto fail;
+    ra_hwdec_mapper_free(&mapper);
+
+    p->hwctx = (struct mp_hwdec_ctx){
+        .driver_name = hw->driver->name,
+        .av_device_ref = d3d9_wrap_device_ref((IDirect3DDevice9 *)p->device9ex),
+        .hw_imgfmt = IMGFMT_DXVA2,
+    };
+
+    if (!p->hwctx.av_device_ref) {
+        MP_VERBOSE(hw, "Failed to create hwdevice_ctx\n");
+        goto fail;
+    }
+
+    hwdec_devices_add(hw->devs, &p->hwctx);
+
+    return 0;
+fail:
+    return -1;
+}
+
+static void mapper_uninit(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+    GL *gl = ra_gl_get(mapper->ra);
+
+    ra_tex_free(mapper->ra, &mapper->tex[0]);
+    gl->DeleteTextures(1, &p->gl_texture);
+
+    if (p->egl_display && p->egl_surface) {
+        eglReleaseTexImage(p->egl_display, p->egl_surface, EGL_BACK_BUFFER);
+        eglDestroySurface(p->egl_display, p->egl_surface);
+    }
+
+    if (p->surface9)
+        IDirect3DSurface9_Release(p->surface9);
+
+    if (p->texture9)
+        IDirect3DTexture9_Release(p->texture9);
+
+    if (p->query9)
+        IDirect3DQuery9_Release(p->query9);
+}
+
+static int mapper_init(struct ra_hwdec_mapper *mapper)
+{
+    struct priv_owner *p_owner = mapper->owner->priv;
+    struct priv *p = mapper->priv;
+    GL *gl = ra_gl_get(mapper->ra);
+    HRESULT hr;
+
+    p->device9ex = p_owner->device9ex;
+    p->egl_display = p_owner->egl_display;
+
+    hr = IDirect3DDevice9_CreateQuery(p->device9ex, D3DQUERYTYPE_EVENT,
+                                      &p->query9);
+    if (FAILED(hr)) {
+        MP_FATAL(mapper, "Failed to create Direct3D query interface: %s\n",
+                 mp_HRESULT_to_str(hr));
+        goto fail;
+    }
+
+    // Test the query API
+    hr = IDirect3DQuery9_Issue(p->query9, D3DISSUE_END);
+    if (FAILED(hr)) {
+        MP_FATAL(mapper, "Failed to issue Direct3D END test query: %s\n",
+                 mp_HRESULT_to_str(hr));
+        goto fail;
+    }
+
+    HANDLE share_handle = NULL;
+    hr = IDirect3DDevice9Ex_CreateTexture(p->device9ex,
+                                          mapper->src_params.w,
+                                          mapper->src_params.h,
+                                          1, D3DUSAGE_RENDERTARGET,
+                                          p_owner->alpha ?
+                                            D3DFMT_A8R8G8B8 : D3DFMT_X8R8G8B8,
+                                          D3DPOOL_DEFAULT,
+                                          &p->texture9,
+                                          &share_handle);
+    if (FAILED(hr)) {
+        MP_ERR(mapper, "Failed to create Direct3D9 texture: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto fail;
+    }
+
+    hr = IDirect3DTexture9_GetSurfaceLevel(p->texture9, 0, &p->surface9);
+    if (FAILED(hr)) {
+        MP_ERR(mapper, "Failed to get Direct3D9 surface from texture: %s\n",
+               mp_HRESULT_to_str(hr));
+        goto fail;
+    }
+
+    EGLint attrib_list[] = {
+        EGL_WIDTH, mapper->src_params.w,
+        EGL_HEIGHT, mapper->src_params.h,
+        EGL_TEXTURE_FORMAT, p_owner->alpha ? EGL_TEXTURE_RGBA : EGL_TEXTURE_RGB,
+        EGL_TEXTURE_TARGET, EGL_TEXTURE_2D,
+        EGL_NONE
+    };
+    p->egl_surface = eglCreatePbufferFromClientBuffer(
+        p->egl_display, EGL_D3D_TEXTURE_2D_SHARE_HANDLE_ANGLE,
+        share_handle, p_owner->egl_config, attrib_list);
+    if (p->egl_surface == EGL_NO_SURFACE) {
+        MP_ERR(mapper, "Failed to create EGL surface\n");
+        goto fail;
+    }
+
+    gl->GenTextures(1, &p->gl_texture);
+    gl->BindTexture(GL_TEXTURE_2D, p->gl_texture);
+    gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+    gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+    gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+    gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+    gl->BindTexture(GL_TEXTURE_2D, 0);
+
+    struct ra_tex_params params = {
+        .dimensions = 2,
+        .w = mapper->src_params.w,
+        .h = mapper->src_params.h,
+        .d = 1,
+        .format = ra_find_unorm_format(mapper->ra, 1, p_owner->alpha ? 4 : 3),
+        .render_src = true,
+        .src_linear = true,
+    };
+    if (!params.format)
+        goto fail;
+
+    mapper->tex[0] = ra_create_wrapped_tex(mapper->ra, &params, p->gl_texture);
+    if (!mapper->tex[0])
+        goto fail;
+
+    mapper->dst_params = mapper->src_params;
+    mapper->dst_params.imgfmt = IMGFMT_RGB0;
+    mapper->dst_params.hw_subfmt = 0;
+    return 0;
+fail:
+    return -1;
+}
+
+static int mapper_map(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+    GL *gl = ra_gl_get(mapper->ra);
+
+    HRESULT hr;
+    RECT rc = {0, 0, mapper->src->w, mapper->src->h};
+    IDirect3DSurface9* hw_surface = (IDirect3DSurface9 *)mapper->src->planes[3];
+    hr = IDirect3DDevice9Ex_StretchRect(p->device9ex,
+                                        hw_surface, &rc,
+                                        p->surface9, &rc,
+                                        D3DTEXF_NONE);
+    if (FAILED(hr)) {
+        MP_ERR(mapper, "Direct3D RGB conversion failed: %s\n",
+               mp_HRESULT_to_str(hr));
+        return -1;
+    }
+
+    hr = IDirect3DQuery9_Issue(p->query9, D3DISSUE_END);
+    if (FAILED(hr)) {
+        MP_ERR(mapper, "Failed to issue Direct3D END query\n");
+        return -1;
+    }
+
+    // There doesn't appear to be an efficient way to do a blocking flush
+    // of the above StretchRect. Timeout of 8ms is required to reliably
+    // render 4k on Intel Haswell, Ivybridge and Cherry Trail Atom.
+    const int max_retries = 8;
+    const int64_t wait_ns = MP_TIME_MS_TO_NS(1);
+    int retries = 0;
+    while (true) {
+        hr = IDirect3DQuery9_GetData(p->query9, NULL, 0, D3DGETDATA_FLUSH);
+        if (FAILED(hr)) {
+            MP_ERR(mapper, "Failed to query Direct3D flush state\n");
+            return -1;
+        } else if (hr == S_FALSE) {
+            if (++retries > max_retries) {
+                MP_VERBOSE(mapper, "Failed to flush frame after %lld ms\n",
+                           (long long)MP_TIME_MS_TO_NS(wait_ns * max_retries));
+                break;
+            }
+            mp_sleep_ns(wait_ns);
+        } else {
+            break;
+        }
+    }
+
+    gl->BindTexture(GL_TEXTURE_2D, p->gl_texture);
+    eglBindTexImage(p->egl_display, p->egl_surface, EGL_BACK_BUFFER);
+    gl->BindTexture(GL_TEXTURE_2D, 0);
+
+    return 0;
+}
+
+const struct ra_hwdec_driver ra_hwdec_dxva2egl = {
+    .name = "dxva2-egl",
+    .priv_size = sizeof(struct priv_owner),
+    .imgfmts = {IMGFMT_DXVA2, 0},
+    .init = init,
+    .uninit = uninit,
+    .mapper = &(const struct ra_hwdec_mapper_driver){
+        .priv_size = sizeof(struct priv),
+        .init = mapper_init,
+        .uninit = mapper_uninit,
+        .map = mapper_map,
+    },
+};
diff --git a/video/out/opengl/hwdec_dxva2gldx.c b/video/out/opengl/hwdec_dxva2gldx.c
new file mode 100644
index 0000000..0172813
--- /dev/null
+++ b/video/out/opengl/hwdec_dxva2gldx.c
@@ -0,0 +1,247 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <d3d9.h>
+#include <assert.h>
+
+#include "common/common.h"
+#include "osdep/windows_utils.h"
+#include "video/out/gpu/hwdec.h"
+#include "ra_gl.h"
+#include "video/hwdec.h"
+#include "video/d3d.h"
+
+// for  WGL_ACCESS_READ_ONLY_NV
+#include <GL/wglext.h>
+
+#define SHARED_SURFACE_D3DFMT D3DFMT_X8R8G8B8
+
+struct priv_owner {
+    struct mp_hwdec_ctx hwctx;
+    IDirect3DDevice9Ex *device;
+    HANDLE device_h;
+};
+
+struct priv {
+    IDirect3DDevice9Ex *device;
+    HANDLE device_h;
+    IDirect3DSurface9 *rtarget;
+    HANDLE rtarget_h;
+    GLuint texture;
+};
+
+static void uninit(struct ra_hwdec *hw)
+{
+    struct priv_owner *p = hw->priv;
+
+    hwdec_devices_remove(hw->devs, &p->hwctx);
+    av_buffer_unref(&p->hwctx.av_device_ref);
+
+    if (p->device)
+        IDirect3DDevice9Ex_Release(p->device);
+}
+
+static int init(struct ra_hwdec *hw)
+{
+    struct priv_owner *p = hw->priv;
+    struct ra *ra = hw->ra_ctx->ra;
+
+    if (!ra_is_gl(ra))
+        return -1;
+    GL *gl = ra_gl_get(ra);
+    if (!(gl->mpgl_caps & MPGL_CAP_DXINTEROP))
+        return -1;
+
+    // AMD drivers won't open multiple dxinterop HANDLES on the same D3D device,
+    // so we request the one already in use by context_dxinterop
+    p->device_h = ra_get_native_resource(ra, "dxinterop_device_HANDLE");
+    if (!p->device_h)
+        return -1;
+
+    // But we also still need the actual D3D device
+    p->device = ra_get_native_resource(ra, "IDirect3DDevice9Ex");
+    if (!p->device)
+        return -1;
+    IDirect3DDevice9Ex_AddRef(p->device);
+
+    p->hwctx = (struct mp_hwdec_ctx){
+        .driver_name = hw->driver->name,
+        .av_device_ref = d3d9_wrap_device_ref((IDirect3DDevice9 *)p->device),
+        .hw_imgfmt = IMGFMT_DXVA2,
+    };
+
+    if (!p->hwctx.av_device_ref) {
+        MP_VERBOSE(hw, "Failed to create hwdevice_ctx\n");
+        return -1;
+    }
+
+    hwdec_devices_add(hw->devs, &p->hwctx);
+    return 0;
+}
+
+static void mapper_uninit(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+    GL *gl = ra_gl_get(mapper->ra);
+
+    if (p->rtarget_h && p->device_h) {
+        if (!gl->DXUnlockObjectsNV(p->device_h, 1, &p->rtarget_h)) {
+            MP_ERR(mapper, "Failed unlocking texture for access by OpenGL: %s\n",
+                   mp_LastError_to_str());
+        }
+    }
+
+    if (p->rtarget_h) {
+        if (!gl->DXUnregisterObjectNV(p->device_h, p->rtarget_h)) {
+            MP_ERR(mapper, "Failed to unregister Direct3D surface with OpenGL: %s\n",
+                   mp_LastError_to_str());
+        } else {
+            p->rtarget_h = 0;
+        }
+    }
+
+    gl->DeleteTextures(1, &p->texture);
+    p->texture = 0;
+
+    if (p->rtarget) {
+        IDirect3DSurface9_Release(p->rtarget);
+        p->rtarget = NULL;
+    }
+
+    ra_tex_free(mapper->ra, &mapper->tex[0]);
+}
+
+static int mapper_init(struct ra_hwdec_mapper *mapper)
+{
+    struct priv_owner *p_owner = mapper->owner->priv;
+    struct priv *p = mapper->priv;
+    GL *gl = ra_gl_get(mapper->ra);
+    HRESULT hr;
+
+    p->device = p_owner->device;
+    p->device_h = p_owner->device_h;
+
+    HANDLE share_handle = NULL;
+    hr = IDirect3DDevice9Ex_CreateRenderTarget(
+        p->device,
+        mapper->src_params.w, mapper->src_params.h,
+        SHARED_SURFACE_D3DFMT, D3DMULTISAMPLE_NONE, 0, FALSE,
+        &p->rtarget, &share_handle);
+    if (FAILED(hr)) {
+        MP_ERR(mapper, "Failed creating offscreen Direct3D surface: %s\n",
+               mp_HRESULT_to_str(hr));
+        return -1;
+    }
+
+    if (share_handle &&
+        !gl->DXSetResourceShareHandleNV(p->rtarget, share_handle)) {
+        MP_ERR(mapper, "Failed setting Direct3D/OpenGL share handle for surface: %s\n",
+               mp_LastError_to_str());
+        return -1;
+    }
+
+    gl->GenTextures(1, &p->texture);
+    gl->BindTexture(GL_TEXTURE_2D, p->texture);
+    gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+    gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+    gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+    gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+    gl->BindTexture(GL_TEXTURE_2D, 0);
+
+    p->rtarget_h = gl->DXRegisterObjectNV(p->device_h, p->rtarget, p->texture,
+                                          GL_TEXTURE_2D,
+                                          WGL_ACCESS_READ_ONLY_NV);
+    if (!p->rtarget_h) {
+        MP_ERR(mapper, "Failed to register Direct3D surface with OpenGL: %s\n",
+               mp_LastError_to_str());
+        return -1;
+    }
+
+    if (!gl->DXLockObjectsNV(p->device_h, 1, &p->rtarget_h)) {
+        MP_ERR(mapper, "Failed locking texture for access by OpenGL %s\n",
+               mp_LastError_to_str());
+        return -1;
+    }
+
+    struct ra_tex_params params = {
+        .dimensions = 2,
+        .w = mapper->src_params.w,
+        .h = mapper->src_params.h,
+        .d = 1,
+        .format = ra_find_unorm_format(mapper->ra, 1, 4),
+        .render_src = true,
+        .src_linear = true,
+    };
+    if (!params.format)
+        return -1;
+
+    mapper->tex[0] = ra_create_wrapped_tex(mapper->ra, &params, p->texture);
+    if (!mapper->tex[0])
+        return -1;
+
+    mapper->dst_params = mapper->src_params;
+    mapper->dst_params.imgfmt = IMGFMT_RGB0;
+    mapper->dst_params.hw_subfmt = 0;
+
+    return 0;
+}
+
+static int mapper_map(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+    GL *gl = ra_gl_get(mapper->ra);
+    HRESULT hr;
+
+    if (!gl->DXUnlockObjectsNV(p->device_h, 1, &p->rtarget_h)) {
+        MP_ERR(mapper, "Failed unlocking texture for access by OpenGL: %s\n",
+               mp_LastError_to_str());
+        return -1;
+    }
+
+    IDirect3DSurface9* hw_surface = (IDirect3DSurface9 *)mapper->src->planes[3];
+    RECT rc = {0, 0, mapper->src->w, mapper->src->h};
+    hr = IDirect3DDevice9Ex_StretchRect(p->device,
+                                        hw_surface, &rc,
+                                        p->rtarget, &rc,
+                                        D3DTEXF_NONE);
+    if (FAILED(hr)) {
+        MP_ERR(mapper, "Direct3D RGB conversion failed: %s", mp_HRESULT_to_str(hr));
+        return -1;
+    }
+
+    if (!gl->DXLockObjectsNV(p->device_h, 1, &p->rtarget_h)) {
+        MP_ERR(mapper, "Failed locking texture for access by OpenGL: %s\n",
+               mp_LastError_to_str());
+        return -1;
+    }
+
+    return 0;
+}
+
+const struct ra_hwdec_driver ra_hwdec_dxva2gldx = {
+    .name = "dxva2-dxinterop",
+    .priv_size = sizeof(struct priv_owner),
+    .imgfmts = {IMGFMT_DXVA2, 0},
+    .init = init,
+    .uninit = uninit,
+    .mapper = &(const struct ra_hwdec_mapper_driver){
+        .priv_size = sizeof(struct priv),
+        .init = mapper_init,
+        .uninit = mapper_uninit,
+        .map = mapper_map,
+    },
+};
diff --git a/video/out/opengl/hwdec_rpi.c b/video/out/opengl/hwdec_rpi.c
new file mode 100644
index 0000000..5362832
--- /dev/null
+++ b/video/out/opengl/hwdec_rpi.c
@@ -0,0 +1,384 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <stdbool.h>
+#include <assert.h>
+
+#include <bcm_host.h>
+#include <interface/mmal/mmal.h>
+#include <interface/mmal/util/mmal_util.h>
+#include <interface/mmal/util/mmal_default_components.h>
+#include <interface/mmal/vc/mmal_vc_api.h>
+
+#include <libavutil/rational.h>
+
+#include "common/common.h"
+#include "common/msg.h"
+#include "video/mp_image.h"
+#include "video/out/gpu/hwdec.h"
+
+#include "common.h"
+
+struct priv {
+    struct mp_log *log;
+
+    struct mp_image_params params;
+
+    MMAL_COMPONENT_T *renderer;
+    bool renderer_enabled;
+
+    // for RAM input
+    MMAL_POOL_T *swpool;
+
+    struct mp_image *current_frame;
+
+    struct mp_rect src, dst;
+    int cur_window[4]; // raw user params
+};
+
+// Magic alignments (in pixels) expected by the MMAL internals.
+#define ALIGN_W 32
+#define ALIGN_H 16
+
+// Make mpi point to buffer, assuming MMAL_ENCODING_I420.
+// buffer can be NULL.
+// Return the required buffer space.
+static size_t layout_buffer(struct mp_image *mpi, MMAL_BUFFER_HEADER_T *buffer,
+                            struct mp_image_params *params)
+{
+    assert(params->imgfmt == IMGFMT_420P);
+    mp_image_set_params(mpi, params);
+    int w = MP_ALIGN_UP(params->w, ALIGN_W);
+    int h = MP_ALIGN_UP(params->h, ALIGN_H);
+    uint8_t *cur = buffer ? buffer->data : NULL;
+    size_t size = 0;
+    for (int i = 0; i < 3; i++) {
+        int div = i ? 2 : 1;
+        mpi->planes[i] = cur;
+        mpi->stride[i] = w / div;
+        size_t plane_size = h / div * mpi->stride[i];
+        if (cur)
+            cur += plane_size;
+        size += plane_size;
+    }
+    return size;
+}
+
+static MMAL_FOURCC_T map_csp(enum mp_csp csp)
+{
+    switch (csp) {
+    case MP_CSP_BT_601:     return MMAL_COLOR_SPACE_ITUR_BT601;
+    case MP_CSP_BT_709:     return MMAL_COLOR_SPACE_ITUR_BT709;
+    case MP_CSP_SMPTE_240M: return MMAL_COLOR_SPACE_SMPTE240M;
+    default:                return MMAL_COLOR_SPACE_UNKNOWN;
+    }
+}
+
+static void control_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
+{
+    mmal_buffer_header_release(buffer);
+}
+
+static void input_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
+{
+    struct mp_image *mpi = buffer->user_data;
+    talloc_free(mpi);
+}
+
+static void disable_renderer(struct ra_hwdec *hw)
+{
+    struct priv *p = hw->priv;
+
+    if (p->renderer_enabled) {
+        mmal_port_disable(p->renderer->control);
+        mmal_port_disable(p->renderer->input[0]);
+
+        mmal_port_flush(p->renderer->control);
+        mmal_port_flush(p->renderer->input[0]);
+
+        mmal_component_disable(p->renderer);
+    }
+    mmal_pool_destroy(p->swpool);
+    p->swpool = NULL;
+    p->renderer_enabled = false;
+}
+
+// check_window_only: assume params and dst/src rc are unchanged
+static void update_overlay(struct ra_hwdec *hw, bool check_window_only)
+{
+    struct priv *p = hw->priv;
+    MMAL_PORT_T *input = p->renderer->input[0];
+    struct mp_rect src = p->src;
+    struct mp_rect dst = p->dst;
+
+    int defs[4] = {0, 0, 0, 0};
+    int *z = ra_get_native_resource(hw->ra_ctx->ra, "MPV_RPI_WINDOW");
+    if (!z)
+        z = defs;
+
+    // As documented in the libmpv openglcb headers.
+    int display = z[0];
+    int layer = z[1];
+    int x = z[2];
+    int y = z[3];
+
+    if (check_window_only && memcmp(z, p->cur_window, sizeof(p->cur_window)) == 0)
+        return;
+
+    memcpy(p->cur_window, z, sizeof(p->cur_window));
+
+    int rotate[] = {MMAL_DISPLAY_ROT0,
+                    MMAL_DISPLAY_ROT90,
+                    MMAL_DISPLAY_ROT180,
+                    MMAL_DISPLAY_ROT270};
+
+    int src_w = src.x1 - src.x0, src_h = src.y1 - src.y0,
+        dst_w = dst.x1 - dst.x0, dst_h = dst.y1 - dst.y0;
+    int p_x, p_y;
+    av_reduce(&p_x, &p_y, dst_w * src_h, src_w * dst_h, 16000);
+    MMAL_DISPLAYREGION_T dr = {
+        .hdr = { .id = MMAL_PARAMETER_DISPLAYREGION,
+                 .size = sizeof(MMAL_DISPLAYREGION_T), },
+        .src_rect = { .x = src.x0, .y = src.y0,
+                      .width = src_w, .height = src_h },
+        .dest_rect = { .x = dst.x0 + x, .y = dst.y0 + y,
+                       .width = dst_w, .height = dst_h },
+        .layer = layer - 1, // under the GL layer
+        .display_num = display,
+        .pixel_x = p_x,
+        .pixel_y = p_y,
+        .transform = rotate[p->params.rotate / 90],
+        .fullscreen = 0,
+        .set = MMAL_DISPLAY_SET_SRC_RECT | MMAL_DISPLAY_SET_DEST_RECT |
+               MMAL_DISPLAY_SET_LAYER | MMAL_DISPLAY_SET_NUM |
+               MMAL_DISPLAY_SET_PIXEL | MMAL_DISPLAY_SET_TRANSFORM |
+               MMAL_DISPLAY_SET_FULLSCREEN,
+    };
+
+    if (p->params.rotate % 180 == 90) {
+        MPSWAP(int, dr.src_rect.x, dr.src_rect.y);
+        MPSWAP(int, dr.src_rect.width, dr.src_rect.height);
+    }
+
+    if (mmal_port_parameter_set(input, &dr.hdr))
+        MP_WARN(p, "could not set video rectangle\n");
+}
+
+static int enable_renderer(struct ra_hwdec *hw)
+{
+    struct priv *p = hw->priv;
+    MMAL_PORT_T *input = p->renderer->input[0];
+    struct mp_image_params *params = &p->params;
+
+    if (p->renderer_enabled)
+        return 0;
+
+    if (!params->imgfmt)
+        return -1;
+
+    bool opaque = params->imgfmt == IMGFMT_MMAL;
+
+    input->format->encoding = opaque ? MMAL_ENCODING_OPAQUE : MMAL_ENCODING_I420;
+    input->format->es->video.width = MP_ALIGN_UP(params->w, ALIGN_W);
+    input->format->es->video.height = MP_ALIGN_UP(params->h, ALIGN_H);
+    input->format->es->video.crop = (MMAL_RECT_T){0, 0, params->w, params->h};
+    input->format->es->video.par = (MMAL_RATIONAL_T){params->p_w, params->p_h};
+    input->format->es->video.color_space = map_csp(params->color.space);
+
+    if (mmal_port_format_commit(input))
+        return -1;
+
+    input->buffer_num = MPMAX(input->buffer_num_min,
+                              input->buffer_num_recommended) + 3;
+    input->buffer_size = MPMAX(input->buffer_size_min,
+                               input->buffer_size_recommended);
+
+    if (!opaque) {
+        size_t size = layout_buffer(&(struct mp_image){0}, NULL, params);
+        if (input->buffer_size != size) {
+            MP_FATAL(hw, "We disagree with MMAL about buffer sizes.\n");
+            return -1;
+        }
+
+        p->swpool = mmal_pool_create(input->buffer_num, input->buffer_size);
+        if (!p->swpool) {
+            MP_FATAL(hw, "Could not allocate buffer pool.\n");
+            return -1;
+        }
+    }
+
+    update_overlay(hw, false);
+
+    p->renderer_enabled = true;
+
+    if (mmal_port_enable(p->renderer->control, control_port_cb))
+        return -1;
+
+    if (mmal_port_enable(input, input_port_cb))
+        return -1;
+
+    if (mmal_component_enable(p->renderer)) {
+        MP_FATAL(hw, "Failed to enable video renderer.\n");
+        return -1;
+    }
+
+    return 0;
+}
+
+static void free_mmal_buffer(void *arg)
+{
+    MMAL_BUFFER_HEADER_T *buffer = arg;
+    mmal_buffer_header_release(buffer);
+}
+
+static struct mp_image *upload(struct ra_hwdec *hw, struct mp_image *hw_image)
+{
+    struct priv *p = hw->priv;
+
+    MMAL_BUFFER_HEADER_T *buffer = mmal_queue_wait(p->swpool->queue);
+    if (!buffer) {
+        MP_ERR(hw, "Can't allocate buffer.\n");
+        return NULL;
+    }
+    mmal_buffer_header_reset(buffer);
+
+    struct mp_image *new_ref = mp_image_new_custom_ref(NULL, buffer,
+                                                       free_mmal_buffer);
+    if (!new_ref) {
+        mmal_buffer_header_release(buffer);
+        MP_ERR(hw, "Out of memory.\n");
+        return NULL;
+    }
+
+    mp_image_setfmt(new_ref, IMGFMT_MMAL);
+    new_ref->planes[3] = (void *)buffer;
+
+    struct mp_image dmpi = {0};
+    buffer->length = layout_buffer(&dmpi, buffer, &p->params);
+    mp_image_copy(&dmpi, hw_image);
+
+    return new_ref;
+}
+
+static int overlay_frame(struct ra_hwdec *hw, struct mp_image *hw_image,
+                         struct mp_rect *src, struct mp_rect *dst, bool newframe)
+{
+    struct priv *p = hw->priv;
+
+    if (hw_image && !mp_image_params_equal(&p->params, &hw_image->params)) {
+        p->params = hw_image->params;
+
+        disable_renderer(hw);
+        mp_image_unrefp(&p->current_frame);
+
+        if (enable_renderer(hw) < 0)
+            return -1;
+    }
+
+    if (hw_image && p->current_frame && !newframe) {
+        if (!mp_rect_equals(&p->src, src) ||mp_rect_equals(&p->dst, dst)) {
+            p->src = *src;
+            p->dst = *dst;
+            update_overlay(hw, false);
+        }
+        return 0; // don't reupload
+    }
+
+    mp_image_unrefp(&p->current_frame);
+
+    if (!hw_image) {
+        disable_renderer(hw);
+        return 0;
+    }
+
+    if (enable_renderer(hw) < 0)
+        return -1;
+
+    update_overlay(hw, true);
+
+    struct mp_image *mpi = NULL;
+    if (hw_image->imgfmt == IMGFMT_MMAL) {
+        mpi = mp_image_new_ref(hw_image);
+    } else {
+        mpi = upload(hw, hw_image);
+    }
+
+    if (!mpi) {
+        disable_renderer(hw);
+        return -1;
+    }
+
+    MMAL_BUFFER_HEADER_T *ref = (void *)mpi->planes[3];
+
+    // Assume this field is free for use by us.
+    ref->user_data = mpi;
+
+    if (mmal_port_send_buffer(p->renderer->input[0], ref)) {
+        MP_ERR(hw, "could not queue picture!\n");
+        talloc_free(mpi);
+        return -1;
+    }
+
+    return 0;
+}
+
+static void destroy(struct ra_hwdec *hw)
+{
+    struct priv *p = hw->priv;
+
+    disable_renderer(hw);
+
+    if (p->renderer)
+        mmal_component_release(p->renderer);
+
+    mmal_vc_deinit();
+}
+
+static int create(struct ra_hwdec *hw)
+{
+    struct priv *p = hw->priv;
+    p->log = hw->log;
+
+    bcm_host_init();
+
+    if (mmal_vc_init()) {
+        MP_FATAL(hw, "Could not initialize MMAL.\n");
+        return -1;
+    }
+
+    if (mmal_component_create(MMAL_COMPONENT_DEFAULT_VIDEO_RENDERER, &p->renderer))
+    {
+        MP_FATAL(hw, "Could not create MMAL renderer.\n");
+        mmal_vc_deinit();
+        return -1;
+    }
+
+    return 0;
+}
+
+const struct ra_hwdec_driver ra_hwdec_rpi_overlay = {
+    .name = "rpi-overlay",
+    .priv_size = sizeof(struct priv),
+    .imgfmts = {IMGFMT_MMAL, IMGFMT_420P, 0},
+    .init = create,
+    .overlay_frame = overlay_frame,
+    .uninit = destroy,
+};
diff --git a/video/out/opengl/hwdec_vdpau.c b/video/out/opengl/hwdec_vdpau.c
new file mode 100644
index 0000000..acdc703
--- /dev/null
+++ b/video/out/opengl/hwdec_vdpau.c
@@ -0,0 +1,251 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stddef.h>
+#include <assert.h>
+
+#include "video/out/gpu/hwdec.h"
+#include "ra_gl.h"
+#include "video/vdpau.h"
+#include "video/vdpau_mixer.h"
+
+// This is a GL_NV_vdpau_interop specification bug, and headers (unfortunately)
+// follow it. I'm not sure about the original nvidia headers.
+#define BRAINDEATH(x) ((void *)(uintptr_t)(x))
+
+struct priv_owner {
+    struct mp_vdpau_ctx *ctx;
+};
+
+struct priv {
+    struct mp_vdpau_ctx *ctx;
+    GL *gl;
+    uint64_t preemption_counter;
+    GLuint gl_texture;
+    bool vdpgl_initialized;
+    GLvdpauSurfaceNV vdpgl_surface;
+    VdpOutputSurface vdp_surface;
+    struct mp_vdpau_mixer *mixer;
+    struct ra_imgfmt_desc direct_desc;
+    bool mapped;
+};
+
+static int init(struct ra_hwdec *hw)
+{
+    struct ra *ra = hw->ra_ctx->ra;
+    Display *x11disp = ra_get_native_resource(ra, "x11");
+    if (!x11disp || !ra_is_gl(ra))
+        return -1;
+    GL *gl = ra_gl_get(ra);
+    if (!(gl->mpgl_caps & MPGL_CAP_VDPAU))
+        return -1;
+    struct priv_owner *p = hw->priv;
+    p->ctx = mp_vdpau_create_device_x11(hw->log, x11disp, true);
+    if (!p->ctx)
+        return -1;
+    if (mp_vdpau_handle_preemption(p->ctx, NULL) < 1)
+        return -1;
+    if (hw->probing && mp_vdpau_guess_if_emulated(p->ctx))
+        return -1;
+    p->ctx->hwctx.driver_name = hw->driver->name;
+    p->ctx->hwctx.hw_imgfmt = IMGFMT_VDPAU;
+    hwdec_devices_add(hw->devs, &p->ctx->hwctx);
+    return 0;
+}
+
+static void uninit(struct ra_hwdec *hw)
+{
+    struct priv_owner *p = hw->priv;
+
+    if (p->ctx)
+        hwdec_devices_remove(hw->devs, &p->ctx->hwctx);
+    mp_vdpau_destroy(p->ctx);
+}
+
+static void mapper_unmap(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+    GL *gl = p->gl;
+
+    for (int n = 0; n < 4; n++)
+        ra_tex_free(mapper->ra, &mapper->tex[n]);
+
+    if (p->mapped) {
+        gl->VDPAUUnmapSurfacesNV(1, &p->vdpgl_surface);
+    }
+    p->mapped = false;
+}
+
+static void mark_vdpau_objects_uninitialized(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+
+    p->vdp_surface = VDP_INVALID_HANDLE;
+    p->mapped = false;
+}
+
+static void mapper_uninit(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+    GL *gl = p->gl;
+    struct vdp_functions *vdp = &p->ctx->vdp;
+    VdpStatus vdp_st;
+
+    assert(!p->mapped);
+
+    if (p->vdpgl_surface)
+        gl->VDPAUUnregisterSurfaceNV(p->vdpgl_surface);
+    p->vdpgl_surface = 0;
+
+    gl->DeleteTextures(1, &p->gl_texture);
+
+    if (p->vdp_surface != VDP_INVALID_HANDLE) {
+        vdp_st = vdp->output_surface_destroy(p->vdp_surface);
+        CHECK_VDP_WARNING(mapper, "Error when calling vdp_output_surface_destroy");
+    }
+    p->vdp_surface = VDP_INVALID_HANDLE;
+
+    gl_check_error(gl, mapper->log, "Before uninitializing OpenGL interop");
+
+    if (p->vdpgl_initialized)
+        gl->VDPAUFiniNV();
+
+    p->vdpgl_initialized = false;
+
+    gl_check_error(gl, mapper->log, "After uninitializing OpenGL interop");
+
+    mp_vdpau_mixer_destroy(p->mixer);
+}
+
+static int mapper_init(struct ra_hwdec_mapper *mapper)
+{
+    struct priv_owner *p_owner = mapper->owner->priv;
+    struct priv *p = mapper->priv;
+
+    p->gl = ra_gl_get(mapper->ra);
+    p->ctx = p_owner->ctx;
+
+    GL *gl = p->gl;
+    struct vdp_functions *vdp = &p->ctx->vdp;
+    VdpStatus vdp_st;
+
+    p->vdp_surface = VDP_INVALID_HANDLE;
+    p->mixer = mp_vdpau_mixer_create(p->ctx, mapper->log);
+    if (!p->mixer)
+        return -1;
+
+    mapper->dst_params = mapper->src_params;
+
+    if (mp_vdpau_handle_preemption(p->ctx, &p->preemption_counter) < 0)
+        return -1;
+
+    gl->VDPAUInitNV(BRAINDEATH(p->ctx->vdp_device), p->ctx->get_proc_address);
+
+    p->vdpgl_initialized = true;
+
+    gl->GenTextures(1, &p->gl_texture);
+
+    gl->BindTexture(GL_TEXTURE_2D, p->gl_texture);
+    gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+    gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+    gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE);
+    gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE);
+    gl->BindTexture(GL_TEXTURE_2D, 0);
+
+    vdp_st = vdp->output_surface_create(p->ctx->vdp_device,
+                                        VDP_RGBA_FORMAT_B8G8R8A8,
+                                        mapper->src_params.w,
+                                        mapper->src_params.h,
+                                        &p->vdp_surface);
+    CHECK_VDP_ERROR(mapper, "Error when calling vdp_output_surface_create");
+
+    p->vdpgl_surface = gl->VDPAURegisterOutputSurfaceNV(BRAINDEATH(p->vdp_surface),
+                                                        GL_TEXTURE_2D,
+                                                        1, &p->gl_texture);
+    if (!p->vdpgl_surface)
+        return -1;
+
+    gl->VDPAUSurfaceAccessNV(p->vdpgl_surface, GL_READ_ONLY);
+
+    mapper->dst_params.imgfmt = IMGFMT_RGB0;
+    mapper->dst_params.hw_subfmt = 0;
+
+    gl_check_error(gl, mapper->log, "After initializing vdpau OpenGL interop");
+
+    return 0;
+}
+
+static int mapper_map(struct ra_hwdec_mapper *mapper)
+{
+    struct priv *p = mapper->priv;
+    GL *gl = p->gl;
+
+    int pe = mp_vdpau_handle_preemption(p->ctx, &p->preemption_counter);
+    if (pe < 1) {
+        mark_vdpau_objects_uninitialized(mapper);
+        if (pe < 0)
+            return -1;
+        mapper_uninit(mapper);
+        if (mapper_init(mapper) < 0)
+            return -1;
+    }
+
+    if (!p->vdpgl_surface)
+        return -1;
+
+    mp_vdpau_mixer_render(p->mixer, NULL, p->vdp_surface, NULL, mapper->src,
+                            NULL);
+
+    gl->VDPAUMapSurfacesNV(1, &p->vdpgl_surface);
+
+    p->mapped = true;
+
+    struct ra_tex_params params = {
+        .dimensions = 2,
+        .w = mapper->src_params.w,
+        .h = mapper->src_params.h,
+        .d = 1,
+        .format = ra_find_unorm_format(mapper->ra, 1, 4),
+        .render_src = true,
+        .src_linear = true,
+    };
+
+    if (!params.format)
+        return -1;
+
+    mapper->tex[0] =
+        ra_create_wrapped_tex(mapper->ra, &params, p->gl_texture);
+    if (!mapper->tex[0])
+        return -1;
+
+    return 0;
+}
+
+const struct ra_hwdec_driver ra_hwdec_vdpau = {
+    .name = "vdpau-gl",
+    .priv_size = sizeof(struct priv_owner),
+    .imgfmts = {IMGFMT_VDPAU, 0},
+    .init = init,
+    .uninit = uninit,
+    .mapper = &(const struct ra_hwdec_mapper_driver){
+        .priv_size = sizeof(struct priv),
+        .init = mapper_init,
+        .uninit = mapper_uninit,
+        .map = mapper_map,
+        .unmap = mapper_unmap,
+    },
+};
diff --git a/video/out/opengl/libmpv_gl.c b/video/out/opengl/libmpv_gl.c
new file mode 100644
index 0000000..c297c13
--- /dev/null
+++ b/video/out/opengl/libmpv_gl.c
@@ -0,0 +1,114 @@
+#include "common.h"
+#include "context.h"
+#include "ra_gl.h"
+#include "options/m_config.h"
+#include "libmpv/render_gl.h"
+#include "video/out/gpu/libmpv_gpu.h"
+#include "video/out/gpu/ra.h"
+
+struct priv {
+    GL *gl;
+    struct ra_ctx *ra_ctx;
+};
+
+static int init(struct libmpv_gpu_context *ctx, mpv_render_param *params)
+{
+    ctx->priv = talloc_zero(NULL, struct priv);
+    struct priv *p = ctx->priv;
+
+    mpv_opengl_init_params *init_params =
+        get_mpv_render_param(params, MPV_RENDER_PARAM_OPENGL_INIT_PARAMS, NULL);
+    if (!init_params)
+        return MPV_ERROR_INVALID_PARAMETER;
+
+    p->gl = talloc_zero(p, GL);
+
+    mpgl_load_functions2(p->gl, init_params->get_proc_address,
+                         init_params->get_proc_address_ctx,
+                         NULL, ctx->log);
+    if (!p->gl->version && !p->gl->es) {
+        MP_FATAL(ctx, "OpenGL not initialized.\n");
+        return MPV_ERROR_UNSUPPORTED;
+    }
+
+    // initialize a blank ra_ctx to reuse ra_gl_ctx
+    p->ra_ctx = talloc_zero(p, struct ra_ctx);
+    p->ra_ctx->log = ctx->log;
+    p->ra_ctx->global = ctx->global;
+    p->ra_ctx->opts = (struct ra_ctx_opts) {
+        .allow_sw = true,
+    };
+
+    static const struct ra_swapchain_fns empty_swapchain_fns = {0};
+    struct ra_gl_ctx_params gl_params = {
+        // vo_libmpv is essentially like a gigantic external swapchain where
+        // the user is in charge of presentation / swapping etc. But we don't
+        // actually need to provide any of these functions, since we can just
+        // not call them to begin with - so just set it to an empty object to
+        // signal to ra_gl_p that we don't care about its latency emulation
+        // functionality
+        .external_swapchain = &empty_swapchain_fns
+    };
+
+    p->gl->SwapInterval = NULL; // we shouldn't randomly change this, so lock it
+    if (!ra_gl_ctx_init(p->ra_ctx, p->gl, gl_params))
+        return MPV_ERROR_UNSUPPORTED;
+
+    struct ra_ctx_opts *ctx_opts = mp_get_config_group(ctx, ctx->global, &ra_ctx_conf);
+    p->ra_ctx->opts.debug = ctx_opts->debug;
+    p->gl->debug_context = ctx_opts->debug;
+    ra_gl_set_debug(p->ra_ctx->ra, ctx_opts->debug);
+    talloc_free(ctx_opts);
+
+    ctx->ra_ctx = p->ra_ctx;
+
+    return 0;
+}
+
+static int wrap_fbo(struct libmpv_gpu_context *ctx, mpv_render_param *params,
+                    struct ra_tex **out)
+{
+    struct priv *p = ctx->priv;
+
+    mpv_opengl_fbo *fbo =
+        get_mpv_render_param(params, MPV_RENDER_PARAM_OPENGL_FBO, NULL);
+    if (!fbo)
+        return MPV_ERROR_INVALID_PARAMETER;
+
+    if (fbo->fbo && !(p->gl->mpgl_caps & MPGL_CAP_FB)) {
+        MP_FATAL(ctx, "Rendering to FBO requested, but no FBO extension found!\n");
+        return MPV_ERROR_UNSUPPORTED;
+    }
+
+    struct ra_swapchain *sw = p->ra_ctx->swapchain;
+    struct ra_fbo target;
+    ra_gl_ctx_resize(sw, fbo->w, fbo->h, fbo->fbo);
+    ra_gl_ctx_start_frame(sw, &target);
+    *out = target.tex;
+    return 0;
+}
+
+static void done_frame(struct libmpv_gpu_context *ctx, bool ds)
+{
+    struct priv *p = ctx->priv;
+
+    struct ra_swapchain *sw = p->ra_ctx->swapchain;
+    struct vo_frame dummy = {.display_synced = ds};
+    ra_gl_ctx_submit_frame(sw, &dummy);
+}
+
+static void destroy(struct libmpv_gpu_context *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    if (p->ra_ctx)
+        ra_gl_ctx_uninit(p->ra_ctx);
+}
+
+const struct libmpv_gpu_context_fns libmpv_gpu_context_gl = {
+    .api_name = MPV_RENDER_API_TYPE_OPENGL,
+    .init = init,
+    .wrap_fbo = wrap_fbo,
+    .done_frame = done_frame,
+    .destroy = destroy,
+};
diff --git a/video/out/opengl/ra_gl.c b/video/out/opengl/ra_gl.c
new file mode 100644
index 0000000..f535f1f
--- /dev/null
+++ b/video/out/opengl/ra_gl.c
@@ -0,0 +1,1208 @@
+#include <libavutil/intreadwrite.h>
+
+#include "formats.h"
+#include "utils.h"
+#include "ra_gl.h"
+
+static struct ra_fns ra_fns_gl;
+
+// For ra.priv
+struct ra_gl {
+    GL *gl;
+    bool debug_enable;
+    bool timer_active; // hack for GL_TIME_ELAPSED limitations
+};
+
+// For ra_tex.priv
+struct ra_tex_gl {
+    struct ra_buf_pool pbo; // for ra.use_pbo
+    bool own_objects;
+    GLenum target;
+    GLuint texture; // 0 if no texture data associated
+    GLuint fbo; // 0 if no rendering requested, or it default framebuffer
+    // These 3 fields can be 0 if unknown.
+    GLint internal_format;
+    GLenum format;
+    GLenum type;
+};
+
+// For ra_buf.priv
+struct ra_buf_gl {
+    GLenum target;
+    GLuint buffer;
+    GLsync fence;
+};
+
+// For ra_renderpass.priv
+struct ra_renderpass_gl {
+    GLuint program;
+    // 1 entry for each ra_renderpass_params.inputs[] entry
+    GLint *uniform_loc;
+    int num_uniform_loc; // == ra_renderpass_params.num_inputs
+    struct gl_vao vao;
+};
+
+// (Init time only.)
+static void probe_real_size(GL *gl, struct ra_format *fmt)
+{
+    const struct gl_format *gl_fmt = fmt->priv;
+
+    if (!gl->GetTexLevelParameteriv)
+        return; // GLES
+
+    bool is_la = gl_fmt->format == GL_LUMINANCE ||
+                 gl_fmt->format == GL_LUMINANCE_ALPHA;
+    if (is_la && gl->es)
+        return; // GLES doesn't provide GL_TEXTURE_LUMINANCE_SIZE.
+
+    GLuint tex;
+    gl->GenTextures(1, &tex);
+    gl->BindTexture(GL_TEXTURE_2D, tex);
+    gl->TexImage2D(GL_TEXTURE_2D, 0, gl_fmt->internal_format, 64, 64, 0,
+                   gl_fmt->format, gl_fmt->type, NULL);
+    for (int i = 0; i < fmt->num_components; i++) {
+        const GLenum pnames[] = {
+            GL_TEXTURE_RED_SIZE,
+            GL_TEXTURE_GREEN_SIZE,
+            GL_TEXTURE_BLUE_SIZE,
+            GL_TEXTURE_ALPHA_SIZE,
+            GL_TEXTURE_LUMINANCE_SIZE,
+            GL_TEXTURE_ALPHA_SIZE,
+        };
+        int comp = is_la ? i + 4 : i;
+        assert(comp < MP_ARRAY_SIZE(pnames));
+        GLint param = -1;
+        gl->GetTexLevelParameteriv(GL_TEXTURE_2D, 0, pnames[comp], &param);
+        fmt->component_depth[i] = param > 0 ? param : 0;
+    }
+    gl->DeleteTextures(1, &tex);
+}
+
+static int ra_init_gl(struct ra *ra, GL *gl)
+{
+    if (gl->version < 210 && gl->es < 200) {
+        MP_ERR(ra, "At least OpenGL 2.1 or OpenGL ES 2.0 required.\n");
+        return -1;
+    }
+
+    struct ra_gl *p = ra->priv = talloc_zero(NULL, struct ra_gl);
+    p->gl = gl;
+
+    ra_gl_set_debug(ra, true);
+
+    ra->fns = &ra_fns_gl;
+    ra->glsl_version = gl->glsl_version;
+    ra->glsl_es = gl->es > 0;
+
+    static const int caps_map[][2] = {
+        {RA_CAP_DIRECT_UPLOAD,      0},
+        {RA_CAP_GLOBAL_UNIFORM,     0},
+        {RA_CAP_FRAGCOORD,          0},
+        {RA_CAP_TEX_1D,             MPGL_CAP_1D_TEX},
+        {RA_CAP_TEX_3D,             MPGL_CAP_3D_TEX},
+        {RA_CAP_COMPUTE,            MPGL_CAP_COMPUTE_SHADER},
+        {RA_CAP_NUM_GROUPS,         MPGL_CAP_COMPUTE_SHADER},
+        {RA_CAP_NESTED_ARRAY,       MPGL_CAP_NESTED_ARRAY},
+        {RA_CAP_SLOW_DR,            MPGL_CAP_SLOW_DR},
+    };
+
+    for (int i = 0; i < MP_ARRAY_SIZE(caps_map); i++) {
+        if ((gl->mpgl_caps & caps_map[i][1]) == caps_map[i][1])
+            ra->caps |= caps_map[i][0];
+    }
+
+    if (gl->BindBufferBase) {
+        if (gl->mpgl_caps & MPGL_CAP_UBO)
+            ra->caps |= RA_CAP_BUF_RO;
+        if (gl->mpgl_caps & MPGL_CAP_SSBO)
+            ra->caps |= RA_CAP_BUF_RW;
+    }
+
+    // textureGather is only supported in GLSL 400+ / ES 310+
+    if (ra->glsl_version >= (ra->glsl_es ? 310 : 400))
+        ra->caps |= RA_CAP_GATHER;
+
+    if (gl->BlitFramebuffer)
+        ra->caps |= RA_CAP_BLIT;
+
+    // Disable compute shaders for GLSL < 420. This work-around is needed since
+    // some buggy OpenGL drivers expose compute shaders for lower GLSL versions,
+    // despite the spec requiring 420+.
+    if (ra->glsl_version < (ra->glsl_es ? 310 : 420)) {
+        ra->caps &= ~RA_CAP_COMPUTE;
+    }
+
+    // While we can handle compute shaders on GLES the spec (intentionally)
+    // does not support binding textures for writing, which all uses inside mpv
+    // would require. So disable it unconditionally anyway.
+    if (ra->glsl_es)
+        ra->caps &= ~RA_CAP_COMPUTE;
+
+    int gl_fmt_features = gl_format_feature_flags(gl);
+
+    for (int n = 0; gl_formats[n].internal_format; n++) {
+        const struct gl_format *gl_fmt = &gl_formats[n];
+
+        if (!(gl_fmt->flags & gl_fmt_features))
+            continue;
+
+        struct ra_format *fmt = talloc_zero(ra, struct ra_format);
+        *fmt = (struct ra_format){
+            .name           = gl_fmt->name,
+            .priv           = (void *)gl_fmt,
+            .ctype          = gl_format_type(gl_fmt),
+            .num_components = gl_format_components(gl_fmt->format),
+            .ordered        = gl_fmt->format != GL_RGB_422_APPLE,
+            .pixel_size     = gl_bytes_per_pixel(gl_fmt->format, gl_fmt->type),
+            .luminance_alpha = gl_fmt->format == GL_LUMINANCE_ALPHA,
+            .linear_filter  = gl_fmt->flags & F_TF,
+            .renderable     = (gl_fmt->flags & F_CR) &&
+                              (gl->mpgl_caps & MPGL_CAP_FB),
+            // TODO: Check whether it's a storable format
+            // https://www.khronos.org/opengl/wiki/Image_Load_Store
+            .storable       = true,
+        };
+
+        int csize = gl_component_size(gl_fmt->type) * 8;
+        int depth = csize;
+
+        if (gl_fmt->flags & F_F16) {
+            depth = 16;
+            csize = 32; // always upload as GL_FLOAT (simpler for us)
+        }
+
+        for (int i = 0; i < fmt->num_components; i++) {
+            fmt->component_size[i] = csize;
+            fmt->component_depth[i] = depth;
+        }
+
+        if (fmt->ctype == RA_CTYPE_UNORM && depth != 8)
+            probe_real_size(gl, fmt);
+
+        // Special formats for which OpenGL happens to have direct support.
+        if (strcmp(fmt->name, "rgb565") == 0) {
+            fmt->special_imgfmt = IMGFMT_RGB565;
+            struct ra_imgfmt_desc *desc = talloc_zero(fmt, struct ra_imgfmt_desc);
+            fmt->special_imgfmt_desc = desc;
+            desc->num_planes = 1;
+            desc->planes[0] = fmt;
+            for (int i = 0; i < 3; i++)
+                desc->components[0][i] = i + 1;
+            desc->chroma_w = desc->chroma_h = 1;
+        }
+        if (strcmp(fmt->name, "rgb10_a2") == 0) {
+            fmt->special_imgfmt = IMGFMT_RGB30;
+            struct ra_imgfmt_desc *desc = talloc_zero(fmt, struct ra_imgfmt_desc);
+            fmt->special_imgfmt_desc = desc;
+            desc->component_bits = 10;
+            desc->num_planes = 1;
+            desc->planes[0] = fmt;
+            for (int i = 0; i < 3; i++)
+                desc->components[0][i] = 3 - i;
+            desc->chroma_w = desc->chroma_h = 1;
+        }
+        if (strcmp(fmt->name, "appleyp") == 0) {
+            fmt->special_imgfmt = IMGFMT_UYVY;
+            struct ra_imgfmt_desc *desc = talloc_zero(fmt, struct ra_imgfmt_desc);
+            fmt->special_imgfmt_desc = desc;
+            desc->num_planes = 1;
+            desc->planes[0] = fmt;
+            desc->components[0][0] = 3;
+            desc->components[0][1] = 1;
+            desc->components[0][2] = 2;
+            desc->chroma_w = desc->chroma_h = 1;
+        }
+
+        fmt->glsl_format = ra_fmt_glsl_format(fmt);
+
+        MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt);
+    }
+
+    GLint ival;
+    gl->GetIntegerv(GL_MAX_TEXTURE_SIZE, &ival);
+    ra->max_texture_wh = ival;
+
+    if (ra->caps & RA_CAP_COMPUTE) {
+        gl->GetIntegerv(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE, &ival);
+        ra->max_shmem = ival;
+        gl->GetIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &ival);
+        ra->max_compute_group_threads = ival;
+    }
+
+    gl->Disable(GL_DITHER);
+
+    if (!ra_find_unorm_format(ra, 2, 1))
+        MP_VERBOSE(ra, "16 bit UNORM textures not available.\n");
+
+    return 0;
+}
+
+struct ra *ra_create_gl(GL *gl, struct mp_log *log)
+{
+    struct ra *ra = talloc_zero(NULL, struct ra);
+    ra->log = log;
+    if (ra_init_gl(ra, gl) < 0) {
+        talloc_free(ra);
+        return NULL;
+    }
+    return ra;
+}
+
+static void gl_destroy(struct ra *ra)
+{
+    talloc_free(ra->priv);
+}
+
+void ra_gl_set_debug(struct ra *ra, bool enable)
+{
+    struct ra_gl *p = ra->priv;
+    GL *gl = ra_gl_get(ra);
+
+    p->debug_enable = enable;
+    if (gl->debug_context)
+        gl_set_debug_logger(gl, enable ? ra->log : NULL);
+}
+
+static void gl_tex_destroy(struct ra *ra, struct ra_tex *tex)
+{
+    GL *gl = ra_gl_get(ra);
+    struct ra_tex_gl *tex_gl = tex->priv;
+
+    ra_buf_pool_uninit(ra, &tex_gl->pbo);
+
+    if (tex_gl->own_objects) {
+        if (tex_gl->fbo)
+            gl->DeleteFramebuffers(1, &tex_gl->fbo);
+
+        gl->DeleteTextures(1, &tex_gl->texture);
+    }
+    talloc_free(tex_gl);
+    talloc_free(tex);
+}
+
+static struct ra_tex *gl_tex_create_blank(struct ra *ra,
+                                          const struct ra_tex_params *params)
+{
+    struct ra_tex *tex = talloc_zero(NULL, struct ra_tex);
+    tex->params = *params;
+    tex->params.initial_data = NULL;
+    struct ra_tex_gl *tex_gl = tex->priv = talloc_zero(NULL, struct ra_tex_gl);
+
+    const struct gl_format *fmt = params->format->priv;
+    tex_gl->internal_format = fmt->internal_format;
+    tex_gl->format = fmt->format;
+    tex_gl->type = fmt->type;
+    switch (params->dimensions) {
+    case 1: tex_gl->target = GL_TEXTURE_1D; break;
+    case 2: tex_gl->target = GL_TEXTURE_2D; break;
+    case 3: tex_gl->target = GL_TEXTURE_3D; break;
+    default: MP_ASSERT_UNREACHABLE();
+    }
+    if (params->non_normalized) {
+        assert(params->dimensions == 2);
+        tex_gl->target = GL_TEXTURE_RECTANGLE;
+    }
+    if (params->external_oes) {
+        assert(params->dimensions == 2 && !params->non_normalized);
+        tex_gl->target = GL_TEXTURE_EXTERNAL_OES;
+    }
+
+    if (params->downloadable && !(params->dimensions == 2 &&
+                                  params->format->renderable))
+    {
+        gl_tex_destroy(ra, tex);
+        return NULL;
+    }
+
+    return tex;
+}
+
+static struct ra_tex *gl_tex_create(struct ra *ra,
+                                    const struct ra_tex_params *params)
+{
+    GL *gl = ra_gl_get(ra);
+    assert(!params->format->dummy_format);
+
+    struct ra_tex *tex = gl_tex_create_blank(ra, params);
+    if (!tex)
+        return NULL;
+    struct ra_tex_gl *tex_gl = tex->priv;
+
+    tex_gl->own_objects = true;
+
+    gl->GenTextures(1, &tex_gl->texture);
+    gl->BindTexture(tex_gl->target, tex_gl->texture);
+
+    GLint filter = params->src_linear ? GL_LINEAR : GL_NEAREST;
+    GLint wrap = params->src_repeat ? GL_REPEAT : GL_CLAMP_TO_EDGE;
+    gl->TexParameteri(tex_gl->target, GL_TEXTURE_MIN_FILTER, filter);
+    gl->TexParameteri(tex_gl->target, GL_TEXTURE_MAG_FILTER, filter);
+    gl->TexParameteri(tex_gl->target, GL_TEXTURE_WRAP_S, wrap);
+    if (params->dimensions > 1)
+        gl->TexParameteri(tex_gl->target, GL_TEXTURE_WRAP_T, wrap);
+    if (params->dimensions > 2)
+        gl->TexParameteri(tex_gl->target, GL_TEXTURE_WRAP_R, wrap);
+
+    gl->PixelStorei(GL_UNPACK_ALIGNMENT, 1);
+    switch (params->dimensions) {
+    case 1:
+        gl->TexImage1D(tex_gl->target, 0, tex_gl->internal_format, params->w,
+                       0, tex_gl->format, tex_gl->type, params->initial_data);
+        break;
+    case 2:
+        gl->TexImage2D(tex_gl->target, 0, tex_gl->internal_format, params->w,
+                       params->h, 0, tex_gl->format, tex_gl->type,
+                       params->initial_data);
+        break;
+    case 3:
+        gl->TexImage3D(tex_gl->target, 0, tex_gl->internal_format, params->w,
+                       params->h, params->d, 0, tex_gl->format, tex_gl->type,
+                       params->initial_data);
+        break;
+    }
+    gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4);
+
+    gl->BindTexture(tex_gl->target, 0);
+
+    gl_check_error(gl, ra->log, "after creating texture");
+
+    // Even blitting needs an FBO in OpenGL for strange reasons.
+    // Download is handled by reading from an FBO.
+    if (tex->params.render_dst || tex->params.blit_src ||
+        tex->params.blit_dst || tex->params.downloadable)
+    {
+        if (!tex->params.format->renderable) {
+            MP_ERR(ra, "Trying to create renderable texture with unsupported "
+                   "format.\n");
+            ra_tex_free(ra, &tex);
+            return NULL;
+        }
+
+        assert(gl->mpgl_caps & MPGL_CAP_FB);
+
+        gl->GenFramebuffers(1, &tex_gl->fbo);
+        gl->BindFramebuffer(GL_FRAMEBUFFER, tex_gl->fbo);
+        gl->FramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0,
+                                 GL_TEXTURE_2D, tex_gl->texture, 0);
+        GLenum err = gl->CheckFramebufferStatus(GL_FRAMEBUFFER);
+        gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
+
+        if (err != GL_FRAMEBUFFER_COMPLETE) {
+            MP_ERR(ra, "Error: framebuffer completeness check failed (error=%d).\n",
+                   (int)err);
+            ra_tex_free(ra, &tex);
+            return NULL;
+        }
+
+
+        gl_check_error(gl, ra->log, "after creating framebuffer");
+    }
+
+    return tex;
+}
+
+// Create a ra_tex that merely wraps an existing texture. The returned object
+// is freed with ra_tex_free(), but this will not delete the texture passed to
+// this function.
+// Some features are unsupported, e.g. setting params->initial_data or render_dst.
+struct ra_tex *ra_create_wrapped_tex(struct ra *ra,
+                                     const struct ra_tex_params *params,
+                                     GLuint gl_texture)
+{
+    struct ra_tex *tex = gl_tex_create_blank(ra, params);
+    if (!tex)
+        return NULL;
+    struct ra_tex_gl *tex_gl = tex->priv;
+    tex_gl->texture = gl_texture;
+    return tex;
+}
+
+static const struct ra_format fbo_dummy_format = {
+    .name = "unknown_fbo",
+    .priv = (void *)&(const struct gl_format){
+        .name = "unknown",
+        .format = GL_RGBA,
+        .flags = F_CR,
+    },
+    .renderable = true,
+    .dummy_format = true,
+};
+
+// Create a ra_tex that merely wraps an existing framebuffer. gl_fbo can be 0
+// to wrap the default framebuffer.
+// The returned object is freed with ra_tex_free(), but this will not delete
+// the framebuffer object passed to this function.
+struct ra_tex *ra_create_wrapped_fb(struct ra *ra, GLuint gl_fbo, int w, int h)
+{
+    struct ra_tex *tex = talloc_zero(ra, struct ra_tex);
+    *tex = (struct ra_tex){
+        .params = {
+            .dimensions = 2,
+            .w = w, .h = h, .d = 1,
+            .format = &fbo_dummy_format,
+            .render_dst = true,
+            .blit_src = true,
+            .blit_dst = true,
+        },
+    };
+
+    struct ra_tex_gl *tex_gl = tex->priv = talloc_zero(NULL, struct ra_tex_gl);
+    *tex_gl = (struct ra_tex_gl){
+        .fbo = gl_fbo,
+        .internal_format = 0,
+        .format = GL_RGBA,
+        .type = 0,
+    };
+
+    return tex;
+}
+
+GL *ra_gl_get(struct ra *ra)
+{
+    struct ra_gl *p = ra->priv;
+    return p->gl;
+}
+
+// Return the associate glTexImage arguments for the given format. Sets all
+// fields to 0 on failure.
+void ra_gl_get_format(const struct ra_format *fmt, GLint *out_internal_format,
+                      GLenum *out_format, GLenum *out_type)
+{
+    const struct gl_format *gl_format = fmt->priv;
+    *out_internal_format = gl_format->internal_format;
+    *out_format = gl_format->format;
+    *out_type = gl_format->type;
+}
+
+void ra_gl_get_raw_tex(struct ra *ra, struct ra_tex *tex,
+                       GLuint *out_texture, GLenum *out_target)
+{
+    struct ra_tex_gl *tex_gl = tex->priv;
+    *out_texture = tex_gl->texture;
+    *out_target = tex_gl->target;
+}
+
+// Return whether the ra instance was created with ra_create_gl(). This is the
+// _only_ function that can be called on a ra instance of any type.
+bool ra_is_gl(struct ra *ra)
+{
+    return ra->fns == &ra_fns_gl;
+}
+
+static bool gl_tex_upload(struct ra *ra,
+                          const struct ra_tex_upload_params *params)
+{
+    GL *gl = ra_gl_get(ra);
+    struct ra_tex *tex = params->tex;
+    struct ra_buf *buf = params->buf;
+    struct ra_tex_gl *tex_gl = tex->priv;
+    struct ra_buf_gl *buf_gl = buf ? buf->priv : NULL;
+    assert(tex->params.host_mutable);
+    assert(!params->buf || !params->src);
+
+    if (ra->use_pbo && !params->buf)
+        return ra_tex_upload_pbo(ra, &tex_gl->pbo, params);
+
+    const void *src = params->src;
+    if (buf) {
+        gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, buf_gl->buffer);
+        src = (void *)params->buf_offset;
+    }
+
+    gl->BindTexture(tex_gl->target, tex_gl->texture);
+    if (params->invalidate && gl->InvalidateTexImage)
+        gl->InvalidateTexImage(tex_gl->texture, 0);
+
+    switch (tex->params.dimensions) {
+    case 1:
+        gl->TexImage1D(tex_gl->target, 0, tex_gl->internal_format,
+                       tex->params.w, 0, tex_gl->format, tex_gl->type, src);
+        break;
+    case 2: {
+        struct mp_rect rc = {0, 0, tex->params.w, tex->params.h};
+        if (params->rc)
+            rc = *params->rc;
+        gl_upload_tex(gl, tex_gl->target, tex_gl->format, tex_gl->type,
+                      src, params->stride, rc.x0, rc.y0, rc.x1 - rc.x0,
+                      rc.y1 - rc.y0);
+        break;
+    }
+    case 3:
+        gl->PixelStorei(GL_UNPACK_ALIGNMENT, 1);
+        gl->TexImage3D(GL_TEXTURE_3D, 0, tex_gl->internal_format, tex->params.w,
+                       tex->params.h, tex->params.d, 0, tex_gl->format,
+                       tex_gl->type, src);
+        gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4);
+        break;
+    }
+
+    gl->BindTexture(tex_gl->target, 0);
+
+    if (buf) {
+        gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0);
+        if (buf->params.host_mapped) {
+            // Make sure the PBO is not reused until GL is done with it. If a
+            // previous operation is pending, "update" it by creating a new
+            // fence that will cover the previous operation as well.
+            gl->DeleteSync(buf_gl->fence);
+            buf_gl->fence = gl->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0);
+        }
+    }
+
+    return true;
+}
+
+static bool gl_tex_download(struct ra *ra, struct ra_tex_download_params *params)
+{
+    GL *gl = ra_gl_get(ra);
+    struct ra_tex *tex = params->tex;
+    struct ra_tex_gl *tex_gl = tex->priv;
+    if (!tex_gl->fbo)
+        return false;
+    return gl_read_fbo_contents(gl, tex_gl->fbo, 1, tex_gl->format, tex_gl->type,
+                                tex->params.w, tex->params.h, params->dst,
+                                params->stride);
+}
+
+static void gl_buf_destroy(struct ra *ra, struct ra_buf *buf)
+{
+    if (!buf)
+        return;
+
+    GL *gl = ra_gl_get(ra);
+    struct ra_buf_gl *buf_gl = buf->priv;
+
+    if (buf_gl->fence)
+        gl->DeleteSync(buf_gl->fence);
+
+    if (buf->data) {
+        gl->BindBuffer(buf_gl->target, buf_gl->buffer);
+        gl->UnmapBuffer(buf_gl->target);
+        gl->BindBuffer(buf_gl->target, 0);
+    }
+    gl->DeleteBuffers(1, &buf_gl->buffer);
+
+    talloc_free(buf_gl);
+    talloc_free(buf);
+}
+
+static struct ra_buf *gl_buf_create(struct ra *ra,
+                                    const struct ra_buf_params *params)
+{
+    GL *gl = ra_gl_get(ra);
+
+    if (params->host_mapped && !gl->BufferStorage)
+        return NULL;
+
+    struct ra_buf *buf = talloc_zero(NULL, struct ra_buf);
+    buf->params = *params;
+    buf->params.initial_data = NULL;
+
+    struct ra_buf_gl *buf_gl = buf->priv = talloc_zero(NULL, struct ra_buf_gl);
+    gl->GenBuffers(1, &buf_gl->buffer);
+
+    switch (params->type) {
+    case RA_BUF_TYPE_TEX_UPLOAD:     buf_gl->target = GL_PIXEL_UNPACK_BUFFER;   break;
+    case RA_BUF_TYPE_SHADER_STORAGE: buf_gl->target = GL_SHADER_STORAGE_BUFFER; break;
+    case RA_BUF_TYPE_UNIFORM:        buf_gl->target = GL_UNIFORM_BUFFER;        break;
+    default: abort();
+    };
+
+    gl->BindBuffer(buf_gl->target, buf_gl->buffer);
+
+    if (params->host_mapped) {
+        unsigned flags = GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT |
+                         GL_MAP_READ_BIT | GL_MAP_WRITE_BIT;
+
+        unsigned storflags = flags;
+        if (params->type == RA_BUF_TYPE_TEX_UPLOAD)
+            storflags |= GL_CLIENT_STORAGE_BIT;
+
+        gl->BufferStorage(buf_gl->target, params->size, params->initial_data,
+                          storflags);
+        buf->data = gl->MapBufferRange(buf_gl->target, 0, params->size, flags);
+        if (!buf->data) {
+            gl_check_error(gl, ra->log, "mapping buffer");
+            gl_buf_destroy(ra, buf);
+            buf = NULL;
+        }
+    } else {
+        GLenum hint;
+        switch (params->type) {
+        case RA_BUF_TYPE_TEX_UPLOAD:     hint = GL_STREAM_DRAW; break;
+        case RA_BUF_TYPE_SHADER_STORAGE: hint = GL_STREAM_COPY; break;
+        case RA_BUF_TYPE_UNIFORM:        hint = GL_STATIC_DRAW; break;
+        default: MP_ASSERT_UNREACHABLE();
+        }
+
+        gl->BufferData(buf_gl->target, params->size, params->initial_data, hint);
+    }
+
+    gl->BindBuffer(buf_gl->target, 0);
+    return buf;
+}
+
+static void gl_buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset,
+                          const void *data, size_t size)
+{
+    GL *gl = ra_gl_get(ra);
+    struct ra_buf_gl *buf_gl = buf->priv;
+    assert(buf->params.host_mutable);
+
+    gl->BindBuffer(buf_gl->target, buf_gl->buffer);
+    gl->BufferSubData(buf_gl->target, offset, size, data);
+    gl->BindBuffer(buf_gl->target, 0);
+}
+
+static bool gl_buf_poll(struct ra *ra, struct ra_buf *buf)
+{
+    // Non-persistently mapped buffers are always implicitly reusable in OpenGL,
+    // the implementation will create more buffers under the hood if needed.
+    if (!buf->data)
+        return true;
+
+    GL *gl = ra_gl_get(ra);
+    struct ra_buf_gl *buf_gl = buf->priv;
+
+    if (buf_gl->fence) {
+        GLenum res = gl->ClientWaitSync(buf_gl->fence, 0, 0); // non-blocking
+        if (res == GL_ALREADY_SIGNALED) {
+            gl->DeleteSync(buf_gl->fence);
+            buf_gl->fence = NULL;
+        }
+    }
+
+    return !buf_gl->fence;
+}
+
+static void gl_clear(struct ra *ra, struct ra_tex *dst, float color[4],
+                     struct mp_rect *scissor)
+{
+    GL *gl = ra_gl_get(ra);
+
+    assert(dst->params.render_dst);
+    struct ra_tex_gl *dst_gl = dst->priv;
+
+    gl->BindFramebuffer(GL_FRAMEBUFFER, dst_gl->fbo);
+
+    gl->Scissor(scissor->x0, scissor->y0,
+                scissor->x1 - scissor->x0,
+                scissor->y1 - scissor->y0);
+
+    gl->Enable(GL_SCISSOR_TEST);
+    gl->ClearColor(color[0], color[1], color[2], color[3]);
+    gl->Clear(GL_COLOR_BUFFER_BIT);
+    gl->Disable(GL_SCISSOR_TEST);
+
+    gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
+}
+
+static void gl_blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
+                    struct mp_rect *dst_rc, struct mp_rect *src_rc)
+{
+    GL *gl = ra_gl_get(ra);
+
+    assert(src->params.blit_src);
+    assert(dst->params.blit_dst);
+
+    struct ra_tex_gl *src_gl = src->priv;
+    struct ra_tex_gl *dst_gl = dst->priv;
+
+    gl->BindFramebuffer(GL_READ_FRAMEBUFFER, src_gl->fbo);
+    gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_gl->fbo);
+    gl->BlitFramebuffer(src_rc->x0, src_rc->y0, src_rc->x1, src_rc->y1,
+                        dst_rc->x0, dst_rc->y0, dst_rc->x1, dst_rc->y1,
+                        GL_COLOR_BUFFER_BIT, GL_NEAREST);
+    gl->BindFramebuffer(GL_READ_FRAMEBUFFER, 0);
+    gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
+}
+
+static int gl_desc_namespace(struct ra *ra, enum ra_vartype type)
+{
+    return type;
+}
+
+static void gl_renderpass_destroy(struct ra *ra, struct ra_renderpass *pass)
+{
+    GL *gl = ra_gl_get(ra);
+    struct ra_renderpass_gl *pass_gl = pass->priv;
+    gl->DeleteProgram(pass_gl->program);
+    gl_vao_uninit(&pass_gl->vao);
+
+    talloc_free(pass_gl);
+    talloc_free(pass);
+}
+
+static const char *shader_typestr(GLenum type)
+{
+    switch (type) {
+    case GL_VERTEX_SHADER:   return "vertex";
+    case GL_FRAGMENT_SHADER: return "fragment";
+    case GL_COMPUTE_SHADER:  return "compute";
+    default: MP_ASSERT_UNREACHABLE();
+    }
+}
+
+static void compile_attach_shader(struct ra *ra, GLuint program,
+                                  GLenum type, const char *source, bool *ok)
+{
+    GL *gl = ra_gl_get(ra);
+
+    GLuint shader = gl->CreateShader(type);
+    gl->ShaderSource(shader, 1, &source, NULL);
+    gl->CompileShader(shader);
+    GLint status = 0;
+    gl->GetShaderiv(shader, GL_COMPILE_STATUS, &status);
+    GLint log_length = 0;
+    gl->GetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length);
+
+    int pri = status ? (log_length > 1 ? MSGL_V : MSGL_DEBUG) : MSGL_ERR;
+    const char *typestr = shader_typestr(type);
+    if (mp_msg_test(ra->log, pri)) {
+        MP_MSG(ra, pri, "%s shader source:\n", typestr);
+        mp_log_source(ra->log, pri, source);
+    }
+    if (log_length > 1) {
+        GLchar *logstr = talloc_zero_size(NULL, log_length + 1);
+        gl->GetShaderInfoLog(shader, log_length, NULL, logstr);
+        MP_MSG(ra, pri, "%s shader compile log (status=%d):\n%s\n",
+               typestr, status, logstr);
+        talloc_free(logstr);
+    }
+    if (gl->GetTranslatedShaderSourceANGLE && mp_msg_test(ra->log, MSGL_DEBUG)) {
+        GLint len = 0;
+        gl->GetShaderiv(shader, GL_TRANSLATED_SHADER_SOURCE_LENGTH_ANGLE, &len);
+        if (len > 0) {
+            GLchar *sstr = talloc_zero_size(NULL, len + 1);
+            gl->GetTranslatedShaderSourceANGLE(shader, len, NULL, sstr);
+            MP_DBG(ra, "Translated shader:\n");
+            mp_log_source(ra->log, MSGL_DEBUG, sstr);
+        }
+    }
+
+    gl->AttachShader(program, shader);
+    gl->DeleteShader(shader);
+
+    *ok &= status;
+}
+
+static void link_shader(struct ra *ra, GLuint program, bool *ok)
+{
+    GL *gl = ra_gl_get(ra);
+
+    gl->LinkProgram(program);
+    GLint status = 0;
+    gl->GetProgramiv(program, GL_LINK_STATUS, &status);
+    GLint log_length = 0;
+    gl->GetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length);
+
+    int pri = status ? (log_length > 1 ? MSGL_V : MSGL_DEBUG) : MSGL_ERR;
+    if (mp_msg_test(ra->log, pri)) {
+        GLchar *logstr = talloc_zero_size(NULL, log_length + 1);
+        gl->GetProgramInfoLog(program, log_length, NULL, logstr);
+        MP_MSG(ra, pri, "shader link log (status=%d): %s\n", status, logstr);
+        talloc_free(logstr);
+    }
+
+    *ok &= status;
+}
+
+// either 'compute' or both 'vertex' and 'frag' are needed
+static GLuint compile_program(struct ra *ra, const struct ra_renderpass_params *p)
+{
+    GL *gl = ra_gl_get(ra);
+
+    GLuint prog = gl->CreateProgram();
+    bool ok = true;
+    if (p->type == RA_RENDERPASS_TYPE_COMPUTE)
+        compile_attach_shader(ra, prog, GL_COMPUTE_SHADER, p->compute_shader, &ok);
+    if (p->type == RA_RENDERPASS_TYPE_RASTER) {
+        compile_attach_shader(ra, prog, GL_VERTEX_SHADER, p->vertex_shader, &ok);
+        compile_attach_shader(ra, prog, GL_FRAGMENT_SHADER, p->frag_shader, &ok);
+        for (int n = 0; n < p->num_vertex_attribs; n++)
+            gl->BindAttribLocation(prog, n, p->vertex_attribs[n].name);
+    }
+    link_shader(ra, prog, &ok);
+    if (!ok) {
+        gl->DeleteProgram(prog);
+        prog = 0;
+    }
+    return prog;
+}
+
+static GLuint load_program(struct ra *ra, const struct ra_renderpass_params *p,
+                           bstr *out_cached_data)
+{
+    GL *gl = ra_gl_get(ra);
+
+    GLuint prog = 0;
+
+    if (gl->ProgramBinary && p->cached_program.len > 4) {
+        GLenum format = AV_RL32(p->cached_program.start);
+        prog = gl->CreateProgram();
+        gl_check_error(gl, ra->log, "before loading program");
+        gl->ProgramBinary(prog, format, p->cached_program.start + 4,
+                                        p->cached_program.len - 4);
+        gl->GetError(); // discard potential useless error
+        GLint status = 0;
+        gl->GetProgramiv(prog, GL_LINK_STATUS, &status);
+        if (status) {
+            MP_DBG(ra, "Loading binary program succeeded.\n");
+        } else {
+            gl->DeleteProgram(prog);
+            prog = 0;
+        }
+    }
+
+    if (!prog) {
+        prog = compile_program(ra, p);
+
+        if (gl->GetProgramBinary && prog) {
+            GLint size = 0;
+            gl->GetProgramiv(prog, GL_PROGRAM_BINARY_LENGTH, &size);
+            uint8_t *buffer = talloc_size(NULL, size + 4);
+            GLsizei actual_size = 0;
+            GLenum binary_format = 0;
+            if (size > 0) {
+                gl->GetProgramBinary(prog, size, &actual_size, &binary_format,
+                                     buffer + 4);
+            }
+            AV_WL32(buffer, binary_format);
+            if (actual_size) {
+                *out_cached_data = (bstr){buffer, actual_size + 4};
+            } else {
+                talloc_free(buffer);
+            }
+        }
+    }
+
+    return prog;
+}
+
+static struct ra_renderpass *gl_renderpass_create(struct ra *ra,
+                                    const struct ra_renderpass_params *params)
+{
+    GL *gl = ra_gl_get(ra);
+
+    struct ra_renderpass *pass = talloc_zero(NULL, struct ra_renderpass);
+    pass->params = *ra_renderpass_params_copy(pass, params);
+    pass->params.cached_program = (bstr){0};
+    struct ra_renderpass_gl *pass_gl = pass->priv =
+        talloc_zero(NULL, struct ra_renderpass_gl);
+
+    bstr cached = {0};
+    pass_gl->program = load_program(ra, params, &cached);
+    if (!pass_gl->program) {
+        gl_renderpass_destroy(ra, pass);
+        return NULL;
+    }
+
+    talloc_steal(pass, cached.start);
+    pass->params.cached_program = cached;
+
+    gl->UseProgram(pass_gl->program);
+    for (int n = 0; n < params->num_inputs; n++) {
+        GLint loc =
+            gl->GetUniformLocation(pass_gl->program, params->inputs[n].name);
+        MP_TARRAY_APPEND(pass_gl, pass_gl->uniform_loc, pass_gl->num_uniform_loc,
+                         loc);
+
+        // For compatibility with older OpenGL, we need to explicitly update
+        // the texture/image unit bindings after creating the shader program,
+        // since specifying it directly requires GLSL 4.20+
+        switch (params->inputs[n].type) {
+        case RA_VARTYPE_TEX:
+        case RA_VARTYPE_IMG_W:
+            gl->Uniform1i(loc, params->inputs[n].binding);
+            break;
+        }
+    }
+    gl->UseProgram(0);
+
+    gl_vao_init(&pass_gl->vao, gl, pass->params.vertex_stride,
+                pass->params.vertex_attribs, pass->params.num_vertex_attribs);
+
+    return pass;
+}
+
+static GLenum map_blend(enum ra_blend blend)
+{
+    switch (blend) {
+    case RA_BLEND_ZERO:                 return GL_ZERO;
+    case RA_BLEND_ONE:                  return GL_ONE;
+    case RA_BLEND_SRC_ALPHA:            return GL_SRC_ALPHA;
+    case RA_BLEND_ONE_MINUS_SRC_ALPHA:  return GL_ONE_MINUS_SRC_ALPHA;
+    default: return 0;
+    }
+}
+
+// Assumes program is current (gl->UseProgram(program)).
+static void update_uniform(struct ra *ra, struct ra_renderpass *pass,
+                           struct ra_renderpass_input_val *val)
+{
+    GL *gl = ra_gl_get(ra);
+    struct ra_renderpass_gl *pass_gl = pass->priv;
+
+    struct ra_renderpass_input *input = &pass->params.inputs[val->index];
+    assert(val->index >= 0 && val->index < pass_gl->num_uniform_loc);
+    GLint loc = pass_gl->uniform_loc[val->index];
+
+    switch (input->type) {
+    case RA_VARTYPE_INT: {
+        assert(input->dim_v * input->dim_m == 1);
+        if (loc < 0)
+            break;
+        gl->Uniform1i(loc, *(int *)val->data);
+        break;
+    }
+    case RA_VARTYPE_FLOAT: {
+        float *f = val->data;
+        if (loc < 0)
+            break;
+        if (input->dim_m == 1) {
+            switch (input->dim_v) {
+            case 1: gl->Uniform1f(loc, f[0]); break;
+            case 2: gl->Uniform2f(loc, f[0], f[1]); break;
+            case 3: gl->Uniform3f(loc, f[0], f[1], f[2]); break;
+            case 4: gl->Uniform4f(loc, f[0], f[1], f[2], f[3]); break;
+            default: MP_ASSERT_UNREACHABLE();
+            }
+        } else if (input->dim_v == 2 && input->dim_m == 2) {
+            gl->UniformMatrix2fv(loc, 1, GL_FALSE, f);
+        } else if (input->dim_v == 3 && input->dim_m == 3) {
+            gl->UniformMatrix3fv(loc, 1, GL_FALSE, f);
+        } else {
+            MP_ASSERT_UNREACHABLE();
+        }
+        break;
+    }
+    case RA_VARTYPE_IMG_W: {
+        struct ra_tex *tex = *(struct ra_tex **)val->data;
+        struct ra_tex_gl *tex_gl = tex->priv;
+        assert(tex->params.storage_dst);
+        gl->BindImageTexture(input->binding, tex_gl->texture, 0, GL_FALSE, 0,
+                             GL_WRITE_ONLY, tex_gl->internal_format);
+        break;
+    }
+    case RA_VARTYPE_TEX: {
+        struct ra_tex *tex = *(struct ra_tex **)val->data;
+        struct ra_tex_gl *tex_gl = tex->priv;
+        assert(tex->params.render_src);
+        gl->ActiveTexture(GL_TEXTURE0 + input->binding);
+        gl->BindTexture(tex_gl->target, tex_gl->texture);
+        break;
+    }
+    case RA_VARTYPE_BUF_RO: // fall through
+    case RA_VARTYPE_BUF_RW: {
+        struct ra_buf *buf = *(struct ra_buf **)val->data;
+        struct ra_buf_gl *buf_gl = buf->priv;
+        gl->BindBufferBase(buf_gl->target, input->binding, buf_gl->buffer);
+        // SSBOs are not implicitly coherent in OpengL
+        if (input->type == RA_VARTYPE_BUF_RW)
+            gl->MemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
+        break;
+    }
+    default:
+        MP_ASSERT_UNREACHABLE();
+    }
+}
+
+static void disable_binding(struct ra *ra, struct ra_renderpass *pass,
+                           struct ra_renderpass_input_val *val)
+{
+    GL *gl = ra_gl_get(ra);
+
+    struct ra_renderpass_input *input = &pass->params.inputs[val->index];
+
+    switch (input->type) {
+    case RA_VARTYPE_IMG_W: /* fall  through */
+    case RA_VARTYPE_TEX: {
+        struct ra_tex *tex = *(struct ra_tex **)val->data;
+        struct ra_tex_gl *tex_gl = tex->priv;
+        assert(tex->params.render_src);
+        if (input->type == RA_VARTYPE_TEX) {
+            gl->ActiveTexture(GL_TEXTURE0 + input->binding);
+            gl->BindTexture(tex_gl->target, 0);
+        } else {
+            gl->BindImageTexture(input->binding, 0, 0, GL_FALSE, 0,
+                                 GL_WRITE_ONLY, tex_gl->internal_format);
+        }
+        break;
+    }
+    case RA_VARTYPE_BUF_RW:
+        gl->BindBufferBase(GL_SHADER_STORAGE_BUFFER, input->binding, 0);
+        break;
+    }
+}
+
+static void gl_renderpass_run(struct ra *ra,
+                              const struct ra_renderpass_run_params *params)
+{
+    GL *gl = ra_gl_get(ra);
+    struct ra_renderpass *pass = params->pass;
+    struct ra_renderpass_gl *pass_gl = pass->priv;
+
+    gl->UseProgram(pass_gl->program);
+
+    for (int n = 0; n < params->num_values; n++)
+        update_uniform(ra, pass, &params->values[n]);
+    gl->ActiveTexture(GL_TEXTURE0);
+
+    switch (pass->params.type) {
+    case RA_RENDERPASS_TYPE_RASTER: {
+        struct ra_tex_gl *target_gl = params->target->priv;
+        assert(params->target->params.render_dst);
+        assert(params->target->params.format == pass->params.target_format);
+        gl->BindFramebuffer(GL_FRAMEBUFFER, target_gl->fbo);
+        if (pass->params.invalidate_target && gl->InvalidateFramebuffer) {
+            GLenum fb = target_gl->fbo ? GL_COLOR_ATTACHMENT0 : GL_COLOR;
+            gl->InvalidateFramebuffer(GL_FRAMEBUFFER, 1, &fb);
+        }
+        gl->Viewport(params->viewport.x0, params->viewport.y0,
+                     mp_rect_w(params->viewport),
+                     mp_rect_h(params->viewport));
+        gl->Scissor(params->scissors.x0, params->scissors.y0,
+                    mp_rect_w(params->scissors),
+                    mp_rect_h(params->scissors));
+        gl->Enable(GL_SCISSOR_TEST);
+        if (pass->params.enable_blend) {
+            gl->BlendFuncSeparate(map_blend(pass->params.blend_src_rgb),
+                                  map_blend(pass->params.blend_dst_rgb),
+                                  map_blend(pass->params.blend_src_alpha),
+                                  map_blend(pass->params.blend_dst_alpha));
+            gl->Enable(GL_BLEND);
+        }
+        gl_vao_draw_data(&pass_gl->vao, GL_TRIANGLES, params->vertex_data,
+                         params->vertex_count);
+        gl->Disable(GL_SCISSOR_TEST);
+        gl->Disable(GL_BLEND);
+        gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
+        break;
+    }
+    case RA_RENDERPASS_TYPE_COMPUTE: {
+        gl->DispatchCompute(params->compute_groups[0],
+                            params->compute_groups[1],
+                            params->compute_groups[2]);
+
+        gl->MemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT);
+        break;
+    }
+    default: MP_ASSERT_UNREACHABLE();
+    }
+
+    for (int n = 0; n < params->num_values; n++)
+        disable_binding(ra, pass, &params->values[n]);
+    gl->ActiveTexture(GL_TEXTURE0);
+
+    gl->UseProgram(0);
+}
+
+// Timers in GL use query objects, and are asynchronous. So pool a few of
+// these together. GL_QUERY_OBJECT_NUM should be large enough to avoid this
+// ever blocking. We can afford to throw query objects around, there's no
+// practical limit on them and their overhead is small.
+
+#define GL_QUERY_OBJECT_NUM 8
+
+struct gl_timer {
+    GLuint query[GL_QUERY_OBJECT_NUM];
+    int idx;
+    uint64_t result;
+    bool active;
+};
+
+static ra_timer *gl_timer_create(struct ra *ra)
+{
+    GL *gl = ra_gl_get(ra);
+
+    if (!gl->GenQueries)
+        return NULL;
+
+    struct gl_timer *timer = talloc_zero(NULL, struct gl_timer);
+    gl->GenQueries(GL_QUERY_OBJECT_NUM, timer->query);
+
+    return (ra_timer *)timer;
+}
+
+static void gl_timer_destroy(struct ra *ra, ra_timer *ratimer)
+{
+    if (!ratimer)
+        return;
+
+    GL *gl = ra_gl_get(ra);
+    struct gl_timer *timer = ratimer;
+
+    gl->DeleteQueries(GL_QUERY_OBJECT_NUM, timer->query);
+    talloc_free(timer);
+}
+
+static void gl_timer_start(struct ra *ra, ra_timer *ratimer)
+{
+    struct ra_gl *p = ra->priv;
+    GL *gl = p->gl;
+    struct gl_timer *timer = ratimer;
+
+    // GL_TIME_ELAPSED queries are not re-entrant, so just do nothing instead
+    // of crashing. Work-around for shitty GL limitations
+    if (p->timer_active)
+        return;
+
+    // If this query object already contains a result, we need to retrieve it
+    timer->result = 0;
+    if (gl->IsQuery(timer->query[timer->idx])) {
+        gl->GetQueryObjectui64v(timer->query[timer->idx], GL_QUERY_RESULT,
+                                &timer->result);
+    }
+
+    gl->BeginQuery(GL_TIME_ELAPSED, timer->query[timer->idx++]);
+    timer->idx %= GL_QUERY_OBJECT_NUM;
+
+    p->timer_active = timer->active = true;
+}
+
+static uint64_t gl_timer_stop(struct ra *ra, ra_timer *ratimer)
+{
+    struct ra_gl *p = ra->priv;
+    GL *gl = p->gl;
+    struct gl_timer *timer = ratimer;
+
+    if (!timer->active)
+        return 0;
+
+    gl->EndQuery(GL_TIME_ELAPSED);
+    p->timer_active = timer->active = false;
+
+    return timer->result;
+}
+
+static void gl_debug_marker(struct ra *ra, const char *msg)
+{
+    struct ra_gl *p = ra->priv;
+
+    if (p->debug_enable)
+        gl_check_error(p->gl, ra->log, msg);
+}
+
+static struct ra_fns ra_fns_gl = {
+    .destroy                = gl_destroy,
+    .tex_create             = gl_tex_create,
+    .tex_destroy            = gl_tex_destroy,
+    .tex_upload             = gl_tex_upload,
+    .tex_download           = gl_tex_download,
+    .buf_create             = gl_buf_create,
+    .buf_destroy            = gl_buf_destroy,
+    .buf_update             = gl_buf_update,
+    .buf_poll               = gl_buf_poll,
+    .clear                  = gl_clear,
+    .blit                   = gl_blit,
+    .uniform_layout         = std140_layout,
+    .desc_namespace         = gl_desc_namespace,
+    .renderpass_create      = gl_renderpass_create,
+    .renderpass_destroy     = gl_renderpass_destroy,
+    .renderpass_run         = gl_renderpass_run,
+    .timer_create           = gl_timer_create,
+    .timer_destroy          = gl_timer_destroy,
+    .timer_start            = gl_timer_start,
+    .timer_stop             = gl_timer_stop,
+    .debug_marker           = gl_debug_marker,
+};
diff --git a/video/out/opengl/ra_gl.h b/video/out/opengl/ra_gl.h
new file mode 100644
index 0000000..9844977
--- /dev/null
+++ b/video/out/opengl/ra_gl.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include "common.h"
+#include "utils.h"
+
+struct ra *ra_create_gl(GL *gl, struct mp_log *log);
+struct ra_tex *ra_create_wrapped_tex(struct ra *ra,
+                                     const struct ra_tex_params *params,
+                                     GLuint gl_texture);
+struct ra_tex *ra_create_wrapped_fb(struct ra *ra, GLuint gl_fbo, int w, int h);
+GL *ra_gl_get(struct ra *ra);
+void ra_gl_set_debug(struct ra *ra, bool enable);
+void ra_gl_get_format(const struct ra_format *fmt, GLint *out_internal_format,
+                      GLenum *out_format, GLenum *out_type);
+void ra_gl_get_raw_tex(struct ra *ra, struct ra_tex *tex,
+                       GLuint *out_texture, GLenum *out_target);
+bool ra_is_gl(struct ra *ra);
diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c
new file mode 100644
index 0000000..a551ce4
--- /dev/null
+++ b/video/out/opengl/utils.c
@@ -0,0 +1,282 @@
+/*
+ * This file is part of mpv.
+ * Parts based on MPlayer code by Reimar Döffinger.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdarg.h>
+#include <assert.h>
+
+#include <libavutil/sha.h>
+#include <libavutil/intreadwrite.h>
+#include <libavutil/mem.h>
+
+#include "osdep/io.h"
+
+#include "common/common.h"
+#include "options/path.h"
+#include "stream/stream.h"
+#include "formats.h"
+#include "utils.h"
+
+// GLU has this as gluErrorString (we don't use GLU, as it is legacy-OpenGL)
+static const char *gl_error_to_string(GLenum error)
+{
+    switch (error) {
+    case GL_INVALID_ENUM: return "INVALID_ENUM";
+    case GL_INVALID_VALUE: return "INVALID_VALUE";
+    case GL_INVALID_OPERATION: return "INVALID_OPERATION";
+    case GL_INVALID_FRAMEBUFFER_OPERATION: return "INVALID_FRAMEBUFFER_OPERATION";
+    case GL_OUT_OF_MEMORY: return "OUT_OF_MEMORY";
+    default: return "unknown";
+    }
+}
+
+void gl_check_error(GL *gl, struct mp_log *log, const char *info)
+{
+    for (;;) {
+        GLenum error = gl->GetError();
+        if (error == GL_NO_ERROR)
+            break;
+        mp_msg(log, MSGL_ERR, "%s: OpenGL error %s.\n", info,
+               gl_error_to_string(error));
+    }
+}
+
+static int get_alignment(int stride)
+{
+    if (stride % 8 == 0)
+        return 8;
+    if (stride % 4 == 0)
+        return 4;
+    if (stride % 2 == 0)
+        return 2;
+    return 1;
+}
+
+// upload a texture, handling things like stride and slices
+//  target: texture target, usually GL_TEXTURE_2D
+//  format, type: texture parameters
+//  dataptr, stride: image data
+//  x, y, width, height: part of the image to upload
+void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type,
+                   const void *dataptr, int stride,
+                   int x, int y, int w, int h)
+{
+    int bpp = gl_bytes_per_pixel(format, type);
+    const uint8_t *data = dataptr;
+    int y_max = y + h;
+    if (w <= 0 || h <= 0 || !bpp)
+        return;
+    assert(stride > 0);
+    gl->PixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(stride));
+    int slice = h;
+    if (gl->mpgl_caps & MPGL_CAP_ROW_LENGTH) {
+        // this is not always correct, but should work for MPlayer
+        gl->PixelStorei(GL_UNPACK_ROW_LENGTH, stride / bpp);
+    } else {
+        if (stride != bpp * w)
+            slice = 1; // very inefficient, but at least it works
+    }
+    for (; y + slice <= y_max; y += slice) {
+        gl->TexSubImage2D(target, 0, x, y, w, slice, format, type, data);
+        data += stride * slice;
+    }
+    if (y < y_max)
+        gl->TexSubImage2D(target, 0, x, y, w, y_max - y, format, type, data);
+    if (gl->mpgl_caps & MPGL_CAP_ROW_LENGTH)
+        gl->PixelStorei(GL_UNPACK_ROW_LENGTH, 0);
+    gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4);
+}
+
+bool gl_read_fbo_contents(GL *gl, int fbo, int dir, GLenum format, GLenum type,
+                          int w, int h, uint8_t *dst, int dst_stride)
+{
+    assert(dir == 1 || dir == -1);
+    if (fbo == 0 && gl->es)
+        return false; // ES can't read from front buffer
+    gl->BindFramebuffer(GL_FRAMEBUFFER, fbo);
+    GLenum obj = fbo ? GL_COLOR_ATTACHMENT0 : GL_FRONT;
+    gl->PixelStorei(GL_PACK_ALIGNMENT, 1);
+    gl->ReadBuffer(obj);
+    // reading by line allows flipping, and avoids stride-related trouble
+    int y1 = dir > 0 ? 0 : h;
+    for (int y = 0; y < h; y++)
+        gl->ReadPixels(0, y, w, 1, format, type, dst + (y1 + dir * y) * dst_stride);
+    gl->PixelStorei(GL_PACK_ALIGNMENT, 4);
+    gl->BindFramebuffer(GL_FRAMEBUFFER, 0);
+    return true;
+}
+
+static void gl_vao_enable_attribs(struct gl_vao *vao)
+{
+    GL *gl = vao->gl;
+
+    for (int n = 0; n < vao->num_entries; n++) {
+        const struct ra_renderpass_input *e = &vao->entries[n];
+        GLenum type = 0;
+        bool normalized = false;
+        switch (e->type) {
+        case RA_VARTYPE_INT:
+            type = GL_INT;
+            break;
+        case RA_VARTYPE_FLOAT:
+            type = GL_FLOAT;
+            break;
+        case RA_VARTYPE_BYTE_UNORM:
+            type = GL_UNSIGNED_BYTE;
+            normalized = true;
+            break;
+        default:
+            abort();
+        }
+        assert(e->dim_m == 1);
+
+        gl->EnableVertexAttribArray(n);
+        gl->VertexAttribPointer(n, e->dim_v, type, normalized,
+                                vao->stride, (void *)(intptr_t)e->offset);
+    }
+}
+
+void gl_vao_init(struct gl_vao *vao, GL *gl, int stride,
+                 const struct ra_renderpass_input *entries,
+                 int num_entries)
+{
+    assert(!vao->vao);
+    assert(!vao->buffer);
+
+    *vao = (struct gl_vao){
+        .gl = gl,
+        .stride = stride,
+        .entries = entries,
+        .num_entries = num_entries,
+    };
+
+    gl->GenBuffers(1, &vao->buffer);
+
+    if (gl->BindVertexArray) {
+        gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer);
+
+        gl->GenVertexArrays(1, &vao->vao);
+        gl->BindVertexArray(vao->vao);
+        gl_vao_enable_attribs(vao);
+        gl->BindVertexArray(0);
+
+        gl->BindBuffer(GL_ARRAY_BUFFER, 0);
+    }
+}
+
+void gl_vao_uninit(struct gl_vao *vao)
+{
+    GL *gl = vao->gl;
+    if (!gl)
+        return;
+
+    if (gl->DeleteVertexArrays)
+        gl->DeleteVertexArrays(1, &vao->vao);
+    gl->DeleteBuffers(1, &vao->buffer);
+
+    *vao = (struct gl_vao){0};
+}
+
+static void gl_vao_bind(struct gl_vao *vao)
+{
+    GL *gl = vao->gl;
+
+    if (gl->BindVertexArray) {
+        gl->BindVertexArray(vao->vao);
+    } else {
+        gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer);
+        gl_vao_enable_attribs(vao);
+        gl->BindBuffer(GL_ARRAY_BUFFER, 0);
+    }
+}
+
+static void gl_vao_unbind(struct gl_vao *vao)
+{
+    GL *gl = vao->gl;
+
+    if (gl->BindVertexArray) {
+        gl->BindVertexArray(0);
+    } else {
+        for (int n = 0; n < vao->num_entries; n++)
+            gl->DisableVertexAttribArray(n);
+    }
+}
+
+// Draw the vertex data (as described by the gl_vao_entry entries) in ptr
+// to the screen. num is the number of vertexes. prim is usually GL_TRIANGLES.
+// If ptr is NULL, then skip the upload, and use the data uploaded with the
+// previous call.
+void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num)
+{
+    GL *gl = vao->gl;
+
+    if (ptr) {
+        gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer);
+        gl->BufferData(GL_ARRAY_BUFFER, num * vao->stride, ptr, GL_STREAM_DRAW);
+        gl->BindBuffer(GL_ARRAY_BUFFER, 0);
+    }
+
+    gl_vao_bind(vao);
+
+    gl->DrawArrays(prim, 0, num);
+
+    gl_vao_unbind(vao);
+}
+
+static void GLAPIENTRY gl_debug_cb(GLenum source, GLenum type, GLuint id,
+                                   GLenum severity, GLsizei length,
+                                   const GLchar *message, const void *userParam)
+{
+    // keep in mind that the debug callback can be asynchronous
+    struct mp_log *log = (void *)userParam;
+    int level = MSGL_ERR;
+    switch (severity) {
+    case GL_DEBUG_SEVERITY_NOTIFICATION:level = MSGL_V; break;
+    case GL_DEBUG_SEVERITY_LOW:         level = MSGL_INFO; break;
+    case GL_DEBUG_SEVERITY_MEDIUM:      level = MSGL_WARN; break;
+    case GL_DEBUG_SEVERITY_HIGH:        level = MSGL_ERR; break;
+    }
+    mp_msg(log, level, "GL: %s\n", message);
+}
+
+void gl_set_debug_logger(GL *gl, struct mp_log *log)
+{
+    if (gl->DebugMessageCallback)
+        gl->DebugMessageCallback(log ? gl_debug_cb : NULL, log);
+}
+
+// Given a GL combined extension string in extensions, find out whether ext
+// is included in it. Basically, a word search.
+bool gl_check_extension(const char *extensions, const char *ext)
+{
+    int len = strlen(ext);
+    const char *cur = extensions;
+    while (cur) {
+        cur = strstr(cur, ext);
+        if (!cur)
+            break;
+        if ((cur == extensions || cur[-1] == ' ') &&
+            (cur[len] == '\0' || cur[len] == ' '))
+            return true;
+        cur += len;
+    }
+    return false;
+}
diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h
new file mode 100644
index 0000000..9bcadae
--- /dev/null
+++ b/video/out/opengl/utils.h
@@ -0,0 +1,57 @@
+/*
+ * This file is part of mpv.
+ * Parts based on MPlayer code by Reimar Döffinger.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MP_GL_UTILS_
+#define MP_GL_UTILS_
+
+#include <math.h>
+
+#include "video/out/gpu/utils.h"
+#include "common.h"
+
+struct mp_log;
+
+void gl_check_error(GL *gl, struct mp_log *log, const char *info);
+
+void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type,
+                   const void *dataptr, int stride,
+                   int x, int y, int w, int h);
+
+bool gl_read_fbo_contents(GL *gl, int fbo, int dir, GLenum format, GLenum type,
+                          int w, int h, uint8_t *dst, int dst_stride);
+
+struct gl_vao {
+    GL *gl;
+    GLuint vao;     // the VAO object, or 0 if unsupported by driver
+    GLuint buffer;  // GL_ARRAY_BUFFER used for the data
+    int stride;     // size of each element (interleaved elements are assumed)
+    const struct ra_renderpass_input *entries;
+    int num_entries;
+};
+
+void gl_vao_init(struct gl_vao *vao, GL *gl, int stride,
+                 const struct ra_renderpass_input *entries,
+                 int num_entries);
+void gl_vao_uninit(struct gl_vao *vao);
+void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num);
+
+void gl_set_debug_logger(GL *gl, struct mp_log *log);
+
+bool gl_check_extension(const char *extensions, const char *ext);
+
+#endif
diff --git a/video/out/placebo/ra_pl.c b/video/out/placebo/ra_pl.c
new file mode 100644
index 0000000..6259651
--- /dev/null
+++ b/video/out/placebo/ra_pl.c
@@ -0,0 +1,677 @@
+#include "common/common.h"
+#include "common/msg.h"
+
+#include "ra_pl.h"
+#include "utils.h"
+
+struct ra_pl {
+    pl_gpu gpu;
+    struct ra_timer_pl *active_timer;
+};
+
+static inline pl_gpu get_gpu(const struct ra *ra)
+{
+    struct ra_pl *p = ra->priv;
+    return p->gpu;
+}
+
+static struct ra_fns ra_fns_pl;
+
+pl_gpu ra_pl_get(const struct ra *ra)
+{
+    return ra->fns == &ra_fns_pl ? get_gpu(ra) : NULL;
+}
+
+static pl_timer get_active_timer(const struct ra *ra);
+
+struct ra *ra_create_pl(pl_gpu gpu, struct mp_log *log)
+{
+    assert(gpu);
+
+    struct ra *ra = talloc_zero(NULL, struct ra);
+    ra->log = log;
+    ra->fns = &ra_fns_pl;
+
+    struct ra_pl *p = ra->priv = talloc_zero(ra, struct ra_pl);
+    p->gpu = gpu;
+
+    ra->glsl_version = gpu->glsl.version;
+    ra->glsl_vulkan = gpu->glsl.vulkan;
+    ra->glsl_es = gpu->glsl.gles;
+
+    ra->caps = RA_CAP_DIRECT_UPLOAD | RA_CAP_NESTED_ARRAY | RA_CAP_FRAGCOORD;
+
+    if (gpu->glsl.compute)
+        ra->caps |= RA_CAP_COMPUTE | RA_CAP_NUM_GROUPS;
+    if (gpu->limits.compute_queues > gpu->limits.fragment_queues)
+        ra->caps |= RA_CAP_PARALLEL_COMPUTE;
+    if (gpu->limits.max_variable_comps)
+        ra->caps |= RA_CAP_GLOBAL_UNIFORM;
+    if (!gpu->limits.host_cached)
+        ra->caps |= RA_CAP_SLOW_DR;
+
+    if (gpu->limits.max_tex_1d_dim)
+        ra->caps |= RA_CAP_TEX_1D;
+    if (gpu->limits.max_tex_3d_dim)
+        ra->caps |= RA_CAP_TEX_3D;
+    if (gpu->limits.max_ubo_size)
+        ra->caps |= RA_CAP_BUF_RO;
+    if (gpu->limits.max_ssbo_size)
+        ra->caps |= RA_CAP_BUF_RW;
+    if (gpu->glsl.min_gather_offset && gpu->glsl.max_gather_offset)
+        ra->caps |= RA_CAP_GATHER;
+
+    // Semi-hack: assume all textures are blittable if r8 is
+    pl_fmt r8 = pl_find_named_fmt(gpu, "r8");
+    if (r8->caps & PL_FMT_CAP_BLITTABLE)
+        ra->caps |= RA_CAP_BLIT;
+
+    ra->max_texture_wh = gpu->limits.max_tex_2d_dim;
+    ra->max_pushc_size = gpu->limits.max_pushc_size;
+    ra->max_compute_group_threads = gpu->glsl.max_group_threads;
+    ra->max_shmem = gpu->glsl.max_shmem_size;
+
+    // Set up format wrappers
+    for (int i = 0; i < gpu->num_formats; i++) {
+        pl_fmt plfmt = gpu->formats[i];
+        static const enum ra_ctype fmt_type_map[PL_FMT_TYPE_COUNT] = {
+            [PL_FMT_UNORM]  = RA_CTYPE_UNORM,
+            [PL_FMT_UINT]   = RA_CTYPE_UINT,
+            [PL_FMT_FLOAT]  = RA_CTYPE_FLOAT,
+        };
+
+        enum ra_ctype type = fmt_type_map[plfmt->type];
+        if (!type || !(plfmt->caps & PL_FMT_CAP_SAMPLEABLE))
+            continue;
+
+        struct ra_format *rafmt = talloc_zero(ra, struct ra_format);
+        *rafmt = (struct ra_format) {
+            .name = plfmt->name,
+            .priv = (void *) plfmt,
+            .ctype = type,
+            .ordered = pl_fmt_is_ordered(plfmt),
+            .num_components = plfmt->num_components,
+            .pixel_size = plfmt->texel_size,
+            .linear_filter = plfmt->caps & PL_FMT_CAP_LINEAR,
+            .renderable = plfmt->caps & PL_FMT_CAP_RENDERABLE,
+            .storable = plfmt->caps & PL_FMT_CAP_STORABLE,
+            .glsl_format = plfmt->glsl_format,
+        };
+
+        for (int c = 0; c < plfmt->num_components; c++) {
+            rafmt->component_size[c] = plfmt->host_bits[c];
+            rafmt->component_depth[c] = plfmt->component_depth[c];
+        }
+
+        MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, rafmt);
+    }
+
+    return ra;
+}
+
+static void destroy_ra_pl(struct ra *ra)
+{
+    talloc_free(ra);
+}
+
+static struct ra_format *map_fmt(struct ra *ra, pl_fmt plfmt)
+{
+    for (int i = 0; i < ra->num_formats; i++) {
+        if (ra->formats[i]->priv == plfmt)
+            return ra->formats[i];
+    }
+
+    MP_ERR(ra, "Failed mapping pl_fmt '%s' to ra_fmt?\n", plfmt->name);
+    return NULL;
+}
+
+bool mppl_wrap_tex(struct ra *ra, pl_tex pltex, struct ra_tex *out_tex)
+{
+    if (!pltex)
+        return false;
+
+    *out_tex = (struct ra_tex) {
+        .params = {
+            .dimensions = pl_tex_params_dimension(pltex->params),
+            .w = pltex->params.w,
+            .h = pltex->params.h,
+            .d = pltex->params.d,
+            .format = map_fmt(ra, pltex->params.format),
+            .render_src = pltex->params.sampleable,
+            .render_dst = pltex->params.renderable,
+            .storage_dst = pltex->params.storable,
+            .blit_src = pltex->params.blit_src,
+            .blit_dst = pltex->params.blit_dst,
+            .host_mutable = pltex->params.host_writable,
+            .downloadable = pltex->params.host_readable,
+            // These don't exist upstream, so just pick something reasonable
+            .src_linear = pltex->params.format->caps & PL_FMT_CAP_LINEAR,
+            .src_repeat = false,
+        },
+        .priv = (void *) pltex,
+    };
+
+    return !!out_tex->params.format;
+}
+
+static struct ra_tex *tex_create_pl(struct ra *ra,
+                                    const struct ra_tex_params *params)
+{
+    pl_gpu gpu = get_gpu(ra);
+    pl_tex pltex = pl_tex_create(gpu, &(struct pl_tex_params) {
+        .w = params->w,
+        .h = params->dimensions >= 2 ? params->h : 0,
+        .d = params->dimensions >= 3 ? params->d : 0,
+        .format = params->format->priv,
+        .sampleable = params->render_src,
+        .renderable = params->render_dst,
+        .storable = params->storage_dst,
+        .blit_src = params->blit_src,
+        .blit_dst = params->blit_dst || params->render_dst,
+        .host_writable = params->host_mutable,
+        .host_readable = params->downloadable,
+        .initial_data = params->initial_data,
+    });
+
+    struct ra_tex *ratex = talloc_ptrtype(NULL, ratex);
+    if (!mppl_wrap_tex(ra, pltex, ratex)) {
+        pl_tex_destroy(gpu, &pltex);
+        talloc_free(ratex);
+        return NULL;
+    }
+
+    // Keep track of these, so we can correctly bind them later
+    ratex->params.src_repeat = params->src_repeat;
+    ratex->params.src_linear = params->src_linear;
+
+    return ratex;
+}
+
+static void tex_destroy_pl(struct ra *ra, struct ra_tex *tex)
+{
+    if (!tex)
+        return;
+
+    pl_tex_destroy(get_gpu(ra), (pl_tex *) &tex->priv);
+    talloc_free(tex);
+}
+
+static bool tex_upload_pl(struct ra *ra, const struct ra_tex_upload_params *params)
+{
+    pl_gpu gpu = get_gpu(ra);
+    pl_tex tex = params->tex->priv;
+    struct pl_tex_transfer_params pl_params = {
+        .tex = tex,
+        .buf = params->buf ? params->buf->priv : NULL,
+        .buf_offset = params->buf_offset,
+        .ptr = (void *) params->src,
+        .timer = get_active_timer(ra),
+    };
+
+    pl_buf staging = NULL;
+    if (params->tex->params.dimensions == 2) {
+        if (params->rc) {
+            pl_params.rc = (struct pl_rect3d) {
+                .x0 = params->rc->x0, .x1 = params->rc->x1,
+                .y0 = params->rc->y0, .y1 = params->rc->y1,
+            };
+        }
+
+        pl_params.row_pitch = params->stride;
+    }
+
+    bool ok = pl_tex_upload(gpu, &pl_params);
+    pl_buf_destroy(gpu, &staging);
+    return ok;
+}
+
+static bool tex_download_pl(struct ra *ra, struct ra_tex_download_params *params)
+{
+    pl_tex tex = params->tex->priv;
+    struct pl_tex_transfer_params pl_params = {
+        .tex = tex,
+        .ptr = params->dst,
+        .timer = get_active_timer(ra),
+        .row_pitch = params->stride,
+    };
+
+    return pl_tex_download(get_gpu(ra), &pl_params);
+}
+
+static struct ra_buf *buf_create_pl(struct ra *ra,
+                                    const struct ra_buf_params *params)
+{
+    pl_buf plbuf = pl_buf_create(get_gpu(ra), &(struct pl_buf_params) {
+        .size = params->size,
+        .uniform = params->type == RA_BUF_TYPE_UNIFORM,
+        .storable = params->type == RA_BUF_TYPE_SHADER_STORAGE,
+        .host_mapped = params->host_mapped,
+        .host_writable = params->host_mutable,
+        .initial_data = params->initial_data,
+    });
+
+    if (!plbuf)
+        return NULL;
+
+    struct ra_buf *rabuf = talloc_ptrtype(NULL, rabuf);
+    *rabuf = (struct ra_buf) {
+        .params = *params,
+        .data = plbuf->data,
+        .priv = (void *) plbuf,
+    };
+
+    rabuf->params.initial_data = NULL;
+    return rabuf;
+}
+
+static void buf_destroy_pl(struct ra *ra, struct ra_buf *buf)
+{
+    if (!buf)
+        return;
+
+    pl_buf_destroy(get_gpu(ra), (pl_buf *) &buf->priv);
+    talloc_free(buf);
+}
+
+static void buf_update_pl(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset,
+                          const void *data, size_t size)
+{
+    pl_buf_write(get_gpu(ra), buf->priv, offset, data, size);
+}
+
+static bool buf_poll_pl(struct ra *ra, struct ra_buf *buf)
+{
+    return !pl_buf_poll(get_gpu(ra), buf->priv, 0);
+}
+
+static void clear_pl(struct ra *ra, struct ra_tex *dst, float color[4],
+                     struct mp_rect *scissor)
+{
+    // TODO: implement scissor clearing by bltting a 1x1 tex instead
+    pl_tex_clear(get_gpu(ra), dst->priv, color);
+}
+
+static void blit_pl(struct ra *ra, struct ra_tex *dst, struct ra_tex *src,
+                    struct mp_rect *dst_rc, struct mp_rect *src_rc)
+{
+    struct pl_rect3d plsrc = {0}, pldst = {0};
+    if (src_rc) {
+        plsrc.x0 = MPMIN(MPMAX(src_rc->x0, 0), src->params.w);
+        plsrc.y0 = MPMIN(MPMAX(src_rc->y0, 0), src->params.h);
+        plsrc.x1 = MPMIN(MPMAX(src_rc->x1, 0), src->params.w);
+        plsrc.y1 = MPMIN(MPMAX(src_rc->y1, 0), src->params.h);
+    }
+
+    if (dst_rc) {
+        pldst.x0 = MPMIN(MPMAX(dst_rc->x0, 0), dst->params.w);
+        pldst.y0 = MPMIN(MPMAX(dst_rc->y0, 0), dst->params.h);
+        pldst.x1 = MPMIN(MPMAX(dst_rc->x1, 0), dst->params.w);
+        pldst.y1 = MPMIN(MPMAX(dst_rc->y1, 0), dst->params.h);
+    }
+
+    pl_tex_blit(get_gpu(ra), &(struct pl_tex_blit_params) {
+        .src = src->priv,
+        .dst = dst->priv,
+        .src_rc = plsrc,
+        .dst_rc = pldst,
+        .sample_mode = src->params.src_linear ? PL_TEX_SAMPLE_LINEAR
+                                              : PL_TEX_SAMPLE_NEAREST,
+    });
+}
+
+static const enum pl_var_type var_type[RA_VARTYPE_COUNT] = {
+    [RA_VARTYPE_INT]    = PL_VAR_SINT,
+    [RA_VARTYPE_FLOAT]  = PL_VAR_FLOAT,
+};
+
+static const enum pl_desc_type desc_type[RA_VARTYPE_COUNT] = {
+    [RA_VARTYPE_TEX]    = PL_DESC_SAMPLED_TEX,
+    [RA_VARTYPE_IMG_W]  = PL_DESC_STORAGE_IMG,
+    [RA_VARTYPE_BUF_RO] = PL_DESC_BUF_UNIFORM,
+    [RA_VARTYPE_BUF_RW] = PL_DESC_BUF_STORAGE,
+};
+
+static const enum pl_fmt_type fmt_type[RA_VARTYPE_COUNT] = {
+    [RA_VARTYPE_INT]        = PL_FMT_SINT,
+    [RA_VARTYPE_FLOAT]      = PL_FMT_FLOAT,
+    [RA_VARTYPE_BYTE_UNORM] = PL_FMT_UNORM,
+};
+
+static const size_t var_size[RA_VARTYPE_COUNT] = {
+    [RA_VARTYPE_INT]        = sizeof(int),
+    [RA_VARTYPE_FLOAT]      = sizeof(float),
+    [RA_VARTYPE_BYTE_UNORM] = sizeof(uint8_t),
+};
+
+static struct ra_layout uniform_layout_pl(struct ra_renderpass_input *inp)
+{
+    // To get the alignment requirements, we try laying this out with
+    // an offset of 1 and then see where it ends up. This will always be
+    // the minimum alignment requirement.
+    struct pl_var_layout layout = pl_buf_uniform_layout(1, &(struct pl_var) {
+        .name = inp->name,
+        .type = var_type[inp->type],
+        .dim_v = inp->dim_v,
+        .dim_m = inp->dim_m,
+        .dim_a = 1,
+    });
+
+    return (struct ra_layout) {
+        .align = layout.offset,
+        .stride = layout.stride,
+        .size = layout.size,
+    };
+}
+
+static struct ra_layout push_constant_layout_pl(struct ra_renderpass_input *inp)
+{
+    struct pl_var_layout layout = pl_push_constant_layout(1, &(struct pl_var) {
+        .name = inp->name,
+        .type = var_type[inp->type],
+        .dim_v = inp->dim_v,
+        .dim_m = inp->dim_m,
+        .dim_a = 1,
+    });
+
+    return (struct ra_layout) {
+        .align = layout.offset,
+        .stride = layout.stride,
+        .size = layout.size,
+    };
+}
+
+static int desc_namespace_pl(struct ra *ra, enum ra_vartype type)
+{
+    return pl_desc_namespace(get_gpu(ra), desc_type[type]);
+}
+
+struct pass_priv {
+    pl_pass pass;
+    uint16_t *inp_index; // index translation map
+    // Space to hold the descriptor bindings and variable updates
+    struct pl_desc_binding *binds;
+    struct pl_var_update *varups;
+    int num_varups;
+};
+
+static struct ra_renderpass *renderpass_create_pl(struct ra *ra,
+                                    const struct ra_renderpass_params *params)
+{
+    void *tmp = talloc_new(NULL);
+    pl_gpu gpu = get_gpu(ra);
+    struct ra_renderpass *pass = NULL;
+
+    static const enum pl_pass_type pass_type[] = {
+        [RA_RENDERPASS_TYPE_RASTER]  = PL_PASS_RASTER,
+        [RA_RENDERPASS_TYPE_COMPUTE] = PL_PASS_COMPUTE,
+    };
+
+    struct pl_var *vars = NULL;
+    struct pl_desc *descs = NULL;
+    int num_vars = 0, num_descs = 0;
+
+    struct pass_priv *priv = talloc_ptrtype(tmp, priv);
+    priv->inp_index = talloc_zero_array(priv, uint16_t, params->num_inputs);
+
+    for (int i = 0; i < params->num_inputs; i++) {
+        const struct ra_renderpass_input *inp = &params->inputs[i];
+        if (var_type[inp->type]) {
+            priv->inp_index[i] = num_vars;
+            MP_TARRAY_APPEND(tmp, vars, num_vars, (struct pl_var) {
+                .name = inp->name,
+                .type = var_type[inp->type],
+                .dim_v = inp->dim_v,
+                .dim_m = inp->dim_m,
+                .dim_a = 1,
+            });
+        } else if (desc_type[inp->type]) {
+            priv->inp_index[i] = num_descs;
+            MP_TARRAY_APPEND(tmp, descs, num_descs, (struct pl_desc) {
+                .name = inp->name,
+                .type = desc_type[inp->type],
+                .binding = inp->binding,
+                .access = inp->type == RA_VARTYPE_IMG_W ? PL_DESC_ACCESS_WRITEONLY
+                        : inp->type == RA_VARTYPE_BUF_RW ? PL_DESC_ACCESS_READWRITE
+                        : PL_DESC_ACCESS_READONLY,
+            });
+        }
+    }
+
+    // Allocate space to store the bindings map persistently
+    priv->binds = talloc_zero_array(priv, struct pl_desc_binding, num_descs);
+
+    struct pl_pass_params pl_params = {
+        .type = pass_type[params->type],
+        .variables = vars,
+        .num_variables = num_vars,
+        .descriptors = descs,
+        .num_descriptors = num_descs,
+        .push_constants_size = params->push_constants_size,
+        .glsl_shader = params->type == RA_RENDERPASS_TYPE_COMPUTE
+                            ? params->compute_shader
+                            : params->frag_shader,
+    };
+
+    struct pl_blend_params blend_params;
+
+    if (params->type == RA_RENDERPASS_TYPE_RASTER) {
+        pl_params.vertex_shader = params->vertex_shader;
+        pl_params.vertex_type = PL_PRIM_TRIANGLE_LIST;
+        pl_params.vertex_stride = params->vertex_stride;
+        pl_params.load_target = !params->invalidate_target;
+        pl_params.target_format = params->target_format->priv;
+
+        if (params->enable_blend) {
+            pl_params.blend_params = &blend_params;
+            blend_params = (struct pl_blend_params) {
+                // Same enum order as ra_blend
+                .src_rgb = (enum pl_blend_mode) params->blend_src_rgb,
+                .dst_rgb = (enum pl_blend_mode) params->blend_dst_rgb,
+                .src_alpha = (enum pl_blend_mode) params->blend_src_alpha,
+                .dst_alpha = (enum pl_blend_mode) params->blend_dst_alpha,
+            };
+        }
+
+        for (int i = 0; i < params->num_vertex_attribs; i++) {
+            const struct ra_renderpass_input *inp = &params->vertex_attribs[i];
+            struct pl_vertex_attrib attrib = {
+                .name = inp->name,
+                .offset = inp->offset,
+                .location = i,
+                .fmt = pl_find_fmt(gpu, fmt_type[inp->type], inp->dim_v, 0,
+                                   var_size[inp->type] * 8, PL_FMT_CAP_VERTEX),
+            };
+
+            if (!attrib.fmt) {
+                MP_ERR(ra, "Failed mapping vertex attrib '%s' to pl_fmt?\n",
+                       inp->name);
+                goto error;
+            }
+
+            MP_TARRAY_APPEND(tmp, pl_params.vertex_attribs,
+                             pl_params.num_vertex_attribs, attrib);
+        }
+    }
+
+    priv->pass = pl_pass_create(gpu, &pl_params);
+    if (!priv->pass)
+        goto error;
+
+    pass = talloc_ptrtype(NULL, pass);
+    *pass = (struct ra_renderpass) {
+        .params = *ra_renderpass_params_copy(pass, params),
+        .priv = talloc_steal(pass, priv),
+    };
+
+    // fall through
+error:
+    talloc_free(tmp);
+    return pass;
+}
+
+static void renderpass_destroy_pl(struct ra *ra, struct ra_renderpass *pass)
+{
+    if (!pass)
+        return;
+
+    struct pass_priv *priv = pass->priv;
+    pl_pass_destroy(get_gpu(ra), (pl_pass *) &priv->pass);
+    talloc_free(pass);
+}
+
+static void renderpass_run_pl(struct ra *ra,
+                              const struct ra_renderpass_run_params *params)
+{
+    struct pass_priv *p = params->pass->priv;
+    p->num_varups = 0;
+
+    for (int i = 0; i < params->num_values; i++) {
+        const struct ra_renderpass_input_val *val = &params->values[i];
+        const struct ra_renderpass_input *inp = &params->pass->params.inputs[i];
+        if (var_type[inp->type]) {
+            MP_TARRAY_APPEND(p, p->varups, p->num_varups, (struct pl_var_update) {
+                .index = p->inp_index[val->index],
+                .data = val->data,
+            });
+        } else {
+            struct pl_desc_binding bind;
+            switch (inp->type) {
+            case RA_VARTYPE_TEX:
+            case RA_VARTYPE_IMG_W: {
+                struct ra_tex *tex = *((struct ra_tex **) val->data);
+                bind.object = tex->priv;
+                bind.sample_mode = tex->params.src_linear ? PL_TEX_SAMPLE_LINEAR
+                                                          : PL_TEX_SAMPLE_NEAREST;
+                bind.address_mode = tex->params.src_repeat ? PL_TEX_ADDRESS_REPEAT
+                                                           : PL_TEX_ADDRESS_CLAMP;
+                break;
+            }
+            case RA_VARTYPE_BUF_RO:
+            case RA_VARTYPE_BUF_RW:
+                bind.object = (* (struct ra_buf **) val->data)->priv;
+                break;
+            default: MP_ASSERT_UNREACHABLE();
+            };
+
+            p->binds[p->inp_index[val->index]] = bind;
+        };
+    }
+
+    struct pl_pass_run_params pl_params = {
+        .pass = p->pass,
+        .var_updates = p->varups,
+        .num_var_updates = p->num_varups,
+        .desc_bindings = p->binds,
+        .push_constants = params->push_constants,
+        .timer = get_active_timer(ra),
+    };
+
+    if (p->pass->params.type == PL_PASS_RASTER) {
+        pl_params.target = params->target->priv;
+        pl_params.viewport = mp_rect2d_to_pl(params->viewport);
+        pl_params.scissors = mp_rect2d_to_pl(params->scissors);
+        pl_params.vertex_data = params->vertex_data;
+        pl_params.vertex_count = params->vertex_count;
+    } else {
+        for (int i = 0; i < MP_ARRAY_SIZE(pl_params.compute_groups); i++)
+            pl_params.compute_groups[i] = params->compute_groups[i];
+    }
+
+    pl_pass_run(get_gpu(ra), &pl_params);
+}
+
+struct ra_timer_pl {
+    // Because libpplacebo only supports one operation per timer, we need
+    // to use multiple pl_timers to sum up multiple passes/transfers
+    pl_timer *timers;
+    int num_timers;
+    int idx_timers;
+};
+
+static ra_timer *timer_create_pl(struct ra *ra)
+{
+    struct ra_timer_pl *t = talloc_zero(ra, struct ra_timer_pl);
+    return t;
+}
+
+static void timer_destroy_pl(struct ra *ra, ra_timer *timer)
+{
+    pl_gpu gpu = get_gpu(ra);
+    struct ra_timer_pl *t = timer;
+
+    for (int i = 0; i < t->num_timers; i++)
+        pl_timer_destroy(gpu, &t->timers[i]);
+
+    talloc_free(t);
+}
+
+static void timer_start_pl(struct ra *ra, ra_timer *timer)
+{
+    struct ra_pl *p = ra->priv;
+    struct ra_timer_pl *t = timer;
+
+    // There's nothing easy we can do in this case, since libplacebo only
+    // supports one timer object per operation; so just ignore "inner" timers
+    // when the user is nesting different timer queries
+    if (p->active_timer)
+        return;
+
+    p->active_timer = t;
+    t->idx_timers = 0;
+}
+
+static uint64_t timer_stop_pl(struct ra *ra, ra_timer *timer)
+{
+    struct ra_pl *p = ra->priv;
+    struct ra_timer_pl *t = timer;
+
+    if (p->active_timer != t)
+        return 0;
+
+    p->active_timer = NULL;
+
+    // Sum up all of the active results
+    uint64_t res = 0;
+    for (int i = 0; i < t->idx_timers; i++)
+        res += pl_timer_query(p->gpu, t->timers[i]);
+
+    return res;
+}
+
+static pl_timer get_active_timer(const struct ra *ra)
+{
+    struct ra_pl *p = ra->priv;
+    if (!p->active_timer)
+        return NULL;
+
+    struct ra_timer_pl *t = p->active_timer;
+    if (t->idx_timers == t->num_timers)
+        MP_TARRAY_APPEND(t, t->timers, t->num_timers, pl_timer_create(p->gpu));
+
+    return t->timers[t->idx_timers++];
+}
+
+static struct ra_fns ra_fns_pl = {
+    .destroy                = destroy_ra_pl,
+    .tex_create             = tex_create_pl,
+    .tex_destroy            = tex_destroy_pl,
+    .tex_upload             = tex_upload_pl,
+    .tex_download           = tex_download_pl,
+    .buf_create             = buf_create_pl,
+    .buf_destroy            = buf_destroy_pl,
+    .buf_update             = buf_update_pl,
+    .buf_poll               = buf_poll_pl,
+    .clear                  = clear_pl,
+    .blit                   = blit_pl,
+    .uniform_layout         = uniform_layout_pl,
+    .push_constant_layout   = push_constant_layout_pl,
+    .desc_namespace         = desc_namespace_pl,
+    .renderpass_create      = renderpass_create_pl,
+    .renderpass_destroy     = renderpass_destroy_pl,
+    .renderpass_run         = renderpass_run_pl,
+    .timer_create           = timer_create_pl,
+    .timer_destroy          = timer_destroy_pl,
+    .timer_start            = timer_start_pl,
+    .timer_stop             = timer_stop_pl,
+};
+
diff --git a/video/out/placebo/ra_pl.h b/video/out/placebo/ra_pl.h
new file mode 100644
index 0000000..1290c9c
--- /dev/null
+++ b/video/out/placebo/ra_pl.h
@@ -0,0 +1,16 @@
+#pragma once
+
+#include "video/out/gpu/ra.h"
+#include <libplacebo/gpu.h>
+
+struct ra *ra_create_pl(pl_gpu gpu, struct mp_log *log);
+
+pl_gpu ra_pl_get(const struct ra *ra);
+
+static inline pl_fmt ra_pl_fmt_get(const struct ra_format *format)
+{
+    return format->priv;
+}
+
+// Wrap a pl_tex into a ra_tex struct, returns if successful
+bool mppl_wrap_tex(struct ra *ra, pl_tex pltex, struct ra_tex *out_tex);
diff --git a/video/out/placebo/utils.c b/video/out/placebo/utils.c
new file mode 100644
index 0000000..1209b72
--- /dev/null
+++ b/video/out/placebo/utils.c
@@ -0,0 +1,263 @@
+#include "common/common.h"
+#include "utils.h"
+
+#include <libplacebo/utils/dolbyvision.h>
+
+static const int pl_log_to_msg_lev[PL_LOG_ALL+1] = {
+    [PL_LOG_FATAL] = MSGL_FATAL,
+    [PL_LOG_ERR]   = MSGL_ERR,
+    [PL_LOG_WARN]  = MSGL_WARN,
+    [PL_LOG_INFO]  = MSGL_V,
+    [PL_LOG_DEBUG] = MSGL_DEBUG,
+    [PL_LOG_TRACE] = MSGL_TRACE,
+};
+
+static const enum pl_log_level msg_lev_to_pl_log[MSGL_MAX+1] = {
+    [MSGL_FATAL]   = PL_LOG_FATAL,
+    [MSGL_ERR]     = PL_LOG_ERR,
+    [MSGL_WARN]    = PL_LOG_WARN,
+    [MSGL_INFO]    = PL_LOG_WARN,
+    [MSGL_STATUS]  = PL_LOG_WARN,
+    [MSGL_V]       = PL_LOG_INFO,
+    [MSGL_DEBUG]   = PL_LOG_DEBUG,
+    [MSGL_TRACE]   = PL_LOG_TRACE,
+    [MSGL_MAX]     = PL_LOG_ALL,
+};
+
+// translates log levels while probing
+static const enum pl_log_level probing_map(enum pl_log_level level)
+{
+    switch (level) {
+    case PL_LOG_FATAL:
+    case PL_LOG_ERR:
+    case PL_LOG_WARN:
+        return PL_LOG_INFO;
+
+    default:
+        return level;
+    }
+}
+
+static void log_cb(void *priv, enum pl_log_level level, const char *msg)
+{
+    struct mp_log *log = priv;
+    mp_msg(log, pl_log_to_msg_lev[level], "%s\n", msg);
+}
+
+static void log_cb_probing(void *priv, enum pl_log_level level, const char *msg)
+{
+    struct mp_log *log = priv;
+    mp_msg(log, pl_log_to_msg_lev[probing_map(level)], "%s\n", msg);
+}
+
+pl_log mppl_log_create(void *tactx, struct mp_log *log)
+{
+    return pl_log_create(PL_API_VER, &(struct pl_log_params) {
+        .log_cb     = log_cb,
+        .log_level  = msg_lev_to_pl_log[mp_msg_level(log)],
+        .log_priv   = mp_log_new(tactx, log, "libplacebo"),
+    });
+}
+
+void mppl_log_set_probing(pl_log log, bool probing)
+{
+    struct pl_log_params params = log->params;
+    params.log_cb = probing ? log_cb_probing : log_cb;
+    pl_log_update(log, &params);
+}
+
+enum pl_color_primaries mp_prim_to_pl(enum mp_csp_prim prim)
+{
+    switch (prim) {
+    case MP_CSP_PRIM_AUTO:          return PL_COLOR_PRIM_UNKNOWN;
+    case MP_CSP_PRIM_BT_601_525:    return PL_COLOR_PRIM_BT_601_525;
+    case MP_CSP_PRIM_BT_601_625:    return PL_COLOR_PRIM_BT_601_625;
+    case MP_CSP_PRIM_BT_709:        return PL_COLOR_PRIM_BT_709;
+    case MP_CSP_PRIM_BT_2020:       return PL_COLOR_PRIM_BT_2020;
+    case MP_CSP_PRIM_BT_470M:       return PL_COLOR_PRIM_BT_470M;
+    case MP_CSP_PRIM_APPLE:         return PL_COLOR_PRIM_APPLE;
+    case MP_CSP_PRIM_ADOBE:         return PL_COLOR_PRIM_ADOBE;
+    case MP_CSP_PRIM_PRO_PHOTO:     return PL_COLOR_PRIM_PRO_PHOTO;
+    case MP_CSP_PRIM_CIE_1931:      return PL_COLOR_PRIM_CIE_1931;
+    case MP_CSP_PRIM_DCI_P3:        return PL_COLOR_PRIM_DCI_P3;
+    case MP_CSP_PRIM_DISPLAY_P3:    return PL_COLOR_PRIM_DISPLAY_P3;
+    case MP_CSP_PRIM_V_GAMUT:       return PL_COLOR_PRIM_V_GAMUT;
+    case MP_CSP_PRIM_S_GAMUT:       return PL_COLOR_PRIM_S_GAMUT;
+    case MP_CSP_PRIM_EBU_3213:      return PL_COLOR_PRIM_EBU_3213;
+    case MP_CSP_PRIM_FILM_C:        return PL_COLOR_PRIM_FILM_C;
+    case MP_CSP_PRIM_ACES_AP0:      return PL_COLOR_PRIM_ACES_AP0;
+    case MP_CSP_PRIM_ACES_AP1:      return PL_COLOR_PRIM_ACES_AP1;
+    case MP_CSP_PRIM_COUNT:         return PL_COLOR_PRIM_COUNT;
+    }
+
+    MP_ASSERT_UNREACHABLE();
+}
+
+enum mp_csp_prim mp_prim_from_pl(enum pl_color_primaries prim)
+{
+    switch (prim){
+    case PL_COLOR_PRIM_UNKNOWN:     return MP_CSP_PRIM_AUTO;
+    case PL_COLOR_PRIM_BT_601_525:  return MP_CSP_PRIM_BT_601_525;
+    case PL_COLOR_PRIM_BT_601_625:  return MP_CSP_PRIM_BT_601_625;
+    case PL_COLOR_PRIM_BT_709:      return MP_CSP_PRIM_BT_709;
+    case PL_COLOR_PRIM_BT_2020:     return MP_CSP_PRIM_BT_2020;
+    case PL_COLOR_PRIM_BT_470M:     return MP_CSP_PRIM_BT_470M;
+    case PL_COLOR_PRIM_APPLE:       return MP_CSP_PRIM_APPLE;
+    case PL_COLOR_PRIM_ADOBE:       return MP_CSP_PRIM_ADOBE;
+    case PL_COLOR_PRIM_PRO_PHOTO:   return MP_CSP_PRIM_PRO_PHOTO;
+    case PL_COLOR_PRIM_CIE_1931:    return MP_CSP_PRIM_CIE_1931;
+    case PL_COLOR_PRIM_DCI_P3:      return MP_CSP_PRIM_DCI_P3;
+    case PL_COLOR_PRIM_DISPLAY_P3:  return MP_CSP_PRIM_DISPLAY_P3;
+    case PL_COLOR_PRIM_V_GAMUT:     return MP_CSP_PRIM_V_GAMUT;
+    case PL_COLOR_PRIM_S_GAMUT:     return MP_CSP_PRIM_S_GAMUT;
+    case PL_COLOR_PRIM_EBU_3213:    return MP_CSP_PRIM_EBU_3213;
+    case PL_COLOR_PRIM_FILM_C:      return MP_CSP_PRIM_FILM_C;
+    case PL_COLOR_PRIM_ACES_AP0:    return MP_CSP_PRIM_ACES_AP0;
+    case PL_COLOR_PRIM_ACES_AP1:    return MP_CSP_PRIM_ACES_AP1;
+    case PL_COLOR_PRIM_COUNT:       return MP_CSP_PRIM_COUNT;
+    }
+
+    MP_ASSERT_UNREACHABLE();
+}
+
+enum pl_color_transfer mp_trc_to_pl(enum mp_csp_trc trc)
+{
+    switch (trc) {
+    case MP_CSP_TRC_AUTO:           return PL_COLOR_TRC_UNKNOWN;
+    case MP_CSP_TRC_BT_1886:        return PL_COLOR_TRC_BT_1886;
+    case MP_CSP_TRC_SRGB:           return PL_COLOR_TRC_SRGB;
+    case MP_CSP_TRC_LINEAR:         return PL_COLOR_TRC_LINEAR;
+    case MP_CSP_TRC_GAMMA18:        return PL_COLOR_TRC_GAMMA18;
+    case MP_CSP_TRC_GAMMA20:        return PL_COLOR_TRC_GAMMA20;
+    case MP_CSP_TRC_GAMMA22:        return PL_COLOR_TRC_GAMMA22;
+    case MP_CSP_TRC_GAMMA24:        return PL_COLOR_TRC_GAMMA24;
+    case MP_CSP_TRC_GAMMA26:        return PL_COLOR_TRC_GAMMA26;
+    case MP_CSP_TRC_GAMMA28:        return PL_COLOR_TRC_GAMMA28;
+    case MP_CSP_TRC_PRO_PHOTO:      return PL_COLOR_TRC_PRO_PHOTO;
+    case MP_CSP_TRC_PQ:             return PL_COLOR_TRC_PQ;
+    case MP_CSP_TRC_HLG:            return PL_COLOR_TRC_HLG;
+    case MP_CSP_TRC_V_LOG:          return PL_COLOR_TRC_V_LOG;
+    case MP_CSP_TRC_S_LOG1:         return PL_COLOR_TRC_S_LOG1;
+    case MP_CSP_TRC_S_LOG2:         return PL_COLOR_TRC_S_LOG2;
+    case MP_CSP_TRC_ST428:          return PL_COLOR_TRC_ST428;
+    case MP_CSP_TRC_COUNT:          return PL_COLOR_TRC_COUNT;
+    }
+
+    MP_ASSERT_UNREACHABLE();
+}
+
+enum mp_csp_trc mp_trc_from_pl(enum pl_color_transfer trc)
+{
+    switch (trc){
+    case PL_COLOR_TRC_UNKNOWN: return MP_CSP_TRC_AUTO;
+    case PL_COLOR_TRC_BT_1886: return MP_CSP_TRC_BT_1886;
+    case PL_COLOR_TRC_SRGB: return MP_CSP_TRC_SRGB;
+    case PL_COLOR_TRC_LINEAR: return MP_CSP_TRC_LINEAR;
+    case PL_COLOR_TRC_GAMMA18: return MP_CSP_TRC_GAMMA18;
+    case PL_COLOR_TRC_GAMMA20: return MP_CSP_TRC_GAMMA20;
+    case PL_COLOR_TRC_GAMMA22: return MP_CSP_TRC_GAMMA22;
+    case PL_COLOR_TRC_GAMMA24: return MP_CSP_TRC_GAMMA24;
+    case PL_COLOR_TRC_GAMMA26: return MP_CSP_TRC_GAMMA26;
+    case PL_COLOR_TRC_GAMMA28: return MP_CSP_TRC_GAMMA28;
+    case PL_COLOR_TRC_PRO_PHOTO: return MP_CSP_TRC_PRO_PHOTO;
+    case PL_COLOR_TRC_PQ: return MP_CSP_TRC_PQ;
+    case PL_COLOR_TRC_HLG: return MP_CSP_TRC_HLG;
+    case PL_COLOR_TRC_V_LOG: return MP_CSP_TRC_V_LOG;
+    case PL_COLOR_TRC_S_LOG1: return MP_CSP_TRC_S_LOG1;
+    case PL_COLOR_TRC_S_LOG2: return MP_CSP_TRC_S_LOG2;
+    case PL_COLOR_TRC_ST428: return MP_CSP_TRC_ST428;
+    case PL_COLOR_TRC_COUNT: return MP_CSP_TRC_COUNT;
+    }
+
+    MP_ASSERT_UNREACHABLE();
+}
+
+enum pl_color_system mp_csp_to_pl(enum mp_csp csp)
+{
+    switch (csp) {
+    case MP_CSP_AUTO:               return PL_COLOR_SYSTEM_UNKNOWN;
+    case MP_CSP_BT_601:             return PL_COLOR_SYSTEM_BT_601;
+    case MP_CSP_BT_709:             return PL_COLOR_SYSTEM_BT_709;
+    case MP_CSP_SMPTE_240M:         return PL_COLOR_SYSTEM_SMPTE_240M;
+    case MP_CSP_BT_2020_NC:         return PL_COLOR_SYSTEM_BT_2020_NC;
+    case MP_CSP_BT_2020_C:          return PL_COLOR_SYSTEM_BT_2020_C;
+    case MP_CSP_RGB:                return PL_COLOR_SYSTEM_RGB;
+    case MP_CSP_XYZ:                return PL_COLOR_SYSTEM_XYZ;
+    case MP_CSP_YCGCO:              return PL_COLOR_SYSTEM_YCGCO;
+    case MP_CSP_COUNT:              return PL_COLOR_SYSTEM_COUNT;
+    }
+
+    MP_ASSERT_UNREACHABLE();
+}
+
+enum pl_color_levels mp_levels_to_pl(enum mp_csp_levels levels)
+{
+    switch (levels) {
+    case MP_CSP_LEVELS_AUTO:        return PL_COLOR_LEVELS_UNKNOWN;
+    case MP_CSP_LEVELS_TV:          return PL_COLOR_LEVELS_TV;
+    case MP_CSP_LEVELS_PC:          return PL_COLOR_LEVELS_PC;
+    case MP_CSP_LEVELS_COUNT:       return PL_COLOR_LEVELS_COUNT;
+    }
+
+    MP_ASSERT_UNREACHABLE();
+}
+
+enum mp_csp_levels mp_levels_from_pl(enum pl_color_levels levels)
+{
+    switch (levels){
+    case PL_COLOR_LEVELS_UNKNOWN:   return MP_CSP_LEVELS_AUTO;
+    case PL_COLOR_LEVELS_TV:        return MP_CSP_LEVELS_TV;
+    case PL_COLOR_LEVELS_PC:        return MP_CSP_LEVELS_PC;
+    case PL_COLOR_LEVELS_COUNT:     return MP_CSP_LEVELS_COUNT;
+    }
+
+    MP_ASSERT_UNREACHABLE();
+}
+
+enum pl_alpha_mode mp_alpha_to_pl(enum mp_alpha_type alpha)
+{
+    switch (alpha) {
+    case MP_ALPHA_AUTO:             return PL_ALPHA_UNKNOWN;
+    case MP_ALPHA_STRAIGHT:         return PL_ALPHA_INDEPENDENT;
+    case MP_ALPHA_PREMUL:           return PL_ALPHA_PREMULTIPLIED;
+    }
+
+    MP_ASSERT_UNREACHABLE();
+}
+
+enum pl_chroma_location mp_chroma_to_pl(enum mp_chroma_location chroma)
+{
+    switch (chroma) {
+    case MP_CHROMA_AUTO:            return PL_CHROMA_UNKNOWN;
+    case MP_CHROMA_TOPLEFT:         return PL_CHROMA_TOP_LEFT;
+    case MP_CHROMA_LEFT:            return PL_CHROMA_LEFT;
+    case MP_CHROMA_CENTER:          return PL_CHROMA_CENTER;
+    case MP_CHROMA_COUNT:           return PL_CHROMA_COUNT;
+    }
+
+    MP_ASSERT_UNREACHABLE();
+}
+
+void mp_map_dovi_metadata_to_pl(struct mp_image *mpi,
+                                struct pl_frame *frame)
+{
+#ifdef PL_HAVE_LAV_DOLBY_VISION
+    if (mpi->dovi) {
+        const AVDOVIMetadata *metadata = (AVDOVIMetadata *) mpi->dovi->data;
+        const AVDOVIRpuDataHeader *header = av_dovi_get_header(metadata);
+
+        if (header->disable_residual_flag) {
+            // Only automatically map DoVi RPUs that don't require an EL
+            struct pl_dovi_metadata *dovi = talloc_ptrtype(mpi, dovi);
+            pl_frame_map_avdovi_metadata(frame, dovi, metadata);
+        }
+    }
+
+#if defined(PL_HAVE_LIBDOVI)
+    if (mpi->dovi_buf)
+        pl_hdr_metadata_from_dovi_rpu(&frame->color.hdr, mpi->dovi_buf->data,
+                                      mpi->dovi_buf->size);
+#endif
+
+#endif // PL_HAVE_LAV_DOLBY_VISION
+}
diff --git a/video/out/placebo/utils.h b/video/out/placebo/utils.h
new file mode 100644
index 0000000..bf780a8
--- /dev/null
+++ b/video/out/placebo/utils.h
@@ -0,0 +1,41 @@
+#pragma once
+
+#include "config.h"
+#include "common/common.h"
+#include "common/msg.h"
+#include "video/csputils.h"
+#include "video/mp_image.h"
+
+#include <libavutil/buffer.h>
+
+#include <libplacebo/common.h>
+#include <libplacebo/log.h>
+#include <libplacebo/colorspace.h>
+#include <libplacebo/renderer.h>
+#include <libplacebo/utils/libav.h>
+
+pl_log mppl_log_create(void *tactx, struct mp_log *log);
+void mppl_log_set_probing(pl_log log, bool probing);
+
+static inline struct pl_rect2d mp_rect2d_to_pl(struct mp_rect rc)
+{
+    return (struct pl_rect2d) {
+        .x0 = rc.x0,
+        .y0 = rc.y0,
+        .x1 = rc.x1,
+        .y1 = rc.y1,
+    };
+}
+
+enum pl_color_primaries mp_prim_to_pl(enum mp_csp_prim prim);
+enum mp_csp_prim mp_prim_from_pl(enum pl_color_primaries prim);
+enum pl_color_transfer mp_trc_to_pl(enum mp_csp_trc trc);
+enum mp_csp_trc mp_trc_from_pl(enum pl_color_transfer trc);
+enum pl_color_system mp_csp_to_pl(enum mp_csp csp);
+enum pl_color_levels mp_levels_to_pl(enum mp_csp_levels levels);
+enum mp_csp_levels mp_levels_from_pl(enum pl_color_levels levels);
+enum pl_alpha_mode mp_alpha_to_pl(enum mp_alpha_type alpha);
+enum pl_chroma_location mp_chroma_to_pl(enum mp_chroma_location chroma);
+
+void mp_map_dovi_metadata_to_pl(struct mp_image *mpi,
+                                struct pl_frame *frame);
diff --git a/video/out/present_sync.c b/video/out/present_sync.c
new file mode 100644
index 0000000..a3b1089
--- /dev/null
+++ b/video/out/present_sync.c
@@ -0,0 +1,126 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <time.h>
+
+#include "misc/linked_list.h"
+#include "mpv_talloc.h"
+#include "osdep/timer.h"
+#include "present_sync.h"
+
+/* General nonsense about this mechanism.
+ *
+ * This requires that that caller has access to two, related values:
+ * (ust, msc): clock time and incrementing counter of last vsync (this is
+ *             increased continuously, even if we don't swap)
+ *
+ * Note that this concept originates from the GLX_OML_sync_control extension
+ * which includes another parameter: sbc (swap counter of frame that was
+ * last displayed). Both the xorg present extension and wayland's
+ * presentation-time protocol do not include sbc values so they are omitted
+ * from this mechanism. mpv does not need to keep track of sbc calls and can
+ * have reliable presentation without it.
+ */
+
+void present_sync_get_info(struct mp_present *present, struct vo_vsync_info *info)
+{
+    struct mp_present_entry *cur = present->head;
+    while (cur) {
+        if (cur->queue_display_time)
+            break;
+        cur = cur->list_node.next;
+    }
+    if (!cur)
+        return;
+
+    info->vsync_duration = cur->vsync_duration;
+    info->skipped_vsyncs = cur->skipped_vsyncs;
+    info->last_queue_display_time = cur->queue_display_time;
+
+    // Remove from the list, zero out everything, and append at the end
+    LL_REMOVE(list_node, present, cur);
+    *cur = (struct mp_present_entry){0};
+    LL_APPEND(list_node, present, cur);
+}
+
+struct mp_present *mp_present_initialize(void *talloc_ctx, struct mp_vo_opts *opts, int entries)
+{
+    struct mp_present *present = talloc_zero(talloc_ctx, struct mp_present);
+    for (int i = 0; i < entries; i++) {
+        struct mp_present_entry *entry = talloc_zero(present, struct mp_present_entry);
+        LL_APPEND(list_node, present, entry);
+    }
+    present->opts = opts;
+    return present;
+}
+
+void present_sync_swap(struct mp_present *present)
+{
+    struct mp_present_entry *cur = present->head;
+    while (cur) {
+        if (!cur->queue_display_time)
+            break;
+        cur = cur->list_node.next;
+    }
+    if (!cur)
+        return;
+
+    int64_t ust = cur->ust;
+    int64_t msc = cur->msc;
+    int64_t last_ust = cur->list_node.prev ? cur->list_node.prev->ust : 0;
+    int64_t last_msc = cur->list_node.prev ? cur->list_node.prev->msc : 0;
+
+    // Avoid attempting to use any presentation statistics if the ust is 0 or has
+    // not actually updated (i.e. the last_ust is equal to ust).
+    if (!ust || ust == last_ust) {
+        cur->skipped_vsyncs = -1;
+        cur->vsync_duration = -1;
+        cur->queue_display_time = -1;
+        return;
+    }
+
+    cur->skipped_vsyncs = 0;
+    int64_t ust_passed = ust ? ust - last_ust: 0;
+    int64_t msc_passed = msc ? msc - last_msc: 0;
+    if (msc_passed && ust_passed)
+        cur->vsync_duration = ust_passed / msc_passed;
+
+    struct timespec ts;
+    if (clock_gettime(CLOCK_MONOTONIC, &ts))
+        return;
+
+    int64_t now_monotonic = MP_TIME_S_TO_NS(ts.tv_sec) + ts.tv_nsec;
+    int64_t ust_mp_time = mp_time_ns() - (now_monotonic - ust);
+    cur->queue_display_time = ust_mp_time;
+}
+
+void present_sync_update_values(struct mp_present *present, int64_t ust,
+                                int64_t msc)
+{
+    struct mp_present_entry *cur = present->head;
+    int index = 0;
+    while (cur && ++index) {
+        if (!cur->ust || index == present->opts->swapchain_depth)
+            break;
+        cur = cur->list_node.next;
+    }
+    if (!cur)
+        return;
+
+    cur->ust = ust;
+    cur->msc = msc;
+}
diff --git a/video/out/present_sync.h b/video/out/present_sync.h
new file mode 100644
index 0000000..ba6d0b3
--- /dev/null
+++ b/video/out/present_sync.h
@@ -0,0 +1,57 @@
+/*
+ * This file is part of mpv video player.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MP_PRESENT_SYNC_H
+#define MP_PRESENT_SYNC_H
+
+#include <stdbool.h>
+#include <stdint.h>
+#include "vo.h"
+
+/* Generic helpers for obtaining presentation feedback from
+ * backend APIs. This requires ust/msc values. */
+
+struct mp_present_entry {
+    int64_t ust;
+    int64_t msc;
+    int64_t vsync_duration;
+    int64_t skipped_vsyncs;
+    int64_t queue_display_time;
+
+    struct {
+        struct mp_present_entry *next, *prev;
+    } list_node;
+};
+
+struct mp_present {
+    struct mp_present_entry *head, *tail;
+    struct mp_vo_opts *opts;
+};
+
+struct mp_present *mp_present_initialize(void *talloc_ctx, struct mp_vo_opts *opts, int entries);
+
+// Used during the get_vsync call to deliver the presentation statistics to the VO.
+void present_sync_get_info(struct mp_present *present, struct vo_vsync_info *info);
+
+// Called after every buffer swap to update presentation statistics.
+void present_sync_swap(struct mp_present *present);
+
+// Called anytime the backend delivers new ust/msc values.
+void present_sync_update_values(struct mp_present *present, int64_t ust,
+                                int64_t msc);
+
+#endif /* MP_PRESENT_SYNC_H */
diff --git a/video/out/vo.c b/video/out/vo.c
new file mode 100644
index 0000000..50129fb
--- /dev/null
+++ b/video/out/vo.c
@@ -0,0 +1,1441 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <math.h>
+#include <stdatomic.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mpv_talloc.h"
+
+#include "config.h"
+#include "osdep/timer.h"
+#include "osdep/threads.h"
+#include "misc/dispatch.h"
+#include "misc/rendezvous.h"
+#include "options/options.h"
+#include "misc/bstr.h"
+#include "vo.h"
+#include "aspect.h"
+#include "dr_helper.h"
+#include "input/input.h"
+#include "options/m_config.h"
+#include "common/msg.h"
+#include "common/global.h"
+#include "common/stats.h"
+#include "video/hwdec.h"
+#include "video/mp_image.h"
+#include "sub/osd.h"
+#include "osdep/io.h"
+#include "osdep/threads.h"
+
+extern const struct vo_driver video_out_mediacodec_embed;
+extern const struct vo_driver video_out_x11;
+extern const struct vo_driver video_out_vdpau;
+extern const struct vo_driver video_out_xv;
+extern const struct vo_driver video_out_gpu;
+extern const struct vo_driver video_out_gpu_next;
+extern const struct vo_driver video_out_libmpv;
+extern const struct vo_driver video_out_null;
+extern const struct vo_driver video_out_image;
+extern const struct vo_driver video_out_lavc;
+extern const struct vo_driver video_out_caca;
+extern const struct vo_driver video_out_drm;
+extern const struct vo_driver video_out_direct3d;
+extern const struct vo_driver video_out_sdl;
+extern const struct vo_driver video_out_vaapi;
+extern const struct vo_driver video_out_dmabuf_wayland;
+extern const struct vo_driver video_out_wlshm;
+extern const struct vo_driver video_out_rpi;
+extern const struct vo_driver video_out_tct;
+extern const struct vo_driver video_out_sixel;
+extern const struct vo_driver video_out_kitty;
+
+static const struct vo_driver *const video_out_drivers[] =
+{
+    &video_out_libmpv,
+#if HAVE_ANDROID
+    &video_out_mediacodec_embed,
+#endif
+    &video_out_gpu,
+    &video_out_gpu_next,
+#if HAVE_VDPAU
+    &video_out_vdpau,
+#endif
+#if HAVE_DIRECT3D
+    &video_out_direct3d,
+#endif
+#if HAVE_WAYLAND && HAVE_MEMFD_CREATE
+    &video_out_wlshm,
+#endif
+#if HAVE_XV
+    &video_out_xv,
+#endif
+#if HAVE_SDL2_VIDEO
+    &video_out_sdl,
+#endif
+#if HAVE_DMABUF_WAYLAND
+    &video_out_dmabuf_wayland,
+#endif
+#if HAVE_VAAPI_X11 && HAVE_GPL
+    &video_out_vaapi,
+#endif
+#if HAVE_X11
+    &video_out_x11,
+#endif
+    &video_out_null,
+    // should not be auto-selected
+    &video_out_image,
+    &video_out_tct,
+#if HAVE_CACA
+    &video_out_caca,
+#endif
+#if HAVE_DRM
+    &video_out_drm,
+#endif
+#if HAVE_RPI_MMAL
+    &video_out_rpi,
+#endif
+#if HAVE_SIXEL
+    &video_out_sixel,
+#endif
+    &video_out_kitty,
+    &video_out_lavc,
+};
+
+struct vo_internal {
+    mp_thread thread;
+    struct mp_dispatch_queue *dispatch;
+    struct dr_helper *dr_helper;
+
+    // --- The following fields are protected by lock
+    mp_mutex lock;
+    mp_cond wakeup;
+
+    bool need_wakeup;
+    bool terminate;
+
+    bool hasframe;
+    bool hasframe_rendered;
+    bool request_redraw;            // redraw request from player to VO
+    bool want_redraw;               // redraw request from VO to player
+    bool send_reset;                // send VOCTRL_RESET
+    bool paused;
+    int queued_events;              // event mask for the user
+    int internal_events;            // event mask for us
+
+    double nominal_vsync_interval;
+
+    double vsync_interval;
+    int64_t *vsync_samples;
+    int num_vsync_samples;
+    int64_t num_total_vsync_samples;
+    int64_t prev_vsync;
+    double base_vsync;
+    int drop_point;
+    double estimated_vsync_interval;
+    double estimated_vsync_jitter;
+    bool expecting_vsync;
+    int64_t num_successive_vsyncs;
+
+    int64_t flip_queue_offset; // queue flip events at most this much in advance
+    int64_t timing_offset;     // same (but from options; not VO configured)
+
+    int64_t delayed_count;
+    int64_t drop_count;
+    bool dropped_frame;             // the previous frame was dropped
+
+    struct vo_frame *current_frame; // last frame queued to the VO
+
+    int64_t wakeup_pts;             // time at which to pull frame from decoder
+
+    bool rendering;                 // true if an image is being rendered
+    struct vo_frame *frame_queued;  // should be drawn next
+    int req_frames;                 // VO's requested value of num_frames
+    uint64_t current_frame_id;
+
+    double display_fps;
+    double reported_display_fps;
+
+    struct stats_ctx *stats;
+};
+
+extern const struct m_sub_options gl_video_conf;
+
+static void forget_frames(struct vo *vo);
+static MP_THREAD_VOID vo_thread(void *ptr);
+
+static bool get_desc(struct m_obj_desc *dst, int index)
+{
+    if (index >= MP_ARRAY_SIZE(video_out_drivers))
+        return false;
+    const struct vo_driver *vo = video_out_drivers[index];
+    *dst = (struct m_obj_desc) {
+        .name = vo->name,
+        .description = vo->description,
+        .priv_size = vo->priv_size,
+        .priv_defaults = vo->priv_defaults,
+        .options = vo->options,
+        .options_prefix = vo->options_prefix,
+        .global_opts = vo->global_opts,
+        .hidden = vo->encode,
+        .p = vo,
+    };
+    return true;
+}
+
+// For the vo option
+const struct m_obj_list vo_obj_list = {
+    .get_desc = get_desc,
+    .description = "video outputs",
+    .aliases = {
+        {"gl", "gpu"},
+        {"direct3d_shaders", "direct3d"},
+        {"opengl", "gpu"},
+        {"opengl-cb", "libmpv"},
+        {0}
+    },
+    .allow_trailer = true,
+    .disallow_positional_parameters = true,
+    .use_global_options = true,
+};
+
+static void dispatch_wakeup_cb(void *ptr)
+{
+    struct vo *vo = ptr;
+    vo_wakeup(vo);
+}
+
+// Initialize or update options from vo->opts
+static void read_opts(struct vo *vo)
+{
+    struct vo_internal *in = vo->in;
+
+    mp_mutex_lock(&in->lock);
+    in->timing_offset = (uint64_t)(MP_TIME_S_TO_NS(vo->opts->timing_offset));
+    mp_mutex_unlock(&in->lock);
+}
+
+static void update_opts(void *p)
+{
+    struct vo *vo = p;
+
+    if (m_config_cache_update(vo->opts_cache)) {
+        read_opts(vo);
+
+        if (vo->driver->control) {
+            vo->driver->control(vo, VOCTRL_VO_OPTS_CHANGED, NULL);
+            // "Legacy" update of video position related options.
+            // Unlike VOCTRL_VO_OPTS_CHANGED, often not propagated to backends.
+            vo->driver->control(vo, VOCTRL_SET_PANSCAN, NULL);
+        }
+    }
+
+    if (vo->gl_opts_cache && m_config_cache_update(vo->gl_opts_cache)) {
+        // "Legacy" update of video GL renderer related options.
+        if (vo->driver->control)
+            vo->driver->control(vo, VOCTRL_UPDATE_RENDER_OPTS, NULL);
+    }
+
+    if (m_config_cache_update(vo->eq_opts_cache)) {
+        // "Legacy" update of video equalizer related options.
+        if (vo->driver->control)
+            vo->driver->control(vo, VOCTRL_SET_EQUALIZER, NULL);
+    }
+}
+
+// Does not include thread- and VO uninit.
+static void dealloc_vo(struct vo *vo)
+{
+    forget_frames(vo); // implicitly synchronized
+
+    // These must be free'd before vo->in->dispatch.
+    talloc_free(vo->opts_cache);
+    talloc_free(vo->gl_opts_cache);
+    talloc_free(vo->eq_opts_cache);
+
+    mp_mutex_destroy(&vo->in->lock);
+    mp_cond_destroy(&vo->in->wakeup);
+    talloc_free(vo);
+}
+
+static struct vo *vo_create(bool probing, struct mpv_global *global,
+                            struct vo_extra *ex, char *name)
+{
+    assert(ex->wakeup_cb);
+
+    struct mp_log *log = mp_log_new(NULL, global->log, "vo");
+    struct m_obj_desc desc;
+    if (!m_obj_list_find(&desc, &vo_obj_list, bstr0(name))) {
+        mp_msg(log, MSGL_ERR, "Video output %s not found!\n", name);
+        talloc_free(log);
+        return NULL;
+    };
+    struct vo *vo = talloc_ptrtype(NULL, vo);
+    *vo = (struct vo) {
+        .log = mp_log_new(vo, log, name),
+        .driver = desc.p,
+        .global = global,
+        .encode_lavc_ctx = ex->encode_lavc_ctx,
+        .input_ctx = ex->input_ctx,
+        .osd = ex->osd,
+        .monitor_par = 1,
+        .extra = *ex,
+        .probing = probing,
+        .in = talloc(vo, struct vo_internal),
+    };
+    talloc_steal(vo, log);
+    *vo->in = (struct vo_internal) {
+        .dispatch = mp_dispatch_create(vo),
+        .req_frames = 1,
+        .estimated_vsync_jitter = -1,
+        .stats = stats_ctx_create(vo, global, "vo"),
+    };
+    mp_dispatch_set_wakeup_fn(vo->in->dispatch, dispatch_wakeup_cb, vo);
+    mp_mutex_init(&vo->in->lock);
+    mp_cond_init(&vo->in->wakeup);
+
+    vo->opts_cache = m_config_cache_alloc(NULL, global, &vo_sub_opts);
+    vo->opts = vo->opts_cache->opts;
+
+    m_config_cache_set_dispatch_change_cb(vo->opts_cache, vo->in->dispatch,
+                                          update_opts, vo);
+
+    vo->gl_opts_cache = m_config_cache_alloc(NULL, global, &gl_video_conf);
+    m_config_cache_set_dispatch_change_cb(vo->gl_opts_cache, vo->in->dispatch,
+                                          update_opts, vo);
+
+    vo->eq_opts_cache = m_config_cache_alloc(NULL, global, &mp_csp_equalizer_conf);
+    m_config_cache_set_dispatch_change_cb(vo->eq_opts_cache, vo->in->dispatch,
+                                          update_opts, vo);
+
+    mp_input_set_mouse_transform(vo->input_ctx, NULL, NULL);
+    if (vo->driver->encode != !!vo->encode_lavc_ctx)
+        goto error;
+    vo->priv = m_config_group_from_desc(vo, vo->log, global, &desc, name);
+    if (!vo->priv)
+        goto error;
+
+    if (mp_thread_create(&vo->in->thread, vo_thread, vo))
+        goto error;
+    if (mp_rendezvous(vo, 0) < 0) { // init barrier
+        mp_thread_join(vo->in->thread);
+        goto error;
+    }
+    return vo;
+
+error:
+    dealloc_vo(vo);
+    return NULL;
+}
+
+struct vo *init_best_video_out(struct mpv_global *global, struct vo_extra *ex)
+{
+    struct mp_vo_opts *opts = mp_get_config_group(NULL, global, &vo_sub_opts);
+    struct m_obj_settings *vo_list = opts->video_driver_list;
+    struct vo *vo = NULL;
+    // first try the preferred drivers, with their optional subdevice param:
+    if (vo_list && vo_list[0].name) {
+        for (int n = 0; vo_list[n].name; n++) {
+            // Something like "-vo name," allows fallback to autoprobing.
+            if (strlen(vo_list[n].name) == 0)
+                goto autoprobe;
+            bool p = !!vo_list[n + 1].name;
+            vo = vo_create(p, global, ex, vo_list[n].name);
+            if (vo)
+                goto done;
+        }
+        goto done;
+    }
+autoprobe:
+    // now try the rest...
+    for (int i = 0; i < MP_ARRAY_SIZE(video_out_drivers); i++) {
+        const struct vo_driver *driver = video_out_drivers[i];
+        if (driver == &video_out_null)
+            break;
+        vo = vo_create(true, global, ex, (char *)driver->name);
+        if (vo)
+            goto done;
+    }
+done:
+    talloc_free(opts);
+    return vo;
+}
+
+static void terminate_vo(void *p)
+{
+    struct vo *vo = p;
+    struct vo_internal *in = vo->in;
+    in->terminate = true;
+}
+
+void vo_destroy(struct vo *vo)
+{
+    struct vo_internal *in = vo->in;
+    mp_dispatch_run(in->dispatch, terminate_vo, vo);
+    mp_thread_join(vo->in->thread);
+    dealloc_vo(vo);
+}
+
+// Wakeup the playloop to queue new video frames etc.
+static void wakeup_core(struct vo *vo)
+{
+    vo->extra.wakeup_cb(vo->extra.wakeup_ctx);
+}
+
+// Drop timing information on discontinuities like seeking.
+// Always called locked.
+static void reset_vsync_timings(struct vo *vo)
+{
+    struct vo_internal *in = vo->in;
+    in->drop_point = 0;
+    in->base_vsync = 0;
+    in->expecting_vsync = false;
+    in->num_successive_vsyncs = 0;
+}
+
+static double vsync_stddef(struct vo *vo, double ref_vsync)
+{
+    struct vo_internal *in = vo->in;
+    double jitter = 0;
+    for (int n = 0; n < in->num_vsync_samples; n++) {
+        double diff = in->vsync_samples[n] - ref_vsync;
+        jitter += diff * diff;
+    }
+    return sqrt(jitter / in->num_vsync_samples);
+}
+
+#define MAX_VSYNC_SAMPLES 1000
+#define DELAY_VSYNC_SAMPLES 10
+
+// Check if we should switch to measured average display FPS if it seems
+// "better" then the system-reported one. (Note that small differences are
+// handled as drift instead.)
+static void check_estimated_display_fps(struct vo *vo)
+{
+    struct vo_internal *in = vo->in;
+
+    bool use_estimated = false;
+    if (in->num_total_vsync_samples >= MAX_VSYNC_SAMPLES / 2 &&
+        in->estimated_vsync_interval <= 1e9 / 20.0 &&
+        in->estimated_vsync_interval >= 1e9 / 400.0)
+    {
+        for (int n = 0; n < in->num_vsync_samples; n++) {
+            if (fabs(in->vsync_samples[n] - in->estimated_vsync_interval)
+                >= in->estimated_vsync_interval / 4)
+                goto done;
+        }
+        double mjitter = vsync_stddef(vo, in->estimated_vsync_interval);
+        double njitter = vsync_stddef(vo, in->nominal_vsync_interval);
+        if (mjitter * 1.01 < njitter)
+            use_estimated = true;
+        done: ;
+    }
+    if (use_estimated == (fabs(in->vsync_interval - in->nominal_vsync_interval) < 1e9)) {
+        if (use_estimated) {
+            MP_TRACE(vo, "adjusting display FPS to a value closer to %.3f Hz\n",
+                       1e9 / in->estimated_vsync_interval);
+        } else {
+            MP_TRACE(vo, "switching back to assuming display fps = %.3f Hz\n",
+                       1e9 / in->nominal_vsync_interval);
+        }
+    }
+    in->vsync_interval = use_estimated ? in->estimated_vsync_interval
+                                       : in->nominal_vsync_interval;
+}
+
+// Attempt to detect vsyncs delayed/skipped by the driver. This tries to deal
+// with strong jitter too, because some drivers have crap vsync timing.
+static void vsync_skip_detection(struct vo *vo)
+{
+    struct vo_internal *in = vo->in;
+
+    int window = 4;
+    double t_r = in->prev_vsync, t_e = in->base_vsync, diff = 0.0, desync_early = 0.0;
+    for (int n = 0; n < in->drop_point; n++) {
+        diff += t_r - t_e;
+        t_r -= in->vsync_samples[n];
+        t_e -= in->vsync_interval;
+        if (n == window + 1)
+            desync_early = diff / window;
+    }
+    double desync = diff / in->num_vsync_samples;
+    if (in->drop_point > window * 2 &&
+        fabs(desync - desync_early) >= in->vsync_interval * 3 / 4)
+    {
+        // Assume a drop. An underflow can technically speaking not be a drop
+        // (it's up to the driver what this is supposed to mean), but no reason
+        // to treat it differently.
+        in->base_vsync = in->prev_vsync;
+        in->delayed_count += 1;
+        in->drop_point = 0;
+        MP_STATS(vo, "vo-delayed");
+    }
+    if (in->drop_point > 10)
+        in->base_vsync += desync / 10;  // smooth out drift
+}
+
+// Always called locked.
+static void update_vsync_timing_after_swap(struct vo *vo,
+                                           struct vo_vsync_info *vsync)
+{
+    struct vo_internal *in = vo->in;
+
+    int64_t vsync_time = vsync->last_queue_display_time;
+    int64_t prev_vsync = in->prev_vsync;
+    in->prev_vsync = vsync_time;
+
+    if (!in->expecting_vsync) {
+        reset_vsync_timings(vo);
+        return;
+    }
+
+    in->num_successive_vsyncs++;
+    if (in->num_successive_vsyncs <= DELAY_VSYNC_SAMPLES)
+        return;
+
+    if (vsync_time <= 0 || vsync_time <= prev_vsync) {
+        in->prev_vsync = 0;
+        return;
+    }
+
+    if (prev_vsync <= 0)
+        return;
+
+    if (in->num_vsync_samples >= MAX_VSYNC_SAMPLES)
+        in->num_vsync_samples -= 1;
+    MP_TARRAY_INSERT_AT(in, in->vsync_samples, in->num_vsync_samples, 0,
+                        vsync_time - prev_vsync);
+    in->drop_point = MPMIN(in->drop_point + 1, in->num_vsync_samples);
+    in->num_total_vsync_samples += 1;
+    if (in->base_vsync) {
+        in->base_vsync += in->vsync_interval;
+    } else {
+        in->base_vsync = vsync_time;
+    }
+
+    double avg = 0;
+    for (int n = 0; n < in->num_vsync_samples; n++) {
+        assert(in->vsync_samples[n] > 0);
+        avg += in->vsync_samples[n];
+    }
+    in->estimated_vsync_interval = avg / in->num_vsync_samples;
+    in->estimated_vsync_jitter =
+        vsync_stddef(vo, in->vsync_interval) / in->vsync_interval;
+
+    check_estimated_display_fps(vo);
+    vsync_skip_detection(vo);
+
+    MP_STATS(vo, "value %f jitter", in->estimated_vsync_jitter);
+    MP_STATS(vo, "value %f vsync-diff", MP_TIME_NS_TO_S(in->vsync_samples[0]));
+}
+
+// to be called from VO thread only
+static void update_display_fps(struct vo *vo)
+{
+    struct vo_internal *in = vo->in;
+    mp_mutex_lock(&in->lock);
+    if (in->internal_events & VO_EVENT_WIN_STATE) {
+        in->internal_events &= ~(unsigned)VO_EVENT_WIN_STATE;
+
+        mp_mutex_unlock(&in->lock);
+
+        double fps = 0;
+        vo->driver->control(vo, VOCTRL_GET_DISPLAY_FPS, &fps);
+
+        mp_mutex_lock(&in->lock);
+
+        in->reported_display_fps = fps;
+    }
+
+    double display_fps = vo->opts->display_fps_override;
+    if (display_fps <= 0)
+        display_fps = in->reported_display_fps;
+
+    if (in->display_fps != display_fps) {
+        in->nominal_vsync_interval =  display_fps > 0 ? 1e9 / display_fps : 0;
+        in->vsync_interval = MPMAX(in->nominal_vsync_interval, 1);
+        in->display_fps = display_fps;
+
+        MP_VERBOSE(vo, "Assuming %f FPS for display sync.\n", display_fps);
+
+        // make sure to update the player
+        in->queued_events |= VO_EVENT_WIN_STATE;
+        wakeup_core(vo);
+    }
+
+    mp_mutex_unlock(&in->lock);
+}
+
+static void check_vo_caps(struct vo *vo)
+{
+    int rot = vo->params->rotate;
+    if (rot) {
+        bool ok = rot % 90 ? false : (vo->driver->caps & VO_CAP_ROTATE90);
+        if (!ok) {
+           MP_WARN(vo, "Video is flagged as rotated by %d degrees, but the "
+                   "video output does not support this.\n", rot);
+        }
+    }
+}
+
+static void run_reconfig(void *p)
+{
+    void **pp = p;
+    struct vo *vo = pp[0];
+    struct mp_image *img = pp[1];
+    int *ret = pp[2];
+
+    struct mp_image_params *params = &img->params;
+
+    struct vo_internal *in = vo->in;
+
+    MP_VERBOSE(vo, "reconfig to %s\n", mp_image_params_to_str(params));
+
+    update_opts(vo);
+
+    mp_image_params_get_dsize(params, &vo->dwidth, &vo->dheight);
+
+    talloc_free(vo->params);
+    vo->params = talloc_dup(vo, params);
+
+    if (vo->driver->reconfig2) {
+        *ret = vo->driver->reconfig2(vo, img);
+    } else {
+        *ret = vo->driver->reconfig(vo, vo->params);
+    }
+    vo->config_ok = *ret >= 0;
+    if (vo->config_ok) {
+        check_vo_caps(vo);
+    } else {
+        talloc_free(vo->params);
+        vo->params = NULL;
+    }
+
+    mp_mutex_lock(&in->lock);
+    talloc_free(in->current_frame);
+    in->current_frame = NULL;
+    forget_frames(vo);
+    reset_vsync_timings(vo);
+    mp_mutex_unlock(&in->lock);
+
+    update_display_fps(vo);
+}
+
+int vo_reconfig(struct vo *vo, struct mp_image_params *params)
+{
+    int ret;
+    struct mp_image dummy = {0};
+    mp_image_set_params(&dummy, params);
+    void *p[] = {vo, &dummy, &ret};
+    mp_dispatch_run(vo->in->dispatch, run_reconfig, p);
+    return ret;
+}
+
+int vo_reconfig2(struct vo *vo, struct mp_image *img)
+{
+    int ret;
+    void *p[] = {vo, img, &ret};
+    mp_dispatch_run(vo->in->dispatch, run_reconfig, p);
+    return ret;
+}
+
+static void run_control(void *p)
+{
+    void **pp = p;
+    struct vo *vo = pp[0];
+    int request = (intptr_t)pp[1];
+    void *data = pp[2];
+    update_opts(vo);
+    int ret = vo->driver->control(vo, request, data);
+    if (pp[3])
+        *(int *)pp[3] = ret;
+}
+
+int vo_control(struct vo *vo, int request, void *data)
+{
+    int ret;
+    void *p[] = {vo, (void *)(intptr_t)request, data, &ret};
+    mp_dispatch_run(vo->in->dispatch, run_control, p);
+    return ret;
+}
+
+// Run vo_control() without waiting for a reply.
+// (Only works for some VOCTRLs.)
+void vo_control_async(struct vo *vo, int request, void *data)
+{
+    void *p[4] = {vo, (void *)(intptr_t)request, NULL, NULL};
+    void **d = talloc_memdup(NULL, p, sizeof(p));
+
+    switch (request) {
+    case VOCTRL_UPDATE_PLAYBACK_STATE:
+        d[2] = talloc_dup(d, (struct voctrl_playback_state *)data);
+        break;
+    case VOCTRL_KILL_SCREENSAVER:
+    case VOCTRL_RESTORE_SCREENSAVER:
+        break;
+    default:
+        abort(); // requires explicit support
+    }
+
+    mp_dispatch_enqueue_autofree(vo->in->dispatch, run_control, d);
+}
+
+// must be called locked
+static void forget_frames(struct vo *vo)
+{
+    struct vo_internal *in = vo->in;
+    in->hasframe = false;
+    in->hasframe_rendered = false;
+    in->drop_count = 0;
+    in->delayed_count = 0;
+    talloc_free(in->frame_queued);
+    in->frame_queued = NULL;
+    in->current_frame_id += VO_MAX_REQ_FRAMES + 1;
+    // don't unref current_frame; we always want to be able to redraw it
+    if (in->current_frame) {
+        in->current_frame->num_vsyncs = 0; // but reset future repeats
+        in->current_frame->display_synced = false; // mark discontinuity
+    }
+}
+
+// VOs which have no special requirements on UI event loops etc. can set the
+// vo_driver.wait_events callback to this (and leave vo_driver.wakeup unset).
+// This function must not be used or called for other purposes.
+void vo_wait_default(struct vo *vo, int64_t until_time)
+{
+    struct vo_internal *in = vo->in;
+
+    mp_mutex_lock(&in->lock);
+    if (!in->need_wakeup)
+        mp_cond_timedwait_until(&in->wakeup, &in->lock, until_time);
+    mp_mutex_unlock(&in->lock);
+}
+
+// Called unlocked.
+static void wait_vo(struct vo *vo, int64_t until_time)
+{
+    struct vo_internal *in = vo->in;
+
+    if (vo->driver->wait_events) {
+        vo->driver->wait_events(vo, until_time);
+    } else {
+        vo_wait_default(vo, until_time);
+    }
+    mp_mutex_lock(&in->lock);
+    in->need_wakeup = false;
+    mp_mutex_unlock(&in->lock);
+}
+
+static void wakeup_locked(struct vo *vo)
+{
+    struct vo_internal *in = vo->in;
+
+    mp_cond_broadcast(&in->wakeup);
+    if (vo->driver->wakeup)
+        vo->driver->wakeup(vo);
+    in->need_wakeup = true;
+}
+
+// Wakeup VO thread, and make it check for new events with VOCTRL_CHECK_EVENTS.
+// To be used by threaded VO backends.
+void vo_wakeup(struct vo *vo)
+{
+    struct vo_internal *in = vo->in;
+
+    mp_mutex_lock(&in->lock);
+    wakeup_locked(vo);
+    mp_mutex_unlock(&in->lock);
+}
+
+// Whether vo_queue_frame() can be called. If the VO is not ready yet, the
+// function will return false, and the VO will call the wakeup callback once
+// it's ready.
+// next_pts is the exact time when the next frame should be displayed. If the
+// VO is ready, but the time is too "early", return false, and call the wakeup
+// callback once the time is right.
+// If next_pts is negative, disable any timing and draw the frame as fast as
+// possible.
+bool vo_is_ready_for_frame(struct vo *vo, int64_t next_pts)
+{
+    struct vo_internal *in = vo->in;
+    mp_mutex_lock(&in->lock);
+    bool blocked = vo->driver->initially_blocked &&
+                   !(in->internal_events & VO_EVENT_INITIAL_UNBLOCK);
+    bool r = vo->config_ok && !in->frame_queued && !blocked &&
+             (!in->current_frame || in->current_frame->num_vsyncs < 1);
+    if (r && next_pts >= 0) {
+        // Don't show the frame too early - it would basically freeze the
+        // display by disallowing OSD redrawing or VO interaction.
+        // Actually render the frame at earliest the given offset before target
+        // time.
+        next_pts -= in->timing_offset;
+        next_pts -= in->flip_queue_offset;
+        int64_t now = mp_time_ns();
+        if (next_pts > now)
+            r = false;
+        if (!in->wakeup_pts || next_pts < in->wakeup_pts) {
+            in->wakeup_pts = next_pts;
+            // If we have to wait, update the vo thread's timer.
+            if (!r)
+                wakeup_locked(vo);
+        }
+    }
+    mp_mutex_unlock(&in->lock);
+    return r;
+}
+
+// Direct the VO thread to put the currently queued image on the screen.
+// vo_is_ready_for_frame() must have returned true before this call.
+// Ownership of frame is handed to the vo.
+void vo_queue_frame(struct vo *vo, struct vo_frame *frame)
+{
+    struct vo_internal *in = vo->in;
+    mp_mutex_lock(&in->lock);
+    assert(vo->config_ok && !in->frame_queued &&
+           (!in->current_frame || in->current_frame->num_vsyncs < 1));
+    in->hasframe = true;
+    frame->frame_id = ++(in->current_frame_id);
+    in->frame_queued = frame;
+    in->wakeup_pts = frame->display_synced
+                   ? 0 : frame->pts + MPMAX(frame->duration, 0);
+    wakeup_locked(vo);
+    mp_mutex_unlock(&in->lock);
+}
+
+// If a frame is currently being rendered (or queued), wait until it's done.
+// Otherwise, return immediately.
+void vo_wait_frame(struct vo *vo)
+{
+    struct vo_internal *in = vo->in;
+    mp_mutex_lock(&in->lock);
+    while (in->frame_queued || in->rendering)
+        mp_cond_wait(&in->wakeup, &in->lock);
+    mp_mutex_unlock(&in->lock);
+}
+
+// Wait until realtime is >= ts
+// called without lock
+static void wait_until(struct vo *vo, int64_t target)
+{
+    struct vo_internal *in = vo->in;
+    mp_mutex_lock(&in->lock);
+    while (target > mp_time_ns()) {
+        if (in->queued_events & VO_EVENT_LIVE_RESIZING)
+            break;
+        if (mp_cond_timedwait_until(&in->wakeup, &in->lock, target))
+            break;
+    }
+    mp_mutex_unlock(&in->lock);
+}
+
+static bool render_frame(struct vo *vo)
+{
+    struct vo_internal *in = vo->in;
+    struct vo_frame *frame = NULL;
+    bool more_frames = false;
+
+    update_display_fps(vo);
+
+    mp_mutex_lock(&in->lock);
+
+    if (in->frame_queued) {
+        talloc_free(in->current_frame);
+        in->current_frame = in->frame_queued;
+        in->frame_queued = NULL;
+    } else if (in->paused || !in->current_frame || !in->hasframe ||
+               (in->current_frame->display_synced && in->current_frame->num_vsyncs < 1) ||
+               !in->current_frame->display_synced)
+    {
+        goto done;
+    }
+
+    frame = vo_frame_ref(in->current_frame);
+    assert(frame);
+
+    if (frame->display_synced) {
+        frame->pts = 0;
+        frame->duration = -1;
+    }
+
+    int64_t now = mp_time_ns();
+    int64_t pts = frame->pts;
+    int64_t duration = frame->duration;
+    int64_t end_time = pts + duration;
+
+    // Time at which we should flip_page on the VO.
+    int64_t target = frame->display_synced ? 0 : pts - in->flip_queue_offset;
+
+    // "normal" strict drop threshold.
+    in->dropped_frame = duration >= 0 && end_time < now;
+
+    in->dropped_frame &= !frame->display_synced;
+    in->dropped_frame &= !(vo->driver->caps & VO_CAP_FRAMEDROP);
+    in->dropped_frame &= frame->can_drop;
+    // Even if we're hopelessly behind, rather degrade to 10 FPS playback,
+    // instead of just freezing the display forever.
+    in->dropped_frame &= now - in->prev_vsync < MP_TIME_MS_TO_NS(100);
+    in->dropped_frame &= in->hasframe_rendered;
+
+    // Setup parameters for the next time this frame is drawn. ("frame" is the
+    // frame currently drawn, while in->current_frame is the potentially next.)
+    in->current_frame->repeat = true;
+    if (frame->display_synced) {
+        // Increment the offset only if it's not the last vsync. The current_frame
+        // can still be reused. This is mostly important for redraws that might
+        // overshoot the target vsync point.
+        if (in->current_frame->num_vsyncs > 1) {
+            in->current_frame->vsync_offset += in->current_frame->vsync_interval;
+            in->current_frame->ideal_frame_vsync += in->current_frame->ideal_frame_vsync_duration;
+        }
+        in->dropped_frame |= in->current_frame->num_vsyncs < 1;
+    }
+    if (in->current_frame->num_vsyncs > 0)
+        in->current_frame->num_vsyncs -= 1;
+
+    // Always render when paused (it's typically the last frame for a while).
+    in->dropped_frame &= !in->paused;
+
+    bool use_vsync = in->current_frame->display_synced && !in->paused;
+    if (use_vsync && !in->expecting_vsync) // first DS frame in a row
+        in->prev_vsync = now;
+    in->expecting_vsync = use_vsync;
+
+    // Store the initial value before we unlock.
+    bool request_redraw = in->request_redraw;
+
+    if (in->dropped_frame) {
+        in->drop_count += 1;
+    } else {
+        in->rendering = true;
+        in->hasframe_rendered = true;
+        int64_t prev_drop_count = vo->in->drop_count;
+        // Can the core queue new video now? Non-display-sync uses a separate
+        // timer instead, but possibly benefits from preparing a frame early.
+        bool can_queue = !in->frame_queued &&
+            (in->current_frame->num_vsyncs < 1 || !use_vsync);
+        mp_mutex_unlock(&in->lock);
+
+        if (can_queue)
+            wakeup_core(vo);
+
+        stats_time_start(in->stats, "video-draw");
+
+        vo->driver->draw_frame(vo, frame);
+
+        stats_time_end(in->stats, "video-draw");
+
+        wait_until(vo, target);
+
+        stats_time_start(in->stats, "video-flip");
+
+        vo->driver->flip_page(vo);
+
+        struct vo_vsync_info vsync = {
+            .last_queue_display_time = -1,
+            .skipped_vsyncs = -1,
+        };
+        if (vo->driver->get_vsync)
+            vo->driver->get_vsync(vo, &vsync);
+
+        // Make up some crap if presentation feedback is missing.
+        if (vsync.last_queue_display_time <= 0)
+            vsync.last_queue_display_time = mp_time_ns();
+
+        stats_time_end(in->stats, "video-flip");
+
+        mp_mutex_lock(&in->lock);
+        in->dropped_frame = prev_drop_count < vo->in->drop_count;
+        in->rendering = false;
+
+        update_vsync_timing_after_swap(vo, &vsync);
+    }
+
+    if (vo->driver->caps & VO_CAP_NORETAIN) {
+        talloc_free(in->current_frame);
+        in->current_frame = NULL;
+    }
+
+    if (in->dropped_frame) {
+        MP_STATS(vo, "drop-vo");
+    } else {
+        // If the initial redraw request was true or mpv is still playing,
+        // then we can clear it here since we just performed a redraw, or the
+        // next loop will draw what we need. However if there initially is
+        // no redraw request, then something can change this (i.e. the OSD)
+        // while the vo was unlocked. If we are paused, don't touch
+        // in->request_redraw in that case.
+        if (request_redraw || !in->paused)
+            in->request_redraw = false;
+    }
+
+    if (in->current_frame && in->current_frame->num_vsyncs &&
+        in->current_frame->display_synced)
+        more_frames = true;
+
+    if (in->frame_queued && in->frame_queued->display_synced)
+        more_frames = true;
+
+    mp_cond_broadcast(&in->wakeup); // for vo_wait_frame()
+    wakeup_core(vo);
+
+done:
+    if (!vo->driver->frame_owner)
+        talloc_free(frame);
+    mp_mutex_unlock(&in->lock);
+
+    return more_frames;
+}
+
+static void do_redraw(struct vo *vo)
+{
+    struct vo_internal *in = vo->in;
+
+    if (!vo->config_ok || (vo->driver->caps & VO_CAP_NORETAIN))
+        return;
+
+    mp_mutex_lock(&in->lock);
+    in->request_redraw = false;
+    bool full_redraw = in->dropped_frame;
+    struct vo_frame *frame = NULL;
+    if (!vo->driver->untimed)
+        frame = vo_frame_ref(in->current_frame);
+    if (frame)
+        in->dropped_frame = false;
+    struct vo_frame dummy = {0};
+    if (!frame)
+        frame = &dummy;
+    frame->redraw = !full_redraw; // unconditionally redraw if it was dropped
+    frame->repeat = false;
+    frame->still = true;
+    frame->pts = 0;
+    frame->duration = -1;
+    mp_mutex_unlock(&in->lock);
+
+    vo->driver->draw_frame(vo, frame);
+    vo->driver->flip_page(vo);
+
+    if (frame != &dummy && !vo->driver->frame_owner)
+        talloc_free(frame);
+}
+
+static struct mp_image *get_image_vo(void *ctx, int imgfmt, int w, int h,
+                                     int stride_align, int flags)
+{
+    struct vo *vo = ctx;
+    return vo->driver->get_image(vo, imgfmt, w, h, stride_align, flags);
+}
+
+static MP_THREAD_VOID vo_thread(void *ptr)
+{
+    struct vo *vo = ptr;
+    struct vo_internal *in = vo->in;
+    bool vo_paused = false;
+
+    mp_thread_set_name("vo");
+
+    if (vo->driver->get_image) {
+        in->dr_helper = dr_helper_create(in->dispatch, get_image_vo, vo);
+        dr_helper_acquire_thread(in->dr_helper);
+    }
+
+    int r = vo->driver->preinit(vo) ? -1 : 0;
+    mp_rendezvous(vo, r); // init barrier
+    if (r < 0)
+        goto done;
+
+    read_opts(vo);
+    update_display_fps(vo);
+    vo_event(vo, VO_EVENT_WIN_STATE);
+
+    while (1) {
+        mp_dispatch_queue_process(vo->in->dispatch, 0);
+        if (in->terminate)
+            break;
+        stats_event(in->stats, "iterations");
+        vo->driver->control(vo, VOCTRL_CHECK_EVENTS, NULL);
+        bool working = render_frame(vo);
+        int64_t now = mp_time_ns();
+        int64_t wait_until = now + MP_TIME_S_TO_NS(working ? 0 : 1000);
+
+        mp_mutex_lock(&in->lock);
+        if (in->wakeup_pts) {
+            if (in->wakeup_pts > now) {
+                wait_until = MPMIN(wait_until, in->wakeup_pts);
+            } else {
+                in->wakeup_pts = 0;
+                wakeup_core(vo);
+            }
+        }
+        if (vo->want_redraw && !in->want_redraw) {
+            in->want_redraw = true;
+            wakeup_core(vo);
+        }
+        vo->want_redraw = false;
+        bool redraw = in->request_redraw;
+        bool send_reset = in->send_reset;
+        in->send_reset = false;
+        bool send_pause = in->paused != vo_paused;
+        vo_paused = in->paused;
+        mp_mutex_unlock(&in->lock);
+
+        if (send_reset)
+            vo->driver->control(vo, VOCTRL_RESET, NULL);
+        if (send_pause)
+            vo->driver->control(vo, vo_paused ? VOCTRL_PAUSE : VOCTRL_RESUME, NULL);
+        if (wait_until > now && redraw) {
+            do_redraw(vo); // now is a good time
+            continue;
+        }
+        if (vo->want_redraw) // might have been set by VOCTRLs
+            wait_until = 0;
+
+        if (wait_until <= now)
+            continue;
+
+        wait_vo(vo, wait_until);
+    }
+    forget_frames(vo); // implicitly synchronized
+    talloc_free(in->current_frame);
+    in->current_frame = NULL;
+    vo->driver->uninit(vo);
+done:
+    TA_FREEP(&in->dr_helper);
+    MP_THREAD_RETURN();
+}
+
+void vo_set_paused(struct vo *vo, bool paused)
+{
+    struct vo_internal *in = vo->in;
+    mp_mutex_lock(&in->lock);
+    if (in->paused != paused) {
+        in->paused = paused;
+        if (in->paused && in->dropped_frame) {
+            in->request_redraw = true;
+            wakeup_core(vo);
+        }
+        reset_vsync_timings(vo);
+        wakeup_locked(vo);
+    }
+    mp_mutex_unlock(&in->lock);
+}
+
+int64_t vo_get_drop_count(struct vo *vo)
+{
+    mp_mutex_lock(&vo->in->lock);
+    int64_t r = vo->in->drop_count;
+    mp_mutex_unlock(&vo->in->lock);
+    return r;
+}
+
+void vo_increment_drop_count(struct vo *vo, int64_t n)
+{
+    mp_mutex_lock(&vo->in->lock);
+    vo->in->drop_count += n;
+    mp_mutex_unlock(&vo->in->lock);
+}
+
+// Make the VO redraw the OSD at some point in the future.
+void vo_redraw(struct vo *vo)
+{
+    struct vo_internal *in = vo->in;
+    mp_mutex_lock(&in->lock);
+    if (!in->request_redraw) {
+        in->request_redraw = true;
+        in->want_redraw = false;
+        wakeup_locked(vo);
+    }
+    mp_mutex_unlock(&in->lock);
+}
+
+bool vo_want_redraw(struct vo *vo)
+{
+    struct vo_internal *in = vo->in;
+    mp_mutex_lock(&in->lock);
+    bool r = in->want_redraw;
+    mp_mutex_unlock(&in->lock);
+    return r;
+}
+
+void vo_seek_reset(struct vo *vo)
+{
+    struct vo_internal *in = vo->in;
+    mp_mutex_lock(&in->lock);
+    forget_frames(vo);
+    reset_vsync_timings(vo);
+    in->send_reset = true;
+    wakeup_locked(vo);
+    mp_mutex_unlock(&in->lock);
+}
+
+// Return true if there is still a frame being displayed (or queued).
+// If this returns true, a wakeup some time in the future is guaranteed.
+bool vo_still_displaying(struct vo *vo)
+{
+    struct vo_internal *in = vo->in;
+    mp_mutex_lock(&in->lock);
+    bool working = in->rendering || in->frame_queued;
+    mp_mutex_unlock(&in->lock);
+    return working && in->hasframe;
+}
+
+// Whether at least 1 frame was queued or rendered since last seek or reconfig.
+bool vo_has_frame(struct vo *vo)
+{
+    return vo->in->hasframe;
+}
+
+static void run_query_format(void *p)
+{
+    void **pp = p;
+    struct vo *vo = pp[0];
+    uint8_t *list = pp[1];
+    for (int format = IMGFMT_START; format < IMGFMT_END; format++)
+        list[format - IMGFMT_START] = vo->driver->query_format(vo, format);
+}
+
+// For each item in the list (allocated as uint8_t[IMGFMT_END - IMGFMT_START]),
+// set the supported format flags.
+void vo_query_formats(struct vo *vo, uint8_t *list)
+{
+    void *p[] = {vo, list};
+    mp_dispatch_run(vo->in->dispatch, run_query_format, p);
+}
+
+// Calculate the appropriate source and destination rectangle to
+// get a correctly scaled picture, including pan-scan.
+// out_src: visible part of the video
+// out_dst: area of screen covered by the video source rectangle
+// out_osd: OSD size, OSD margins, etc.
+// Must be called from the VO thread only.
+void vo_get_src_dst_rects(struct vo *vo, struct mp_rect *out_src,
+                          struct mp_rect *out_dst, struct mp_osd_res *out_osd)
+{
+    if (!vo->params) {
+        *out_src = *out_dst = (struct mp_rect){0};
+        *out_osd = (struct mp_osd_res){0};
+        return;
+    }
+    mp_get_src_dst_rects(vo->log, vo->opts, vo->driver->caps, vo->params,
+                         vo->dwidth, vo->dheight, vo->monitor_par,
+                         out_src, out_dst, out_osd);
+}
+
+// flip_page[_timed] will be called offset_us nanoseconds too early.
+// (For vo_vdpau, which does its own timing.)
+// num_req_frames set the requested number of requested vo_frame.frames.
+// (For vo_gpu interpolation.)
+void vo_set_queue_params(struct vo *vo, int64_t offset_ns, int num_req_frames)
+{
+    struct vo_internal *in = vo->in;
+    mp_mutex_lock(&in->lock);
+    in->flip_queue_offset = offset_ns;
+    in->req_frames = MPCLAMP(num_req_frames, 1, VO_MAX_REQ_FRAMES);
+    mp_mutex_unlock(&in->lock);
+}
+
+int vo_get_num_req_frames(struct vo *vo)
+{
+    struct vo_internal *in = vo->in;
+    mp_mutex_lock(&in->lock);
+    int res = in->req_frames;
+    mp_mutex_unlock(&in->lock);
+    return res;
+}
+
+double vo_get_vsync_interval(struct vo *vo)
+{
+    struct vo_internal *in = vo->in;
+    mp_mutex_lock(&in->lock);
+    double res = vo->in->vsync_interval > 1 ? vo->in->vsync_interval : -1;
+    mp_mutex_unlock(&in->lock);
+    return res;
+}
+
+double vo_get_estimated_vsync_interval(struct vo *vo)
+{
+    struct vo_internal *in = vo->in;
+    mp_mutex_lock(&in->lock);
+    double res = in->estimated_vsync_interval;
+    mp_mutex_unlock(&in->lock);
+    return res;
+}
+
+double vo_get_estimated_vsync_jitter(struct vo *vo)
+{
+    struct vo_internal *in = vo->in;
+    mp_mutex_lock(&in->lock);
+    double res = in->estimated_vsync_jitter;
+    mp_mutex_unlock(&in->lock);
+    return res;
+}
+
+// Get the time in seconds at after which the currently rendering frame will
+// end. Returns positive values if the frame is yet to be finished, negative
+// values if it already finished.
+// This can only be called while no new frame is queued (after
+// vo_is_ready_for_frame). Returns 0 for non-display synced frames, or if the
+// deadline for continuous display was missed.
+double vo_get_delay(struct vo *vo)
+{
+    struct vo_internal *in = vo->in;
+    mp_mutex_lock(&in->lock);
+    assert (!in->frame_queued);
+    int64_t res = 0;
+    if (in->base_vsync && in->vsync_interval > 1 && in->current_frame) {
+        res = in->base_vsync;
+        int extra = !!in->rendering;
+        res += (in->current_frame->num_vsyncs + extra) * in->vsync_interval;
+        if (!in->current_frame->display_synced)
+            res = 0;
+    }
+    mp_mutex_unlock(&in->lock);
+    return res ? MP_TIME_NS_TO_S(res - mp_time_ns()) : 0;
+}
+
+void vo_discard_timing_info(struct vo *vo)
+{
+    struct vo_internal *in = vo->in;
+    mp_mutex_lock(&in->lock);
+    reset_vsync_timings(vo);
+    mp_mutex_unlock(&in->lock);
+}
+
+int64_t vo_get_delayed_count(struct vo *vo)
+{
+    struct vo_internal *in = vo->in;
+    mp_mutex_lock(&in->lock);
+    int64_t res = vo->in->delayed_count;
+    mp_mutex_unlock(&in->lock);
+    return res;
+}
+
+double vo_get_display_fps(struct vo *vo)
+{
+    struct vo_internal *in = vo->in;
+    mp_mutex_lock(&in->lock);
+    double res = vo->in->display_fps;
+    mp_mutex_unlock(&in->lock);
+    return res;
+}
+
+// Set specific event flags, and wakeup the playback core if needed.
+// vo_query_and_reset_events() can retrieve the events again.
+void vo_event(struct vo *vo, int event)
+{
+    struct vo_internal *in = vo->in;
+    mp_mutex_lock(&in->lock);
+    if ((in->queued_events & event & VO_EVENTS_USER) != (event & VO_EVENTS_USER))
+        wakeup_core(vo);
+    if (event)
+        wakeup_locked(vo);
+    in->queued_events |= event;
+    in->internal_events |= event;
+    mp_mutex_unlock(&in->lock);
+}
+
+// Check event flags set with vo_event(). Return the mask of events that was
+// set and included in the events parameter. Clear the returned events.
+int vo_query_and_reset_events(struct vo *vo, int events)
+{
+    struct vo_internal *in = vo->in;
+    mp_mutex_lock(&in->lock);
+    int r = in->queued_events & events;
+    in->queued_events &= ~(unsigned)r;
+    mp_mutex_unlock(&in->lock);
+    return r;
+}
+
+struct mp_image *vo_get_current_frame(struct vo *vo)
+{
+    struct vo_internal *in = vo->in;
+    mp_mutex_lock(&in->lock);
+    struct mp_image *r = NULL;
+    if (vo->in->current_frame)
+        r = mp_image_new_ref(vo->in->current_frame->current);
+    mp_mutex_unlock(&in->lock);
+    return r;
+}
+
+struct vo_frame *vo_get_current_vo_frame(struct vo *vo)
+{
+    struct vo_internal *in = vo->in;
+    mp_mutex_lock(&in->lock);
+    struct vo_frame *r = vo_frame_ref(vo->in->current_frame);
+    mp_mutex_unlock(&in->lock);
+    return r;
+}
+
+struct mp_image *vo_get_image(struct vo *vo, int imgfmt, int w, int h,
+                              int stride_align, int flags)
+{
+    if (vo->driver->get_image_ts)
+        return vo->driver->get_image_ts(vo, imgfmt, w, h, stride_align, flags);
+    if (vo->in->dr_helper)
+        return dr_helper_get_image(vo->in->dr_helper, imgfmt, w, h, stride_align, flags);
+    return NULL;
+}
+
+static void destroy_frame(void *p)
+{
+    struct vo_frame *frame = p;
+    for (int n = 0; n < frame->num_frames; n++)
+        talloc_free(frame->frames[n]);
+}
+
+// Return a new reference to the given frame. The image pointers are also new
+// references. Calling talloc_free() on the frame unrefs all currently set
+// image references. (Assuming current==frames[0].)
+struct vo_frame *vo_frame_ref(struct vo_frame *frame)
+{
+    if (!frame)
+        return NULL;
+
+    struct vo_frame *new = talloc_ptrtype(NULL, new);
+    talloc_set_destructor(new, destroy_frame);
+    *new = *frame;
+    for (int n = 0; n < frame->num_frames; n++)
+        new->frames[n] = mp_image_new_ref(frame->frames[n]);
+    new->current = new->num_frames ? new->frames[0] : NULL;
+    return new;
+}
+
+/*
+ * lookup an integer in a table, table must have 0 as the last key
+ * param: key key to search for
+ * returns translation corresponding to key or "to" value of last mapping
+ *         if not found.
+ */
+int lookup_keymap_table(const struct mp_keymap *map, int key)
+{
+    while (map->from && map->from != key)
+        map++;
+    return map->to;
+}
+
+struct mp_image_params vo_get_current_params(struct vo *vo)
+{
+    struct mp_image_params p = {0};
+    mp_mutex_lock(&vo->in->lock);
+    if (vo->params)
+        p = *vo->params;
+    mp_mutex_unlock(&vo->in->lock);
+    return p;
+}
diff --git a/video/out/vo.h b/video/out/vo.h
new file mode 100644
index 0000000..e38dcf8
--- /dev/null
+++ b/video/out/vo.h
@@ -0,0 +1,544 @@
+/*
+ * Copyright (C) Aaron Holtzman - Aug 1999
+ *
+ * Strongly modified, most parts rewritten: A'rpi/ESP-team - 2000-2001
+ * (C) MPlayer developers
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MPLAYER_VIDEO_OUT_H
+#define MPLAYER_VIDEO_OUT_H
+
+#include <inttypes.h>
+#include <stdbool.h>
+
+#include "video/img_format.h"
+#include "common/common.h"
+#include "options/options.h"
+
+enum {
+    // VO needs to redraw
+    VO_EVENT_EXPOSE                     = 1 << 0,
+    // VO needs to update state to a new window size
+    VO_EVENT_RESIZE                     = 1 << 1,
+    // The ICC profile needs to be reloaded
+    VO_EVENT_ICC_PROFILE_CHANGED        = 1 << 2,
+    // Some other window state changed (position, window state, fps)
+    VO_EVENT_WIN_STATE                  = 1 << 3,
+    // The ambient light conditions changed and need to be reloaded
+    VO_EVENT_AMBIENT_LIGHTING_CHANGED   = 1 << 4,
+    // Special mechanism for making resizing with Cocoa react faster
+    VO_EVENT_LIVE_RESIZING              = 1 << 5,
+    // For VOCTRL_GET_HIDPI_SCALE changes.
+    VO_EVENT_DPI                        = 1 << 6,
+    // Special thing for encode mode (vo_driver.initially_blocked).
+    // Part of VO_EVENTS_USER to make vo_is_ready_for_frame() work properly.
+    VO_EVENT_INITIAL_UNBLOCK            = 1 << 7,
+    VO_EVENT_FOCUS                      = 1 << 8,
+
+    // Set of events the player core may be interested in.
+    VO_EVENTS_USER = VO_EVENT_RESIZE | VO_EVENT_WIN_STATE | VO_EVENT_DPI |
+                     VO_EVENT_INITIAL_UNBLOCK | VO_EVENT_FOCUS,
+};
+
+enum mp_voctrl {
+    /* signal a device reset seek */
+    VOCTRL_RESET = 1,
+    /* Handle input and redraw events, called by vo_check_events() */
+    VOCTRL_CHECK_EVENTS,
+    /* signal a device pause */
+    VOCTRL_PAUSE,
+    /* start/resume playback */
+    VOCTRL_RESUME,
+
+    VOCTRL_SET_PANSCAN,
+    VOCTRL_SET_EQUALIZER,
+
+    // Triggered by any change to mp_vo_opts. This is for convenience. In theory,
+    // you could install your own listener.
+    VOCTRL_VO_OPTS_CHANGED,
+
+    /* private to vo_gpu */
+    VOCTRL_LOAD_HWDEC_API,
+
+    // Only used internally in vo_libmpv
+    VOCTRL_PREINIT,
+    VOCTRL_UNINIT,
+    VOCTRL_RECONFIG,
+
+    VOCTRL_UPDATE_WINDOW_TITLE,         // char*
+    VOCTRL_UPDATE_PLAYBACK_STATE,       // struct voctrl_playback_state*
+
+    VOCTRL_PERFORMANCE_DATA,            // struct voctrl_performance_data*
+
+    VOCTRL_SET_CURSOR_VISIBILITY,       // bool*
+
+    VOCTRL_CONTENT_TYPE,                // enum mp_content_type*
+
+    VOCTRL_KILL_SCREENSAVER,
+    VOCTRL_RESTORE_SCREENSAVER,
+
+    // Return or set window size (not-fullscreen mode only - if fullscreened,
+    // these must access the not-fullscreened window size only).
+    VOCTRL_GET_UNFS_WINDOW_SIZE,        // int[2] (w/h)
+    VOCTRL_SET_UNFS_WINDOW_SIZE,        // int[2] (w/h)
+
+    VOCTRL_GET_FOCUSED,                 // bool*
+
+    // char *** (NULL terminated array compatible with CONF_TYPE_STRING_LIST)
+    // names for displays the window is on
+    VOCTRL_GET_DISPLAY_NAMES,
+
+    // Retrieve window contents. (Normal screenshots use vo_get_current_frame().)
+    // Deprecated for VOCTRL_SCREENSHOT with corresponding flags.
+    VOCTRL_SCREENSHOT_WIN,              // struct mp_image**
+
+    // A normal screenshot - VOs can react to this if vo_get_current_frame() is
+    // not sufficient.
+    VOCTRL_SCREENSHOT,                  // struct voctrl_screenshot*
+
+    VOCTRL_UPDATE_RENDER_OPTS,
+
+    VOCTRL_GET_ICC_PROFILE,             // bstr*
+    VOCTRL_GET_AMBIENT_LUX,             // int*
+    VOCTRL_GET_DISPLAY_FPS,             // double*
+    VOCTRL_GET_HIDPI_SCALE,             // double*
+    VOCTRL_GET_DISPLAY_RES,             // int[2]
+    VOCTRL_GET_WINDOW_ID,               // int64_t*
+
+    /* private to vo_gpu and vo_gpu_next */
+    VOCTRL_EXTERNAL_RESIZE,
+};
+
+// Helper to expose what kind of content is currently playing to the VO.
+enum mp_content_type {
+    MP_CONTENT_NONE, // used for force-window
+    MP_CONTENT_IMAGE,
+    MP_CONTENT_VIDEO,
+};
+
+#define VO_TRUE         true
+#define VO_FALSE        false
+#define VO_ERROR        -1
+#define VO_NOTAVAIL     -2
+#define VO_NOTIMPL      -3
+
+// VOCTRL_UPDATE_PLAYBACK_STATE
+struct voctrl_playback_state {
+    bool taskbar_progress;
+    bool playing;
+    bool paused;
+    int percent_pos;
+};
+
+// VOCTRL_PERFORMANCE_DATA
+#define VO_PERF_SAMPLE_COUNT 256
+
+struct mp_pass_perf {
+    // times are all in nanoseconds
+    uint64_t last, avg, peak;
+    uint64_t samples[VO_PERF_SAMPLE_COUNT];
+    uint64_t count;
+};
+
+#define VO_PASS_PERF_MAX 64
+#define VO_PASS_DESC_MAX_LEN 128
+
+struct mp_frame_perf {
+    int count;
+    struct mp_pass_perf perf[VO_PASS_PERF_MAX];
+    char desc[VO_PASS_PERF_MAX][VO_PASS_DESC_MAX_LEN];
+};
+
+struct voctrl_performance_data {
+    struct mp_frame_perf fresh, redraw;
+};
+
+struct voctrl_screenshot {
+    bool scaled, subs, osd, high_bit_depth, native_csp;
+    struct mp_image *res;
+};
+
+enum {
+    // VO does handle mp_image_params.rotate in 90 degree steps
+    VO_CAP_ROTATE90     = 1 << 0,
+    // VO does framedrop itself (vo_vdpau). Untimed/encoding VOs never drop.
+    VO_CAP_FRAMEDROP    = 1 << 1,
+    // VO does not allow frames to be retained (vo_mediacodec_embed).
+    VO_CAP_NORETAIN     = 1 << 2,
+    // VO supports applying film grain
+    VO_CAP_FILM_GRAIN   = 1 << 3,
+};
+
+enum {
+    // Require DR buffers to be host-cached (i.e. fast readback)
+    VO_DR_FLAG_HOST_CACHED = 1 << 0,
+};
+
+#define VO_MAX_REQ_FRAMES 10
+#define VO_MAX_SWAPCHAIN_DEPTH 8
+
+struct vo;
+struct osd_state;
+struct mp_image;
+struct mp_image_params;
+
+struct vo_extra {
+    struct input_ctx *input_ctx;
+    struct osd_state *osd;
+    struct encode_lavc_context *encode_lavc_ctx;
+    void (*wakeup_cb)(void *ctx);
+    void *wakeup_ctx;
+};
+
+struct vo_frame {
+    // If > 0, realtime when frame should be shown, in mp_time_ns() units.
+    // If 0, present immediately.
+    int64_t pts;
+    // Approximate frame duration, in ns.
+    int duration;
+    // Realtime of estimated distance between 2 vsync events.
+    double vsync_interval;
+    // "ideal" display time within the vsync
+    double vsync_offset;
+    // "ideal" frame duration (can be different from num_vsyncs*vsync_interval
+    // up to a vsync) - valid for the entire frame, i.e. not changed for repeats
+    double ideal_frame_duration;
+    // "ideal" frame vsync point relative to the pts
+    double ideal_frame_vsync;
+    // "ideal" frame duration relative to the pts
+    double ideal_frame_vsync_duration;
+    // how often the frame will be repeated (does not include OSD redraws)
+    int num_vsyncs;
+    // Set if the current frame is repeated from the previous. It's guaranteed
+    // that the current is the same as the previous one, even if the image
+    // pointer is different.
+    // The repeat flag is set if exactly the same frame should be rendered
+    // again (and the OSD does not need to be redrawn).
+    // A repeat frame can be redrawn, in which case repeat==redraw==true, and
+    // OSD should be updated.
+    bool redraw, repeat;
+    // The frame is not in movement - e.g. redrawing while paused.
+    bool still;
+    // Frames are output as fast as possible, with implied vsync blocking.
+    bool display_synced;
+    // Dropping the frame is allowed if the VO is behind.
+    bool can_drop;
+    // The current frame to be drawn.
+    // Warning: When OSD should be redrawn in --force-window --idle mode, this
+    //          can be NULL. The VO should draw a black background, OSD on top.
+    struct mp_image *current;
+    // List of future images, starting with the current one. This does not
+    // care about repeated frames - it simply contains the next real frames.
+    // vo_set_queue_params() sets how many future frames this should include.
+    // The actual number of frames delivered to the VO can be lower.
+    // frames[0] is current, frames[1] is the next frame.
+    // Note that some future frames may never be sent as current frame to the
+    // VO if frames are dropped.
+    int num_frames;
+    struct mp_image *frames[VO_MAX_REQ_FRAMES];
+    // Speed unadjusted, approximate frame duration inferred from past frames
+    double approx_duration;
+    // ID for frames[0] (== current). If current==NULL, the number is
+    // meaningless. Otherwise, it's an unique ID for the frame. The ID for
+    // a frame is guaranteed not to change (instant redraws will use the same
+    // ID). frames[n] has the ID frame_id+n, with the guarantee that frame
+    // drops or reconfigs will keep the guarantee.
+    // The ID is never 0 (unless num_frames==0). IDs are strictly monotonous.
+    uint64_t frame_id;
+};
+
+// Presentation feedback. See get_vsync() for how backends should fill this
+// struct.
+struct vo_vsync_info {
+    // mp_time_ns() timestamp at which the last queued frame will likely be
+    // displayed (this is in the future, unless the frame is instantly output).
+    // 0 or lower if unset or unsupported.
+    // This implies the latency of the output.
+    int64_t last_queue_display_time;
+
+    // Time between 2 vsync events in nanoseconds. The difference should be the
+    // from 2 times sampled from the same reference point (it should not be the
+    // difference between e.g. the end of scanout and the start of the next one;
+    // it must be continuous).
+    // -1 if unsupported.
+    //  0 if supported, but no value available yet. It is assumed that the value
+    //    becomes available after enough swap_buffers() calls were done.
+    // >0 values are taken for granted. Very bad things will happen if it's
+    //    inaccurate.
+    int64_t vsync_duration;
+
+    // Number of skipped physical vsyncs at some point in time. Typically, this
+    // value is some time in the past by an offset that equals to the latency.
+    // This value is reset and newly sampled at every swap_buffers() call.
+    // This can be used to detect delayed frames iff you try to call
+    // swap_buffers() for every physical vsync.
+    // -1 if unset or unsupported.
+    int64_t skipped_vsyncs;
+};
+
+struct vo_driver {
+    // Encoding functionality, which can be invoked via --o only.
+    bool encode;
+
+    // This requires waiting for a VO_EVENT_INITIAL_UNBLOCK event before the
+    // first frame can be sent. Doing vo_reconfig*() calls is allowed though.
+    // Encode mode uses this, the core uses vo_is_ready_for_frame() to
+    // implicitly check for this.
+    bool initially_blocked;
+
+    // VO_CAP_* bits
+    int caps;
+
+    // Disable video timing, push frames as quickly as possible, never redraw.
+    bool untimed;
+
+    // The VO is responsible for freeing frames.
+    bool frame_owner;
+
+    const char *name;
+    const char *description;
+
+    /*
+     *   returns: zero on successful initialization, non-zero on error.
+     */
+    int (*preinit)(struct vo *vo);
+
+    /*
+     * Whether the given image format is supported and config() will succeed.
+     * format: one of IMGFMT_*
+     * returns: 0 on not supported, otherwise 1
+     */
+    int (*query_format)(struct vo *vo, int format);
+
+    /*
+     * Initialize or reconfigure the display driver.
+     *   params: video parameters, like pixel format and frame size
+     * returns: < 0 on error, >= 0 on success
+     */
+    int (*reconfig)(struct vo *vo, struct mp_image_params *params);
+
+    /*
+     * Like reconfig(), but provides the whole mp_image for which the change is
+     * required. (The image doesn't have to have real data.)
+     */
+    int (*reconfig2)(struct vo *vo, struct mp_image *img);
+
+    /*
+     * Control interface
+     */
+    int (*control)(struct vo *vo, uint32_t request, void *data);
+
+    /*
+     * lavc callback for direct rendering
+     *
+     * Optional. To make implementation easier, the callback is always run on
+     * the VO thread. The returned mp_image's destructor callback is also called
+     * on the VO thread, even if it's actually unref'ed from another thread.
+     *
+     * It is guaranteed that the last reference to an image is destroyed before
+     * ->uninit is called (except it's not - libmpv screenshots can hold the
+     * reference longer, fuck).
+     *
+     * The allocated image - or a part of it, can be passed to draw_frame(). The
+     * point of this mechanism is that the decoder directly renders to GPU
+     * staging memory, to avoid a memcpy on frame upload. But this is not a
+     * guarantee. A filter could change the data pointers or return a newly
+     * allocated image. It's even possible that only 1 plane uses the buffer
+     * allocated by the get_image function. The VO has to check for this.
+     *
+     * stride_align is always a value >=1. The stride values of the returned
+     * image must be divisible by this value. This may be a non power of two.
+     *
+     * flags is a combination of VO_DR_FLAG_* flags.
+     *
+     * Currently, the returned image must have exactly 1 AVBufferRef set, for
+     * internal implementation simplicity.
+     *
+     * returns: an allocated, refcounted image; if NULL is returned, the caller
+     * will silently fallback to a default allocator
+     */
+    struct mp_image *(*get_image)(struct vo *vo, int imgfmt, int w, int h,
+                                  int stride_align, int flags);
+
+    /*
+     * Thread-safe variant of get_image. Set at most one of these callbacks.
+     * This excludes _all_ synchronization magic. The only guarantee is that
+     * vo_driver.uninit is not called before this function returns.
+     */
+    struct mp_image *(*get_image_ts)(struct vo *vo, int imgfmt, int w, int h,
+                                     int stride_align, int flags);
+
+    /* Render the given frame. Note that this is also called when repeating
+     * or redrawing frames.
+     *
+     * frame is freed by the caller if the callee did not assume ownership
+     * of the frames, but in any case the callee can still modify the
+     * contained data and references.
+     */
+    void (*draw_frame)(struct vo *vo, struct vo_frame *frame);
+
+    /*
+     * Blit/Flip buffer to the screen. Must be called after each frame!
+     */
+    void (*flip_page)(struct vo *vo);
+
+    /*
+     * Return presentation feedback. The implementation should not touch fields
+     * it doesn't support; the info fields are preinitialized to neutral values.
+     * Usually called once after flip_page(), but can be called any time.
+     * The values returned by this are always relative to the last flip_page()
+     * call.
+     */
+    void (*get_vsync)(struct vo *vo, struct vo_vsync_info *info);
+
+    /* These optional callbacks can be provided if the GUI framework used by
+     * the VO requires entering a message loop for receiving events and does
+     * not call vo_wakeup() from a separate thread when there are new events.
+     *
+     * wait_events() will wait for new events, until the timeout expires, or the
+     * function is interrupted. wakeup() is used to possibly interrupt the
+     * event loop (wakeup() itself must be thread-safe, and not call any other
+     * VO functions; it's the only vo_driver function with this requirement).
+     * wakeup() should behave like a binary semaphore; if wait_events() is not
+     * being called while wakeup() is, the next wait_events() call should exit
+     * immediately.
+     */
+    void (*wakeup)(struct vo *vo);
+    void (*wait_events)(struct vo *vo, int64_t until_time_ns);
+
+    /*
+     * Closes driver. Should restore the original state of the system.
+     */
+    void (*uninit)(struct vo *vo);
+
+    // Size of private struct for automatic allocation (0 doesn't allocate)
+    int priv_size;
+
+    // If not NULL, it's copied into the newly allocated private struct.
+    const void *priv_defaults;
+
+    // List of options to parse into priv struct (requires priv_size to be set)
+    // This will register them as global options (with options_prefix), and
+    // copy the current value at VO creation time to the priv struct.
+    const struct m_option *options;
+
+    // All options in the above array are prefixed with this string. (It's just
+    // for convenience and makes no difference in semantics.)
+    const char *options_prefix;
+
+    // Registers global options that go to a separate options struct.
+    const struct m_sub_options *global_opts;
+};
+
+struct vo {
+    const struct vo_driver *driver;
+    struct mp_log *log; // Using e.g. "[vo/vdpau]" as prefix
+    void *priv;
+    struct mpv_global *global;
+    struct vo_x11_state *x11;
+    struct vo_w32_state *w32;
+    struct vo_wayland_state *wl;
+    struct vo_android_state *android;
+    struct vo_drm_state *drm;
+    struct mp_hwdec_devices *hwdec_devs;
+    struct input_ctx *input_ctx;
+    struct osd_state *osd;
+    struct encode_lavc_context *encode_lavc_ctx;
+    struct vo_internal *in;
+    struct vo_extra extra;
+
+    // --- The following fields are generally only changed during initialization.
+
+    bool probing;
+
+    // --- The following fields are only changed with vo_reconfig(), and can
+    //     be accessed unsynchronized (read-only).
+
+    int config_ok;      // Last config call was successful?
+    struct mp_image_params *params; // Configured parameters (as in vo_reconfig)
+
+    // --- The following fields can be accessed only by the VO thread, or from
+    //     anywhere _if_ the VO thread is suspended (use vo->dispatch).
+
+    struct m_config_cache *opts_cache; // cache for ->opts
+    struct mp_vo_opts *opts;
+    struct m_config_cache *gl_opts_cache;
+    struct m_config_cache *eq_opts_cache;
+
+    bool want_redraw;   // redraw as soon as possible
+
+    // current window state
+    int dwidth;
+    int dheight;
+    float monitor_par;
+};
+
+struct mpv_global;
+struct vo *init_best_video_out(struct mpv_global *global, struct vo_extra *ex);
+int vo_reconfig(struct vo *vo, struct mp_image_params *p);
+int vo_reconfig2(struct vo *vo, struct mp_image *img);
+
+int vo_control(struct vo *vo, int request, void *data);
+void vo_control_async(struct vo *vo, int request, void *data);
+bool vo_is_ready_for_frame(struct vo *vo, int64_t next_pts);
+void vo_queue_frame(struct vo *vo, struct vo_frame *frame);
+void vo_wait_frame(struct vo *vo);
+bool vo_still_displaying(struct vo *vo);
+bool vo_has_frame(struct vo *vo);
+void vo_redraw(struct vo *vo);
+bool vo_want_redraw(struct vo *vo);
+void vo_seek_reset(struct vo *vo);
+void vo_destroy(struct vo *vo);
+void vo_set_paused(struct vo *vo, bool paused);
+int64_t vo_get_drop_count(struct vo *vo);
+void vo_increment_drop_count(struct vo *vo, int64_t n);
+int64_t vo_get_delayed_count(struct vo *vo);
+void vo_query_formats(struct vo *vo, uint8_t *list);
+void vo_event(struct vo *vo, int event);
+int vo_query_and_reset_events(struct vo *vo, int events);
+struct mp_image *vo_get_current_frame(struct vo *vo);
+void vo_set_queue_params(struct vo *vo, int64_t offset_ns, int num_req_frames);
+int vo_get_num_req_frames(struct vo *vo);
+double vo_get_vsync_interval(struct vo *vo);
+double vo_get_estimated_vsync_interval(struct vo *vo);
+double vo_get_estimated_vsync_jitter(struct vo *vo);
+double vo_get_display_fps(struct vo *vo);
+double vo_get_delay(struct vo *vo);
+void vo_discard_timing_info(struct vo *vo);
+struct vo_frame *vo_get_current_vo_frame(struct vo *vo);
+struct mp_image *vo_get_image(struct vo *vo, int imgfmt, int w, int h,
+                              int stride_align, int flags);
+
+void vo_wakeup(struct vo *vo);
+void vo_wait_default(struct vo *vo, int64_t until_time);
+
+struct mp_keymap {
+  int from;
+  int to;
+};
+int lookup_keymap_table(const struct mp_keymap *map, int key);
+
+struct mp_osd_res;
+void vo_get_src_dst_rects(struct vo *vo, struct mp_rect *out_src,
+                          struct mp_rect *out_dst, struct mp_osd_res *out_osd);
+
+struct vo_frame *vo_frame_ref(struct vo_frame *frame);
+
+struct mp_image_params vo_get_current_params(struct vo *vo);
+
+#endif /* MPLAYER_VIDEO_OUT_H */
diff --git a/video/out/vo_caca.c b/video/out/vo_caca.c
new file mode 100644
index 0000000..0625de0
--- /dev/null
+++ b/video/out/vo_caca.c
@@ -0,0 +1,314 @@
+/*
+ * video output driver for libcaca
+ *
+ * by Pigeon <pigeon@pigeond.net>
+ *
+ * Some functions/codes/ideas are from x11 and aalib vo
+ *
+ * TODO: support draw_alpha?
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <string.h>
+#include <time.h>
+#include <errno.h>
+#include <assert.h>
+#include <caca.h>
+
+#include "config.h"
+#include "vo.h"
+#include "video/mp_image.h"
+
+#include "input/keycodes.h"
+#include "input/input.h"
+#include "common/msg.h"
+#include "input/input.h"
+
+#include "config.h"
+#if !HAVE_GPL
+#error GPL only
+#endif
+
+struct priv {
+    caca_canvas_t  *canvas;
+    caca_display_t *display;
+    caca_dither_t  *dither;
+    uint8_t        *dither_buffer;
+    const char     *dither_antialias;
+    const char     *dither_charset;
+    const char     *dither_color;
+    const char     *dither_algo;
+
+    /* image infos */
+    int image_format;
+    int image_width;
+    int image_height;
+
+    int screen_w, screen_h;
+};
+
+/* We want 24bpp always for now */
+static const unsigned int bpp   = 24;
+static const unsigned int depth = 3;
+static const unsigned int rmask = 0xff0000;
+static const unsigned int gmask = 0x00ff00;
+static const unsigned int bmask = 0x0000ff;
+static const unsigned int amask = 0;
+
+static int resize(struct vo *vo)
+{
+    struct priv *priv = vo->priv;
+    priv->screen_w = caca_get_canvas_width(priv->canvas);
+    priv->screen_h = caca_get_canvas_height(priv->canvas);
+
+    caca_free_dither(priv->dither);
+    talloc_free(priv->dither_buffer);
+
+    priv->dither = caca_create_dither(bpp, priv->image_width, priv->image_height,
+                                depth * priv->image_width,
+                                rmask, gmask, bmask, amask);
+    if (priv->dither == NULL) {
+        MP_FATAL(vo, "caca_create_dither failed!\n");
+        return -1;
+    }
+    priv->dither_buffer =
+        talloc_array(NULL, uint8_t, depth * priv->image_width * priv->image_height);
+
+    /* Default libcaca features */
+    caca_set_dither_antialias(priv->dither, priv->dither_antialias);
+    caca_set_dither_charset(priv->dither, priv->dither_charset);
+    caca_set_dither_color(priv->dither, priv->dither_color);
+    caca_set_dither_algorithm(priv->dither, priv->dither_algo);
+
+    return 0;
+}
+
+static int reconfig(struct vo *vo, struct mp_image_params *params)
+{
+    struct priv *priv = vo->priv;
+    priv->image_height = params->h;
+    priv->image_width  = params->w;
+    priv->image_format = params->imgfmt;
+
+    return resize(vo);
+}
+
+static void draw_frame(struct vo *vo, struct vo_frame *frame)
+{
+    struct priv *priv = vo->priv;
+    struct mp_image *mpi = frame->current;
+    if (!mpi)
+        return;
+    memcpy_pic(priv->dither_buffer, mpi->planes[0], priv->image_width * depth, priv->image_height,
+               priv->image_width * depth, mpi->stride[0]);
+    caca_dither_bitmap(priv->canvas, 0, 0, priv->screen_w, priv->screen_h, priv->dither, priv->dither_buffer);
+}
+
+static void flip_page(struct vo *vo)
+{
+    struct priv *priv = vo->priv;
+    caca_refresh_display(priv->display);
+}
+
+static void set_next_str(const char * const *list, const char **str,
+                         const char **msg)
+{
+    int ind;
+    for (ind = 0; list[ind]; ind += 2) {
+        if (strcmp(list[ind], *str) == 0) {
+            if (list[ind + 2] == NULL)
+                ind = -2;
+            *str = list[ind + 2];
+            *msg = list[ind + 3];
+            return;
+        }
+    }
+
+    *str = list[0];
+    *msg = list[1];
+}
+
+static const struct mp_keymap keysym_map[] = {
+    {CACA_KEY_RETURN, MP_KEY_ENTER}, {CACA_KEY_ESCAPE, MP_KEY_ESC},
+    {CACA_KEY_UP, MP_KEY_UP}, {CACA_KEY_DOWN, MP_KEY_DOWN},
+    {CACA_KEY_LEFT, MP_KEY_LEFT}, {CACA_KEY_RIGHT, MP_KEY_RIGHT},
+    {CACA_KEY_PAGEUP, MP_KEY_PAGE_UP}, {CACA_KEY_PAGEDOWN, MP_KEY_PAGE_DOWN},
+    {CACA_KEY_HOME, MP_KEY_HOME}, {CACA_KEY_END, MP_KEY_END},
+    {CACA_KEY_INSERT, MP_KEY_INSERT}, {CACA_KEY_DELETE, MP_KEY_DELETE},
+    {CACA_KEY_BACKSPACE, MP_KEY_BACKSPACE}, {CACA_KEY_TAB, MP_KEY_TAB},
+    {CACA_KEY_PAUSE, MP_KEY_PAUSE},
+    {CACA_KEY_F1, MP_KEY_F+1}, {CACA_KEY_F2, MP_KEY_F+2},
+    {CACA_KEY_F3, MP_KEY_F+3}, {CACA_KEY_F4, MP_KEY_F+4},
+    {CACA_KEY_F5, MP_KEY_F+5}, {CACA_KEY_F6, MP_KEY_F+6},
+    {CACA_KEY_F7, MP_KEY_F+7}, {CACA_KEY_F8, MP_KEY_F+8},
+    {CACA_KEY_F9, MP_KEY_F+9}, {CACA_KEY_F10, MP_KEY_F+10},
+    {CACA_KEY_F11, MP_KEY_F+11}, {CACA_KEY_F12, MP_KEY_F+12},
+    {CACA_KEY_F13, MP_KEY_F+13}, {CACA_KEY_F14, MP_KEY_F+14},
+    {CACA_KEY_F15, MP_KEY_F+15},
+    {0, 0}
+};
+
+static void check_events(struct vo *vo)
+{
+    struct priv *priv = vo->priv;
+
+    caca_event_t cev;
+    while (caca_get_event(priv->display, CACA_EVENT_ANY, &cev, 0)) {
+
+        switch (cev.type) {
+        case CACA_EVENT_RESIZE:
+            caca_refresh_display(priv->display);
+            resize(vo);
+            break;
+        case CACA_EVENT_QUIT:
+            mp_input_put_key(vo->input_ctx, MP_KEY_CLOSE_WIN);
+            break;
+        case CACA_EVENT_MOUSE_MOTION:
+            mp_input_set_mouse_pos(vo->input_ctx, cev.data.mouse.x, cev.data.mouse.y);
+            break;
+        case CACA_EVENT_MOUSE_PRESS:
+            mp_input_put_key(vo->input_ctx,
+                    (MP_MBTN_BASE + cev.data.mouse.button - 1) | MP_KEY_STATE_DOWN);
+            break;
+        case CACA_EVENT_MOUSE_RELEASE:
+            mp_input_put_key(vo->input_ctx,
+                    (MP_MBTN_BASE + cev.data.mouse.button - 1) | MP_KEY_STATE_UP);
+            break;
+        case CACA_EVENT_KEY_PRESS:
+        {
+            int key = cev.data.key.ch;
+            int mpkey = lookup_keymap_table(keysym_map, key);
+            const char *msg_name;
+
+            if (mpkey)
+                mp_input_put_key(vo->input_ctx, mpkey);
+            else
+            switch (key) {
+            case 'd':
+            case 'D':
+                /* Toggle dithering algorithm */
+                set_next_str(caca_get_dither_algorithm_list(priv->dither),
+                             &priv->dither_algo, &msg_name);
+                caca_set_dither_algorithm(priv->dither, priv->dither_algo);
+                break;
+
+            case 'a':
+            case 'A':
+                /* Toggle antialiasing method */
+                set_next_str(caca_get_dither_antialias_list(priv->dither),
+                             &priv->dither_antialias, &msg_name);
+                caca_set_dither_antialias(priv->dither, priv->dither_antialias);
+                break;
+
+            case 'h':
+            case 'H':
+                /* Toggle charset method */
+                set_next_str(caca_get_dither_charset_list(priv->dither),
+                             &priv->dither_charset, &msg_name);
+                caca_set_dither_charset(priv->dither, priv->dither_charset);
+                break;
+
+            case 'c':
+            case 'C':
+                /* Toggle color method */
+                set_next_str(caca_get_dither_color_list(priv->dither),
+                             &priv->dither_color, &msg_name);
+                caca_set_dither_color(priv->dither, priv->dither_color);
+                break;
+
+            default:
+                if (key <= 255)
+                    mp_input_put_key(vo->input_ctx, key);
+                break;
+            }
+        }
+        }
+    }
+}
+
+static void uninit(struct vo *vo)
+{
+    struct priv *priv = vo->priv;
+    caca_free_dither(priv->dither);
+    priv->dither = NULL;
+    talloc_free(priv->dither_buffer);
+    priv->dither_buffer = NULL;
+    caca_free_display(priv->display);
+    caca_free_canvas(priv->canvas);
+}
+
+static int preinit(struct vo *vo)
+{
+    struct priv *priv = vo->priv;
+
+    priv->dither_antialias = "default";
+    priv->dither_charset   = "default";
+    priv->dither_color     = "default";
+    priv->dither_algo      = "none";
+
+    priv->canvas = caca_create_canvas(0, 0);
+    if (priv->canvas == NULL) {
+        MP_ERR(vo, "failed to create canvas\n");
+        return ENOSYS;
+    }
+
+    priv->display = caca_create_display(priv->canvas);
+
+    if (priv->display == NULL) {
+        MP_ERR(vo, "failed to create display\n");
+        caca_free_canvas(priv->canvas);
+        return ENOSYS;
+    }
+
+    return 0;
+}
+
+static int query_format(struct vo *vo, int format)
+{
+    return format == IMGFMT_BGR24;
+}
+
+static int control(struct vo *vo, uint32_t request, void *data)
+{
+    struct priv *priv = vo->priv;
+    switch (request) {
+    case VOCTRL_CHECK_EVENTS:
+        check_events(vo);
+        return VO_TRUE;
+    case VOCTRL_UPDATE_WINDOW_TITLE:
+        caca_set_display_title(priv->display, (char *)data);
+        return VO_TRUE;
+    }
+    return VO_NOTIMPL;
+}
+
+const struct vo_driver video_out_caca = {
+    .name = "caca",
+    .description = "libcaca",
+    .preinit = preinit,
+    .query_format = query_format,
+    .reconfig = reconfig,
+    .control = control,
+    .draw_frame = draw_frame,
+    .flip_page = flip_page,
+    .uninit = uninit,
+    .priv_size = sizeof(struct priv),
+};
diff --git a/video/out/vo_direct3d.c b/video/out/vo_direct3d.c
new file mode 100644
index 0000000..16936bb
--- /dev/null
+++ b/video/out/vo_direct3d.c
@@ -0,0 +1,1247 @@
+/*
+ * Copyright (c) 2008 Georgi Petrov (gogothebee) <gogothebee@gmail.com>
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <windows.h>
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <stdbool.h>
+#include <assert.h>
+#include <d3d9.h>
+#include <inttypes.h>
+#include <limits.h>
+#include "config.h"
+#include "options/options.h"
+#include "options/m_option.h"
+#include "sub/draw_bmp.h"
+#include "mpv_talloc.h"
+#include "vo.h"
+#include "video/csputils.h"
+#include "video/mp_image.h"
+#include "video/img_format.h"
+#include "common/msg.h"
+#include "common/common.h"
+#include "w32_common.h"
+#include "sub/osd.h"
+
+#include "config.h"
+#if !HAVE_GPL
+#error GPL only
+#endif
+
+#define DEVTYPE D3DDEVTYPE_HAL
+//#define DEVTYPE D3DDEVTYPE_REF
+
+#define D3DFVF_OSD_VERTEX (D3DFVF_XYZ | D3DFVF_TEX1)
+
+typedef struct {
+    float x, y, z;
+    float tu, tv;
+} vertex_osd;
+
+struct d3dtex {
+    // user-requested size
+    int w, h;
+    // allocated texture size
+    int tex_w, tex_h;
+    // D3DPOOL_SYSTEMMEM (or others) texture:
+    // - can be locked in order to write (and even read) data
+    // - can _not_ (probably) be used as texture for rendering
+    // This is always non-NULL if d3dtex_allocate succeeds.
+    IDirect3DTexture9 *system;
+    // D3DPOOL_DEFAULT texture:
+    // - can't be locked (Probably.)
+    // - must be used for rendering
+    // This can be NULL if the system one can be both locked and mapped.
+    IDirect3DTexture9 *device;
+};
+
+#define MAX_OSD_RECTS 64
+
+/* Global variables "priv" structure. I try to keep their count low.
+ */
+typedef struct d3d_priv {
+    struct mp_log *log;
+
+    bool opt_disable_texture_align;
+    // debugging
+    bool opt_force_power_of_2;
+    int opt_texture_memory;
+    bool opt_swap_discard;
+    bool opt_exact_backbuffer;
+
+    struct vo *vo;
+
+    bool have_image;
+    double osd_pts;
+
+    D3DLOCKED_RECT locked_rect; /**< The locked offscreen surface */
+    RECT fs_movie_rect;         /**< Rect (upscaled) of the movie when displayed
+                                in fullscreen */
+    RECT fs_panscan_rect;       /**< PanScan source surface cropping in
+                                fullscreen */
+    int src_width;              /**< Source (movie) width */
+    int src_height;             /**< Source (movie) height */
+    struct mp_osd_res osd_res;
+    int image_format;           /**< mplayer image format */
+    struct mp_image_params params;
+
+    D3DFORMAT movie_src_fmt;        /**< Movie colorspace format (depends on
+                                    the movie's codec) */
+    D3DFORMAT desktop_fmt;          /**< Desktop (screen) colorspace format.
+                                    Usually XRGB */
+
+    HANDLE d3d9_dll;                /**< d3d9 Library HANDLE */
+    IDirect3D9 * (WINAPI *pDirect3DCreate9)(UINT); /**< pointer to Direct3DCreate9 function */
+
+    LPDIRECT3D9        d3d_handle;  /**< Direct3D Handle */
+    LPDIRECT3DDEVICE9  d3d_device;  /**< The Direct3D Adapter */
+    bool d3d_in_scene;              /**< BeginScene was called, EndScene not */
+    IDirect3DSurface9 *d3d_surface; /**< Offscreen Direct3D Surface. MPlayer
+                                    renders inside it. Uses colorspace
+                                    priv->movie_src_fmt */
+    IDirect3DSurface9 *d3d_backbuf; /**< Video card's back buffer (used to
+                                    display next frame) */
+    int cur_backbuf_width;          /**< Current backbuffer width */
+    int cur_backbuf_height;         /**< Current backbuffer height */
+    int device_caps_power2_only;    /**< 1 = texture sizes have to be power 2
+                                    0 = texture sizes can be anything */
+    int device_caps_square_only;    /**< 1 = textures have to be square
+                                    0 = textures do not have to be square */
+    int device_texture_sys;         /**< 1 = device can texture from system memory
+                                    0 = device requires shadow */
+    int max_texture_width;          /**< from the device capabilities */
+    int max_texture_height;         /**< from the device capabilities */
+
+    D3DMATRIX d3d_colormatrix;
+
+    struct mp_draw_sub_cache *osd_cache;
+    struct d3dtex osd_texture;
+    int osd_num_vertices;
+    vertex_osd osd_vertices[MAX_OSD_RECTS * 6];
+} d3d_priv;
+
+struct fmt_entry {
+    const unsigned int  mplayer_fmt;   /**< Given by MPlayer */
+    const D3DFORMAT     fourcc;        /**< Required by D3D's test function */
+};
+
+/* Map table from reported MPlayer format to the required
+   fourcc. This is needed to perform the format query. */
+
+static const struct fmt_entry fmt_table[] = {
+    // planar YUV
+    {IMGFMT_420P,  MAKEFOURCC('Y','V','1','2')},
+    {IMGFMT_420P,  MAKEFOURCC('I','4','2','0')},
+    {IMGFMT_420P,  MAKEFOURCC('I','Y','U','V')},
+    {IMGFMT_NV12,  MAKEFOURCC('N','V','1','2')},
+    // packed YUV
+    {IMGFMT_UYVY,  D3DFMT_UYVY},
+    // packed RGB
+    {IMGFMT_BGR0, D3DFMT_X8R8G8B8},
+    {IMGFMT_RGB0, D3DFMT_X8B8G8R8},
+    {IMGFMT_BGR24, D3DFMT_R8G8B8}, //untested
+    {IMGFMT_RGB565, D3DFMT_R5G6B5},
+    {0},
+};
+
+
+static bool resize_d3d(d3d_priv *priv);
+static void uninit(struct vo *vo);
+static void flip_page(struct vo *vo);
+static mp_image_t *get_window_screenshot(d3d_priv *priv);
+static void draw_osd(struct vo *vo);
+static bool change_d3d_backbuffer(d3d_priv *priv);
+
+static void d3d_matrix_identity(D3DMATRIX *m)
+{
+    memset(m, 0, sizeof(D3DMATRIX));
+    m->_11 = m->_22 = m->_33 = m->_44 = 1.0f;
+}
+
+static void d3d_matrix_ortho(D3DMATRIX *m, float left, float right,
+                             float bottom, float top)
+{
+    d3d_matrix_identity(m);
+    m->_11 = 2.0f / (right - left);
+    m->_22 = 2.0f / (top - bottom);
+    m->_33 = 1.0f;
+    m->_41 = -(right + left) / (right - left);
+    m->_42 = -(top + bottom) / (top - bottom);
+    m->_43 = 0;
+    m->_44 = 1.0f;
+}
+
+/****************************************************************************
+ *                                                                          *
+ *                                                                          *
+ *                                                                          *
+ * Direct3D specific implementation functions                               *
+ *                                                                          *
+ *                                                                          *
+ *                                                                          *
+ ****************************************************************************/
+
+static bool d3d_begin_scene(d3d_priv *priv)
+{
+    if (!priv->d3d_in_scene) {
+        if (FAILED(IDirect3DDevice9_BeginScene(priv->d3d_device))) {
+            MP_ERR(priv, "BeginScene failed.\n");
+            return false;
+        }
+        priv->d3d_in_scene = true;
+    }
+    return true;
+}
+
+/** @brief Calculate scaled fullscreen movie rectangle with
+ *  preserved aspect ratio.
+ */
+static void calc_fs_rect(d3d_priv *priv)
+{
+    struct mp_rect src_rect;
+    struct mp_rect dst_rect;
+    vo_get_src_dst_rects(priv->vo, &src_rect, &dst_rect, &priv->osd_res);
+
+    priv->fs_movie_rect.left     = dst_rect.x0;
+    priv->fs_movie_rect.right    = dst_rect.x1;
+    priv->fs_movie_rect.top      = dst_rect.y0;
+    priv->fs_movie_rect.bottom   = dst_rect.y1;
+    priv->fs_panscan_rect.left   = src_rect.x0;
+    priv->fs_panscan_rect.right  = src_rect.x1;
+    priv->fs_panscan_rect.top    = src_rect.y0;
+    priv->fs_panscan_rect.bottom = src_rect.y1;
+}
+
+// Adjust the texture size *width/*height to fit the requirements of the D3D
+// device. The texture size is only increased.
+static void d3d_fix_texture_size(d3d_priv *priv, int *width, int *height)
+{
+    int tex_width = *width;
+    int tex_height = *height;
+
+    // avoid nasty special cases with 0-sized textures and texture sizes
+    tex_width = MPMAX(tex_width, 1);
+    tex_height = MPMAX(tex_height, 1);
+
+    if (priv->device_caps_power2_only) {
+        tex_width  = 1;
+        tex_height = 1;
+        while (tex_width  < *width) tex_width  <<= 1;
+        while (tex_height < *height) tex_height <<= 1;
+    }
+    if (priv->device_caps_square_only)
+        /* device only supports square textures */
+        tex_width = tex_height = MPMAX(tex_width, tex_height);
+    // better round up to a multiple of 16
+    if (!priv->opt_disable_texture_align) {
+        tex_width  = (tex_width  + 15) & ~15;
+        tex_height = (tex_height + 15) & ~15;
+    }
+
+    *width = tex_width;
+    *height = tex_height;
+}
+
+static void d3dtex_release(d3d_priv *priv, struct d3dtex *tex)
+{
+    if (tex->system)
+        IDirect3DTexture9_Release(tex->system);
+    tex->system = NULL;
+
+    if (tex->device)
+        IDirect3DTexture9_Release(tex->device);
+    tex->device = NULL;
+
+    tex->tex_w = tex->tex_h = 0;
+}
+
+static bool d3dtex_allocate(d3d_priv *priv, struct d3dtex *tex, D3DFORMAT fmt,
+                            int w, int h)
+{
+    d3dtex_release(priv, tex);
+
+    tex->w = w;
+    tex->h = h;
+
+    int tw = w, th = h;
+    d3d_fix_texture_size(priv, &tw, &th);
+
+    bool use_sh = !priv->device_texture_sys;
+    int memtype = D3DPOOL_SYSTEMMEM;
+    switch (priv->opt_texture_memory) {
+    case 1: memtype = D3DPOOL_MANAGED; use_sh = false; break;
+    case 2: memtype = D3DPOOL_DEFAULT; use_sh = false; break;
+    case 3: memtype = D3DPOOL_DEFAULT; use_sh = true; break;
+    case 4: memtype = D3DPOOL_SCRATCH; use_sh = true; break;
+    }
+
+    if (FAILED(IDirect3DDevice9_CreateTexture(priv->d3d_device, tw, th, 1,
+        D3DUSAGE_DYNAMIC, fmt, memtype, &tex->system, NULL)))
+    {
+        MP_ERR(priv, "Allocating %dx%d texture in system RAM failed.\n", w, h);
+        goto error_exit;
+    }
+
+    if (use_sh) {
+        if (FAILED(IDirect3DDevice9_CreateTexture(priv->d3d_device, tw, th, 1,
+            D3DUSAGE_DYNAMIC, fmt, D3DPOOL_DEFAULT, &tex->device, NULL)))
+        {
+            MP_ERR(priv, "Allocating %dx%d texture in video RAM failed.\n", w, h);
+            goto error_exit;
+        }
+    }
+
+    tex->tex_w = tw;
+    tex->tex_h = th;
+
+    return true;
+
+error_exit:
+    d3dtex_release(priv, tex);
+    return false;
+}
+
+static IDirect3DBaseTexture9 *d3dtex_get_render_texture(d3d_priv *priv,
+                                                        struct d3dtex *tex)
+{
+    return (IDirect3DBaseTexture9 *)(tex->device ? tex->device : tex->system);
+}
+
+// Copy system texture contents to device texture.
+static bool d3dtex_update(d3d_priv *priv, struct d3dtex *tex)
+{
+    if (!tex->device)
+        return true;
+    return !FAILED(IDirect3DDevice9_UpdateTexture(priv->d3d_device,
+                   (IDirect3DBaseTexture9 *)tex->system,
+                   (IDirect3DBaseTexture9 *)tex->device));
+}
+
+static void d3d_unlock_video_objects(d3d_priv *priv)
+{
+    if (priv->locked_rect.pBits) {
+        if (FAILED(IDirect3DSurface9_UnlockRect(priv->d3d_surface)))
+            MP_VERBOSE(priv, "Unlocking video objects failed.\n");
+    }
+    priv->locked_rect.pBits = NULL;
+}
+
+// Free video surface/textures,  etc.
+static void d3d_destroy_video_objects(d3d_priv *priv)
+{
+    d3d_unlock_video_objects(priv);
+
+    if (priv->d3d_surface)
+        IDirect3DSurface9_Release(priv->d3d_surface);
+    priv->d3d_surface = NULL;
+}
+
+/** @brief Destroy D3D Offscreen and Backbuffer surfaces.
+ */
+static void destroy_d3d_surfaces(d3d_priv *priv)
+{
+    MP_VERBOSE(priv, "destroy_d3d_surfaces called.\n");
+
+    d3d_destroy_video_objects(priv);
+    d3dtex_release(priv, &priv->osd_texture);
+
+    if (priv->d3d_backbuf)
+        IDirect3DSurface9_Release(priv->d3d_backbuf);
+    priv->d3d_backbuf = NULL;
+
+    priv->d3d_in_scene = false;
+}
+
+// Allocate video surface.
+static bool d3d_configure_video_objects(d3d_priv *priv)
+{
+    assert(priv->image_format != 0);
+
+    if (!priv->d3d_surface &&
+        FAILED(IDirect3DDevice9_CreateOffscreenPlainSurface(
+            priv->d3d_device, priv->src_width, priv->src_height,
+            priv->movie_src_fmt, D3DPOOL_DEFAULT, &priv->d3d_surface, NULL)))
+    {
+        MP_ERR(priv, "Allocating offscreen surface failed.\n");
+        return false;
+    }
+
+    return true;
+}
+
+// Recreate and initialize D3D objects if necessary. The amount of work that
+// needs to be done can be quite different: it could be that full initialization
+// is required, or that some objects need to be created, or that nothing is
+// done.
+static bool create_d3d_surfaces(d3d_priv *priv)
+{
+    MP_VERBOSE(priv, "create_d3d_surfaces called.\n");
+
+    if (!priv->d3d_backbuf &&
+        FAILED(IDirect3DDevice9_GetBackBuffer(priv->d3d_device, 0, 0,
+                                              D3DBACKBUFFER_TYPE_MONO,
+                                              &priv->d3d_backbuf))) {
+        MP_ERR(priv, "Allocating backbuffer failed.\n");
+        return 0;
+    }
+
+    if (!d3d_configure_video_objects(priv))
+        return 0;
+
+    /* setup default renderstate */
+    IDirect3DDevice9_SetRenderState(priv->d3d_device,
+                                    D3DRS_SRCBLEND, D3DBLEND_SRCALPHA);
+    IDirect3DDevice9_SetRenderState(priv->d3d_device,
+                                    D3DRS_DESTBLEND, D3DBLEND_INVSRCALPHA);
+    IDirect3DDevice9_SetRenderState(priv->d3d_device,
+                                    D3DRS_ALPHAFUNC, D3DCMP_GREATER);
+    IDirect3DDevice9_SetRenderState(priv->d3d_device,
+                                    D3DRS_ALPHAREF, (DWORD)0x0);
+    IDirect3DDevice9_SetRenderState(priv->d3d_device,
+                                    D3DRS_LIGHTING, FALSE);
+
+    // we use up to 3 samplers for up to 3 YUV planes
+    // TODO
+    /*
+    for (int n = 0; n < 3; n++) {
+        IDirect3DDevice9_SetSamplerState(priv->d3d_device, n, D3DSAMP_MINFILTER,
+                                         D3DTEXF_LINEAR);
+        IDirect3DDevice9_SetSamplerState(priv->d3d_device, n, D3DSAMP_MAGFILTER,
+                                         D3DTEXF_LINEAR);
+        IDirect3DDevice9_SetSamplerState(priv->d3d_device, n, D3DSAMP_ADDRESSU,
+                                         D3DTADDRESS_CLAMP);
+        IDirect3DDevice9_SetSamplerState(priv->d3d_device, n, D3DSAMP_ADDRESSV,
+                                         D3DTADDRESS_CLAMP);
+    }
+    */
+
+    return 1;
+}
+
+static bool init_d3d(d3d_priv *priv)
+{
+    D3DDISPLAYMODE disp_mode;
+    D3DCAPS9 disp_caps;
+    DWORD texture_caps;
+    DWORD dev_caps;
+
+    priv->d3d_handle = priv->pDirect3DCreate9(D3D_SDK_VERSION);
+    if (!priv->d3d_handle) {
+        MP_ERR(priv, "Initializing Direct3D failed.\n");
+        return false;
+    }
+
+    if (FAILED(IDirect3D9_GetAdapterDisplayMode(priv->d3d_handle,
+                                                D3DADAPTER_DEFAULT,
+                                                &disp_mode))) {
+        MP_ERR(priv, "Reading display mode failed.\n");
+        return false;
+    }
+
+    priv->desktop_fmt = disp_mode.Format;
+    priv->cur_backbuf_width = disp_mode.Width;
+    priv->cur_backbuf_height = disp_mode.Height;
+
+    MP_VERBOSE(priv, "Setting backbuffer dimensions to (%dx%d).\n",
+               disp_mode.Width, disp_mode.Height);
+
+    if (FAILED(IDirect3D9_GetDeviceCaps(priv->d3d_handle,
+                                        D3DADAPTER_DEFAULT,
+                                        DEVTYPE,
+                                        &disp_caps)))
+    {
+        MP_ERR(priv, "Reading display capabilities failed.\n");
+        return false;
+    }
+
+    /* Store relevant information reguarding caps of device */
+    texture_caps                  = disp_caps.TextureCaps;
+    dev_caps                      = disp_caps.DevCaps;
+    priv->device_caps_power2_only =  (texture_caps & D3DPTEXTURECAPS_POW2) &&
+                        !(texture_caps & D3DPTEXTURECAPS_NONPOW2CONDITIONAL);
+    priv->device_caps_square_only = texture_caps & D3DPTEXTURECAPS_SQUAREONLY;
+    priv->device_texture_sys      = dev_caps & D3DDEVCAPS_TEXTURESYSTEMMEMORY;
+    priv->max_texture_width       = disp_caps.MaxTextureWidth;
+    priv->max_texture_height      = disp_caps.MaxTextureHeight;
+
+    if (priv->opt_force_power_of_2)
+        priv->device_caps_power2_only = 1;
+
+    if (FAILED(IDirect3D9_CheckDeviceFormat(priv->d3d_handle,
+                        D3DADAPTER_DEFAULT,
+                        DEVTYPE,
+                        priv->desktop_fmt,
+                        D3DUSAGE_DYNAMIC | D3DUSAGE_QUERY_FILTER,
+                        D3DRTYPE_TEXTURE,
+                        D3DFMT_A8R8G8B8)))
+    {
+        MP_ERR(priv, "OSD texture format not supported.\n");
+        return false;
+    }
+
+    if (!change_d3d_backbuffer(priv))
+        return false;
+
+    MP_VERBOSE(priv, "device_caps_power2_only %d, device_caps_square_only %d\n"
+               "device_texture_sys %d\n"
+               "max_texture_width %d, max_texture_height %d\n",
+               priv->device_caps_power2_only, priv->device_caps_square_only,
+               priv->device_texture_sys, priv->max_texture_width,
+               priv->max_texture_height);
+
+    return true;
+}
+
+/** @brief Fill D3D Presentation parameters
+ */
+static void fill_d3d_presentparams(d3d_priv *priv,
+                                   D3DPRESENT_PARAMETERS *present_params)
+{
+    /* Prepare Direct3D initialization parameters. */
+    memset(present_params, 0, sizeof(D3DPRESENT_PARAMETERS));
+    present_params->Windowed               = TRUE;
+    present_params->SwapEffect             =
+        priv->opt_swap_discard ? D3DSWAPEFFECT_DISCARD : D3DSWAPEFFECT_COPY;
+    present_params->Flags                  = D3DPRESENTFLAG_VIDEO;
+    present_params->hDeviceWindow          = vo_w32_hwnd(priv->vo);
+    present_params->BackBufferWidth        = priv->cur_backbuf_width;
+    present_params->BackBufferHeight       = priv->cur_backbuf_height;
+    present_params->MultiSampleType        = D3DMULTISAMPLE_NONE;
+    present_params->PresentationInterval   = D3DPRESENT_INTERVAL_ONE;
+    present_params->BackBufferFormat       = priv->desktop_fmt;
+    present_params->BackBufferCount        = 1;
+    present_params->EnableAutoDepthStencil = FALSE;
+}
+
+
+// Create a new backbuffer. Create or Reset the D3D device.
+static bool change_d3d_backbuffer(d3d_priv *priv)
+{
+    int window_w = priv->vo->dwidth;
+    int window_h = priv->vo->dheight;
+
+    /* Grow the backbuffer in the required dimension. */
+    if (window_w > priv->cur_backbuf_width)
+        priv->cur_backbuf_width = window_w;
+
+    if (window_h > priv->cur_backbuf_height)
+        priv->cur_backbuf_height = window_h;
+
+    if (priv->opt_exact_backbuffer) {
+        priv->cur_backbuf_width = window_w;
+        priv->cur_backbuf_height = window_h;
+    }
+
+    /* The grown backbuffer dimensions are ready and fill_d3d_presentparams
+     * will use them, so we can reset the device.
+     */
+    D3DPRESENT_PARAMETERS present_params;
+    fill_d3d_presentparams(priv, &present_params);
+
+    if (!priv->d3d_device) {
+        if (FAILED(IDirect3D9_CreateDevice(priv->d3d_handle,
+                                           D3DADAPTER_DEFAULT,
+                                           DEVTYPE, vo_w32_hwnd(priv->vo),
+                                           D3DCREATE_SOFTWARE_VERTEXPROCESSING
+                                           | D3DCREATE_FPU_PRESERVE
+                                           | D3DCREATE_MULTITHREADED,
+                                           &present_params, &priv->d3d_device)))
+        {
+            MP_VERBOSE(priv, "Creating Direct3D device failed.\n");
+            return 0;
+        }
+    } else {
+        if (FAILED(IDirect3DDevice9_Reset(priv->d3d_device, &present_params))) {
+            MP_ERR(priv, "Resetting Direct3D device failed.\n");
+            return 0;
+        }
+    }
+
+    MP_VERBOSE(priv, "New backbuffer (%dx%d), VO (%dx%d)\n",
+               present_params.BackBufferWidth, present_params.BackBufferHeight,
+               window_w, window_h);
+
+    return 1;
+}
+
+static void destroy_d3d(d3d_priv *priv)
+{
+    destroy_d3d_surfaces(priv);
+
+    if (priv->d3d_device)
+        IDirect3DDevice9_Release(priv->d3d_device);
+    priv->d3d_device = NULL;
+
+    if (priv->d3d_handle) {
+        MP_VERBOSE(priv, "Stopping Direct3D.\n");
+        IDirect3D9_Release(priv->d3d_handle);
+    }
+    priv->d3d_handle = NULL;
+}
+
+/** @brief Reconfigure the whole Direct3D. Called only
+ *  when the video adapter becomes uncooperative. ("Lost" devices)
+ *  @return 1 on success, 0 on failure
+ */
+static int reconfigure_d3d(d3d_priv *priv)
+{
+    MP_VERBOSE(priv, "reconfigure_d3d called.\n");
+
+    // Force complete destruction of the D3D state.
+    // Note: this step could be omitted. The resize_d3d call below would detect
+    // that d3d_device is NULL, and would properly recreate it. I'm not sure why
+    // the following code to release and recreate the d3d_handle exists.
+    destroy_d3d(priv);
+    if (!init_d3d(priv))
+        return 0;
+
+    // Proper re-initialization.
+    if (!resize_d3d(priv))
+        return 0;
+
+    return 1;
+}
+
+// Resize Direct3D context on window resize.
+// This function also is called when major initializations need to be done.
+static bool resize_d3d(d3d_priv *priv)
+{
+    D3DVIEWPORT9 vp = {0, 0, priv->vo->dwidth, priv->vo->dheight, 0, 1};
+
+    MP_VERBOSE(priv, "resize_d3d %dx%d called.\n",
+               priv->vo->dwidth, priv->vo->dheight);
+
+    /* Make sure that backbuffer is large enough to accommodate the new
+       viewport dimensions. Grow it if necessary. */
+
+    bool backbuf_resize = priv->vo->dwidth > priv->cur_backbuf_width ||
+                          priv->vo->dheight > priv->cur_backbuf_height;
+
+    if (priv->opt_exact_backbuffer) {
+        backbuf_resize = priv->vo->dwidth != priv->cur_backbuf_width ||
+                         priv->vo->dheight != priv->cur_backbuf_height;
+    }
+
+    if (backbuf_resize || !priv->d3d_device)
+    {
+        destroy_d3d_surfaces(priv);
+        if (!change_d3d_backbuffer(priv))
+            return 0;
+    }
+
+    if (!priv->d3d_device || !priv->image_format)
+        return 1;
+
+    if (!create_d3d_surfaces(priv))
+        return 0;
+
+    if (FAILED(IDirect3DDevice9_SetViewport(priv->d3d_device, &vp))) {
+        MP_ERR(priv, "Setting viewport failed.\n");
+        return 0;
+    }
+
+    // so that screen coordinates map to D3D ones
+    D3DMATRIX view;
+    d3d_matrix_ortho(&view, 0.5f, vp.Width + 0.5f, vp.Height + 0.5f, 0.5f);
+    IDirect3DDevice9_SetTransform(priv->d3d_device, D3DTS_VIEW, &view);
+
+    calc_fs_rect(priv);
+    priv->vo->want_redraw = true;
+
+    return 1;
+}
+
+/** @brief Uninitialize Direct3D and close the window.
+ */
+static void uninit_d3d(d3d_priv *priv)
+{
+    MP_VERBOSE(priv, "uninit_d3d called.\n");
+
+    destroy_d3d(priv);
+}
+
+static uint32_t d3d_draw_frame(d3d_priv *priv)
+{
+    if (!priv->d3d_device)
+        return VO_TRUE;
+
+    if (!d3d_begin_scene(priv))
+        return VO_ERROR;
+
+    IDirect3DDevice9_Clear(priv->d3d_device, 0, NULL, D3DCLEAR_TARGET, 0, 0, 0);
+
+    if (!priv->have_image)
+        goto render_osd;
+
+    RECT rm = priv->fs_movie_rect;
+    RECT rs = priv->fs_panscan_rect;
+
+    rs.left &= ~(ULONG)1;
+    rs.top &= ~(ULONG)1;
+    rs.right &= ~(ULONG)1;
+    rs.bottom &= ~(ULONG)1;
+    if (FAILED(IDirect3DDevice9_StretchRect(priv->d3d_device,
+                                            priv->d3d_surface,
+                                            &rs,
+                                            priv->d3d_backbuf,
+                                            &rm,
+                                            D3DTEXF_LINEAR))) {
+        MP_ERR(priv, "Copying frame to the backbuffer failed.\n");
+        return VO_ERROR;
+    }
+
+render_osd:
+
+    draw_osd(priv->vo);
+
+    return VO_TRUE;
+}
+
+static D3DFORMAT check_format(d3d_priv *priv, uint32_t movie_fmt)
+{
+    const struct fmt_entry *cur = &fmt_table[0];
+
+    while (cur->mplayer_fmt) {
+        if (cur->mplayer_fmt == movie_fmt) {
+            HRESULT res;
+            /* Test conversion from Movie colorspace to
+            * display's target colorspace. */
+            res = IDirect3D9_CheckDeviceFormatConversion(priv->d3d_handle,
+                        D3DADAPTER_DEFAULT,
+                        DEVTYPE,
+                        cur->fourcc,
+                        priv->desktop_fmt);
+            if (FAILED(res)) {
+                MP_VERBOSE(priv, "Rejected image format: %s\n",
+                           vo_format_name(cur->mplayer_fmt));
+                return 0;
+            }
+
+            MP_DBG(priv, "Accepted image format: %s\n",
+                   vo_format_name(cur->mplayer_fmt));
+
+            return cur->fourcc;
+        }
+        cur++;
+    }
+
+    return 0;
+}
+
+// Return if the image format can be used. If it can, decide which rendering
+// and conversion mode to use.
+// If initialize is true, actually setup all variables to use the picked
+// rendering mode.
+static bool init_rendering_mode(d3d_priv *priv, uint32_t fmt, bool initialize)
+{
+    int blit_d3dfmt = check_format(priv, fmt);
+
+    if (!blit_d3dfmt)
+        return false;
+
+    MP_VERBOSE(priv, "Accepted rendering methods for "
+           "format='%s': StretchRect=%#x.\n",
+           vo_format_name(fmt), blit_d3dfmt);
+
+    if (!initialize)
+        return true;
+
+    // initialization doesn't fail beyond this point
+
+    priv->movie_src_fmt = 0;
+    priv->image_format = fmt;
+
+    priv->movie_src_fmt = blit_d3dfmt;
+
+    return true;
+}
+
+/** @brief Query if movie colorspace is supported by the HW.
+ *  @return 0 on failure, device capabilities (not probed
+ *          currently) on success.
+ */
+static int query_format(struct vo *vo, int movie_fmt)
+{
+    d3d_priv *priv = vo->priv;
+    if (!init_rendering_mode(priv, movie_fmt, false))
+        return 0;
+
+    return 1;
+}
+
+/****************************************************************************
+ *                                                                          *
+ *                                                                          *
+ *                                                                          *
+ * libvo Control / Callback functions                                       *
+ *                                                                          *
+ *                                                                          *
+ *                                                                          *
+ ****************************************************************************/
+
+
+/** @brief libvo Callback: Preinitialize the video card.
+ *  Preinit the hardware just enough to be queried about
+ *  supported formats.
+ *
+ *  @return 0 on success, -1 on failure
+ */
+
+static int preinit(struct vo *vo)
+{
+    d3d_priv *priv = vo->priv;
+    priv->vo = vo;
+    priv->log = vo->log;
+
+    priv->d3d9_dll = LoadLibraryA("d3d9.dll");
+    if (!priv->d3d9_dll) {
+        MP_ERR(priv, "Unable to dynamically load d3d9.dll\n");
+        goto err_out;
+    }
+
+    priv->pDirect3DCreate9 = (void *)GetProcAddress(priv->d3d9_dll,
+                                                    "Direct3DCreate9");
+    if (!priv->pDirect3DCreate9) {
+        MP_ERR(priv, "Unable to find entry point of Direct3DCreate9\n");
+        goto err_out;
+    }
+
+    /* w32_common framework call. Configures window on the screen, gets
+     * fullscreen dimensions and does other useful stuff.
+     */
+    if (!vo_w32_init(vo)) {
+        MP_VERBOSE(priv, "Configuring onscreen window failed.\n");
+        goto err_out;
+    }
+
+    if (!init_d3d(priv))
+        goto err_out;
+
+    return 0;
+
+err_out:
+    uninit(vo);
+    return -1;
+}
+
+/** @brief libvo Callback: Handle control requests.
+ *  @return VO_TRUE on success, VO_NOTIMPL when not implemented
+ */
+static int control(struct vo *vo, uint32_t request, void *data)
+{
+    d3d_priv *priv = vo->priv;
+
+    switch (request) {
+    case VOCTRL_SET_PANSCAN:
+        calc_fs_rect(priv);
+        priv->vo->want_redraw = true;
+        return VO_TRUE;
+    case VOCTRL_SCREENSHOT_WIN:
+        *(struct mp_image **)data = get_window_screenshot(priv);
+        return VO_TRUE;
+    }
+
+    int events = 0;
+    int r = vo_w32_control(vo, &events, request, data);
+
+    if (events & VO_EVENT_RESIZE)
+        resize_d3d(priv);
+
+    if (events & VO_EVENT_EXPOSE)
+        vo->want_redraw = true;
+
+    vo_event(vo, events);
+
+    return r;
+}
+
+static int reconfig(struct vo *vo, struct mp_image_params *params)
+{
+    d3d_priv *priv = vo->priv;
+
+    priv->have_image = false;
+
+    vo_w32_config(vo);
+
+    if ((priv->image_format != params->imgfmt)
+        || (priv->src_width != params->w)
+        || (priv->src_height != params->h))
+    {
+        d3d_destroy_video_objects(priv);
+
+        priv->src_width = params->w;
+        priv->src_height = params->h;
+        priv->params = *params;
+        init_rendering_mode(priv, params->imgfmt, true);
+    }
+
+    if (!resize_d3d(priv))
+        return VO_ERROR;
+
+    return 0; /* Success */
+}
+
+/** @brief libvo Callback: Flip next already drawn frame on the
+ *         screen.
+ */
+static void flip_page(struct vo *vo)
+{
+    d3d_priv *priv = vo->priv;
+
+    if (priv->d3d_device && priv->d3d_in_scene) {
+        if (FAILED(IDirect3DDevice9_EndScene(priv->d3d_device))) {
+            MP_ERR(priv, "EndScene failed.\n");
+        }
+    }
+    priv->d3d_in_scene = false;
+
+    RECT rect = {0, 0, vo->dwidth, vo->dheight};
+    if (!priv->d3d_device ||
+        FAILED(IDirect3DDevice9_Present(priv->d3d_device, &rect, 0, 0, 0))) {
+        MP_VERBOSE(priv, "Trying to reinitialize uncooperative video adapter.\n");
+        if (!reconfigure_d3d(priv)) {
+            MP_VERBOSE(priv, "Reinitialization failed.\n");
+            return;
+        } else {
+            MP_VERBOSE(priv, "Video adapter reinitialized.\n");
+        }
+    }
+}
+
+/** @brief libvo Callback: Uninitializes all pointers and closes
+ *         all D3D related stuff,
+ */
+static void uninit(struct vo *vo)
+{
+    d3d_priv *priv = vo->priv;
+
+    MP_VERBOSE(priv, "uninit called.\n");
+
+    uninit_d3d(priv);
+    vo_w32_uninit(vo);
+    if (priv->d3d9_dll)
+        FreeLibrary(priv->d3d9_dll);
+    priv->d3d9_dll = NULL;
+}
+
+// Lock buffers and fill out to point to them.
+// Must call d3d_unlock_video_objects() to unlock the buffers again.
+static bool get_video_buffer(d3d_priv *priv, struct mp_image *out)
+{
+    *out = (struct mp_image) {0};
+    mp_image_set_size(out, priv->src_width, priv->src_height);
+    mp_image_setfmt(out, priv->image_format);
+
+    if (!priv->d3d_device)
+        return false;
+
+    if (!priv->locked_rect.pBits) {
+        if (FAILED(IDirect3DSurface9_LockRect(priv->d3d_surface,
+                                              &priv->locked_rect, NULL, 0)))
+        {
+            MP_ERR(priv, "Surface lock failed.\n");
+            return false;
+        }
+    }
+
+    uint8_t *base = priv->locked_rect.pBits;
+    size_t stride = priv->locked_rect.Pitch;
+
+    out->planes[0] = base;
+    out->stride[0] = stride;
+
+    if (out->num_planes == 2) {
+        // NV12, NV21
+        out->planes[1] = base + stride * out->h;
+        out->stride[1] = stride;
+    }
+
+    if (out->num_planes == 3) {
+        bool swap = priv->movie_src_fmt == MAKEFOURCC('Y','V','1','2');
+
+        size_t uv_stride = stride / 2;
+        uint8_t *u = base + out->h * stride;
+        uint8_t *v = u + (out->h / 2) * uv_stride;
+
+        out->planes[1] = swap ? v : u;
+        out->planes[2] = swap ? u : v;
+
+        out->stride[1] = out->stride[2] = uv_stride;
+    }
+
+    return true;
+}
+
+static void draw_frame(struct vo *vo, struct vo_frame *frame)
+{
+    d3d_priv *priv = vo->priv;
+    if (!priv->d3d_device)
+        return;
+
+    struct mp_image buffer;
+    if (!get_video_buffer(priv, &buffer))
+        return;
+
+    if (!frame->current)
+        return;
+
+    mp_image_copy(&buffer, frame->current);
+
+    d3d_unlock_video_objects(priv);
+
+    priv->have_image = true;
+    priv->osd_pts = frame->current->pts;
+
+    d3d_draw_frame(priv);
+}
+
+static mp_image_t *get_window_screenshot(d3d_priv *priv)
+{
+    D3DDISPLAYMODE mode;
+    mp_image_t *image = NULL;
+    RECT window_rc;
+    RECT screen_rc;
+    RECT visible;
+    POINT pt;
+    D3DLOCKED_RECT locked_rect;
+    int width, height;
+    IDirect3DSurface9 *surface = NULL;
+
+    if (FAILED(IDirect3DDevice9_GetDisplayMode(priv->d3d_device, 0, &mode))) {
+        MP_ERR(priv, "GetDisplayMode failed.\n");
+        goto error_exit;
+    }
+
+    if (FAILED(IDirect3DDevice9_CreateOffscreenPlainSurface(priv->d3d_device,
+        mode.Width, mode.Height, D3DFMT_A8R8G8B8, D3DPOOL_SYSTEMMEM, &surface,
+        NULL)))
+    {
+        MP_ERR(priv, "Couldn't create surface.\n");
+        goto error_exit;
+    }
+
+    if (FAILED(IDirect3DDevice9_GetFrontBufferData(priv->d3d_device, 0,
+        surface)))
+    {
+        MP_ERR(priv, "Couldn't copy frontbuffer.\n");
+        goto error_exit;
+    }
+
+    GetClientRect(vo_w32_hwnd(priv->vo), &window_rc);
+    pt = (POINT) { 0, 0 };
+    ClientToScreen(vo_w32_hwnd(priv->vo), &pt);
+    window_rc.left = pt.x;
+    window_rc.top = pt.y;
+    window_rc.right += window_rc.left;
+    window_rc.bottom += window_rc.top;
+
+    screen_rc = (RECT) { 0, 0, mode.Width, mode.Height };
+
+    if (!IntersectRect(&visible, &screen_rc, &window_rc))
+        goto error_exit;
+    width = visible.right - visible.left;
+    height = visible.bottom - visible.top;
+    if (width < 1 || height < 1)
+        goto error_exit;
+
+    image = mp_image_alloc(IMGFMT_BGR0, width, height);
+    if (!image)
+        goto error_exit;
+
+    IDirect3DSurface9_LockRect(surface, &locked_rect, NULL, 0);
+
+    memcpy_pic(image->planes[0], (char*)locked_rect.pBits + visible.top *
+               locked_rect.Pitch + visible.left * 4, width * 4, height,
+               image->stride[0], locked_rect.Pitch);
+
+    IDirect3DSurface9_UnlockRect(surface);
+    IDirect3DSurface9_Release(surface);
+
+    return image;
+
+error_exit:
+    talloc_free(image);
+    if (surface)
+        IDirect3DSurface9_Release(surface);
+    return NULL;
+}
+
+static void update_osd(d3d_priv *priv)
+{
+    if (!priv->osd_cache)
+        priv->osd_cache = mp_draw_sub_alloc(priv, priv->vo->global);
+
+    struct sub_bitmap_list *sbs = osd_render(priv->vo->osd, priv->osd_res,
+                                             priv->osd_pts, 0, mp_draw_sub_formats);
+
+    struct mp_rect act_rc[MAX_OSD_RECTS], mod_rc[64];
+    int num_act_rc = 0, num_mod_rc = 0;
+
+    struct mp_image *osd = mp_draw_sub_overlay(priv->osd_cache, sbs,
+                    act_rc, MP_ARRAY_SIZE(act_rc), &num_act_rc,
+                    mod_rc, MP_ARRAY_SIZE(mod_rc), &num_mod_rc);
+
+    talloc_free(sbs);
+
+    if (!osd) {
+        MP_ERR(priv, "Failed to render OSD.\n");
+        return;
+    }
+
+    if (!num_mod_rc && priv->osd_texture.system)
+        return; // nothing changed
+
+    priv->osd_num_vertices = 0;
+
+    if (osd->w > priv->osd_texture.tex_w || osd->h > priv->osd_texture.tex_h) {
+        int new_w = osd->w;
+        int new_h = osd->h;
+        d3d_fix_texture_size(priv, &new_w, &new_h);
+
+        MP_DBG(priv, "reallocate OSD surface to %dx%d.\n", new_w, new_h);
+
+        d3dtex_release(priv, &priv->osd_texture);
+        if (!d3dtex_allocate(priv, &priv->osd_texture, D3DFMT_A8R8G8B8,
+                             new_w, new_h))
+            return;
+    }
+
+    // Lazy; could/should use the bounding rect, or perform multiple lock calls.
+    // The previous approach (fully packed texture) was more efficient.
+    RECT dirty_rc = { 0, 0, priv->osd_texture.w, priv->osd_texture.h };
+
+    D3DLOCKED_RECT locked_rect;
+
+    if (FAILED(IDirect3DTexture9_LockRect(priv->osd_texture.system, 0, &locked_rect,
+                                          &dirty_rc, 0)))
+    {
+        MP_ERR(priv, "OSD texture lock failed.\n");
+        return;
+    }
+
+    for (int n = 0; n < num_mod_rc; n++) {
+        struct mp_rect rc = mod_rc[n];
+        int w = mp_rect_w(rc);
+        int h = mp_rect_h(rc);
+        void *src = mp_image_pixel_ptr(osd, 0, rc.x0, rc.y0);
+        void *dst = (char *)locked_rect.pBits + locked_rect.Pitch * rc.y0 +
+                    rc.x0 * 4;
+        memcpy_pic(dst, src, w * 4, h, locked_rect.Pitch, osd->stride[0]);
+    }
+
+    if (FAILED(IDirect3DTexture9_UnlockRect(priv->osd_texture.system, 0))) {
+        MP_ERR(priv, "OSD texture unlock failed.\n");
+        return;
+    }
+
+    if (!d3dtex_update(priv, &priv->osd_texture))
+        return;
+
+    // We need 2 primitives per quad which makes 6 vertices.
+    priv->osd_num_vertices = num_act_rc * 6;
+
+    float tex_w = priv->osd_texture.tex_w;
+    float tex_h = priv->osd_texture.tex_h;
+
+    for (int n = 0; n < num_act_rc; n++) {
+        struct mp_rect rc = act_rc[n];
+
+        float tx0 = rc.x0 / tex_w;
+        float ty0 = rc.y0 / tex_h;
+        float tx1 = rc.x1 / tex_w;
+        float ty1 = rc.y1 / tex_h;
+
+        vertex_osd *v = &priv->osd_vertices[n * 6];
+        v[0] = (vertex_osd) { rc.x0, rc.y0, 0, tx0, ty0 };
+        v[1] = (vertex_osd) { rc.x1, rc.y0, 0, tx1, ty0 };
+        v[2] = (vertex_osd) { rc.x0, rc.y1, 0, tx0, ty1 };
+        v[3] = (vertex_osd) { rc.x1, rc.y1, 0, tx1, ty1 };
+        v[4] = v[2];
+        v[5] = v[1];
+    }
+}
+
+static void draw_osd(struct vo *vo)
+{
+    d3d_priv *priv = vo->priv;
+    if (!priv->d3d_device)
+        return;
+
+    update_osd(priv);
+
+    if (!priv->osd_num_vertices)
+        return;
+
+    d3d_begin_scene(priv);
+
+    IDirect3DDevice9_SetRenderState(priv->d3d_device,
+                                    D3DRS_ALPHABLENDENABLE, TRUE);
+
+    IDirect3DDevice9_SetTexture(priv->d3d_device, 0,
+                        d3dtex_get_render_texture(priv, &priv->osd_texture));
+
+    IDirect3DDevice9_SetRenderState(priv->d3d_device, D3DRS_SRCBLEND,
+                                    D3DBLEND_ONE);
+
+    IDirect3DDevice9_SetFVF(priv->d3d_device, D3DFVF_OSD_VERTEX);
+    IDirect3DDevice9_DrawPrimitiveUP(priv->d3d_device, D3DPT_TRIANGLELIST,
+                                     priv->osd_num_vertices / 3,
+                                     priv->osd_vertices, sizeof(vertex_osd));
+
+    IDirect3DDevice9_SetRenderState(priv->d3d_device,
+                                    D3DRS_SRCBLEND, D3DBLEND_SRCALPHA);
+
+    IDirect3DDevice9_SetTexture(priv->d3d_device, 0, NULL);
+
+    IDirect3DDevice9_SetRenderState(priv->d3d_device,
+                                    D3DRS_ALPHABLENDENABLE, FALSE);
+}
+
+#define OPT_BASE_STRUCT d3d_priv
+
+static const struct m_option opts[] = {
+    {"force-power-of-2", OPT_BOOL(opt_force_power_of_2)},
+    {"disable-texture-align", OPT_BOOL(opt_disable_texture_align)},
+    {"texture-memory", OPT_CHOICE(opt_texture_memory,
+        {"default", 0},
+        {"managed", 1},
+        {"default-pool", 2},
+        {"default-pool-shadow", 3},
+        {"scratch", 4})},
+    {"swap-discard", OPT_BOOL(opt_swap_discard)},
+    {"exact-backbuffer", OPT_BOOL(opt_exact_backbuffer)},
+    {0}
+};
+
+const struct vo_driver video_out_direct3d = {
+    .description = "Direct3D 9 Renderer",
+    .name = "direct3d",
+    .preinit = preinit,
+    .query_format = query_format,
+    .reconfig = reconfig,
+    .control = control,
+    .draw_frame = draw_frame,
+    .flip_page = flip_page,
+    .uninit = uninit,
+    .priv_size = sizeof(d3d_priv),
+    .options = opts,
+    .options_prefix = "vo-direct3d",
+};
diff --git a/video/out/vo_dmabuf_wayland.c b/video/out/vo_dmabuf_wayland.c
new file mode 100644
index 0000000..e04ff5d
--- /dev/null
+++ b/video/out/vo_dmabuf_wayland.c
@@ -0,0 +1,872 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <libavutil/hwcontext_drm.h>
+#include <sys/mman.h>
+#include <unistd.h>
+#include "config.h"
+
+#if HAVE_VAAPI
+#include <va/va_drmcommon.h>
+#endif
+
+#include "common/global.h"
+#include "gpu/hwdec.h"
+#include "gpu/video.h"
+#include "mpv_talloc.h"
+#include "present_sync.h"
+#include "sub/draw_bmp.h"
+#include "video/fmt-conversion.h"
+#include "video/mp_image.h"
+#include "vo.h"
+#include "wayland_common.h"
+#include "wldmabuf/ra_wldmabuf.h"
+
+#if HAVE_VAAPI
+#include "video/vaapi.h"
+#endif
+
+// Generated from wayland-protocols
+#include "linux-dmabuf-unstable-v1.h"
+#include "viewporter.h"
+
+#if HAVE_WAYLAND_PROTOCOLS_1_27
+#include "single-pixel-buffer-v1.h"
+#endif
+
+// We need at least enough buffers to avoid a
+// flickering artifact in certain formats.
+#define WL_BUFFERS_WANTED 15
+
+enum hwdec_type {
+    HWDEC_NONE,
+    HWDEC_VAAPI,
+    HWDEC_DRMPRIME,
+};
+
+struct buffer {
+    struct vo *vo;
+    struct wl_buffer *buffer;
+    struct wl_list link;
+    struct vo_frame *frame;
+
+    uint32_t drm_format;
+    uintptr_t id;
+};
+
+struct osd_buffer {
+    struct vo *vo;
+    struct wl_buffer *buffer;
+    struct wl_list link;
+    struct mp_image image;
+    size_t size;
+};
+
+struct priv {
+    struct mp_log *log;
+    struct mp_rect src;
+    struct mpv_global *global;
+
+    struct ra_ctx *ctx;
+    struct ra_hwdec_ctx hwdec_ctx;
+
+    struct wl_shm_pool *solid_buffer_pool;
+    struct wl_buffer *solid_buffer;
+    struct wl_list buffer_list;
+    struct wl_list osd_buffer_list;
+
+    struct wl_shm_pool *osd_shm_pool;
+    uint8_t *osd_shm_data;
+    int osd_shm_width;
+    int osd_shm_stride;
+    int osd_shm_height;
+    bool osd_surface_is_mapped;
+    bool osd_surface_has_contents;
+
+    struct osd_buffer *osd_buffer;
+    struct mp_draw_sub_cache *osd_cache;
+    struct mp_osd_res screen_osd_res;
+
+    bool destroy_buffers;
+    bool force_window;
+    enum hwdec_type hwdec_type;
+    uint32_t drm_format;
+    uint64_t drm_modifier;
+};
+
+static void buffer_handle_release(void *data, struct wl_buffer *wl_buffer)
+{
+    struct buffer *buf = data;
+    if (buf->frame) {
+        talloc_free(buf->frame);
+        buf->frame = NULL;
+    }
+}
+
+static const struct wl_buffer_listener buffer_listener = {
+    buffer_handle_release,
+};
+
+static void osd_buffer_handle_release(void *data, struct wl_buffer *wl_buffer)
+{
+    struct osd_buffer *osd_buf = data;
+    wl_list_remove(&osd_buf->link);
+    if (osd_buf->buffer) {
+        wl_buffer_destroy(osd_buf->buffer);
+        osd_buf->buffer = NULL;
+    }
+    talloc_free(osd_buf);
+}
+
+static const struct wl_buffer_listener osd_buffer_listener = {
+    osd_buffer_handle_release,
+};
+
+#if HAVE_VAAPI
+static void close_file_descriptors(VADRMPRIMESurfaceDescriptor desc)
+{
+    for (int i = 0; i < desc.num_objects; i++)
+        close(desc.objects[i].fd);
+}
+#endif
+
+static uintptr_t vaapi_surface_id(struct mp_image *src)
+{
+    uintptr_t id = 0;
+#if HAVE_VAAPI
+    id = (uintptr_t)va_surface_id(src);
+#endif
+    return id;
+}
+
+static bool vaapi_drm_format(struct vo *vo, struct mp_image *src)
+{
+    bool format = false;
+#if HAVE_VAAPI
+    struct priv *p = vo->priv;
+    VADRMPRIMESurfaceDescriptor desc = {0};
+
+    uintptr_t id = vaapi_surface_id(src);
+    VADisplay display = ra_get_native_resource(p->ctx->ra, "VADisplay");
+    VAStatus status = vaExportSurfaceHandle(display, id, VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2,
+                                            VA_EXPORT_SURFACE_COMPOSED_LAYERS | VA_EXPORT_SURFACE_READ_ONLY, &desc);
+
+    if (!CHECK_VA_STATUS(vo, "vaExportSurfaceHandle()")) {
+        /* invalid surface warning => composed layers not supported */
+        if (status == VA_STATUS_ERROR_INVALID_SURFACE)
+            MP_VERBOSE(vo, "vaExportSurfaceHandle: composed layers not supported.\n");
+        goto done;
+    }
+    p->drm_format = desc.layers[0].drm_format;
+    p->drm_modifier = desc.objects[0].drm_format_modifier;
+    format = true;
+done:
+    close_file_descriptors(desc);
+#endif
+    return format;
+}
+
+static void vaapi_dmabuf_importer(struct buffer *buf, struct mp_image *src,
+                                  struct zwp_linux_buffer_params_v1 *params)
+{
+#if HAVE_VAAPI
+    struct vo *vo = buf->vo;
+    struct priv *p = vo->priv;
+    VADRMPRIMESurfaceDescriptor desc = {0};
+    VADisplay display = ra_get_native_resource(p->ctx->ra, "VADisplay");
+
+    /* composed has single layer */
+    int layer_no = 0;
+    buf->id = vaapi_surface_id(src);
+    VAStatus status = vaExportSurfaceHandle(display, buf->id, VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2,
+                                            VA_EXPORT_SURFACE_COMPOSED_LAYERS | VA_EXPORT_SURFACE_READ_ONLY, &desc);
+
+    if (!CHECK_VA_STATUS(vo, "vaExportSurfaceHandle()")) {
+        /* invalid surface warning => composed layers not supported */
+        if (status == VA_STATUS_ERROR_INVALID_SURFACE)
+            MP_VERBOSE(vo, "vaExportSurfaceHandle: composed layers not supported.\n");
+        goto done;
+    }
+    buf->drm_format = desc.layers[layer_no].drm_format;
+    if (!ra_compatible_format(p->ctx->ra, buf->drm_format, desc.objects[0].drm_format_modifier)) {
+        MP_VERBOSE(vo, "%s(%016lx) is not supported.\n",
+                   mp_tag_str(buf->drm_format), desc.objects[0].drm_format_modifier);
+        buf->drm_format = 0;
+        goto done;
+    }
+    for (int plane_no = 0; plane_no < desc.layers[layer_no].num_planes; ++plane_no) {
+        int object = desc.layers[layer_no].object_index[plane_no];
+        uint64_t modifier = desc.objects[object].drm_format_modifier;
+        zwp_linux_buffer_params_v1_add(params, desc.objects[object].fd, plane_no, desc.layers[layer_no].offset[plane_no],
+                                       desc.layers[layer_no].pitch[plane_no], modifier >> 32, modifier & 0xffffffff);
+    }
+
+done:
+    close_file_descriptors(desc);
+#endif
+}
+
+static uintptr_t drmprime_surface_id(struct mp_image *src)
+{
+    uintptr_t id = 0;
+    struct AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->planes[0];
+
+    AVDRMObjectDescriptor object = desc->objects[0];
+    id = (uintptr_t)object.fd;
+    return id;
+}
+
+static bool drmprime_drm_format(struct vo *vo, struct mp_image *src)
+{
+    struct priv *p = vo->priv;
+    struct AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->planes[0];
+    if (!desc)
+        return false;
+
+    // Just check the very first layer/plane.
+    p->drm_format = desc->layers[0].format;
+    int object_index = desc->layers[0].planes[0].object_index;
+    p->drm_modifier = desc->objects[object_index].format_modifier;
+    return true;
+}
+
+static void drmprime_dmabuf_importer(struct buffer *buf, struct mp_image *src,
+                                     struct zwp_linux_buffer_params_v1 *params)
+{
+    int layer_no, plane_no;
+    int max_planes = 0;
+    const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->planes[0];
+    if (!desc)
+        return;
+
+    buf->id = drmprime_surface_id(src);
+    for (layer_no = 0; layer_no < desc->nb_layers; layer_no++) {
+        AVDRMLayerDescriptor layer = desc->layers[layer_no];
+
+        buf->drm_format = layer.format;
+        max_planes = MPMAX(max_planes, layer.nb_planes);
+        for (plane_no = 0; plane_no < layer.nb_planes; ++plane_no) {
+            AVDRMPlaneDescriptor plane = layer.planes[plane_no];
+            int object_index = plane.object_index;
+            AVDRMObjectDescriptor object = desc->objects[object_index];
+            uint64_t modifier = object.format_modifier;
+
+            zwp_linux_buffer_params_v1_add(params, object.fd, plane_no, plane.offset,
+                                           plane.pitch, modifier >> 32, modifier & 0xffffffff);
+        }
+    }
+}
+
+static intptr_t surface_id(struct vo *vo, struct mp_image *src)
+{
+    struct priv *p = vo->priv;
+    switch(p->hwdec_type) {
+    case HWDEC_VAAPI:
+        return vaapi_surface_id(src);
+    case HWDEC_DRMPRIME:
+        return drmprime_surface_id(src);
+    default:
+        return 0;
+    }
+}
+
+static bool drm_format_check(struct vo *vo, struct mp_image *src)
+{
+    struct priv *p = vo->priv;
+    switch(p->hwdec_type) {
+    case HWDEC_VAAPI:
+        return vaapi_drm_format(vo, src);
+    case HWDEC_DRMPRIME:
+        return drmprime_drm_format(vo, src);
+    }
+    return false;
+}
+
+static struct buffer *buffer_check(struct vo *vo, struct vo_frame *frame)
+{
+    struct priv *p = vo->priv;
+
+    /* Make more buffers if we're not at the desired amount yet. */
+    if (wl_list_length(&p->buffer_list) < WL_BUFFERS_WANTED)
+        goto done;
+
+    uintptr_t id = surface_id(vo, frame->current);
+    struct buffer *buf;
+    wl_list_for_each(buf, &p->buffer_list, link) {
+        if (buf->id == id) {
+            if (buf->frame)
+                talloc_free(buf->frame);
+            buf->frame = frame;
+            return buf;
+        }
+    }
+
+done:
+    return NULL;
+}
+
+static struct buffer *buffer_create(struct vo *vo, struct vo_frame *frame)
+{
+    struct vo_wayland_state *wl = vo->wl;
+    struct priv *p = vo->priv;
+
+    struct buffer *buf = talloc_zero(vo, struct buffer);
+    buf->vo = vo;
+    buf->frame = frame;
+
+    struct mp_image *image = buf->frame->current;
+    struct zwp_linux_buffer_params_v1 *params = zwp_linux_dmabuf_v1_create_params(wl->dmabuf);
+
+    switch(p->hwdec_type) {
+    case HWDEC_VAAPI:
+        vaapi_dmabuf_importer(buf, image, params);
+        break;
+    case HWDEC_DRMPRIME:
+        drmprime_dmabuf_importer(buf, image, params);
+        break;
+    }
+
+    if (!buf->drm_format) {
+        talloc_free(buf->frame);
+        talloc_free(buf);
+        zwp_linux_buffer_params_v1_destroy(params);
+        return NULL;
+    }
+
+    buf->buffer = zwp_linux_buffer_params_v1_create_immed(params, image->params.w, image->params.h,
+                                                          buf->drm_format, 0);
+    zwp_linux_buffer_params_v1_destroy(params);
+    wl_buffer_add_listener(buf->buffer, &buffer_listener, buf);
+    wl_list_insert(&p->buffer_list, &buf->link);
+    return buf;
+}
+
+static struct buffer *buffer_get(struct vo *vo, struct vo_frame *frame)
+{
+    /* Reuse existing buffer if possible. */
+    struct buffer *buf = buffer_check(vo, frame);
+    if (buf) {
+        return buf;
+    } else {
+        return buffer_create(vo, frame);
+    }
+}
+
+static void destroy_buffers(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    struct buffer *buf, *tmp;
+    p->destroy_buffers = false;
+    wl_list_for_each_safe(buf, tmp, &p->buffer_list, link) {
+        wl_list_remove(&buf->link);
+        if (buf->frame) {
+            talloc_free(buf->frame);
+            buf->frame = NULL;
+        }
+        if (buf->buffer) {
+            wl_buffer_destroy(buf->buffer);
+            buf->buffer = NULL;
+        }
+        talloc_free(buf);
+    }
+}
+
+static void destroy_osd_buffers(struct vo *vo)
+{
+    if (!vo->wl)
+        return;
+
+    // Remove any existing buffer before we destroy them.
+    wl_surface_attach(vo->wl->osd_surface, NULL, 0, 0);
+    wl_surface_commit(vo->wl->osd_surface);
+
+    struct priv *p = vo->priv;
+    struct osd_buffer *osd_buf, *tmp;
+    wl_list_for_each_safe(osd_buf, tmp, &p->osd_buffer_list, link) {
+        wl_list_remove(&osd_buf->link);
+        munmap(osd_buf->image.planes[0], osd_buf->size);
+        if (osd_buf->buffer) {
+            wl_buffer_destroy(osd_buf->buffer);
+            osd_buf->buffer = NULL;
+        }
+    }
+}
+
+static struct osd_buffer *osd_buffer_check(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    struct osd_buffer *osd_buf;
+    wl_list_for_each(osd_buf, &p->osd_buffer_list, link) {
+        return osd_buf;
+    }
+    return NULL;
+}
+
+static struct osd_buffer *osd_buffer_create(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    struct osd_buffer *osd_buf = talloc_zero(vo, struct osd_buffer);
+
+    osd_buf->vo = vo;
+    osd_buf->size = p->osd_shm_height * p->osd_shm_stride;
+    mp_image_set_size(&osd_buf->image, p->osd_shm_width, p->osd_shm_height);
+    osd_buf->image.planes[0] = p->osd_shm_data;
+    osd_buf->image.stride[0] = p->osd_shm_stride;
+    osd_buf->buffer = wl_shm_pool_create_buffer(p->osd_shm_pool, 0,
+                                                p->osd_shm_width, p->osd_shm_height,
+                                                p->osd_shm_stride, WL_SHM_FORMAT_ARGB8888);
+
+    if (!osd_buf->buffer) {
+        talloc_free(osd_buf);
+        return NULL;
+    }
+
+    wl_list_insert(&p->osd_buffer_list, &osd_buf->link);
+    wl_buffer_add_listener(osd_buf->buffer, &osd_buffer_listener, osd_buf);
+    return osd_buf;
+}
+
+static struct osd_buffer *osd_buffer_get(struct vo *vo)
+{
+    struct osd_buffer *osd_buf = osd_buffer_check(vo);
+    if (osd_buf) {
+        return osd_buf;
+    } else {
+        return osd_buffer_create(vo);
+    }
+}
+
+static void create_shm_pool(struct vo *vo)
+{
+    struct vo_wayland_state *wl = vo->wl;
+    struct priv *p = vo->priv;
+
+    int stride = MP_ALIGN_UP(vo->dwidth * 4, 16);
+    size_t size = vo->dheight * stride;
+    int fd = vo_wayland_allocate_memfd(vo, size);
+    if (fd < 0)
+        return;
+    uint8_t *data = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+    if (data == MAP_FAILED)
+        goto error1;
+    struct wl_shm_pool *pool = wl_shm_create_pool(wl->shm, fd, size);
+    if (!pool)
+        goto error2;
+    close(fd);
+
+    destroy_osd_buffers(vo);
+
+    if (p->osd_shm_pool)
+        wl_shm_pool_destroy(p->osd_shm_pool);
+    p->osd_shm_pool = pool;
+    p->osd_shm_width = vo->dwidth;
+    p->osd_shm_height = vo->dheight;
+    p->osd_shm_stride = stride;
+    p->osd_shm_data = data;
+    return;
+
+error2:
+    munmap(data, size);
+error1:
+    close(fd);
+}
+
+static void set_viewport_source(struct vo *vo, struct mp_rect src)
+{
+    struct priv *p = vo->priv;
+    struct vo_wayland_state *wl = vo->wl;
+
+    if (p->force_window)
+        return;
+
+    if (wl->video_viewport && !mp_rect_equals(&p->src, &src)) {
+        wp_viewport_set_source(wl->video_viewport, src.x0 << 8,
+                               src.y0 << 8, mp_rect_w(src) << 8,
+                               mp_rect_h(src) << 8);
+        p->src = src;
+    }
+}
+
+static void resize(struct vo *vo)
+{
+    struct vo_wayland_state *wl = vo->wl;
+    struct priv *p = vo->priv;
+
+    struct mp_rect src;
+    struct mp_rect dst;
+    struct mp_vo_opts *vo_opts = wl->vo_opts;
+
+    const int width = mp_rect_w(wl->geometry);
+    const int height = mp_rect_h(wl->geometry);
+
+    if (width == 0 || height == 0)
+        return;
+
+    vo_wayland_set_opaque_region(wl, false);
+    vo->dwidth = width;
+    vo->dheight = height;
+
+    create_shm_pool(vo);
+
+    // top level viewport is calculated with pan set to zero
+    vo->opts->pan_x = 0;
+    vo->opts->pan_y = 0;
+    vo_get_src_dst_rects(vo, &src, &dst, &p->screen_osd_res);
+    int window_w = p->screen_osd_res.ml + p->screen_osd_res.mr + mp_rect_w(dst);
+    int window_h = p->screen_osd_res.mt + p->screen_osd_res.mb + mp_rect_h(dst);
+    wp_viewport_set_destination(wl->viewport, window_w, window_h);
+
+    //now we restore pan for video viewport calculation
+    vo->opts->pan_x = vo_opts->pan_x;
+    vo->opts->pan_y = vo_opts->pan_y;
+    vo_get_src_dst_rects(vo, &src, &dst, &p->screen_osd_res);
+    wp_viewport_set_destination(wl->video_viewport, mp_rect_w(dst), mp_rect_h(dst));
+    wl_subsurface_set_position(wl->video_subsurface, dst.x0, dst.y0);
+    wp_viewport_set_destination(wl->osd_viewport, vo->dwidth, vo->dheight);
+    wl_subsurface_set_position(wl->osd_subsurface, 0 - dst.x0, 0 - dst.y0);
+    set_viewport_source(vo, src);
+}
+
+static bool draw_osd(struct vo *vo, struct mp_image *cur, double pts)
+{
+    struct priv *p = vo->priv;
+    struct mp_osd_res *res = &p->screen_osd_res;
+    bool draw = false;
+
+    struct sub_bitmap_list *sbs = osd_render(vo->osd, *res, pts, 0, mp_draw_sub_formats);
+
+    if (!sbs)
+        return draw;
+
+    struct mp_rect act_rc[1], mod_rc[64];
+    int num_act_rc = 0, num_mod_rc = 0;
+
+    if (!p->osd_cache)
+        p->osd_cache = mp_draw_sub_alloc(p, vo->global);
+
+    struct mp_image *osd = mp_draw_sub_overlay(p->osd_cache, sbs, act_rc,
+                                               MP_ARRAY_SIZE(act_rc), &num_act_rc,
+                                               mod_rc, MP_ARRAY_SIZE(mod_rc), &num_mod_rc);
+
+    p->osd_surface_has_contents = num_act_rc > 0;
+
+    if (!osd || !num_mod_rc)
+        goto done;
+
+    for (int n = 0; n < num_mod_rc; n++) {
+        struct mp_rect rc = mod_rc[n];
+
+        int rw = mp_rect_w(rc);
+        int rh = mp_rect_h(rc);
+
+        void *src = mp_image_pixel_ptr(osd, 0, rc.x0, rc.y0);
+        void *dst = cur->planes[0] + rc.x0 * 4 + rc.y0 * cur->stride[0];
+
+        memcpy_pic(dst, src, rw * 4, rh, cur->stride[0], osd->stride[0]);
+    }
+
+    draw = true;
+done:
+    talloc_free(sbs);
+    return draw;
+}
+
+static void draw_frame(struct vo *vo, struct vo_frame *frame)
+{
+    struct priv *p = vo->priv;
+    struct vo_wayland_state *wl = vo->wl;
+    struct buffer *buf;
+    struct osd_buffer *osd_buf;
+    double pts;
+
+    if (!vo_wayland_check_visible(vo)) {
+        if (frame->current)
+            talloc_free(frame);
+        return;
+    }
+
+    if (p->destroy_buffers)
+        destroy_buffers(vo);
+
+    // Reuse the solid buffer so the osd can be visible
+    if (p->force_window) {
+        wl_surface_attach(wl->video_surface, p->solid_buffer, 0, 0);
+        wl_surface_damage_buffer(wl->video_surface, 0, 0, 1, 1);
+    }
+
+    pts = frame->current ? frame->current->pts : 0;
+    if (frame->current) {
+        buf = buffer_get(vo, frame);
+
+        if (buf && buf->frame) {
+            struct mp_image *image = buf->frame->current;
+            wl_surface_attach(wl->video_surface, buf->buffer, 0, 0);
+            wl_surface_damage_buffer(wl->video_surface, 0, 0, image->w,
+                                     image->h);
+
+        }
+    }
+
+    osd_buf = osd_buffer_get(vo);
+    if (osd_buf && osd_buf->buffer) {
+        if (draw_osd(vo, &osd_buf->image, pts) && p->osd_surface_has_contents) {
+            wl_surface_attach(wl->osd_surface, osd_buf->buffer, 0, 0);
+            wl_surface_damage_buffer(wl->osd_surface, 0, 0, osd_buf->image.w,
+                                     osd_buf->image.h);
+            p->osd_surface_is_mapped = true;
+        } else if (!p->osd_surface_has_contents && p->osd_surface_is_mapped) {
+            wl_surface_attach(wl->osd_surface, NULL, 0, 0);
+            p->osd_surface_is_mapped = false;
+        }
+    }
+}
+
+static void flip_page(struct vo *vo)
+{
+    struct vo_wayland_state *wl = vo->wl;
+
+    wl_surface_commit(wl->video_surface);
+    wl_surface_commit(wl->osd_surface);
+    wl_surface_commit(wl->surface);
+
+    if (!wl->opts->disable_vsync)
+        vo_wayland_wait_frame(wl);
+
+    if (wl->use_present)
+        present_sync_swap(wl->present);
+}
+
+static void get_vsync(struct vo *vo, struct vo_vsync_info *info)
+{
+    struct vo_wayland_state *wl = vo->wl;
+    if (wl->use_present)
+        present_sync_get_info(wl->present, info);
+}
+
+static bool is_supported_fmt(int fmt)
+{
+    return (fmt == IMGFMT_DRMPRIME || fmt == IMGFMT_VAAPI);
+}
+
+static int query_format(struct vo *vo, int format)
+{
+    return is_supported_fmt(format);
+}
+
+static int reconfig(struct vo *vo, struct mp_image *img)
+{
+    struct priv *p = vo->priv;
+
+    if (img->params.force_window) {
+        p->force_window = true;
+        goto done;
+    }
+
+    if (!drm_format_check(vo, img)) {
+        MP_ERR(vo, "Unable to get drm format from hardware decoding!\n");
+        return VO_ERROR;
+    }
+
+    if (!ra_compatible_format(p->ctx->ra, p->drm_format, p->drm_modifier)) {
+        MP_ERR(vo, "Format '%s' with modifier '(%016lx)' is not supported by"
+               " the compositor.\n", mp_tag_str(p->drm_format), p->drm_modifier);
+        return VO_ERROR;
+    }
+
+    p->force_window = false;
+done:
+    if (!vo_wayland_reconfig(vo))
+        return VO_ERROR;
+
+    // mpv rotates clockwise but the wayland spec has counter-clockwise rotations
+    // swap 1 and 3 to match mpv's direction
+    int transform = (360 - img->params.rotate) % 360 / 90;
+    wl_surface_set_buffer_transform(vo->wl->video_surface, transform);
+
+    // Immediately destroy all buffers if params change.
+    destroy_buffers(vo);
+    return 0;
+}
+
+static int control(struct vo *vo, uint32_t request, void *data)
+{
+    struct priv *p = vo->priv;
+    int events = 0;
+    int ret;
+
+    switch (request) {
+    case VOCTRL_RESET:
+        p->destroy_buffers = true;
+        return VO_TRUE;
+    case VOCTRL_SET_PANSCAN:
+        resize(vo);
+        return VO_TRUE;
+    }
+
+    ret = vo_wayland_control(vo, &events, request, data);
+    if (events & VO_EVENT_RESIZE)
+        resize(vo);
+    if (events & VO_EVENT_EXPOSE)
+        vo->want_redraw = true;
+    vo_event(vo, events);
+
+    return ret;
+}
+
+static void uninit(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+
+    destroy_buffers(vo);
+    destroy_osd_buffers(vo);
+    if (p->osd_shm_pool)
+        wl_shm_pool_destroy(p->osd_shm_pool);
+    if (p->solid_buffer_pool)
+        wl_shm_pool_destroy(p->solid_buffer_pool);
+    if (p->solid_buffer)
+        wl_buffer_destroy(p->solid_buffer);
+    ra_hwdec_ctx_uninit(&p->hwdec_ctx);
+    if (vo->hwdec_devs) {
+        hwdec_devices_set_loader(vo->hwdec_devs, NULL, NULL);
+        hwdec_devices_destroy(vo->hwdec_devs);
+    }
+
+    vo_wayland_uninit(vo);
+    ra_ctx_destroy(&p->ctx);
+}
+
+static int preinit(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+
+    p->log = vo->log;
+    p->global = vo->global;
+    p->ctx = ra_ctx_create_by_name(vo, "wldmabuf");
+    wl_list_init(&p->buffer_list);
+    wl_list_init(&p->osd_buffer_list);
+    if (!p->ctx)
+       goto err;
+
+    assert(p->ctx->ra);
+
+    if (!vo->wl->dmabuf || !vo->wl->dmabuf_feedback) {
+        MP_FATAL(vo->wl, "Compositor doesn't support the %s (ver. 4) protocol!\n",
+                 zwp_linux_dmabuf_v1_interface.name);
+        goto err;
+    }
+
+    if (!vo->wl->shm) {
+        MP_FATAL(vo->wl, "Compositor doesn't support the %s protocol!\n",
+                 wl_shm_interface.name);
+        goto err;
+    }
+
+    if (!vo->wl->video_subsurface) {
+        MP_FATAL(vo->wl, "Compositor doesn't support the %s protocol!\n",
+                 wl_subcompositor_interface.name);
+        goto err;
+    }
+
+    if (!vo->wl->viewport) {
+        MP_FATAL(vo->wl, "Compositor doesn't support the %s protocol!\n",
+                 wp_viewporter_interface.name);
+        goto err;
+    }
+
+    if (vo->wl->single_pixel_manager) {
+#if HAVE_WAYLAND_PROTOCOLS_1_27
+        p->solid_buffer = wp_single_pixel_buffer_manager_v1_create_u32_rgba_buffer(
+            vo->wl->single_pixel_manager, 0, 0, 0, UINT32_MAX); /* R, G, B, A */
+#endif
+    } else {
+        int width = 1;
+        int height = 1;
+        int stride = MP_ALIGN_UP(width * 4, 16);
+        int fd = vo_wayland_allocate_memfd(vo, stride);
+        if (fd < 0)
+            goto err;
+        p->solid_buffer_pool = wl_shm_create_pool(vo->wl->shm, fd, height * stride);
+        close(fd);
+        if (!p->solid_buffer_pool)
+            goto err;
+        p->solid_buffer = wl_shm_pool_create_buffer(
+            p->solid_buffer_pool, 0, width, height, stride, WL_SHM_FORMAT_XRGB8888);
+    }
+    if (!p->solid_buffer)
+        goto err;
+
+    wl_surface_attach(vo->wl->surface, p->solid_buffer, 0, 0);
+
+    vo->hwdec_devs = hwdec_devices_create();
+    p->hwdec_ctx = (struct ra_hwdec_ctx) {
+        .log = p->log,
+        .global = p->global,
+        .ra_ctx = p->ctx,
+    };
+    ra_hwdec_ctx_init(&p->hwdec_ctx, vo->hwdec_devs, NULL, true);
+
+    // Loop through hardware accelerated formats and only request known
+    // supported formats.
+    for (int i = IMGFMT_VDPAU_OUTPUT; i < IMGFMT_AVPIXFMT_START; ++i) {
+        if (is_supported_fmt(i)) {
+            struct hwdec_imgfmt_request params = {
+                .imgfmt = i,
+                .probing = false,
+            };
+            ra_hwdec_ctx_load_fmt(&p->hwdec_ctx, vo->hwdec_devs, &params);
+        }
+    }
+
+    for (int i = 0; i < p->hwdec_ctx.num_hwdecs; i++) {
+        struct ra_hwdec *hw = p->hwdec_ctx.hwdecs[i];
+        if (ra_get_native_resource(p->ctx->ra, "VADisplay")) {
+            p->hwdec_type = HWDEC_VAAPI;
+        } else if (strcmp(hw->driver->name, "drmprime") == 0) {
+            p->hwdec_type = HWDEC_DRMPRIME;
+        }
+    }
+
+    if (p->hwdec_type == HWDEC_NONE) {
+        MP_ERR(vo, "No valid hardware decoding driver could be loaded!\n");
+        goto err;
+    }
+
+    p->src = (struct mp_rect){0, 0, 0, 0};
+    return 0;
+
+err:
+    uninit(vo);
+    return -1;
+}
+
+const struct vo_driver video_out_dmabuf_wayland = {
+    .description = "Wayland dmabuf video output",
+    .name = "dmabuf-wayland",
+    .caps = VO_CAP_ROTATE90,
+    .frame_owner = true,
+    .preinit = preinit,
+    .query_format = query_format,
+    .reconfig2 = reconfig,
+    .control = control,
+    .draw_frame = draw_frame,
+    .flip_page = flip_page,
+    .get_vsync = get_vsync,
+    .wakeup = vo_wayland_wakeup,
+    .wait_events = vo_wayland_wait_events,
+    .uninit = uninit,
+    .priv_size = sizeof(struct priv),
+};
diff --git a/video/out/vo_drm.c b/video/out/vo_drm.c
new file mode 100644
index 0000000..aae73f7
--- /dev/null
+++ b/video/out/vo_drm.c
@@ -0,0 +1,458 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <stdbool.h>
+#include <sys/mman.h>
+#include <poll.h>
+#include <unistd.h>
+
+#include <drm_fourcc.h>
+#include <libswscale/swscale.h>
+
+#include "common/msg.h"
+#include "drm_atomic.h"
+#include "drm_common.h"
+#include "osdep/timer.h"
+#include "sub/osd.h"
+#include "video/fmt-conversion.h"
+#include "video/mp_image.h"
+#include "video/out/present_sync.h"
+#include "video/sws_utils.h"
+#include "vo.h"
+
+#define IMGFMT_XRGB8888 IMGFMT_BGR0
+#if BYTE_ORDER == BIG_ENDIAN
+#define IMGFMT_XRGB2101010 pixfmt2imgfmt(AV_PIX_FMT_GBRP10BE)
+#else
+#define IMGFMT_XRGB2101010 pixfmt2imgfmt(AV_PIX_FMT_GBRP10LE)
+#endif
+
+#define BYTES_PER_PIXEL 4
+#define BITS_PER_PIXEL 32
+
+struct drm_frame {
+    struct framebuffer *fb;
+};
+
+struct priv {
+    struct drm_frame **fb_queue;
+    unsigned int fb_queue_len;
+
+    uint32_t drm_format;
+    enum mp_imgfmt imgfmt;
+
+    struct mp_image *last_input;
+    struct mp_image *cur_frame;
+    struct mp_image *cur_frame_cropped;
+    struct mp_rect src;
+    struct mp_rect dst;
+    struct mp_osd_res osd;
+    struct mp_sws_context *sws;
+
+    struct framebuffer **bufs;
+    int front_buf;
+    int buf_count;
+};
+
+static void destroy_framebuffer(int fd, struct framebuffer *fb)
+{
+    if (!fb)
+        return;
+
+    if (fb->map) {
+        munmap(fb->map, fb->size);
+    }
+    if (fb->id) {
+        drmModeRmFB(fd, fb->id);
+    }
+    if (fb->handle) {
+        struct drm_mode_destroy_dumb dreq = {
+            .handle = fb->handle,
+        };
+        drmIoctl(fd, DRM_IOCTL_MODE_DESTROY_DUMB, &dreq);
+    }
+}
+
+static struct framebuffer *setup_framebuffer(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    struct vo_drm_state *drm = vo->drm;
+
+    struct framebuffer *fb = talloc_zero(drm, struct framebuffer);
+    fb->width = drm->mode.mode.hdisplay;
+    fb->height = drm->mode.mode.vdisplay;
+    fb->fd = drm->fd;
+    fb->handle = 0;
+
+    // create dumb buffer
+    struct drm_mode_create_dumb creq = {
+        .width = fb->width,
+        .height = fb->height,
+        .bpp = BITS_PER_PIXEL,
+    };
+
+    if (drmIoctl(drm->fd, DRM_IOCTL_MODE_CREATE_DUMB, &creq) < 0) {
+        MP_ERR(vo, "Cannot create dumb buffer: %s\n", mp_strerror(errno));
+        goto err;
+    }
+
+    fb->stride = creq.pitch;
+    fb->size = creq.size;
+    fb->handle = creq.handle;
+
+    // select format
+    if (drm->opts->drm_format == DRM_OPTS_FORMAT_XRGB2101010) {
+        p->drm_format = DRM_FORMAT_XRGB2101010;
+        p->imgfmt = IMGFMT_XRGB2101010;
+    } else {
+        p->drm_format = DRM_FORMAT_XRGB8888;;
+        p->imgfmt = IMGFMT_XRGB8888;
+    }
+
+    // create framebuffer object for the dumb-buffer
+    int ret = drmModeAddFB2(fb->fd, fb->width, fb->height,
+                            p->drm_format,
+                            (uint32_t[4]){fb->handle, 0, 0, 0},
+                            (uint32_t[4]){fb->stride, 0, 0, 0},
+                            (uint32_t[4]){0, 0, 0, 0},
+                            &fb->id, 0);
+    if (ret) {
+        MP_ERR(vo, "Cannot create framebuffer: %s\n", mp_strerror(errno));
+        goto err;
+    }
+
+    // prepare buffer for memory mapping
+    struct drm_mode_map_dumb mreq = {
+        .handle = fb->handle,
+    };
+    if (drmIoctl(drm->fd, DRM_IOCTL_MODE_MAP_DUMB, &mreq)) {
+        MP_ERR(vo, "Cannot map dumb buffer: %s\n", mp_strerror(errno));
+        goto err;
+    }
+
+    // perform actual memory mapping
+    fb->map = mmap(0, fb->size, PROT_READ | PROT_WRITE, MAP_SHARED,
+                    drm->fd, mreq.offset);
+    if (fb->map == MAP_FAILED) {
+        MP_ERR(vo, "Cannot map dumb buffer: %s\n", mp_strerror(errno));
+        goto err;
+    }
+
+    memset(fb->map, 0, fb->size);
+    return fb;
+
+err:
+    destroy_framebuffer(drm->fd, fb);
+    return NULL;
+}
+
+static int reconfig(struct vo *vo, struct mp_image_params *params)
+{
+    struct priv *p = vo->priv;
+    struct vo_drm_state *drm = vo->drm;
+
+    vo->dwidth =drm->fb->width;
+    vo->dheight = drm->fb->height;
+    vo_get_src_dst_rects(vo, &p->src, &p->dst, &p->osd);
+
+    int w = p->dst.x1 - p->dst.x0;
+    int h = p->dst.y1 - p->dst.y0;
+
+    p->sws->src = *params;
+    p->sws->dst = (struct mp_image_params) {
+        .imgfmt = p->imgfmt,
+        .w = w,
+        .h = h,
+        .p_w = 1,
+        .p_h = 1,
+    };
+
+    talloc_free(p->cur_frame);
+    p->cur_frame = mp_image_alloc(p->imgfmt, drm->fb->width, drm->fb->height);
+    mp_image_params_guess_csp(&p->sws->dst);
+    mp_image_set_params(p->cur_frame, &p->sws->dst);
+    mp_image_set_size(p->cur_frame, drm->fb->width, drm->fb->height);
+
+    talloc_free(p->cur_frame_cropped);
+    p->cur_frame_cropped = mp_image_new_dummy_ref(p->cur_frame);
+    mp_image_crop_rc(p->cur_frame_cropped, p->dst);
+
+    talloc_free(p->last_input);
+    p->last_input = NULL;
+
+    if (mp_sws_reinit(p->sws) < 0)
+        return -1;
+
+    vo->want_redraw = true;
+    return 0;
+}
+
+static struct framebuffer *get_new_fb(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+
+    p->front_buf++;
+    p->front_buf %= p->buf_count;
+
+    return p->bufs[p->front_buf];
+}
+
+static void draw_image(struct vo *vo, mp_image_t *mpi, struct framebuffer *buf)
+{
+    struct priv *p = vo->priv;
+    struct vo_drm_state *drm = vo->drm;
+
+    if (drm->active && buf != NULL) {
+        if (mpi) {
+            struct mp_image src = *mpi;
+            struct mp_rect src_rc = p->src;
+            src_rc.x0 = MP_ALIGN_DOWN(src_rc.x0, mpi->fmt.align_x);
+            src_rc.y0 = MP_ALIGN_DOWN(src_rc.y0, mpi->fmt.align_y);
+            mp_image_crop_rc(&src, src_rc);
+
+            mp_image_clear(p->cur_frame, 0, 0, p->cur_frame->w, p->dst.y0);
+            mp_image_clear(p->cur_frame, 0, p->dst.y1, p->cur_frame->w, p->cur_frame->h);
+            mp_image_clear(p->cur_frame, 0, p->dst.y0, p->dst.x0, p->dst.y1);
+            mp_image_clear(p->cur_frame, p->dst.x1, p->dst.y0, p->cur_frame->w, p->dst.y1);
+
+            mp_sws_scale(p->sws, p->cur_frame_cropped, &src);
+            osd_draw_on_image(vo->osd, p->osd, src.pts, 0, p->cur_frame);
+        } else {
+            mp_image_clear(p->cur_frame, 0, 0, p->cur_frame->w, p->cur_frame->h);
+            osd_draw_on_image(vo->osd, p->osd, 0, 0, p->cur_frame);
+        }
+
+        if (p->drm_format == DRM_FORMAT_XRGB2101010) {
+            // Pack GBRP10 image into XRGB2101010 for DRM
+            const int w = p->cur_frame->w;
+            const int h = p->cur_frame->h;
+
+            const int g_padding = p->cur_frame->stride[0]/sizeof(uint16_t) - w;
+            const int b_padding = p->cur_frame->stride[1]/sizeof(uint16_t) - w;
+            const int r_padding = p->cur_frame->stride[2]/sizeof(uint16_t) - w;
+            const int fbuf_padding = buf->stride/sizeof(uint32_t) - w;
+
+            uint16_t *g_ptr = (uint16_t*)p->cur_frame->planes[0];
+            uint16_t *b_ptr = (uint16_t*)p->cur_frame->planes[1];
+            uint16_t *r_ptr = (uint16_t*)p->cur_frame->planes[2];
+            uint32_t *fbuf_ptr = (uint32_t*)buf->map;
+            for (unsigned y = 0; y < h; ++y) {
+                for (unsigned x = 0; x < w; ++x) {
+                    *fbuf_ptr++ = (*r_ptr++ << 20) | (*g_ptr++ << 10) | (*b_ptr++);
+                }
+                g_ptr += g_padding;
+                b_ptr += b_padding;
+                r_ptr += r_padding;
+                fbuf_ptr += fbuf_padding;
+            }
+        } else { // p->drm_format == DRM_FORMAT_XRGB8888
+            memcpy_pic(buf->map, p->cur_frame->planes[0],
+                       p->cur_frame->w * BYTES_PER_PIXEL, p->cur_frame->h,
+                       buf->stride,
+                       p->cur_frame->stride[0]);
+        }
+    }
+
+    if (mpi != p->last_input) {
+        talloc_free(p->last_input);
+        p->last_input = mpi;
+    }
+}
+
+static void enqueue_frame(struct vo *vo, struct framebuffer *fb)
+{
+    struct priv *p = vo->priv;
+
+    struct drm_frame *new_frame = talloc(p, struct drm_frame);
+    new_frame->fb = fb;
+    MP_TARRAY_APPEND(p, p->fb_queue, p->fb_queue_len, new_frame);
+}
+
+static void dequeue_frame(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+
+    talloc_free(p->fb_queue[0]);
+    MP_TARRAY_REMOVE_AT(p->fb_queue, p->fb_queue_len, 0);
+}
+
+static void swapchain_step(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+
+    if (p->fb_queue_len > 0) {
+        dequeue_frame(vo);
+    }
+}
+
+static void draw_frame(struct vo *vo, struct vo_frame *frame)
+{
+    struct vo_drm_state *drm = vo->drm;
+    struct priv *p = vo->priv;
+
+    if (!drm->active)
+        return;
+
+    drm->still = frame->still;
+
+    // we redraw the entire image when OSD needs to be redrawn
+    struct framebuffer *fb =  p->bufs[p->front_buf];
+    const bool repeat = frame->repeat && !frame->redraw;
+    if (!repeat) {
+        fb = get_new_fb(vo);
+        draw_image(vo, mp_image_new_ref(frame->current), fb);
+    }
+
+    enqueue_frame(vo, fb);
+}
+
+static void queue_flip(struct vo *vo, struct drm_frame *frame)
+{
+    struct vo_drm_state *drm = vo->drm;
+
+    drm->fb = frame->fb;
+
+    int ret = drmModePageFlip(drm->fd, drm->crtc_id,
+                              drm->fb->id, DRM_MODE_PAGE_FLIP_EVENT, drm);
+    if (ret)
+        MP_WARN(vo, "Failed to queue page flip: %s\n", mp_strerror(errno));
+    drm->waiting_for_flip = !ret;
+}
+
+static void flip_page(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    struct vo_drm_state *drm = vo->drm;
+    const bool drain = drm->paused || drm->still;
+
+    if (!drm->active)
+        return;
+
+    while (drain || p->fb_queue_len > vo->opts->swapchain_depth) {
+        if (drm->waiting_for_flip) {
+            vo_drm_wait_on_flip(vo->drm);
+            swapchain_step(vo);
+        }
+        if (p->fb_queue_len <= 1)
+            break;
+        if (!p->fb_queue[1] || !p->fb_queue[1]->fb) {
+            MP_ERR(vo, "Hole in swapchain?\n");
+            swapchain_step(vo);
+            continue;
+        }
+        queue_flip(vo, p->fb_queue[1]);
+    }
+}
+
+static void get_vsync(struct vo *vo, struct vo_vsync_info *info)
+{
+    struct vo_drm_state *drm = vo->drm;
+    present_sync_get_info(drm->present, info);
+}
+
+static void uninit(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+
+    vo_drm_uninit(vo);
+
+    while (p->fb_queue_len > 0) {
+        swapchain_step(vo);
+    }
+
+    talloc_free(p->last_input);
+    talloc_free(p->cur_frame);
+    talloc_free(p->cur_frame_cropped);
+}
+
+static int preinit(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+
+    if (!vo_drm_init(vo))
+        goto err;
+
+    struct vo_drm_state *drm = vo->drm;
+    p->buf_count = vo->opts->swapchain_depth + 1;
+    p->bufs = talloc_zero_array(p, struct framebuffer *, p->buf_count);
+
+    p->front_buf = 0;
+    for (int i = 0; i < p->buf_count; i++) {
+        p->bufs[i] = setup_framebuffer(vo);
+        if (!p->bufs[i])
+            goto err;
+    }
+    drm->fb = p->bufs[0];
+
+    vo->drm->width = vo->drm->fb->width;
+    vo->drm->height = vo->drm->fb->height;
+
+    if (!vo_drm_acquire_crtc(vo->drm)) {
+        MP_ERR(vo, "Failed to set CRTC for connector %u: %s\n",
+               vo->drm->connector->connector_id, mp_strerror(errno));
+        goto err;
+    }
+
+    vo_drm_set_monitor_par(vo);
+    p->sws = mp_sws_alloc(vo);
+    p->sws->log = vo->log;
+    mp_sws_enable_cmdline_opts(p->sws, vo->global);
+    return 0;
+
+err:
+    uninit(vo);
+    return -1;
+}
+
+static int query_format(struct vo *vo, int format)
+{
+    return sws_isSupportedInput(imgfmt2pixfmt(format));
+}
+
+static int control(struct vo *vo, uint32_t request, void *arg)
+{
+    switch (request) {
+    case VOCTRL_SET_PANSCAN:
+        if (vo->config_ok)
+            reconfig(vo, vo->params);
+        return VO_TRUE;
+    }
+
+    int events = 0;
+    int ret = vo_drm_control(vo, &events, request, arg);
+    vo_event(vo, events);
+    return ret;
+}
+
+const struct vo_driver video_out_drm = {
+    .name = "drm",
+    .description = "Direct Rendering Manager (software scaling)",
+    .preinit = preinit,
+    .query_format = query_format,
+    .reconfig = reconfig,
+    .control = control,
+    .draw_frame = draw_frame,
+    .flip_page = flip_page,
+    .get_vsync = get_vsync,
+    .uninit = uninit,
+    .wait_events = vo_drm_wait_events,
+    .wakeup = vo_drm_wakeup,
+    .priv_size = sizeof(struct priv),
+};
diff --git a/video/out/vo_gpu.c b/video/out/vo_gpu.c
new file mode 100644
index 0000000..c02e6e7
--- /dev/null
+++ b/video/out/vo_gpu.c
@@ -0,0 +1,336 @@
+/*
+ * Based on vo_gl.c by Reimar Doeffinger.
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <stdbool.h>
+#include <assert.h>
+
+#include <libavutil/common.h>
+
+#include "mpv_talloc.h"
+#include "common/common.h"
+#include "misc/bstr.h"
+#include "common/msg.h"
+#include "common/global.h"
+#include "options/m_config.h"
+#include "vo.h"
+#include "video/mp_image.h"
+#include "sub/osd.h"
+
+#include "gpu/context.h"
+#include "gpu/hwdec.h"
+#include "gpu/video.h"
+
+struct gpu_priv {
+    struct mp_log *log;
+    struct ra_ctx *ctx;
+
+    char *context_name;
+    char *context_type;
+    struct gl_video *renderer;
+
+    int events;
+};
+static void resize(struct vo *vo)
+{
+    struct gpu_priv *p = vo->priv;
+    struct ra_swapchain *sw = p->ctx->swapchain;
+
+    MP_VERBOSE(vo, "Resize: %dx%d\n", vo->dwidth, vo->dheight);
+
+    struct mp_rect src, dst;
+    struct mp_osd_res osd;
+    vo_get_src_dst_rects(vo, &src, &dst, &osd);
+
+    gl_video_resize(p->renderer, &src, &dst, &osd);
+
+    int fb_depth = sw->fns->color_depth ? sw->fns->color_depth(sw) : 0;
+    if (fb_depth)
+        MP_VERBOSE(p, "Reported display depth: %d\n", fb_depth);
+    gl_video_set_fb_depth(p->renderer, fb_depth);
+
+    vo->want_redraw = true;
+}
+
+static void draw_frame(struct vo *vo, struct vo_frame *frame)
+{
+    struct gpu_priv *p = vo->priv;
+    struct ra_swapchain *sw = p->ctx->swapchain;
+
+    struct ra_fbo fbo;
+    if (!sw->fns->start_frame(sw, &fbo))
+        return;
+
+    gl_video_render_frame(p->renderer, frame, fbo, RENDER_FRAME_DEF);
+    if (!sw->fns->submit_frame(sw, frame)) {
+        MP_ERR(vo, "Failed presenting frame!\n");
+        return;
+    }
+}
+
+static void flip_page(struct vo *vo)
+{
+    struct gpu_priv *p = vo->priv;
+    struct ra_swapchain *sw = p->ctx->swapchain;
+    sw->fns->swap_buffers(sw);
+}
+
+static void get_vsync(struct vo *vo, struct vo_vsync_info *info)
+{
+    struct gpu_priv *p = vo->priv;
+    struct ra_swapchain *sw = p->ctx->swapchain;
+    if (sw->fns->get_vsync)
+        sw->fns->get_vsync(sw, info);
+}
+
+static int query_format(struct vo *vo, int format)
+{
+    struct gpu_priv *p = vo->priv;
+    if (!gl_video_check_format(p->renderer, format))
+        return 0;
+    return 1;
+}
+
+static int reconfig(struct vo *vo, struct mp_image_params *params)
+{
+    struct gpu_priv *p = vo->priv;
+
+    if (!p->ctx->fns->reconfig(p->ctx))
+        return -1;
+
+    resize(vo);
+    gl_video_config(p->renderer, params);
+
+    return 0;
+}
+
+static void request_hwdec_api(struct vo *vo, void *data)
+{
+    struct gpu_priv *p = vo->priv;
+    gl_video_load_hwdecs_for_img_fmt(p->renderer, vo->hwdec_devs, data);
+}
+
+static void call_request_hwdec_api(void *ctx,
+                                   struct hwdec_imgfmt_request *params)
+{
+    // Roundabout way to run hwdec loading on the VO thread.
+    // Redirects to request_hwdec_api().
+    vo_control(ctx, VOCTRL_LOAD_HWDEC_API, params);
+}
+
+static void get_and_update_icc_profile(struct gpu_priv *p)
+{
+    if (gl_video_icc_auto_enabled(p->renderer)) {
+        MP_VERBOSE(p, "Querying ICC profile...\n");
+        bstr icc = bstr0(NULL);
+        int r = p->ctx->fns->control(p->ctx, &p->events, VOCTRL_GET_ICC_PROFILE, &icc);
+
+        if (r != VO_NOTAVAIL) {
+            if (r == VO_FALSE) {
+                MP_WARN(p, "Could not retrieve an ICC profile.\n");
+            } else if (r == VO_NOTIMPL) {
+                MP_ERR(p, "icc-profile-auto not implemented on this platform.\n");
+            }
+
+            gl_video_set_icc_profile(p->renderer, icc);
+        }
+    }
+}
+
+static void get_and_update_ambient_lighting(struct gpu_priv *p)
+{
+    int lux;
+    int r = p->ctx->fns->control(p->ctx, &p->events, VOCTRL_GET_AMBIENT_LUX, &lux);
+    if (r == VO_TRUE) {
+        gl_video_set_ambient_lux(p->renderer, lux);
+    }
+    if (r != VO_TRUE && gl_video_gamma_auto_enabled(p->renderer)) {
+        MP_ERR(p, "gamma_auto option provided, but querying for ambient"
+                  " lighting is not supported on this platform\n");
+    }
+}
+
+static void update_ra_ctx_options(struct vo *vo)
+{
+    struct gpu_priv *p = vo->priv;
+
+    /* Only the alpha option has any runtime toggle ability. */
+    struct gl_video_opts *gl_opts = mp_get_config_group(p->ctx, vo->global, &gl_video_conf);
+    p->ctx->opts.want_alpha = gl_opts->alpha_mode == 1;
+}
+
+static int control(struct vo *vo, uint32_t request, void *data)
+{
+    struct gpu_priv *p = vo->priv;
+
+    switch (request) {
+    case VOCTRL_SET_PANSCAN:
+        resize(vo);
+        return VO_TRUE;
+    case VOCTRL_SET_EQUALIZER:
+        vo->want_redraw = true;
+        return VO_TRUE;
+    case VOCTRL_SCREENSHOT: {
+        struct vo_frame *frame = vo_get_current_vo_frame(vo);
+        if (frame)
+            gl_video_screenshot(p->renderer, frame, data);
+        talloc_free(frame);
+        return true;
+    }
+    case VOCTRL_LOAD_HWDEC_API:
+        request_hwdec_api(vo, data);
+        return true;
+    case VOCTRL_UPDATE_RENDER_OPTS: {
+        update_ra_ctx_options(vo);
+        gl_video_configure_queue(p->renderer, vo);
+        get_and_update_icc_profile(p);
+        if (p->ctx->fns->update_render_opts)
+            p->ctx->fns->update_render_opts(p->ctx);
+        vo->want_redraw = true;
+        return true;
+    }
+    case VOCTRL_RESET:
+        gl_video_reset(p->renderer);
+        return true;
+    case VOCTRL_PAUSE:
+        if (gl_video_showing_interpolated_frame(p->renderer))
+            vo->want_redraw = true;
+        return true;
+    case VOCTRL_PERFORMANCE_DATA:
+        gl_video_perfdata(p->renderer, (struct voctrl_performance_data *)data);
+        return true;
+    case VOCTRL_EXTERNAL_RESIZE:
+        p->ctx->fns->reconfig(p->ctx);
+        resize(vo);
+        return true;
+    }
+
+    int events = 0;
+    int r = p->ctx->fns->control(p->ctx, &events, request, data);
+    if (events & VO_EVENT_ICC_PROFILE_CHANGED) {
+        get_and_update_icc_profile(p);
+        vo->want_redraw = true;
+    }
+    if (events & VO_EVENT_AMBIENT_LIGHTING_CHANGED) {
+        get_and_update_ambient_lighting(p);
+        vo->want_redraw = true;
+    }
+    events |= p->events;
+    p->events = 0;
+    if (events & VO_EVENT_RESIZE)
+        resize(vo);
+    if (events & VO_EVENT_EXPOSE)
+        vo->want_redraw = true;
+    vo_event(vo, events);
+
+    return r;
+}
+
+static void wakeup(struct vo *vo)
+{
+    struct gpu_priv *p = vo->priv;
+    if (p->ctx && p->ctx->fns->wakeup)
+        p->ctx->fns->wakeup(p->ctx);
+}
+
+static void wait_events(struct vo *vo, int64_t until_time_ns)
+{
+    struct gpu_priv *p = vo->priv;
+    if (p->ctx && p->ctx->fns->wait_events) {
+        p->ctx->fns->wait_events(p->ctx, until_time_ns);
+    } else {
+        vo_wait_default(vo, until_time_ns);
+    }
+}
+
+static struct mp_image *get_image(struct vo *vo, int imgfmt, int w, int h,
+                                  int stride_align, int flags)
+{
+    struct gpu_priv *p = vo->priv;
+
+    return gl_video_get_image(p->renderer, imgfmt, w, h, stride_align, flags);
+}
+
+static void uninit(struct vo *vo)
+{
+    struct gpu_priv *p = vo->priv;
+
+    gl_video_uninit(p->renderer);
+    if (vo->hwdec_devs) {
+        hwdec_devices_set_loader(vo->hwdec_devs, NULL, NULL);
+        hwdec_devices_destroy(vo->hwdec_devs);
+    }
+    ra_ctx_destroy(&p->ctx);
+}
+
+static int preinit(struct vo *vo)
+{
+    struct gpu_priv *p = vo->priv;
+    p->log = vo->log;
+
+    struct ra_ctx_opts *ctx_opts = mp_get_config_group(vo, vo->global, &ra_ctx_conf);
+    struct gl_video_opts *gl_opts = mp_get_config_group(vo, vo->global, &gl_video_conf);
+    struct ra_ctx_opts opts = *ctx_opts;
+    opts.want_alpha = gl_opts->alpha_mode == 1;
+    p->ctx = ra_ctx_create(vo, opts);
+    talloc_free(ctx_opts);
+    talloc_free(gl_opts);
+    if (!p->ctx)
+        goto err_out;
+    assert(p->ctx->ra);
+    assert(p->ctx->swapchain);
+
+    p->renderer = gl_video_init(p->ctx->ra, vo->log, vo->global);
+    gl_video_set_osd_source(p->renderer, vo->osd);
+    gl_video_configure_queue(p->renderer, vo);
+
+    get_and_update_icc_profile(p);
+
+    vo->hwdec_devs = hwdec_devices_create();
+    hwdec_devices_set_loader(vo->hwdec_devs, call_request_hwdec_api, vo);
+
+    gl_video_init_hwdecs(p->renderer, p->ctx, vo->hwdec_devs, false);
+
+    return 0;
+
+err_out:
+    uninit(vo);
+    return -1;
+}
+
+const struct vo_driver video_out_gpu = {
+    .description = "Shader-based GPU Renderer",
+    .name = "gpu",
+    .caps = VO_CAP_ROTATE90,
+    .preinit = preinit,
+    .query_format = query_format,
+    .reconfig = reconfig,
+    .control = control,
+    .get_image = get_image,
+    .draw_frame = draw_frame,
+    .flip_page = flip_page,
+    .get_vsync = get_vsync,
+    .wait_events = wait_events,
+    .wakeup = wakeup,
+    .uninit = uninit,
+    .priv_size = sizeof(struct gpu_priv),
+};
diff --git a/video/out/vo_gpu_next.c b/video/out/vo_gpu_next.c
new file mode 100644
index 0000000..1dc1b18
--- /dev/null
+++ b/video/out/vo_gpu_next.c
@@ -0,0 +1,2104 @@
+/*
+ * Copyright (C) 2021 Niklas Haas
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <unistd.h>
+
+#include <libplacebo/colorspace.h>
+#include <libplacebo/options.h>
+#include <libplacebo/renderer.h>
+#include <libplacebo/shaders/lut.h>
+#include <libplacebo/shaders/icc.h>
+#include <libplacebo/utils/libav.h>
+#include <libplacebo/utils/frame_queue.h>
+
+#include "config.h"
+#include "common/common.h"
+#include "options/m_config.h"
+#include "options/path.h"
+#include "osdep/io.h"
+#include "osdep/threads.h"
+#include "stream/stream.h"
+#include "video/fmt-conversion.h"
+#include "video/mp_image.h"
+#include "video/out/placebo/ra_pl.h"
+#include "placebo/utils.h"
+#include "gpu/context.h"
+#include "gpu/hwdec.h"
+#include "gpu/video.h"
+#include "gpu/video_shaders.h"
+#include "sub/osd.h"
+#include "gpu_next/context.h"
+
+#if HAVE_GL && defined(PL_HAVE_OPENGL)
+#include <libplacebo/opengl.h>
+#include "video/out/opengl/ra_gl.h"
+#endif
+
+#if HAVE_D3D11 && defined(PL_HAVE_D3D11)
+#include <libplacebo/d3d11.h>
+#include "video/out/d3d11/ra_d3d11.h"
+#include "osdep/windows_utils.h"
+#endif
+
+
+struct osd_entry {
+    pl_tex tex;
+    struct pl_overlay_part *parts;
+    int num_parts;
+};
+
+struct osd_state {
+    struct osd_entry entries[MAX_OSD_PARTS];
+    struct pl_overlay overlays[MAX_OSD_PARTS];
+};
+
+struct scaler_params {
+    struct pl_filter_config config;
+};
+
+struct user_hook {
+    char *path;
+    const struct pl_hook *hook;
+};
+
+struct user_lut {
+    char *opt;
+    char *path;
+    int type;
+    struct pl_custom_lut *lut;
+};
+
+struct frame_info {
+    int count;
+    struct pl_dispatch_info info[VO_PASS_PERF_MAX];
+};
+
+struct cache {
+    char *path;
+    pl_cache cache;
+    uint64_t sig;
+};
+
+struct priv {
+    struct mp_log *log;
+    struct mpv_global *global;
+    struct ra_ctx *ra_ctx;
+    struct gpu_ctx *context;
+    struct ra_hwdec_ctx hwdec_ctx;
+    struct ra_hwdec_mapper *hwdec_mapper;
+
+    // Allocated DR buffers
+    mp_mutex dr_lock;
+    pl_buf *dr_buffers;
+    int num_dr_buffers;
+
+    pl_log pllog;
+    pl_gpu gpu;
+    pl_renderer rr;
+    pl_queue queue;
+    pl_swapchain sw;
+    pl_fmt osd_fmt[SUBBITMAP_COUNT];
+    pl_tex *sub_tex;
+    int num_sub_tex;
+
+    struct mp_rect src, dst;
+    struct mp_osd_res osd_res;
+    struct osd_state osd_state;
+
+    uint64_t last_id;
+    uint64_t osd_sync;
+    double last_pts;
+    bool is_interpolated;
+    bool want_reset;
+    bool frame_pending;
+    bool redraw;
+
+    pl_options pars;
+    struct m_config_cache *opts_cache;
+    struct cache shader_cache, icc_cache;
+    struct mp_csp_equalizer_state *video_eq;
+    struct scaler_params scalers[SCALER_COUNT];
+    const struct pl_hook **hooks; // storage for `params.hooks`
+    enum mp_csp_levels output_levels;
+    char **raw_opts;
+
+    struct pl_icc_params icc_params;
+    char *icc_path;
+    pl_icc_object icc_profile;
+
+    struct user_lut image_lut;
+    struct user_lut target_lut;
+    struct user_lut lut;
+
+    // Cached shaders, preserved across options updates
+    struct user_hook *user_hooks;
+    int num_user_hooks;
+
+    // Performance data of last frame
+    struct frame_info perf_fresh;
+    struct frame_info perf_redraw;
+
+    bool delayed_peak;
+    bool inter_preserve;
+    bool target_hint;
+
+    float corner_rounding;
+};
+
+static void update_render_options(struct vo *vo);
+static void update_lut(struct priv *p, struct user_lut *lut);
+
+static pl_buf get_dr_buf(struct priv *p, const uint8_t *ptr)
+{
+    mp_mutex_lock(&p->dr_lock);
+
+    for (int i = 0; i < p->num_dr_buffers; i++) {
+        pl_buf buf = p->dr_buffers[i];
+        if (ptr >= buf->data && ptr < buf->data + buf->params.size) {
+            mp_mutex_unlock(&p->dr_lock);
+            return buf;
+        }
+    }
+
+    mp_mutex_unlock(&p->dr_lock);
+    return NULL;
+}
+
+static void free_dr_buf(void *opaque, uint8_t *data)
+{
+    struct priv *p = opaque;
+    mp_mutex_lock(&p->dr_lock);
+
+    for (int i = 0; i < p->num_dr_buffers; i++) {
+        if (p->dr_buffers[i]->data == data) {
+            pl_buf_destroy(p->gpu, &p->dr_buffers[i]);
+            MP_TARRAY_REMOVE_AT(p->dr_buffers, p->num_dr_buffers, i);
+            mp_mutex_unlock(&p->dr_lock);
+            return;
+        }
+    }
+
+    MP_ASSERT_UNREACHABLE();
+}
+
+static struct mp_image *get_image(struct vo *vo, int imgfmt, int w, int h,
+                                  int stride_align, int flags)
+{
+    struct priv *p = vo->priv;
+    pl_gpu gpu = p->gpu;
+    if (!gpu->limits.thread_safe || !gpu->limits.max_mapped_size)
+        return NULL;
+
+    if ((flags & VO_DR_FLAG_HOST_CACHED) && !gpu->limits.host_cached)
+        return NULL;
+
+    stride_align = mp_lcm(stride_align, gpu->limits.align_tex_xfer_pitch);
+    stride_align = mp_lcm(stride_align, gpu->limits.align_tex_xfer_offset);
+    int size = mp_image_get_alloc_size(imgfmt, w, h, stride_align);
+    if (size < 0)
+        return NULL;
+
+    pl_buf buf = pl_buf_create(gpu, &(struct pl_buf_params) {
+        .memory_type = PL_BUF_MEM_HOST,
+        .host_mapped = true,
+        .size = size + stride_align,
+    });
+
+    if (!buf)
+        return NULL;
+
+    struct mp_image *mpi = mp_image_from_buffer(imgfmt, w, h, stride_align,
+                                                buf->data, buf->params.size,
+                                                p, free_dr_buf);
+    if (!mpi) {
+        pl_buf_destroy(gpu, &buf);
+        return NULL;
+    }
+
+    mp_mutex_lock(&p->dr_lock);
+    MP_TARRAY_APPEND(p, p->dr_buffers, p->num_dr_buffers, buf);
+    mp_mutex_unlock(&p->dr_lock);
+
+    return mpi;
+}
+
+static struct pl_color_space get_mpi_csp(struct vo *vo, struct mp_image *mpi);
+
+static void update_overlays(struct vo *vo, struct mp_osd_res res,
+                            int flags, enum pl_overlay_coords coords,
+                            struct osd_state *state, struct pl_frame *frame,
+                            struct mp_image *src)
+{
+    struct priv *p = vo->priv;
+    static const bool subfmt_all[SUBBITMAP_COUNT] = {
+        [SUBBITMAP_LIBASS] = true,
+        [SUBBITMAP_BGRA]   = true,
+    };
+
+    double pts = src ? src->pts : 0;
+    struct sub_bitmap_list *subs = osd_render(vo->osd, res, pts, flags, subfmt_all);
+
+    frame->overlays = state->overlays;
+    frame->num_overlays = 0;
+
+    for (int n = 0; n < subs->num_items; n++) {
+        const struct sub_bitmaps *item = subs->items[n];
+        if (!item->num_parts || !item->packed)
+            continue;
+        struct osd_entry *entry = &state->entries[item->render_index];
+        pl_fmt tex_fmt = p->osd_fmt[item->format];
+        if (!entry->tex)
+            MP_TARRAY_POP(p->sub_tex, p->num_sub_tex, &entry->tex);
+        bool ok = pl_tex_recreate(p->gpu, &entry->tex, &(struct pl_tex_params) {
+            .format = tex_fmt,
+            .w = MPMAX(item->packed_w, entry->tex ? entry->tex->params.w : 0),
+            .h = MPMAX(item->packed_h, entry->tex ? entry->tex->params.h : 0),
+            .host_writable = true,
+            .sampleable = true,
+        });
+        if (!ok) {
+            MP_ERR(vo, "Failed recreating OSD texture!\n");
+            break;
+        }
+        ok = pl_tex_upload(p->gpu, &(struct pl_tex_transfer_params) {
+            .tex        = entry->tex,
+            .rc         = { .x1 = item->packed_w, .y1 = item->packed_h, },
+            .row_pitch  = item->packed->stride[0],
+            .ptr        = item->packed->planes[0],
+        });
+        if (!ok) {
+            MP_ERR(vo, "Failed uploading OSD texture!\n");
+            break;
+        }
+
+        entry->num_parts = 0;
+        for (int i = 0; i < item->num_parts; i++) {
+            const struct sub_bitmap *b = &item->parts[i];
+            uint32_t c = b->libass.color;
+            struct pl_overlay_part part = {
+                .src = { b->src_x, b->src_y, b->src_x + b->w, b->src_y + b->h },
+                .dst = { b->x, b->y, b->x + b->dw, b->y + b->dh },
+                .color = {
+                    (c >> 24) / 255.0,
+                    ((c >> 16) & 0xFF) / 255.0,
+                    ((c >> 8) & 0xFF) / 255.0,
+                    1.0 - (c & 0xFF) / 255.0,
+                }
+            };
+            MP_TARRAY_APPEND(p, entry->parts, entry->num_parts, part);
+        }
+
+        struct pl_overlay *ol = &state->overlays[frame->num_overlays++];
+        *ol = (struct pl_overlay) {
+            .tex = entry->tex,
+            .parts = entry->parts,
+            .num_parts = entry->num_parts,
+            .color = {
+                .primaries = PL_COLOR_PRIM_BT_709,
+                .transfer = PL_COLOR_TRC_SRGB,
+            },
+            .coords = coords,
+        };
+
+        switch (item->format) {
+        case SUBBITMAP_BGRA:
+            ol->mode = PL_OVERLAY_NORMAL;
+            ol->repr.alpha = PL_ALPHA_PREMULTIPLIED;
+            // Infer bitmap colorspace from source
+            if (src) {
+                ol->color = get_mpi_csp(vo, src);
+                // Seems like HDR subtitles are targeting SDR white
+                if (pl_color_transfer_is_hdr(ol->color.transfer)) {
+                    ol->color.hdr = (struct pl_hdr_metadata) {
+                        .max_luma = PL_COLOR_SDR_WHITE,
+                    };
+                }
+            }
+            break;
+        case SUBBITMAP_LIBASS:
+            ol->mode = PL_OVERLAY_MONOCHROME;
+            ol->repr.alpha = PL_ALPHA_INDEPENDENT;
+            break;
+        }
+    }
+
+    talloc_free(subs);
+}
+
+struct frame_priv {
+    struct vo *vo;
+    struct osd_state subs;
+    uint64_t osd_sync;
+    struct ra_hwdec *hwdec;
+};
+
+static int plane_data_from_imgfmt(struct pl_plane_data out_data[4],
+                                  struct pl_bit_encoding *out_bits,
+                                  enum mp_imgfmt imgfmt)
+{
+    struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(imgfmt);
+    if (!desc.num_planes || !(desc.flags & MP_IMGFLAG_HAS_COMPS))
+        return 0;
+
+    if (desc.flags & MP_IMGFLAG_HWACCEL)
+        return 0; // HW-accelerated frames need to be mapped differently
+
+    if (!(desc.flags & MP_IMGFLAG_NE))
+        return 0; // GPU endianness follows the host's
+
+    if (desc.flags & MP_IMGFLAG_PAL)
+        return 0; // Palette formats (currently) not supported in libplacebo
+
+    if ((desc.flags & MP_IMGFLAG_TYPE_FLOAT) && (desc.flags & MP_IMGFLAG_YUV))
+        return 0; // Floating-point YUV (currently) unsupported
+
+    bool has_bits = false;
+    bool any_padded = false;
+
+    for (int p = 0; p < desc.num_planes; p++) {
+        struct pl_plane_data *data = &out_data[p];
+        struct mp_imgfmt_comp_desc sorted[MP_NUM_COMPONENTS];
+        int num_comps = 0;
+        if (desc.bpp[p] % 8)
+            return 0; // Pixel size is not byte-aligned
+
+        for (int c = 0; c < mp_imgfmt_desc_get_num_comps(&desc); c++) {
+            if (desc.comps[c].plane != p)
+                continue;
+
+            data->component_map[num_comps] = c;
+            sorted[num_comps] = desc.comps[c];
+            num_comps++;
+
+            // Sort components by offset order, while keeping track of the
+            // semantic mapping in `data->component_map`
+            for (int i = num_comps - 1; i > 0; i--) {
+                if (sorted[i].offset >= sorted[i - 1].offset)
+                    break;
+                MPSWAP(struct mp_imgfmt_comp_desc, sorted[i], sorted[i - 1]);
+                MPSWAP(int, data->component_map[i], data->component_map[i - 1]);
+            }
+        }
+
+        uint64_t total_bits = 0;
+
+        // Fill in the pl_plane_data fields for each component
+        memset(data->component_size, 0, sizeof(data->component_size));
+        for (int c = 0; c < num_comps; c++) {
+            data->component_size[c] = sorted[c].size;
+            data->component_pad[c] = sorted[c].offset - total_bits;
+            total_bits += data->component_pad[c] + data->component_size[c];
+            any_padded |= sorted[c].pad;
+
+            // Ignore bit encoding of alpha channel
+            if (!out_bits || data->component_map[c] == PL_CHANNEL_A)
+                continue;
+
+            struct pl_bit_encoding bits = {
+                .sample_depth = data->component_size[c],
+                .color_depth = sorted[c].size - abs(sorted[c].pad),
+                .bit_shift = MPMAX(sorted[c].pad, 0),
+            };
+
+            if (!has_bits) {
+                *out_bits = bits;
+                has_bits = true;
+            } else {
+                if (!pl_bit_encoding_equal(out_bits, &bits)) {
+                    // Bit encoding differs between components/planes,
+                    // cannot handle this
+                    *out_bits = (struct pl_bit_encoding) {0};
+                    out_bits = NULL;
+                }
+            }
+        }
+
+        data->pixel_stride = desc.bpp[p] / 8;
+        data->type = (desc.flags & MP_IMGFLAG_TYPE_FLOAT)
+                            ? PL_FMT_FLOAT
+                            : PL_FMT_UNORM;
+    }
+
+    if (any_padded && !out_bits)
+        return 0; // can't handle padded components without `pl_bit_encoding`
+
+    return desc.num_planes;
+}
+
+static struct pl_color_space get_mpi_csp(struct vo *vo, struct mp_image *mpi)
+{
+    struct pl_color_space csp = {
+        .primaries = mp_prim_to_pl(mpi->params.color.primaries),
+        .transfer = mp_trc_to_pl(mpi->params.color.gamma),
+        .hdr = mpi->params.color.hdr,
+    };
+    return csp;
+}
+
+static bool hwdec_reconfig(struct priv *p, struct ra_hwdec *hwdec,
+                           const struct mp_image_params *par)
+{
+    if (p->hwdec_mapper) {
+        if (mp_image_params_equal(par, &p->hwdec_mapper->src_params)) {
+            return p->hwdec_mapper;
+        } else {
+            ra_hwdec_mapper_free(&p->hwdec_mapper);
+        }
+    }
+
+    p->hwdec_mapper = ra_hwdec_mapper_create(hwdec, par);
+    if (!p->hwdec_mapper) {
+        MP_ERR(p, "Initializing texture for hardware decoding failed.\n");
+        return NULL;
+    }
+
+    return p->hwdec_mapper;
+}
+
+// For RAs not based on ra_pl, this creates a new pl_tex wrapper
+static pl_tex hwdec_get_tex(struct priv *p, int n)
+{
+    struct ra_tex *ratex = p->hwdec_mapper->tex[n];
+    struct ra *ra = p->hwdec_mapper->ra;
+    if (ra_pl_get(ra))
+        return (pl_tex) ratex->priv;
+
+#if HAVE_GL && defined(PL_HAVE_OPENGL)
+    if (ra_is_gl(ra) && pl_opengl_get(p->gpu)) {
+        struct pl_opengl_wrap_params par = {
+            .width = ratex->params.w,
+            .height = ratex->params.h,
+        };
+
+        ra_gl_get_format(ratex->params.format, &par.iformat,
+                         &(GLenum){0}, &(GLenum){0});
+        ra_gl_get_raw_tex(ra, ratex, &par.texture, &par.target);
+        return pl_opengl_wrap(p->gpu, &par);
+    }
+#endif
+
+#if HAVE_D3D11 && defined(PL_HAVE_D3D11)
+    if (ra_is_d3d11(ra)) {
+        int array_slice = 0;
+        ID3D11Resource *res = ra_d3d11_get_raw_tex(ra, ratex, &array_slice);
+        pl_tex tex = pl_d3d11_wrap(p->gpu, pl_d3d11_wrap_params(
+            .tex = res,
+            .array_slice = array_slice,
+            .fmt = ra_d3d11_get_format(ratex->params.format),
+            .w = ratex->params.w,
+            .h = ratex->params.h,
+        ));
+        SAFE_RELEASE(res);
+        return tex;
+    }
+#endif
+
+    MP_ERR(p, "Failed mapping hwdec frame? Open a bug!\n");
+    return false;
+}
+
+static bool hwdec_acquire(pl_gpu gpu, struct pl_frame *frame)
+{
+    struct mp_image *mpi = frame->user_data;
+    struct frame_priv *fp = mpi->priv;
+    struct priv *p = fp->vo->priv;
+    if (!hwdec_reconfig(p, fp->hwdec, &mpi->params))
+        return false;
+
+    if (ra_hwdec_mapper_map(p->hwdec_mapper, mpi) < 0) {
+        MP_ERR(p, "Mapping hardware decoded surface failed.\n");
+        return false;
+    }
+
+    for (int n = 0; n < frame->num_planes; n++) {
+        if (!(frame->planes[n].texture = hwdec_get_tex(p, n)))
+            return false;
+    }
+
+    return true;
+}
+
+static void hwdec_release(pl_gpu gpu, struct pl_frame *frame)
+{
+    struct mp_image *mpi = frame->user_data;
+    struct frame_priv *fp = mpi->priv;
+    struct priv *p = fp->vo->priv;
+    if (!ra_pl_get(p->hwdec_mapper->ra)) {
+        for (int n = 0; n < frame->num_planes; n++)
+            pl_tex_destroy(p->gpu, &frame->planes[n].texture);
+    }
+
+    ra_hwdec_mapper_unmap(p->hwdec_mapper);
+}
+
+static bool map_frame(pl_gpu gpu, pl_tex *tex, const struct pl_source_frame *src,
+                      struct pl_frame *frame)
+{
+    struct mp_image *mpi = src->frame_data;
+    const struct mp_image_params *par = &mpi->params;
+    struct frame_priv *fp = mpi->priv;
+    struct vo *vo = fp->vo;
+    struct priv *p = vo->priv;
+
+    fp->hwdec = ra_hwdec_get(&p->hwdec_ctx, mpi->imgfmt);
+    if (fp->hwdec) {
+        // Note: We don't actually need the mapper to map the frame yet, we
+        // only reconfig the mapper here (potentially creating it) to access
+        // `dst_params`. In practice, though, this should not matter unless the
+        // image format changes mid-stream.
+        if (!hwdec_reconfig(p, fp->hwdec, &mpi->params)) {
+            talloc_free(mpi);
+            return false;
+        }
+
+        par = &p->hwdec_mapper->dst_params;
+    }
+
+    *frame = (struct pl_frame) {
+        .color = get_mpi_csp(vo, mpi),
+        .repr = {
+            .sys = mp_csp_to_pl(par->color.space),
+            .levels = mp_levels_to_pl(par->color.levels),
+            .alpha = mp_alpha_to_pl(par->alpha),
+        },
+        .profile = {
+            .data = mpi->icc_profile ? mpi->icc_profile->data : NULL,
+            .len = mpi->icc_profile ? mpi->icc_profile->size : 0,
+        },
+        .rotation = par->rotate / 90,
+        .user_data = mpi,
+    };
+
+    // mp_image, like AVFrame, likes communicating RGB/XYZ/YCbCr status
+    // implicitly via the image format, rather than the actual tagging.
+    switch (mp_imgfmt_get_forced_csp(par->imgfmt)) {
+    case MP_CSP_RGB:
+        frame->repr.sys = PL_COLOR_SYSTEM_RGB;
+        frame->repr.levels = PL_COLOR_LEVELS_FULL;
+        break;
+    case MP_CSP_XYZ:
+        frame->repr.sys = PL_COLOR_SYSTEM_XYZ;
+        break;
+    case MP_CSP_AUTO:
+        if (!frame->repr.sys)
+            frame->repr.sys = pl_color_system_guess_ycbcr(par->w, par->h);
+        break;
+    default: break;
+    }
+
+    if (fp->hwdec) {
+
+        struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(par->imgfmt);
+        frame->acquire = hwdec_acquire;
+        frame->release = hwdec_release;
+        frame->num_planes = desc.num_planes;
+        for (int n = 0; n < frame->num_planes; n++) {
+            struct pl_plane *plane = &frame->planes[n];
+            int *map = plane->component_mapping;
+            for (int c = 0; c < mp_imgfmt_desc_get_num_comps(&desc); c++) {
+                if (desc.comps[c].plane != n)
+                    continue;
+
+                // Sort by component offset
+                uint8_t offset = desc.comps[c].offset;
+                int index = plane->components++;
+                while (index > 0 && desc.comps[map[index - 1]].offset > offset) {
+                    map[index] = map[index - 1];
+                    index--;
+                }
+                map[index] = c;
+            }
+        }
+
+    } else { // swdec
+
+        struct pl_plane_data data[4] = {0};
+        frame->num_planes = plane_data_from_imgfmt(data, &frame->repr.bits, mpi->imgfmt);
+        for (int n = 0; n < frame->num_planes; n++) {
+            struct pl_plane *plane = &frame->planes[n];
+            data[n].width = mp_image_plane_w(mpi, n);
+            data[n].height = mp_image_plane_h(mpi, n);
+            if (mpi->stride[n] < 0) {
+                data[n].pixels = mpi->planes[n] + (data[n].height - 1) * mpi->stride[n];
+                data[n].row_stride = -mpi->stride[n];
+                plane->flipped = true;
+            } else {
+                data[n].pixels = mpi->planes[n];
+                data[n].row_stride = mpi->stride[n];
+            }
+
+            pl_buf buf = get_dr_buf(p, data[n].pixels);
+            if (buf) {
+                data[n].buf = buf;
+                data[n].buf_offset = (uint8_t *) data[n].pixels - buf->data;
+                data[n].pixels = NULL;
+            } else if (gpu->limits.callbacks) {
+                data[n].callback = talloc_free;
+                data[n].priv = mp_image_new_ref(mpi);
+            }
+
+            if (!pl_upload_plane(gpu, plane, &tex[n], &data[n])) {
+                MP_ERR(vo, "Failed uploading frame!\n");
+                talloc_free(data[n].priv);
+                talloc_free(mpi);
+                return false;
+            }
+        }
+
+    }
+
+    // Update chroma location, must be done after initializing planes
+    pl_frame_set_chroma_location(frame, mp_chroma_to_pl(par->chroma_location));
+
+    // Set the frame DOVI metadata
+    mp_map_dovi_metadata_to_pl(mpi, frame);
+
+    if (mpi->film_grain)
+        pl_film_grain_from_av(&frame->film_grain, (AVFilmGrainParams *) mpi->film_grain->data);
+
+    // Compute a unique signature for any attached ICC profile. Wasteful in
+    // theory if the ICC profile is the same for multiple frames, but in
+    // practice ICC profiles are overwhelmingly going to be attached to
+    // still images so it shouldn't matter.
+    pl_icc_profile_compute_signature(&frame->profile);
+
+    // Update LUT attached to this frame
+    update_lut(p, &p->image_lut);
+    frame->lut = p->image_lut.lut;
+    frame->lut_type = p->image_lut.type;
+    return true;
+}
+
+static void unmap_frame(pl_gpu gpu, struct pl_frame *frame,
+                        const struct pl_source_frame *src)
+{
+    struct mp_image *mpi = src->frame_data;
+    struct frame_priv *fp = mpi->priv;
+    struct priv *p = fp->vo->priv;
+    for (int i = 0; i < MP_ARRAY_SIZE(fp->subs.entries); i++) {
+        pl_tex tex = fp->subs.entries[i].tex;
+        if (tex)
+            MP_TARRAY_APPEND(p, p->sub_tex, p->num_sub_tex, tex);
+    }
+    talloc_free(mpi);
+}
+
+static void discard_frame(const struct pl_source_frame *src)
+{
+    struct mp_image *mpi = src->frame_data;
+    talloc_free(mpi);
+}
+
+static void info_callback(void *priv, const struct pl_render_info *info)
+{
+    struct vo *vo = priv;
+    struct priv *p = vo->priv;
+    if (info->index >= VO_PASS_PERF_MAX)
+        return; // silently ignore clipped passes, whatever
+
+    struct frame_info *frame;
+    switch (info->stage) {
+    case PL_RENDER_STAGE_FRAME: frame = &p->perf_fresh; break;
+    case PL_RENDER_STAGE_BLEND: frame = &p->perf_redraw; break;
+    default: abort();
+    }
+
+    frame->count = info->index + 1;
+    pl_dispatch_info_move(&frame->info[info->index], info->pass);
+}
+
+static void update_options(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    pl_options pars = p->pars;
+    if (m_config_cache_update(p->opts_cache))
+        update_render_options(vo);
+
+    update_lut(p, &p->lut);
+    pars->params.lut = p->lut.lut;
+    pars->params.lut_type = p->lut.type;
+
+    // Update equalizer state
+    struct mp_csp_params cparams = MP_CSP_PARAMS_DEFAULTS;
+    mp_csp_equalizer_state_get(p->video_eq, &cparams);
+    pars->color_adjustment.brightness = cparams.brightness;
+    pars->color_adjustment.contrast = cparams.contrast;
+    pars->color_adjustment.hue = cparams.hue;
+    pars->color_adjustment.saturation = cparams.saturation;
+    pars->color_adjustment.gamma = cparams.gamma;
+    p->output_levels = cparams.levels_out;
+
+    for (char **kv = p->raw_opts; kv && kv[0]; kv += 2)
+        pl_options_set_str(pars, kv[0], kv[1]);
+}
+
+static void apply_target_contrast(struct priv *p, struct pl_color_space *color)
+{
+    const struct gl_video_opts *opts = p->opts_cache->opts;
+
+    // Auto mode, leave as is
+    if (!opts->target_contrast)
+        return;
+
+    // Infinite contrast
+    if (opts->target_contrast == -1) {
+        color->hdr.min_luma = 1e-7;
+        return;
+    }
+
+    // Infer max_luma for current pl_color_space
+    pl_color_space_nominal_luma_ex(pl_nominal_luma_params(
+        .color = color,
+        // with HDR10 meta to respect value if already set
+        .metadata = PL_HDR_METADATA_HDR10,
+        .scaling = PL_HDR_NITS,
+        .out_max = &color->hdr.max_luma
+    ));
+
+    color->hdr.min_luma = color->hdr.max_luma / opts->target_contrast;
+}
+
+static void apply_target_options(struct priv *p, struct pl_frame *target)
+{
+    update_lut(p, &p->target_lut);
+    target->lut = p->target_lut.lut;
+    target->lut_type = p->target_lut.type;
+
+    // Colorspace overrides
+    const struct gl_video_opts *opts = p->opts_cache->opts;
+    if (p->output_levels)
+        target->repr.levels = mp_levels_to_pl(p->output_levels);
+    if (opts->target_prim)
+        target->color.primaries = mp_prim_to_pl(opts->target_prim);
+    if (opts->target_trc)
+        target->color.transfer = mp_trc_to_pl(opts->target_trc);
+    // If swapchain returned a value use this, override is used in hint
+    if (opts->target_peak && !target->color.hdr.max_luma)
+        target->color.hdr.max_luma = opts->target_peak;
+    if (!target->color.hdr.min_luma)
+        apply_target_contrast(p, &target->color);
+    if (opts->target_gamut) {
+        // Ensure resulting gamut still fits inside container
+        const struct pl_raw_primaries *gamut, *container;
+        gamut = pl_raw_primaries_get(mp_prim_to_pl(opts->target_gamut));
+        container = pl_raw_primaries_get(target->color.primaries);
+        target->color.hdr.prim = pl_primaries_clip(gamut, container);
+    }
+    if (opts->dither_depth > 0) {
+        struct pl_bit_encoding *tbits = &target->repr.bits;
+        tbits->color_depth += opts->dither_depth - tbits->sample_depth;
+        tbits->sample_depth = opts->dither_depth;
+    }
+
+    if (opts->icc_opts->icc_use_luma) {
+        p->icc_params.max_luma = 0.0f;
+    } else {
+        pl_color_space_nominal_luma_ex(pl_nominal_luma_params(
+            .color    = &target->color,
+            .metadata = PL_HDR_METADATA_HDR10, // use only static HDR nits
+            .scaling  = PL_HDR_NITS,
+            .out_max  = &p->icc_params.max_luma,
+        ));
+    }
+
+    pl_icc_update(p->pllog, &p->icc_profile, NULL, &p->icc_params);
+    target->icc = p->icc_profile;
+}
+
+static void apply_crop(struct pl_frame *frame, struct mp_rect crop,
+                       int width, int height)
+{
+    frame->crop = (struct pl_rect2df) {
+        .x0 = crop.x0,
+        .y0 = crop.y0,
+        .x1 = crop.x1,
+        .y1 = crop.y1,
+    };
+
+    // mpv gives us rotated/flipped rects, libplacebo expects unrotated
+    pl_rect2df_rotate(&frame->crop, -frame->rotation);
+    if (frame->crop.x1 < frame->crop.x0) {
+        frame->crop.x0 = width - frame->crop.x0;
+        frame->crop.x1 = width - frame->crop.x1;
+    }
+
+    if (frame->crop.y1 < frame->crop.y0) {
+        frame->crop.y0 = height - frame->crop.y0;
+        frame->crop.y1 = height - frame->crop.y1;
+    }
+}
+
+static void update_tm_viz(struct pl_color_map_params *params,
+                          const struct pl_frame *target)
+{
+    if (!params->visualize_lut)
+        return;
+
+    // Use right half of sceen for TM visualization, constrain to 1:1 AR
+    const float out_w = fabsf(pl_rect_w(target->crop));
+    const float out_h = fabsf(pl_rect_h(target->crop));
+    const float size = MPMIN(out_w / 2.0f, out_h);
+    params->visualize_rect = (pl_rect2df) {
+        .x0 = 1.0f - size / out_w,
+        .x1 = 1.0f,
+        .y0 = 0.0f,
+        .y1 = size / out_h,
+    };
+
+    // Visualize red-blue plane
+    params->visualize_hue = M_PI / 4.0;
+}
+
+static void draw_frame(struct vo *vo, struct vo_frame *frame)
+{
+    struct priv *p = vo->priv;
+    pl_options pars = p->pars;
+    pl_gpu gpu = p->gpu;
+    update_options(vo);
+
+    struct pl_render_params params = pars->params;
+    const struct gl_video_opts *opts = p->opts_cache->opts;
+    bool will_redraw = frame->display_synced && frame->num_vsyncs > 1;
+    bool cache_frame = will_redraw || frame->still;
+    bool can_interpolate = opts->interpolation && frame->display_synced &&
+                           !frame->still && frame->num_frames > 1;
+    double pts_offset = can_interpolate ? frame->ideal_frame_vsync : 0;
+    params.info_callback = info_callback;
+    params.info_priv = vo;
+    params.skip_caching_single_frame = !cache_frame;
+    params.preserve_mixing_cache = p->inter_preserve && !frame->still;
+    if (frame->still)
+        params.frame_mixer = NULL;
+
+    // pl_queue advances its internal virtual PTS and culls available frames
+    // based on this value and the VPS/FPS ratio. Requesting a non-monotonic PTS
+    // is an invalid use of pl_queue. Reset it if this happens in an attempt to
+    // recover as much as possible. Ideally, this should never occur, and if it
+    // does, it should be corrected. The ideal_frame_vsync may be negative if
+    // the last draw did not align perfectly with the vsync. In this case, we
+    // should have the previous frame available in pl_queue, or a reset is
+    // already requested. Clamp the check to 0, as we don't have the previous
+    // frame in vo_frame anyway.
+    struct pl_source_frame vpts;
+    if (frame->current && !p->want_reset) {
+        if (pl_queue_peek(p->queue, 0, &vpts) &&
+            frame->current->pts + MPMAX(0, pts_offset) < vpts.pts)
+        {
+            MP_VERBOSE(vo, "Forcing queue refill, PTS(%f + %f | %f) < VPTS(%f)\n",
+                       frame->current->pts, pts_offset,
+                       frame->ideal_frame_vsync_duration, vpts.pts);
+            p->want_reset = true;
+        }
+    }
+
+    // Push all incoming frames into the frame queue
+    for (int n = 0; n < frame->num_frames; n++) {
+        int id = frame->frame_id + n;
+
+        if (p->want_reset) {
+            pl_renderer_flush_cache(p->rr);
+            pl_queue_reset(p->queue);
+            p->last_pts = 0.0;
+            p->last_id = 0;
+            p->want_reset = false;
+        }
+
+        if (id <= p->last_id)
+            continue; // ignore already seen frames
+
+        struct mp_image *mpi = mp_image_new_ref(frame->frames[n]);
+        struct frame_priv *fp = talloc_zero(mpi, struct frame_priv);
+        mpi->priv = fp;
+        fp->vo = vo;
+
+        pl_queue_push(p->queue, &(struct pl_source_frame) {
+            .pts = mpi->pts,
+            .duration = can_interpolate ? frame->approx_duration : 0,
+            .frame_data = mpi,
+            .map = map_frame,
+            .unmap = unmap_frame,
+            .discard = discard_frame,
+        });
+
+        p->last_id = id;
+    }
+
+    if (p->target_hint && frame->current) {
+        struct pl_color_space hint = get_mpi_csp(vo, frame->current);
+        if (opts->target_prim)
+            hint.primaries = mp_prim_to_pl(opts->target_prim);
+        if (opts->target_trc)
+            hint.transfer = mp_trc_to_pl(opts->target_trc);
+        if (opts->target_peak)
+            hint.hdr.max_luma = opts->target_peak;
+        apply_target_contrast(p, &hint);
+        pl_swapchain_colorspace_hint(p->sw, &hint);
+    } else if (!p->target_hint) {
+        pl_swapchain_colorspace_hint(p->sw, NULL);
+    }
+
+    struct pl_swapchain_frame swframe;
+    struct ra_swapchain *sw = p->ra_ctx->swapchain;
+    bool should_draw = sw->fns->start_frame(sw, NULL); // for wayland logic
+    if (!should_draw || !pl_swapchain_start_frame(p->sw, &swframe)) {
+        if (frame->current) {
+            // Advance the queue state to the current PTS to discard unused frames
+            pl_queue_update(p->queue, NULL, pl_queue_params(
+                .pts = frame->current->pts + pts_offset,
+                .radius = pl_frame_mix_radius(&params),
+                .vsync_duration = can_interpolate ? frame->ideal_frame_vsync_duration : 0,
+#if PL_API_VER >= 340
+                .drift_compensation = 0,
+#endif
+            ));
+        }
+        return;
+    }
+
+    bool valid = false;
+    p->is_interpolated = false;
+
+    // Calculate target
+    struct pl_frame target;
+    pl_frame_from_swapchain(&target, &swframe);
+    apply_target_options(p, &target);
+    update_overlays(vo, p->osd_res,
+                    (frame->current && opts->blend_subs) ? OSD_DRAW_OSD_ONLY : 0,
+                    PL_OVERLAY_COORDS_DST_FRAME, &p->osd_state, &target, frame->current);
+    apply_crop(&target, p->dst, swframe.fbo->params.w, swframe.fbo->params.h);
+    update_tm_viz(&pars->color_map_params, &target);
+
+    struct pl_frame_mix mix = {0};
+    if (frame->current) {
+        // Update queue state
+        struct pl_queue_params qparams = *pl_queue_params(
+            .pts = frame->current->pts + pts_offset,
+            .radius = pl_frame_mix_radius(&params),
+            .vsync_duration = can_interpolate ? frame->ideal_frame_vsync_duration : 0,
+            .interpolation_threshold = opts->interpolation_threshold,
+#if PL_API_VER >= 340
+            .drift_compensation = 0,
+#endif
+        );
+
+        // Depending on the vsync ratio, we may be up to half of the vsync
+        // duration before the current frame time. This works fine because
+        // pl_queue will have this frame, unless it's after a reset event. In
+        // this case, start from the first available frame.
+        struct pl_source_frame first;
+        if (pl_queue_peek(p->queue, 0, &first) && qparams.pts < first.pts) {
+            if (first.pts != frame->current->pts)
+                MP_VERBOSE(vo, "Current PTS(%f) != VPTS(%f)\n", frame->current->pts, first.pts);
+            MP_VERBOSE(vo, "Clamping first frame PTS from %f to %f\n", qparams.pts, first.pts);
+            qparams.pts = first.pts;
+        }
+        p->last_pts = qparams.pts;
+
+        switch (pl_queue_update(p->queue, &mix, &qparams)) {
+        case PL_QUEUE_ERR:
+            MP_ERR(vo, "Failed updating frames!\n");
+            goto done;
+        case PL_QUEUE_EOF:
+            abort(); // we never signal EOF
+        case PL_QUEUE_MORE:
+            // This is expected to happen semi-frequently near the start and
+            // end of a file, so only log it at high verbosity and move on.
+            MP_DBG(vo, "Render queue underrun.\n");
+            break;
+        case PL_QUEUE_OK:
+            break;
+        }
+
+        // Update source crop and overlays on all existing frames. We
+        // technically own the `pl_frame` struct so this is kosher. This could
+        // be partially avoided by instead flushing the queue on resizes, but
+        // doing it this way avoids unnecessarily re-uploading frames.
+        for (int i = 0; i < mix.num_frames; i++) {
+            struct pl_frame *image = (struct pl_frame *) mix.frames[i];
+            struct mp_image *mpi = image->user_data;
+            struct frame_priv *fp = mpi->priv;
+            apply_crop(image, p->src, vo->params->w, vo->params->h);
+            if (opts->blend_subs) {
+                if (frame->redraw || fp->osd_sync < p->osd_sync) {
+                    float rx = pl_rect_w(p->dst) / pl_rect_w(image->crop);
+                    float ry = pl_rect_h(p->dst) / pl_rect_h(image->crop);
+                    struct mp_osd_res res = {
+                        .w = pl_rect_w(p->dst),
+                        .h = pl_rect_h(p->dst),
+                        .ml = -image->crop.x0 * rx,
+                        .mr = (image->crop.x1 - vo->params->w) * rx,
+                        .mt = -image->crop.y0 * ry,
+                        .mb = (image->crop.y1 - vo->params->h) * ry,
+                        .display_par = 1.0,
+                    };
+                    // TODO: fix this doing pointless updates
+                    if (frame->redraw)
+                        p->osd_sync++;
+                    update_overlays(vo, res, OSD_DRAW_SUB_ONLY,
+                                    PL_OVERLAY_COORDS_DST_CROP,
+                                    &fp->subs, image, mpi);
+                    fp->osd_sync = p->osd_sync;
+                }
+            } else {
+                // Disable overlays when blend_subs is disabled
+                image->num_overlays = 0;
+                fp->osd_sync = 0;
+            }
+
+            // Update the frame signature to include the current OSD sync
+            // value, in order to disambiguate between identical frames with
+            // modified OSD. Shift the OSD sync value by a lot to avoid
+            // collisions with low signature values.
+            //
+            // This is safe to do because `pl_frame_mix.signature` lives in
+            // temporary memory that is only valid for this `pl_queue_update`.
+            ((uint64_t *) mix.signatures)[i] ^= fp->osd_sync << 48;
+        }
+    }
+
+    // Render frame
+    if (!pl_render_image_mix(p->rr, &mix, &target, &params)) {
+        MP_ERR(vo, "Failed rendering frame!\n");
+        goto done;
+    }
+
+    const struct pl_frame *cur_frame = pl_frame_mix_nearest(&mix);
+    if (cur_frame && vo->params) {
+        vo->params->color.hdr = cur_frame->color.hdr;
+        // Augment metadata with peak detection max_pq_y / avg_pq_y
+        pl_renderer_get_hdr_metadata(p->rr, &vo->params->color.hdr);
+    }
+
+    p->is_interpolated = pts_offset != 0 && mix.num_frames > 1;
+    valid = true;
+    // fall through
+
+done:
+    if (!valid) // clear with purple to indicate error
+        pl_tex_clear(gpu, swframe.fbo, (float[4]){ 0.5, 0.0, 1.0, 1.0 });
+
+    pl_gpu_flush(gpu);
+    p->frame_pending = true;
+}
+
+static void flip_page(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    struct ra_swapchain *sw = p->ra_ctx->swapchain;
+
+    if (p->frame_pending) {
+        if (!pl_swapchain_submit_frame(p->sw))
+            MP_ERR(vo, "Failed presenting frame!\n");
+        p->frame_pending = false;
+    }
+
+    sw->fns->swap_buffers(sw);
+}
+
+static void get_vsync(struct vo *vo, struct vo_vsync_info *info)
+{
+    struct priv *p = vo->priv;
+    struct ra_swapchain *sw = p->ra_ctx->swapchain;
+    if (sw->fns->get_vsync)
+        sw->fns->get_vsync(sw, info);
+}
+
+static int query_format(struct vo *vo, int format)
+{
+    struct priv *p = vo->priv;
+    if (ra_hwdec_get(&p->hwdec_ctx, format))
+        return true;
+
+    struct pl_bit_encoding bits;
+    struct pl_plane_data data[4] = {0};
+    int planes = plane_data_from_imgfmt(data, &bits, format);
+    if (!planes)
+        return false;
+
+    for (int i = 0; i < planes; i++) {
+        if (!pl_plane_find_fmt(p->gpu, NULL, &data[i]))
+            return false;
+    }
+
+    return true;
+}
+
+static void resize(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    struct mp_rect src, dst;
+    struct mp_osd_res osd;
+    vo_get_src_dst_rects(vo, &src, &dst, &osd);
+    if (vo->dwidth && vo->dheight) {
+        gpu_ctx_resize(p->context, vo->dwidth, vo->dheight);
+        vo->want_redraw = true;
+    }
+
+    if (mp_rect_equals(&p->src, &src) &&
+        mp_rect_equals(&p->dst, &dst) &&
+        osd_res_equals(p->osd_res, osd))
+        return;
+
+    p->osd_sync++;
+    p->osd_res = osd;
+    p->src = src;
+    p->dst = dst;
+}
+
+static int reconfig(struct vo *vo, struct mp_image_params *params)
+{
+    struct priv *p = vo->priv;
+    if (!p->ra_ctx->fns->reconfig(p->ra_ctx))
+        return -1;
+
+    resize(vo);
+    return 0;
+}
+
+// Takes over ownership of `icc`. Can be used to unload profile (icc.len == 0)
+static bool update_icc(struct priv *p, struct bstr icc)
+{
+    struct pl_icc_profile profile = {
+        .data = icc.start,
+        .len  = icc.len,
+    };
+
+    pl_icc_profile_compute_signature(&profile);
+
+    bool ok = pl_icc_update(p->pllog, &p->icc_profile, &profile, &p->icc_params);
+    talloc_free(icc.start);
+    return ok;
+}
+
+// Returns whether the ICC profile was updated (even on failure)
+static bool update_auto_profile(struct priv *p, int *events)
+{
+    const struct gl_video_opts *opts = p->opts_cache->opts;
+    if (!opts->icc_opts || !opts->icc_opts->profile_auto || p->icc_path)
+        return false;
+
+    MP_VERBOSE(p, "Querying ICC profile...\n");
+    bstr icc = {0};
+    int r = p->ra_ctx->fns->control(p->ra_ctx, events, VOCTRL_GET_ICC_PROFILE, &icc);
+
+    if (r != VO_NOTAVAIL) {
+        if (r == VO_FALSE) {
+            MP_WARN(p, "Could not retrieve an ICC profile.\n");
+        } else if (r == VO_NOTIMPL) {
+            MP_ERR(p, "icc-profile-auto not implemented on this platform.\n");
+        }
+
+        update_icc(p, icc);
+        return true;
+    }
+
+    return false;
+}
+
+static void video_screenshot(struct vo *vo, struct voctrl_screenshot *args)
+{
+    struct priv *p = vo->priv;
+    pl_options pars = p->pars;
+    pl_gpu gpu = p->gpu;
+    pl_tex fbo = NULL;
+    args->res = NULL;
+
+    update_options(vo);
+    struct pl_render_params params = pars->params;
+    params.info_callback = NULL;
+    params.skip_caching_single_frame = true;
+    params.preserve_mixing_cache = false;
+    params.frame_mixer = NULL;
+
+    struct pl_peak_detect_params peak_params;
+    if (params.peak_detect_params) {
+        peak_params = *params.peak_detect_params;
+        params.peak_detect_params = &peak_params;
+        peak_params.allow_delayed = false;
+    }
+
+    // Retrieve the current frame from the frame queue
+    struct pl_frame_mix mix;
+    enum pl_queue_status status;
+    status = pl_queue_update(p->queue, &mix, pl_queue_params(
+        .pts = p->last_pts,
+#if PL_API_VER >= 340
+        .drift_compensation = 0,
+#endif
+    ));
+    assert(status != PL_QUEUE_EOF);
+    if (status == PL_QUEUE_ERR) {
+        MP_ERR(vo, "Unknown error occurred while trying to take screenshot!\n");
+        return;
+    }
+    if (!mix.num_frames) {
+        MP_ERR(vo, "No frames available to take screenshot of, is a file loaded?\n");
+        return;
+    }
+
+    // Passing an interpolation radius of 0 guarantees that the first frame in
+    // the resulting mix is the correct frame for this PTS
+    struct pl_frame image = *(struct pl_frame *) mix.frames[0];
+    struct mp_image *mpi = image.user_data;
+    struct mp_rect src = p->src, dst = p->dst;
+    struct mp_osd_res osd = p->osd_res;
+    if (!args->scaled) {
+        int w, h;
+        mp_image_params_get_dsize(&mpi->params, &w, &h);
+        if (w < 1 || h < 1)
+            return;
+
+        int src_w = mpi->params.w;
+        int src_h = mpi->params.h;
+        src = (struct mp_rect) {0, 0, src_w, src_h};
+        dst = (struct mp_rect) {0, 0, w, h};
+
+        if (mp_image_crop_valid(&mpi->params))
+            src = mpi->params.crop;
+
+        if (mpi->params.rotate % 180 == 90) {
+            MPSWAP(int, w, h);
+            MPSWAP(int, src_w, src_h);
+        }
+        mp_rect_rotate(&src, src_w, src_h, mpi->params.rotate);
+        mp_rect_rotate(&dst, w, h, mpi->params.rotate);
+
+        osd = (struct mp_osd_res) {
+            .display_par = 1.0,
+            .w = mp_rect_w(dst),
+            .h = mp_rect_h(dst),
+        };
+    }
+
+    // Create target FBO, try high bit depth first
+    int mpfmt;
+    for (int depth = args->high_bit_depth ? 16 : 8; depth; depth -= 8) {
+        if (depth == 16) {
+            mpfmt = IMGFMT_RGBA64;
+        } else {
+            mpfmt = p->ra_ctx->opts.want_alpha ? IMGFMT_RGBA : IMGFMT_RGB0;
+        }
+        pl_fmt fmt = pl_find_fmt(gpu, PL_FMT_UNORM, 4, depth, depth,
+                                 PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_HOST_READABLE);
+        if (!fmt)
+            continue;
+
+        fbo = pl_tex_create(gpu, pl_tex_params(
+            .w = osd.w,
+            .h = osd.h,
+            .format = fmt,
+            .blit_dst = true,
+            .renderable = true,
+            .host_readable = true,
+            .storable = fmt->caps & PL_FMT_CAP_STORABLE,
+        ));
+        if (fbo)
+            break;
+    }
+
+    if (!fbo) {
+        MP_ERR(vo, "Failed creating target FBO for screenshot!\n");
+        return;
+    }
+
+    struct pl_frame target = {
+        .repr = pl_color_repr_rgb,
+        .num_planes = 1,
+        .planes[0] = {
+            .texture = fbo,
+            .components = 4,
+            .component_mapping = {0, 1, 2, 3},
+        },
+    };
+
+    if (args->scaled) {
+        // Apply target LUT, ICC profile and CSP override only in window mode
+        apply_target_options(p, &target);
+    } else if (args->native_csp) {
+        target.color = image.color;
+    } else {
+        target.color = pl_color_space_srgb;
+    }
+
+    apply_crop(&image, src, mpi->params.w, mpi->params.h);
+    apply_crop(&target, dst, fbo->params.w, fbo->params.h);
+    update_tm_viz(&pars->color_map_params, &target);
+
+    int osd_flags = 0;
+    if (!args->subs)
+        osd_flags |= OSD_DRAW_OSD_ONLY;
+    if (!args->osd)
+        osd_flags |= OSD_DRAW_SUB_ONLY;
+
+    const struct gl_video_opts *opts = p->opts_cache->opts;
+    struct frame_priv *fp = mpi->priv;
+    if (opts->blend_subs) {
+        float rx = pl_rect_w(dst) / pl_rect_w(image.crop);
+        float ry = pl_rect_h(dst) / pl_rect_h(image.crop);
+        struct mp_osd_res res = {
+            .w = pl_rect_w(dst),
+            .h = pl_rect_h(dst),
+            .ml = -image.crop.x0 * rx,
+            .mr = (image.crop.x1 - vo->params->w) * rx,
+            .mt = -image.crop.y0 * ry,
+            .mb = (image.crop.y1 - vo->params->h) * ry,
+            .display_par = 1.0,
+        };
+        update_overlays(vo, res, osd_flags,
+                        PL_OVERLAY_COORDS_DST_CROP,
+                        &fp->subs, &image, mpi);
+    } else {
+        // Disable overlays when blend_subs is disabled
+        update_overlays(vo, osd, osd_flags, PL_OVERLAY_COORDS_DST_FRAME,
+                        &p->osd_state, &target, mpi);
+        image.num_overlays = 0;
+    }
+
+    if (!pl_render_image(p->rr, &image, &target, &params)) {
+        MP_ERR(vo, "Failed rendering frame!\n");
+        goto done;
+    }
+
+    args->res = mp_image_alloc(mpfmt, fbo->params.w, fbo->params.h);
+    if (!args->res)
+        goto done;
+
+    args->res->params.color.primaries = mp_prim_from_pl(target.color.primaries);
+    args->res->params.color.gamma = mp_trc_from_pl(target.color.transfer);
+    args->res->params.color.levels = mp_levels_from_pl(target.repr.levels);
+    args->res->params.color.hdr = target.color.hdr;
+    if (args->scaled)
+        args->res->params.p_w = args->res->params.p_h = 1;
+
+    bool ok = pl_tex_download(gpu, pl_tex_transfer_params(
+        .tex = fbo,
+        .ptr = args->res->planes[0],
+        .row_pitch = args->res->stride[0],
+    ));
+
+    if (!ok)
+        TA_FREEP(&args->res);
+
+    // fall through
+done:
+    pl_tex_destroy(gpu, &fbo);
+}
+
+static inline void copy_frame_info_to_mp(struct frame_info *pl,
+                                         struct mp_frame_perf *mp) {
+    static_assert(MP_ARRAY_SIZE(pl->info) == MP_ARRAY_SIZE(mp->perf), "");
+    assert(pl->count <= VO_PASS_PERF_MAX);
+    mp->count = MPMIN(pl->count, VO_PASS_PERF_MAX);
+
+    for (int i = 0; i < mp->count; ++i) {
+        const struct pl_dispatch_info *pass = &pl->info[i];
+
+        static_assert(VO_PERF_SAMPLE_COUNT >= MP_ARRAY_SIZE(pass->samples), "");
+        assert(pass->num_samples <= MP_ARRAY_SIZE(pass->samples));
+
+        struct mp_pass_perf *perf = &mp->perf[i];
+        perf->count = MPMIN(pass->num_samples, VO_PERF_SAMPLE_COUNT);
+        memcpy(perf->samples, pass->samples, perf->count * sizeof(pass->samples[0]));
+        perf->last = pass->last;
+        perf->peak = pass->peak;
+        perf->avg = pass->average;
+
+        strncpy(mp->desc[i], pass->shader->description, sizeof(mp->desc[i]) - 1);
+        mp->desc[i][sizeof(mp->desc[i]) - 1] = '\0';
+    }
+}
+
+static int control(struct vo *vo, uint32_t request, void *data)
+{
+    struct priv *p = vo->priv;
+
+    switch (request) {
+    case VOCTRL_SET_PANSCAN:
+        resize(vo);
+        return VO_TRUE;
+    case VOCTRL_SET_EQUALIZER:
+    case VOCTRL_PAUSE:
+        if (p->is_interpolated)
+            vo->want_redraw = true;
+        return VO_TRUE;
+
+    case VOCTRL_UPDATE_RENDER_OPTS: {
+        m_config_cache_update(p->opts_cache);
+        const struct gl_video_opts *opts = p->opts_cache->opts;
+        p->ra_ctx->opts.want_alpha = opts->alpha_mode == ALPHA_YES;
+        if (p->ra_ctx->fns->update_render_opts)
+            p->ra_ctx->fns->update_render_opts(p->ra_ctx);
+        update_render_options(vo);
+        vo->want_redraw = true;
+
+        // Also re-query the auto profile, in case `update_render_options`
+        // unloaded a manually specified icc profile in favor of
+        // icc-profile-auto
+        int events = 0;
+        update_auto_profile(p, &events);
+        vo_event(vo, events);
+        return VO_TRUE;
+    }
+
+    case VOCTRL_RESET:
+        // Defer until the first new frame (unique ID) actually arrives
+        p->want_reset = true;
+        return VO_TRUE;
+
+    case VOCTRL_PERFORMANCE_DATA: {
+        struct voctrl_performance_data *perf = data;
+        copy_frame_info_to_mp(&p->perf_fresh, &perf->fresh);
+        copy_frame_info_to_mp(&p->perf_redraw, &perf->redraw);
+        return true;
+    }
+
+    case VOCTRL_SCREENSHOT:
+        video_screenshot(vo, data);
+        return true;
+
+    case VOCTRL_EXTERNAL_RESIZE:
+        reconfig(vo, NULL);
+        return true;
+
+    case VOCTRL_LOAD_HWDEC_API:
+        ra_hwdec_ctx_load_fmt(&p->hwdec_ctx, vo->hwdec_devs, data);
+        return true;
+    }
+
+    int events = 0;
+    int r = p->ra_ctx->fns->control(p->ra_ctx, &events, request, data);
+    if (events & VO_EVENT_ICC_PROFILE_CHANGED) {
+        if (update_auto_profile(p, &events))
+            vo->want_redraw = true;
+    }
+    if (events & VO_EVENT_RESIZE)
+        resize(vo);
+    if (events & VO_EVENT_EXPOSE)
+        vo->want_redraw = true;
+    vo_event(vo, events);
+
+    return r;
+}
+
+static void wakeup(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    if (p->ra_ctx && p->ra_ctx->fns->wakeup)
+        p->ra_ctx->fns->wakeup(p->ra_ctx);
+}
+
+static void wait_events(struct vo *vo, int64_t until_time_ns)
+{
+    struct priv *p = vo->priv;
+    if (p->ra_ctx && p->ra_ctx->fns->wait_events) {
+        p->ra_ctx->fns->wait_events(p->ra_ctx, until_time_ns);
+    } else {
+        vo_wait_default(vo, until_time_ns);
+    }
+}
+
+#if PL_API_VER < 342
+static inline void xor_hash(void *hash, pl_cache_obj obj)
+{
+    *((uint64_t *) hash) ^= obj.key;
+}
+
+static inline uint64_t pl_cache_signature(pl_cache cache)
+{
+    uint64_t hash = 0;
+    pl_cache_iterate(cache, xor_hash, &hash);
+    return hash;
+}
+#endif
+
+static void cache_init(struct vo *vo, struct cache *cache, size_t max_size,
+                       const char *dir_opt)
+{
+    struct priv *p = vo->priv;
+    const char *name = cache == &p->shader_cache ? "shader.cache" : "icc.cache";
+
+    char *dir;
+    if (dir_opt && dir_opt[0]) {
+        dir = mp_get_user_path(NULL, p->global, dir_opt);
+    } else {
+        dir = mp_find_user_file(NULL, p->global, "cache", "");
+    }
+    if (!dir || !dir[0])
+        goto done;
+
+    mp_mkdirp(dir);
+    cache->path = mp_path_join(vo, dir, name);
+    cache->cache = pl_cache_create(pl_cache_params(
+        .log = p->pllog,
+        .max_total_size = max_size,
+    ));
+
+    FILE *file = fopen(cache->path, "rb");
+    if (file) {
+        int ret = pl_cache_load_file(cache->cache, file);
+        fclose(file);
+        if (ret < 0)
+            MP_WARN(p, "Failed loading cache from %s\n", cache->path);
+    }
+
+    cache->sig = pl_cache_signature(cache->cache);
+done:
+    talloc_free(dir);
+}
+
+static void cache_uninit(struct priv *p, struct cache *cache)
+{
+    if (!cache->cache)
+        goto done;
+    if (pl_cache_signature(cache->cache) == cache->sig)
+        goto done; // skip re-saving identical cache
+
+    assert(cache->path);
+    char *tmp = talloc_asprintf(cache->path, "%sXXXXXX", cache->path);
+    int fd = mkstemp(tmp);
+    if (fd < 0)
+        goto done;
+    FILE *file = fdopen(fd, "wb");
+    if (!file) {
+        close(fd);
+        unlink(tmp);
+        goto done;
+    }
+    int ret = pl_cache_save_file(cache->cache, file);
+    fclose(file);
+    if (ret >= 0)
+        ret = rename(tmp, cache->path);
+    if (ret < 0) {
+        MP_WARN(p, "Failed saving cache to %s\n", cache->path);
+        unlink(tmp);
+    }
+
+    // fall through
+done:
+    pl_cache_destroy(&cache->cache);
+}
+
+static void uninit(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    pl_queue_destroy(&p->queue); // destroy this first
+    for (int i = 0; i < MP_ARRAY_SIZE(p->osd_state.entries); i++)
+        pl_tex_destroy(p->gpu, &p->osd_state.entries[i].tex);
+    for (int i = 0; i < p->num_sub_tex; i++)
+        pl_tex_destroy(p->gpu, &p->sub_tex[i]);
+    for (int i = 0; i < p->num_user_hooks; i++)
+        pl_mpv_user_shader_destroy(&p->user_hooks[i].hook);
+
+    if (vo->hwdec_devs) {
+        ra_hwdec_mapper_free(&p->hwdec_mapper);
+        ra_hwdec_ctx_uninit(&p->hwdec_ctx);
+        hwdec_devices_set_loader(vo->hwdec_devs, NULL, NULL);
+        hwdec_devices_destroy(vo->hwdec_devs);
+    }
+
+    assert(p->num_dr_buffers == 0);
+    mp_mutex_destroy(&p->dr_lock);
+
+    cache_uninit(p, &p->shader_cache);
+    cache_uninit(p, &p->icc_cache);
+
+    pl_icc_close(&p->icc_profile);
+    pl_renderer_destroy(&p->rr);
+
+    for (int i = 0; i < VO_PASS_PERF_MAX; ++i) {
+        pl_shader_info_deref(&p->perf_fresh.info[i].shader);
+        pl_shader_info_deref(&p->perf_redraw.info[i].shader);
+    }
+
+    pl_options_free(&p->pars);
+
+    p->ra_ctx = NULL;
+    p->pllog = NULL;
+    p->gpu = NULL;
+    p->sw = NULL;
+    gpu_ctx_destroy(&p->context);
+}
+
+static void load_hwdec_api(void *ctx, struct hwdec_imgfmt_request *params)
+{
+    vo_control(ctx, VOCTRL_LOAD_HWDEC_API, params);
+}
+
+static int preinit(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    p->opts_cache = m_config_cache_alloc(p, vo->global, &gl_video_conf);
+    p->video_eq = mp_csp_equalizer_create(p, vo->global);
+    p->global = vo->global;
+    p->log = vo->log;
+
+    struct gl_video_opts *gl_opts = p->opts_cache->opts;
+    p->context = gpu_ctx_create(vo, gl_opts);
+    if (!p->context)
+        goto err_out;
+    // For the time being
+    p->ra_ctx = p->context->ra_ctx;
+    p->pllog = p->context->pllog;
+    p->gpu = p->context->gpu;
+    p->sw = p->context->swapchain;
+    p->hwdec_ctx = (struct ra_hwdec_ctx) {
+        .log = p->log,
+        .global = p->global,
+        .ra_ctx = p->ra_ctx,
+    };
+
+    vo->hwdec_devs = hwdec_devices_create();
+    hwdec_devices_set_loader(vo->hwdec_devs, load_hwdec_api, vo);
+    ra_hwdec_ctx_init(&p->hwdec_ctx, vo->hwdec_devs, gl_opts->hwdec_interop, false);
+    mp_mutex_init(&p->dr_lock);
+
+    if (gl_opts->shader_cache)
+        cache_init(vo, &p->shader_cache, 10 << 20, gl_opts->shader_cache_dir);
+    if (gl_opts->icc_opts->cache)
+        cache_init(vo, &p->icc_cache, 20 << 20, gl_opts->icc_opts->cache_dir);
+
+    pl_gpu_set_cache(p->gpu, p->shader_cache.cache);
+    p->rr = pl_renderer_create(p->pllog, p->gpu);
+    p->queue = pl_queue_create(p->gpu);
+    p->osd_fmt[SUBBITMAP_LIBASS] = pl_find_named_fmt(p->gpu, "r8");
+    p->osd_fmt[SUBBITMAP_BGRA] = pl_find_named_fmt(p->gpu, "bgra8");
+    p->osd_sync = 1;
+
+    p->pars = pl_options_alloc(p->pllog);
+    update_render_options(vo);
+    return 0;
+
+err_out:
+    uninit(vo);
+    return -1;
+}
+
+static const struct pl_filter_config *map_scaler(struct priv *p,
+                                                 enum scaler_unit unit)
+{
+    const struct pl_filter_preset fixed_scalers[] = {
+        { "bilinear",       &pl_filter_bilinear },
+        { "bicubic_fast",   &pl_filter_bicubic },
+        { "nearest",        &pl_filter_nearest },
+        { "oversample",     &pl_filter_oversample },
+        {0},
+    };
+
+    const struct pl_filter_preset fixed_frame_mixers[] = {
+        { "linear",         &pl_filter_bilinear },
+        { "oversample",     &pl_filter_oversample },
+        {0},
+    };
+
+    const struct pl_filter_preset *fixed_presets =
+        unit == SCALER_TSCALE ? fixed_frame_mixers : fixed_scalers;
+
+    const struct gl_video_opts *opts = p->opts_cache->opts;
+    const struct scaler_config *cfg = &opts->scaler[unit];
+    if (unit == SCALER_DSCALE && (!cfg->kernel.name || !cfg->kernel.name[0]))
+        cfg = &opts->scaler[SCALER_SCALE];
+    if (unit == SCALER_CSCALE && (!cfg->kernel.name || !cfg->kernel.name[0]))
+        cfg = &opts->scaler[SCALER_SCALE];
+
+    for (int i = 0; fixed_presets[i].name; i++) {
+        if (strcmp(cfg->kernel.name, fixed_presets[i].name) == 0)
+            return fixed_presets[i].filter;
+    }
+
+    // Attempt loading filter preset first, fall back to raw filter function
+    struct scaler_params *par = &p->scalers[unit];
+    const struct pl_filter_preset *preset;
+    const struct pl_filter_function_preset *fpreset;
+    if ((preset = pl_find_filter_preset(cfg->kernel.name))) {
+        par->config = *preset->filter;
+    } else if ((fpreset = pl_find_filter_function_preset(cfg->kernel.name))) {
+        par->config = (struct pl_filter_config) {
+            .kernel = fpreset->function,
+            .params[0] = fpreset->function->params[0],
+            .params[1] = fpreset->function->params[1],
+        };
+    } else {
+        MP_ERR(p, "Failed mapping filter function '%s', no libplacebo analog?\n",
+               cfg->kernel.name);
+        return &pl_filter_bilinear;
+    }
+
+    const struct pl_filter_function_preset *wpreset;
+    if ((wpreset = pl_find_filter_function_preset(cfg->window.name))) {
+        par->config.window = wpreset->function;
+        par->config.wparams[0] = wpreset->function->params[0];
+        par->config.wparams[1] = wpreset->function->params[1];
+    }
+
+    for (int i = 0; i < 2; i++) {
+        if (!isnan(cfg->kernel.params[i]))
+            par->config.params[i] = cfg->kernel.params[i];
+        if (!isnan(cfg->window.params[i]))
+            par->config.wparams[i] = cfg->window.params[i];
+    }
+
+    par->config.clamp = cfg->clamp;
+    if (cfg->kernel.blur > 0.0)
+        par->config.blur = cfg->kernel.blur;
+    if (cfg->kernel.taper > 0.0)
+        par->config.taper = cfg->kernel.taper;
+    if (cfg->radius > 0.0) {
+        if (par->config.kernel->resizable) {
+            par->config.radius = cfg->radius;
+        } else {
+            MP_WARN(p, "Filter radius specified but filter '%s' is not "
+                    "resizable, ignoring\n", cfg->kernel.name);
+        }
+    }
+
+    return &par->config;
+}
+
+static const struct pl_hook *load_hook(struct priv *p, const char *path)
+{
+    if (!path || !path[0])
+        return NULL;
+
+    for (int i = 0; i < p->num_user_hooks; i++) {
+        if (strcmp(p->user_hooks[i].path, path) == 0)
+            return p->user_hooks[i].hook;
+    }
+
+    char *fname = mp_get_user_path(NULL, p->global, path);
+    bstr shader = stream_read_file(fname, p, p->global, 1000000000); // 1GB
+    talloc_free(fname);
+
+    const struct pl_hook *hook = NULL;
+    if (shader.len)
+        hook = pl_mpv_user_shader_parse(p->gpu, shader.start, shader.len);
+
+    MP_TARRAY_APPEND(p, p->user_hooks, p->num_user_hooks, (struct user_hook) {
+        .path = talloc_strdup(p, path),
+        .hook = hook,
+    });
+
+    return hook;
+}
+
+static void update_icc_opts(struct priv *p, const struct mp_icc_opts *opts)
+{
+    if (!opts)
+        return;
+
+    if (!opts->profile_auto && !p->icc_path) {
+        // Un-set any auto-loaded profiles if icc-profile-auto was disabled
+        update_icc(p, (bstr) {0});
+    }
+
+    int s_r = 0, s_g = 0, s_b = 0;
+    gl_parse_3dlut_size(opts->size_str, &s_r, &s_g, &s_b);
+    p->icc_params = pl_icc_default_params;
+    p->icc_params.intent = opts->intent;
+    p->icc_params.size_r = s_r;
+    p->icc_params.size_g = s_g;
+    p->icc_params.size_b = s_b;
+    p->icc_params.cache = p->icc_cache.cache;
+
+    if (!opts->profile || !opts->profile[0]) {
+        // No profile enabled, un-load any existing profiles
+        update_icc(p, (bstr) {0});
+        TA_FREEP(&p->icc_path);
+        return;
+    }
+
+    if (p->icc_path && strcmp(opts->profile, p->icc_path) == 0)
+        return; // ICC profile hasn't changed
+
+    char *fname = mp_get_user_path(NULL, p->global, opts->profile);
+    MP_VERBOSE(p, "Opening ICC profile '%s'\n", fname);
+    struct bstr icc = stream_read_file(fname, p, p->global, 100000000); // 100 MB
+    talloc_free(fname);
+    update_icc(p, icc);
+
+    // Update cached path
+    talloc_free(p->icc_path);
+    p->icc_path = talloc_strdup(p, opts->profile);
+}
+
+static void update_lut(struct priv *p, struct user_lut *lut)
+{
+    if (!lut->opt) {
+        pl_lut_free(&lut->lut);
+        TA_FREEP(&lut->path);
+        return;
+    }
+
+    if (lut->path && strcmp(lut->path, lut->opt) == 0)
+        return; // no change
+
+    // Update cached path
+    pl_lut_free(&lut->lut);
+    talloc_free(lut->path);
+    lut->path = talloc_strdup(p, lut->opt);
+
+    // Load LUT file
+    char *fname = mp_get_user_path(NULL, p->global, lut->path);
+    MP_VERBOSE(p, "Loading custom LUT '%s'\n", fname);
+    struct bstr lutdata = stream_read_file(fname, p, p->global, 100000000); // 100 MB
+    lut->lut = pl_lut_parse_cube(p->pllog, lutdata.start, lutdata.len);
+    talloc_free(lutdata.start);
+}
+
+static void update_hook_opts(struct priv *p, char **opts, const char *shaderpath,
+                             const struct pl_hook *hook)
+{
+    if (!opts)
+        return;
+
+    const char *basename = mp_basename(shaderpath);
+    struct bstr shadername;
+    if (!mp_splitext(basename, &shadername))
+        shadername = bstr0(basename);
+
+    for (int n = 0; opts[n * 2]; n++) {
+        struct bstr k = bstr0(opts[n * 2 + 0]);
+        struct bstr v = bstr0(opts[n * 2 + 1]);
+        int pos;
+        if ((pos = bstrchr(k, '/')) >= 0) {
+            if (!bstr_equals(bstr_splice(k, 0, pos), shadername))
+                continue;
+            k = bstr_cut(k, pos + 1);
+        }
+
+        for (int i = 0; i < hook->num_parameters; i++) {
+            const struct pl_hook_par *hp = &hook->parameters[i];
+            if (!bstr_equals0(k, hp->name) != 0)
+                continue;
+
+            m_option_t opt = {
+                .name = hp->name,
+            };
+
+            if (hp->names) {
+                for (int j = hp->minimum.i; j <= hp->maximum.i; j++) {
+                    if (bstr_equals0(v, hp->names[j])) {
+                        hp->data->i = j;
+                        goto next_hook;
+                    }
+                }
+            }
+
+            switch (hp->type) {
+            case PL_VAR_FLOAT:
+                opt.type = &m_option_type_float;
+                opt.min = hp->minimum.f;
+                opt.max = hp->maximum.f;
+                break;
+            case PL_VAR_SINT:
+                opt.type = &m_option_type_int;
+                opt.min = hp->minimum.i;
+                opt.max = hp->maximum.i;
+                break;
+            case PL_VAR_UINT:
+                opt.type = &m_option_type_int;
+                opt.min = MPMIN(hp->minimum.u, INT_MAX);
+                opt.max = MPMIN(hp->maximum.u, INT_MAX);
+                break;
+            }
+
+            if (!opt.type)
+                goto next_hook;
+
+            opt.type->parse(p->log, &opt, k, v, hp->data);
+            goto next_hook;
+        }
+
+    next_hook:;
+    }
+}
+
+static void update_render_options(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    pl_options pars = p->pars;
+    const struct gl_video_opts *opts = p->opts_cache->opts;
+    pars->params.antiringing_strength = opts->scaler[0].antiring;
+    pars->params.background_color[0] = opts->background.r / 255.0;
+    pars->params.background_color[1] = opts->background.g / 255.0;
+    pars->params.background_color[2] = opts->background.b / 255.0;
+    pars->params.background_transparency = 1.0 - opts->background.a / 255.0;
+    pars->params.skip_anti_aliasing = !opts->correct_downscaling;
+    pars->params.disable_linear_scaling = !opts->linear_downscaling && !opts->linear_upscaling;
+    pars->params.disable_fbos = opts->dumb_mode == 1;
+    pars->params.blend_against_tiles = opts->alpha_mode == ALPHA_BLEND_TILES;
+    pars->params.corner_rounding = p->corner_rounding;
+    pars->params.correct_subpixel_offsets = !opts->scaler_resizes_only;
+
+    // Map scaler options as best we can
+    pars->params.upscaler = map_scaler(p, SCALER_SCALE);
+    pars->params.downscaler = map_scaler(p, SCALER_DSCALE);
+    pars->params.plane_upscaler = map_scaler(p, SCALER_CSCALE);
+    pars->params.frame_mixer = opts->interpolation ? map_scaler(p, SCALER_TSCALE) : NULL;
+
+    // Request as many frames as required from the decoder, depending on the
+    // speed VPS/FPS ratio libplacebo may need more frames. Request frames up to
+    // ratio of 1/2, but only if anti aliasing is enabled.
+    int req_frames = 2;
+    if (pars->params.frame_mixer) {
+        req_frames += ceilf(pars->params.frame_mixer->kernel->radius) *
+                      (pars->params.skip_anti_aliasing ? 1 : 2);
+    }
+    vo_set_queue_params(vo, 0, MPMIN(VO_MAX_REQ_FRAMES, req_frames));
+
+    pars->params.deband_params = opts->deband ? &pars->deband_params : NULL;
+    pars->deband_params.iterations = opts->deband_opts->iterations;
+    pars->deband_params.radius = opts->deband_opts->range;
+    pars->deband_params.threshold = opts->deband_opts->threshold / 16.384;
+    pars->deband_params.grain = opts->deband_opts->grain / 8.192;
+
+    pars->params.sigmoid_params = opts->sigmoid_upscaling ? &pars->sigmoid_params : NULL;
+    pars->sigmoid_params.center = opts->sigmoid_center;
+    pars->sigmoid_params.slope = opts->sigmoid_slope;
+
+    pars->params.peak_detect_params = opts->tone_map.compute_peak >= 0 ? &pars->peak_detect_params : NULL;
+    pars->peak_detect_params.smoothing_period = opts->tone_map.decay_rate;
+    pars->peak_detect_params.scene_threshold_low = opts->tone_map.scene_threshold_low;
+    pars->peak_detect_params.scene_threshold_high = opts->tone_map.scene_threshold_high;
+    pars->peak_detect_params.percentile = opts->tone_map.peak_percentile;
+    pars->peak_detect_params.allow_delayed = p->delayed_peak;
+
+    const struct pl_tone_map_function * const tone_map_funs[] = {
+        [TONE_MAPPING_AUTO]     = &pl_tone_map_auto,
+        [TONE_MAPPING_CLIP]     = &pl_tone_map_clip,
+        [TONE_MAPPING_MOBIUS]   = &pl_tone_map_mobius,
+        [TONE_MAPPING_REINHARD] = &pl_tone_map_reinhard,
+        [TONE_MAPPING_HABLE]    = &pl_tone_map_hable,
+        [TONE_MAPPING_GAMMA]    = &pl_tone_map_gamma,
+        [TONE_MAPPING_LINEAR]   = &pl_tone_map_linear,
+        [TONE_MAPPING_SPLINE]   = &pl_tone_map_spline,
+        [TONE_MAPPING_BT_2390]  = &pl_tone_map_bt2390,
+        [TONE_MAPPING_BT_2446A] = &pl_tone_map_bt2446a,
+        [TONE_MAPPING_ST2094_40] = &pl_tone_map_st2094_40,
+        [TONE_MAPPING_ST2094_10] = &pl_tone_map_st2094_10,
+    };
+
+    const struct pl_gamut_map_function * const gamut_modes[] = {
+        [GAMUT_AUTO]            = pl_color_map_default_params.gamut_mapping,
+        [GAMUT_CLIP]            = &pl_gamut_map_clip,
+        [GAMUT_PERCEPTUAL]      = &pl_gamut_map_perceptual,
+        [GAMUT_RELATIVE]        = &pl_gamut_map_relative,
+        [GAMUT_SATURATION]      = &pl_gamut_map_saturation,
+        [GAMUT_ABSOLUTE]        = &pl_gamut_map_absolute,
+        [GAMUT_DESATURATE]      = &pl_gamut_map_desaturate,
+        [GAMUT_DARKEN]          = &pl_gamut_map_darken,
+        [GAMUT_WARN]            = &pl_gamut_map_highlight,
+        [GAMUT_LINEAR]          = &pl_gamut_map_linear,
+    };
+
+    pars->color_map_params.tone_mapping_function = tone_map_funs[opts->tone_map.curve];
+    pars->color_map_params.tone_mapping_param = opts->tone_map.curve_param;
+    if (isnan(pars->color_map_params.tone_mapping_param)) // vo_gpu compatibility
+        pars->color_map_params.tone_mapping_param = 0.0;
+    pars->color_map_params.inverse_tone_mapping = opts->tone_map.inverse;
+    pars->color_map_params.contrast_recovery = opts->tone_map.contrast_recovery;
+    pars->color_map_params.visualize_lut = opts->tone_map.visualize;
+    pars->color_map_params.contrast_smoothness = opts->tone_map.contrast_smoothness;
+    pars->color_map_params.gamut_mapping = gamut_modes[opts->tone_map.gamut_mode];
+
+    switch (opts->dither_algo) {
+    case DITHER_NONE:
+        pars->params.dither_params = NULL;
+        break;
+    case DITHER_ERROR_DIFFUSION:
+        pars->params.error_diffusion = pl_find_error_diffusion_kernel(opts->error_diffusion);
+        if (!pars->params.error_diffusion) {
+            MP_WARN(p, "Could not find error diffusion kernel '%s', falling "
+                    "back to fruit.\n", opts->error_diffusion);
+        }
+        MP_FALLTHROUGH;
+    case DITHER_ORDERED:
+    case DITHER_FRUIT:
+        pars->params.dither_params = &pars->dither_params;
+        pars->dither_params.method = opts->dither_algo == DITHER_ORDERED
+                                ? PL_DITHER_ORDERED_FIXED
+                                : PL_DITHER_BLUE_NOISE;
+        pars->dither_params.lut_size = opts->dither_size;
+        pars->dither_params.temporal = opts->temporal_dither;
+        break;
+    }
+
+    if (opts->dither_depth < 0)
+        pars->params.dither_params = NULL;
+
+    update_icc_opts(p, opts->icc_opts);
+
+    pars->params.num_hooks = 0;
+    const struct pl_hook *hook;
+    for (int i = 0; opts->user_shaders && opts->user_shaders[i]; i++) {
+        if ((hook = load_hook(p, opts->user_shaders[i]))) {
+            MP_TARRAY_APPEND(p, p->hooks, pars->params.num_hooks, hook);
+            update_hook_opts(p, opts->user_shader_opts, opts->user_shaders[i], hook);
+        }
+    }
+
+    pars->params.hooks = p->hooks;
+}
+
+#define OPT_BASE_STRUCT struct priv
+
+const struct m_opt_choice_alternatives lut_types[] = {
+    {"auto",        PL_LUT_UNKNOWN},
+    {"native",      PL_LUT_NATIVE},
+    {"normalized",  PL_LUT_NORMALIZED},
+    {"conversion",  PL_LUT_CONVERSION},
+    {0}
+};
+
+const struct vo_driver video_out_gpu_next = {
+    .description = "Video output based on libplacebo",
+    .name = "gpu-next",
+    .caps = VO_CAP_ROTATE90 |
+            VO_CAP_FILM_GRAIN |
+            0x0,
+    .preinit = preinit,
+    .query_format = query_format,
+    .reconfig = reconfig,
+    .control = control,
+    .get_image_ts = get_image,
+    .draw_frame = draw_frame,
+    .flip_page = flip_page,
+    .get_vsync = get_vsync,
+    .wait_events = wait_events,
+    .wakeup = wakeup,
+    .uninit = uninit,
+    .priv_size = sizeof(struct priv),
+    .priv_defaults = &(const struct priv) {
+        .inter_preserve = true,
+    },
+
+    .options = (const struct m_option[]) {
+        {"allow-delayed-peak-detect", OPT_BOOL(delayed_peak)},
+        {"corner-rounding", OPT_FLOAT(corner_rounding), M_RANGE(0, 1)},
+        {"interpolation-preserve", OPT_BOOL(inter_preserve)},
+        {"lut", OPT_STRING(lut.opt), .flags = M_OPT_FILE},
+        {"lut-type", OPT_CHOICE_C(lut.type, lut_types)},
+        {"image-lut", OPT_STRING(image_lut.opt), .flags = M_OPT_FILE},
+        {"image-lut-type", OPT_CHOICE_C(image_lut.type, lut_types)},
+        {"target-lut", OPT_STRING(target_lut.opt), .flags = M_OPT_FILE},
+        {"target-colorspace-hint", OPT_BOOL(target_hint)},
+        // No `target-lut-type` because we don't support non-RGB targets
+        {"libplacebo-opts", OPT_KEYVALUELIST(raw_opts)},
+        {0}
+    },
+};
diff --git a/video/out/vo_image.c b/video/out/vo_image.c
new file mode 100644
index 0000000..cc48ab3
--- /dev/null
+++ b/video/out/vo_image.c
@@ -0,0 +1,165 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <stdbool.h>
+#include <sys/stat.h>
+
+#include <libswscale/swscale.h>
+
+#include "misc/bstr.h"
+#include "osdep/io.h"
+#include "options/m_config.h"
+#include "options/path.h"
+#include "mpv_talloc.h"
+#include "common/common.h"
+#include "common/msg.h"
+#include "video/out/vo.h"
+#include "video/csputils.h"
+#include "video/mp_image.h"
+#include "video/fmt-conversion.h"
+#include "video/image_writer.h"
+#include "video/sws_utils.h"
+#include "sub/osd.h"
+#include "options/m_option.h"
+
+static const struct m_sub_options image_writer_conf = {
+    .opts = image_writer_opts,
+    .size = sizeof(struct image_writer_opts),
+    .defaults = &image_writer_opts_defaults,
+};
+
+struct vo_image_opts {
+    struct image_writer_opts *opts;
+    char *outdir;
+};
+
+#define OPT_BASE_STRUCT struct vo_image_opts
+
+static const struct m_sub_options vo_image_conf = {
+    .opts = (const struct m_option[]) {
+        {"vo-image", OPT_SUBSTRUCT(opts, image_writer_conf)},
+        {"vo-image-outdir", OPT_STRING(outdir), .flags = M_OPT_FILE},
+        {0},
+    },
+    .size = sizeof(struct vo_image_opts),
+};
+
+struct priv {
+    struct vo_image_opts *opts;
+
+    struct mp_image *current;
+    int frame;
+};
+
+static bool checked_mkdir(struct vo *vo, const char *buf)
+{
+    MP_INFO(vo, "Creating output directory '%s'...\n", buf);
+    if (mkdir(buf, 0755) < 0) {
+        char *errstr = mp_strerror(errno);
+        if (errno == EEXIST) {
+            struct stat stat_p;
+            if (stat(buf, &stat_p ) == 0 && S_ISDIR(stat_p.st_mode))
+                return true;
+        }
+        MP_ERR(vo, "Error creating output directory: %s\n", errstr);
+        return false;
+    }
+    return true;
+}
+
+static int reconfig(struct vo *vo, struct mp_image_params *params)
+{
+    return 0;
+}
+
+static void draw_frame(struct vo *vo, struct vo_frame *frame)
+{
+    struct priv *p = vo->priv;
+    if (!frame->current)
+        return;
+
+    p->current = frame->current;
+
+    struct mp_osd_res dim = osd_res_from_image_params(vo->params);
+    osd_draw_on_image(vo->osd, dim, frame->current->pts, OSD_DRAW_SUB_ONLY, p->current);
+}
+
+static void flip_page(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    if (!p->current)
+        return;
+
+    (p->frame)++;
+
+    void *t = talloc_new(NULL);
+    char *filename = talloc_asprintf(t, "%08d.%s", p->frame,
+                                     image_writer_file_ext(p->opts->opts));
+
+    if (p->opts->outdir && strlen(p->opts->outdir))
+        filename = mp_path_join(t, p->opts->outdir, filename);
+
+    MP_INFO(vo, "Saving %s\n", filename);
+    write_image(p->current, p->opts->opts, filename, vo->global, vo->log);
+
+    talloc_free(t);
+}
+
+static int query_format(struct vo *vo, int fmt)
+{
+    if (mp_sws_supported_format(fmt))
+        return 1;
+    return 0;
+}
+
+static void uninit(struct vo *vo)
+{
+}
+
+static int preinit(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    p->opts = mp_get_config_group(vo, vo->global, &vo_image_conf);
+    if (p->opts->outdir && !checked_mkdir(vo, p->opts->outdir))
+        return -1;
+    return 0;
+}
+
+static int control(struct vo *vo, uint32_t request, void *data)
+{
+    return VO_NOTIMPL;
+}
+
+const struct vo_driver video_out_image =
+{
+    .description = "Write video frames to image files",
+    .name = "image",
+    .untimed = true,
+    .priv_size = sizeof(struct priv),
+    .preinit = preinit,
+    .query_format = query_format,
+    .reconfig = reconfig,
+    .control = control,
+    .draw_frame = draw_frame,
+    .flip_page = flip_page,
+    .uninit = uninit,
+    .global_opts = &vo_image_conf,
+};
diff --git a/video/out/vo_kitty.c b/video/out/vo_kitty.c
new file mode 100644
index 0000000..7d548c7
--- /dev/null
+++ b/video/out/vo_kitty.c
@@ -0,0 +1,433 @@
+/*
+ * Video output device using the kitty terminal graphics protocol
+ * See https://sw.kovidgoyal.net/kitty/graphics-protocol/
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <signal.h>
+
+#include "config.h"
+
+#if HAVE_POSIX
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#endif
+
+#include <libswscale/swscale.h>
+#include <libavutil/base64.h>
+
+#include "options/m_config.h"
+#include "osdep/terminal.h"
+#include "sub/osd.h"
+#include "vo.h"
+#include "video/sws_utils.h"
+#include "video/mp_image.h"
+
+#define IMGFMT IMGFMT_RGB24
+#define BYTES_PER_PX 3
+#define DEFAULT_WIDTH_PX  320
+#define DEFAULT_HEIGHT_PX 240
+#define DEFAULT_WIDTH 80
+#define DEFAULT_HEIGHT 25
+
+static inline void write_str(const char *s)
+{
+    // On POSIX platforms, write() is the fastest method. It also is the only
+    // one that allows atomic writes so mpv’s output will not be interrupted
+    // by other processes or threads that write to stdout, which would cause
+    // screen corruption. POSIX does not guarantee atomicity for writes
+    // exceeding PIPE_BUF, but at least Linux does seem to implement it that
+    // way.
+#if HAVE_POSIX
+    int remain = strlen(s);
+    while (remain > 0) {
+        ssize_t written = write(STDOUT_FILENO, s, remain);
+        if (written < 0)
+            return;
+        remain -= written;
+        s += written;
+    }
+#else
+    printf("%s", s);
+    fflush(stdout);
+#endif
+}
+
+#define KITTY_ESC_IMG        "\033_Ga=T,f=24,s=%d,v=%d,C=1,q=2,m=1;"
+#define KITTY_ESC_IMG_SHM    "\033_Ga=T,t=s,f=24,s=%d,v=%d,C=1,q=2,m=1;%s\033\\"
+#define KITTY_ESC_CONTINUE   "\033_Gm=%d;"
+#define KITTY_ESC_END        "\033\\"
+#define KITTY_ESC_DELETE_ALL "\033_Ga=d;\033\\"
+
+struct vo_kitty_opts {
+    int width, height, top, left, rows, cols;
+    bool config_clear, alt_screen;
+    bool use_shm;
+};
+
+struct priv {
+    struct vo_kitty_opts opts;
+
+    uint8_t *buffer;
+    char    *output;
+    char    *shm_path, *shm_path_b64;
+    int     buffer_size, output_size;
+    int     shm_fd;
+
+    int left, top, width, height, cols, rows;
+
+    struct mp_rect src;
+    struct mp_rect dst;
+    struct mp_osd_res osd;
+    struct mp_image *frame;
+    struct mp_sws_context *sws;
+};
+
+#if HAVE_POSIX
+static struct sigaction saved_sigaction = {0};
+static bool resized;
+#endif
+
+static void close_shm(struct priv *p)
+{
+#if HAVE_POSIX_SHM
+    if (p->buffer != NULL) {
+        munmap(p->buffer, p->buffer_size);
+        p->buffer = NULL;
+    }
+    if (p->shm_fd != -1) {
+        close(p->shm_fd);
+        p->shm_fd = -1;
+    }
+#endif
+}
+
+static void free_bufs(struct vo* vo)
+{
+    struct priv* p = vo->priv;
+
+    talloc_free(p->frame);
+    talloc_free(p->output);
+
+    if (p->opts.use_shm) {
+        close_shm(p);
+    } else {
+        talloc_free(p->buffer);
+    }
+}
+
+static void get_win_size(struct vo *vo, int *out_rows, int *out_cols,
+                         int *out_width, int *out_height)
+{
+    struct priv *p = vo->priv;
+    *out_rows = DEFAULT_HEIGHT;
+    *out_cols = DEFAULT_WIDTH;
+    *out_width = DEFAULT_WIDTH_PX;
+    *out_height = DEFAULT_HEIGHT_PX;
+
+    terminal_get_size2(out_rows, out_cols, out_width, out_height);
+
+    *out_rows = p->opts.rows > 0 ? p->opts.rows : *out_rows;
+    *out_cols = p->opts.cols > 0 ? p->opts.cols : *out_cols;
+    *out_width = p->opts.width > 0 ? p->opts.width : *out_width;
+    *out_height = p->opts.height > 0 ? p->opts.height : *out_height;
+}
+
+static void set_out_params(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+
+    vo_get_src_dst_rects(vo, &p->src, &p->dst, &p->osd);
+
+    p->width  = p->dst.x1 - p->dst.x0;
+    p->height = p->dst.y1 - p->dst.y0;
+    p->top  = p->opts.top > 0 ?
+        p->opts.top : p->rows * p->dst.y0 / vo->dheight;
+    p->left = p->opts.left > 0 ?
+        p->opts.left : p->cols * p->dst.x0 / vo->dwidth;
+
+    p->buffer_size = 3 * p->width * p->height;
+    p->output_size = AV_BASE64_SIZE(p->buffer_size);
+}
+
+static int reconfig(struct vo *vo, struct mp_image_params *params)
+{
+    struct priv *p = vo->priv;
+
+    vo->want_redraw = true;
+    write_str(KITTY_ESC_DELETE_ALL);
+    if (p->opts.config_clear)
+        write_str(TERM_ESC_CLEAR_SCREEN);
+
+    get_win_size(vo, &p->rows, &p->cols, &vo->dwidth, &vo->dheight);
+    set_out_params(vo);
+    free_bufs(vo);
+
+    p->sws->src = *params;
+    p->sws->src.w = mp_rect_w(p->src);
+    p->sws->src.h = mp_rect_h(p->src);
+    p->sws->dst = (struct mp_image_params) {
+        .imgfmt = IMGFMT,
+        .w = p->width,
+        .h = p->height,
+        .p_w = 1,
+        .p_h = 1,
+    };
+
+    p->frame = mp_image_alloc(IMGFMT, p->width, p->height);
+    if (!p->frame)
+        return -1;
+
+    if (mp_sws_reinit(p->sws) < 0)
+        return -1;
+
+    if (!p->opts.use_shm) {
+        p->buffer = talloc_array(NULL, uint8_t, p->buffer_size);
+        p->output = talloc_array(NULL, char, p->output_size);
+    }
+
+    return 0;
+}
+
+static int create_shm(struct vo *vo)
+{
+#if HAVE_POSIX_SHM
+    struct priv *p = vo->priv;
+    p->shm_fd = shm_open(p->shm_path, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR);
+    if (p->shm_fd == -1) {
+        MP_ERR(vo, "Failed to create shared memory object");
+        return 0;
+    }
+
+    if (ftruncate(p->shm_fd, p->buffer_size) == -1) {
+        MP_ERR(vo, "Failed to truncate shared memory object");
+        shm_unlink(p->shm_path);
+        close(p->shm_fd);
+        return 0;
+    }
+
+    p->buffer = mmap(NULL, p->buffer_size,
+                        PROT_READ | PROT_WRITE, MAP_SHARED, p->shm_fd, 0);
+
+    if (p->buffer == MAP_FAILED) {
+        MP_ERR(vo, "Failed to mmap shared memory object");
+        shm_unlink(p->shm_path);
+        close(p->shm_fd);
+        return 0;
+    }
+    return 1;
+#else
+    return 0;
+#endif
+}
+
+static void draw_frame(struct vo *vo, struct vo_frame *frame)
+{
+    struct priv *p = vo->priv;
+    mp_image_t *mpi = NULL;
+
+#if !HAVE_POSIX
+    int prev_height = vo->dheight;
+    int prev_width = vo->dwidth;
+    get_win_size(vo, &p->rows, &p->cols, &vo->dwidth, &vo->dheight);
+    bool resized = (prev_width != vo->dwidth || prev_height != vo->dheight);
+#endif
+
+    if (resized)
+        reconfig(vo, vo->params);
+
+    resized = false;
+
+    if (frame->current) {
+        mpi = mp_image_new_ref(frame->current);
+        struct mp_rect src_rc = p->src;
+        src_rc.x0 = MP_ALIGN_DOWN(src_rc.x0, mpi->fmt.align_x);
+        src_rc.y0 = MP_ALIGN_DOWN(src_rc.y0, mpi->fmt.align_y);
+        mp_image_crop_rc(mpi, src_rc);
+
+        mp_sws_scale(p->sws, p->frame, mpi);
+    } else {
+        mp_image_clear(p->frame, 0, 0, p->width, p->height);
+    }
+
+    struct mp_osd_res res = { .w = p->width, .h = p->height };
+    osd_draw_on_image(vo->osd, res, mpi ? mpi->pts : 0, 0, p->frame);
+
+
+    if (p->opts.use_shm && !create_shm(vo))
+        return;
+
+    memcpy_pic(p->buffer, p->frame->planes[0], p->width * BYTES_PER_PX,
+               p->height, p->width * BYTES_PER_PX, p->frame->stride[0]);
+
+    if (!p->opts.use_shm)
+        av_base64_encode(p->output, p->output_size, p->buffer, p->buffer_size);
+
+    talloc_free(mpi);
+}
+
+static void flip_page(struct vo *vo)
+{
+    struct priv* p = vo->priv;
+
+    if (p->buffer == NULL)
+        return;
+
+    char *cmd = talloc_asprintf(NULL, TERM_ESC_GOTO_YX, p->top, p->left);
+
+    if (p->opts.use_shm) {
+        cmd = talloc_asprintf_append(cmd, KITTY_ESC_IMG_SHM, p->width, p->height, p->shm_path_b64);
+    } else {
+        if (p->output == NULL) {
+            talloc_free(cmd);
+            return;
+        }
+
+        cmd = talloc_asprintf_append(cmd, KITTY_ESC_IMG, p->width, p->height);
+        for (int offset = 0, noffset;; offset += noffset) {
+            if (offset)
+                cmd = talloc_asprintf_append(cmd, KITTY_ESC_CONTINUE, offset < p->output_size);
+            noffset = MPMIN(4096, p->output_size - offset);
+            cmd = talloc_strndup_append(cmd, p->output + offset, noffset);
+            cmd = talloc_strdup_append(cmd, KITTY_ESC_END);
+
+            if (offset >= p->output_size)
+                break;
+        }
+    }
+
+    write_str(cmd);
+    talloc_free(cmd);
+
+#if HAVE_POSIX
+    if (p->opts.use_shm)
+        close_shm(p);
+#endif
+}
+
+#if HAVE_POSIX
+static void handle_winch(int sig) {
+    resized = true;
+    if (saved_sigaction.sa_handler)
+        saved_sigaction.sa_handler(sig);
+}
+#endif
+
+static int preinit(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+
+    p->sws = mp_sws_alloc(vo);
+    p->sws->log = vo->log;
+    mp_sws_enable_cmdline_opts(p->sws, vo->global);
+
+#if HAVE_POSIX
+    struct sigaction sa;
+    sa.sa_handler = handle_winch;
+    sigaction(SIGWINCH, &sa, &saved_sigaction);
+#endif
+
+#if HAVE_POSIX_SHM
+    if (p->opts.use_shm) {
+        p->shm_path = talloc_asprintf(vo, "/mpv-kitty-%p", vo);
+        int p_size = strlen(p->shm_path) - 1;
+        int b64_size = AV_BASE64_SIZE(p_size);
+        p->shm_path_b64 = talloc_array(vo, char, b64_size);
+        av_base64_encode(p->shm_path_b64, b64_size, p->shm_path + 1, p_size);
+    }
+#else
+    if (p->opts.use_shm) {
+        MP_ERR(vo, "Shared memory support is not available on this platform.");
+        return -1;
+    }
+#endif
+
+    write_str(TERM_ESC_HIDE_CURSOR);
+    if (p->opts.alt_screen)
+        write_str(TERM_ESC_ALT_SCREEN);
+
+    return 0;
+}
+
+static int query_format(struct vo *vo, int format)
+{
+    return format == IMGFMT;
+}
+
+static int control(struct vo *vo, uint32_t request, void *data)
+{
+    if (request == VOCTRL_SET_PANSCAN)
+        return (vo->config_ok && !reconfig(vo, vo->params)) ? VO_TRUE : VO_FALSE;
+    return VO_NOTIMPL;
+}
+
+static void uninit(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+
+#if HAVE_POSIX
+    sigaction(SIGWINCH, &saved_sigaction, NULL);
+#endif
+
+    write_str(TERM_ESC_RESTORE_CURSOR);
+
+    if (p->opts.alt_screen) {
+        write_str(TERM_ESC_NORMAL_SCREEN);
+    } else {
+        char *cmd = talloc_asprintf(vo, TERM_ESC_GOTO_YX, p->cols, 0);
+        write_str(cmd);
+    }
+
+    free_bufs(vo);
+}
+
+#define OPT_BASE_STRUCT struct priv
+
+const struct vo_driver video_out_kitty = {
+    .name = "kitty",
+    .description = "Kitty terminal graphics protocol",
+    .preinit = preinit,
+    .query_format = query_format,
+    .reconfig = reconfig,
+    .control = control,
+    .draw_frame = draw_frame,
+    .flip_page = flip_page,
+    .uninit = uninit,
+    .priv_size = sizeof(struct priv),
+    .priv_defaults = &(const struct priv) {
+        .shm_fd = -1,
+        .opts.config_clear = true,
+        .opts.alt_screen = true,
+    },
+    .options = (const m_option_t[]) {
+        {"width", OPT_INT(opts.width)},
+        {"height", OPT_INT(opts.height)},
+        {"top", OPT_INT(opts.top)},
+        {"left", OPT_INT(opts.left)},
+        {"rows", OPT_INT(opts.rows)},
+        {"cols", OPT_INT(opts.cols)},
+        {"config-clear", OPT_BOOL(opts.config_clear), },
+        {"alt-screen", OPT_BOOL(opts.alt_screen), },
+        {"use-shm", OPT_BOOL(opts.use_shm), },
+        {0}
+    },
+    .options_prefix = "vo-kitty",
+};
diff --git a/video/out/vo_lavc.c b/video/out/vo_lavc.c
new file mode 100644
index 0000000..7170c1d
--- /dev/null
+++ b/video/out/vo_lavc.c
@@ -0,0 +1,262 @@
+/*
+ * video encoding using libavformat
+ *
+ * Copyright (C) 2010 Nicolas George <george@nsup.org>
+ * Copyright (C) 2011-2012 Rudolf Polzer <divVerent@xonotic.org>
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include "common/common.h"
+#include "options/options.h"
+#include "video/fmt-conversion.h"
+#include "video/mp_image.h"
+#include "mpv_talloc.h"
+#include "vo.h"
+
+#include "common/encode_lavc.h"
+
+#include "sub/osd.h"
+
+struct priv {
+    struct encoder_context *enc;
+
+    bool shutdown;
+};
+
+static int preinit(struct vo *vo)
+{
+    struct priv *vc = vo->priv;
+    vc->enc = encoder_context_alloc(vo->encode_lavc_ctx, STREAM_VIDEO, vo->log);
+    if (!vc->enc)
+        return -1;
+    talloc_steal(vc, vc->enc);
+    return 0;
+}
+
+static void uninit(struct vo *vo)
+{
+    struct priv *vc = vo->priv;
+    struct encoder_context *enc = vc->enc;
+
+    if (!vc->shutdown)
+        encoder_encode(enc, NULL); // finish encoding
+}
+
+static void on_ready(void *ptr)
+{
+    struct vo *vo = ptr;
+
+    vo_event(vo, VO_EVENT_INITIAL_UNBLOCK);
+}
+
+static int reconfig2(struct vo *vo, struct mp_image *img)
+{
+    struct priv *vc = vo->priv;
+    AVCodecContext *encoder = vc->enc->encoder;
+
+    struct mp_image_params *params = &img->params;
+    enum AVPixelFormat pix_fmt = imgfmt2pixfmt(params->imgfmt);
+    AVRational aspect = {params->p_w, params->p_h};
+    int width = params->w;
+    int height = params->h;
+
+    if (vc->shutdown)
+        return -1;
+
+    if (avcodec_is_open(encoder)) {
+        if (width == encoder->width && height == encoder->height &&
+            pix_fmt == encoder->pix_fmt)
+        {
+            // consider these changes not critical
+            MP_ERR(vo, "Ignoring mid-stream parameter changes!\n");
+            return 0;
+        }
+
+        /* FIXME Is it possible with raw video? */
+        MP_ERR(vo, "resolution changes not supported.\n");
+        goto error;
+    }
+
+    // When we get here, this must be the first call to reconfigure(). Thus, we
+    // can rely on no existing data in vc having been allocated yet.
+    // Reason:
+    // - Second calls after reconfigure() already failed once fail (due to the
+    //   vc->shutdown check above).
+    // - Second calls after reconfigure() already succeeded once return early
+    //   (due to the avcodec_is_open() check above).
+
+    if (pix_fmt == AV_PIX_FMT_NONE) {
+        MP_FATAL(vo, "Format %s not supported by lavc.\n",
+                 mp_imgfmt_to_name(params->imgfmt));
+        goto error;
+    }
+
+    encoder->sample_aspect_ratio = aspect;
+    encoder->width = width;
+    encoder->height = height;
+    encoder->pix_fmt = pix_fmt;
+    encoder->colorspace = mp_csp_to_avcol_spc(params->color.space);
+    encoder->color_range = mp_csp_levels_to_avcol_range(params->color.levels);
+
+    AVRational tb;
+
+    // we want to handle:
+    //      1/25
+    //   1001/24000
+    //   1001/30000
+    // for this we would need 120000fps...
+    // however, mpeg-4 only allows 16bit values
+    // so let's take 1001/30000 out
+    tb.num = 24000;
+    tb.den = 1;
+
+    const AVRational *rates = encoder->codec->supported_framerates;
+    if (rates && rates[0].den)
+        tb = rates[av_find_nearest_q_idx(tb, rates)];
+
+    encoder->time_base = av_inv_q(tb);
+
+    // Used for rate control, level selection, etc.
+    // Usually it's not too catastrophic if this isn't exactly correct,
+    // as long as it's not off by orders of magnitude.
+    // If we don't set anything, encoders will use the time base,
+    // and 24000 is so high that the output can end up extremely screwy (see #11215),
+    // so we default to 240 if we don't have a real value.
+    if (img->nominal_fps > 0)
+        encoder->framerate = av_d2q(img->nominal_fps, img->nominal_fps * 1001 + 2); // Hopefully give exact results for NTSC rates
+    else
+        encoder->framerate = (AVRational){ 240, 1 };
+
+    if (!encoder_init_codec_and_muxer(vc->enc, on_ready, vo))
+        goto error;
+
+    return 0;
+
+error:
+    vc->shutdown = true;
+    return -1;
+}
+
+static int query_format(struct vo *vo, int format)
+{
+    struct priv *vc = vo->priv;
+
+    enum AVPixelFormat pix_fmt = imgfmt2pixfmt(format);
+    const enum AVPixelFormat *p = vc->enc->encoder->codec->pix_fmts;
+
+    if (!p)
+        return 1;
+
+    while (*p != AV_PIX_FMT_NONE) {
+        if (*p == pix_fmt)
+            return 1;
+        p++;
+    }
+
+    return 0;
+}
+
+static void draw_frame(struct vo *vo, struct vo_frame *voframe)
+{
+    struct priv *vc = vo->priv;
+    struct encoder_context *enc = vc->enc;
+    struct encode_lavc_context *ectx = enc->encode_lavc_ctx;
+    AVCodecContext *avc = enc->encoder;
+
+    if (voframe->redraw || voframe->repeat || voframe->num_frames < 1)
+        return;
+
+    struct mp_image *mpi = voframe->frames[0];
+
+    struct mp_osd_res dim = osd_res_from_image_params(vo->params);
+    osd_draw_on_image(vo->osd, dim, mpi->pts, OSD_DRAW_SUB_ONLY, mpi);
+
+    if (vc->shutdown)
+        return;
+
+    // Lock for shared timestamp fields.
+    mp_mutex_lock(&ectx->lock);
+
+    double pts = mpi->pts;
+    double outpts = pts;
+    if (!enc->options->rawts) {
+        // fix the discontinuity pts offset
+        if (ectx->discontinuity_pts_offset == MP_NOPTS_VALUE) {
+            ectx->discontinuity_pts_offset = ectx->next_in_pts - pts;
+        } else if (fabs(pts + ectx->discontinuity_pts_offset -
+                        ectx->next_in_pts) > 30)
+        {
+            MP_WARN(vo, "detected an unexpected discontinuity (pts jumped by "
+                    "%f seconds)\n",
+                    pts + ectx->discontinuity_pts_offset - ectx->next_in_pts);
+            ectx->discontinuity_pts_offset = ectx->next_in_pts - pts;
+        }
+
+        outpts = pts + ectx->discontinuity_pts_offset;
+    }
+
+    if (!enc->options->rawts) {
+        // calculate expected pts of next video frame
+        double timeunit = av_q2d(avc->time_base);
+        double expected_next_pts = pts + timeunit;
+        // set next allowed output pts value
+        double nextpts = expected_next_pts + ectx->discontinuity_pts_offset;
+        if (nextpts > ectx->next_in_pts)
+            ectx->next_in_pts = nextpts;
+    }
+
+    mp_mutex_unlock(&ectx->lock);
+
+    AVFrame *frame = mp_image_to_av_frame(mpi);
+    MP_HANDLE_OOM(frame);
+
+    frame->pts = rint(outpts * av_q2d(av_inv_q(avc->time_base)));
+    frame->pict_type = 0; // keep this at unknown/undefined
+    frame->quality = avc->global_quality;
+    encoder_encode(enc, frame);
+    av_frame_free(&frame);
+}
+
+static void flip_page(struct vo *vo)
+{
+}
+
+static int control(struct vo *vo, uint32_t request, void *data)
+{
+    return VO_NOTIMPL;
+}
+
+const struct vo_driver video_out_lavc = {
+    .encode = true,
+    .description = "video encoding using libavcodec",
+    .name = "lavc",
+    .initially_blocked = true,
+    .untimed = true,
+    .priv_size = sizeof(struct priv),
+    .preinit = preinit,
+    .query_format = query_format,
+    .reconfig2 = reconfig2,
+    .control = control,
+    .uninit = uninit,
+    .draw_frame = draw_frame,
+    .flip_page = flip_page,
+};
+
+// vim: sw=4 ts=4 et tw=80
diff --git a/video/out/vo_libmpv.c b/video/out/vo_libmpv.c
new file mode 100644
index 0000000..972588e
--- /dev/null
+++ b/video/out/vo_libmpv.c
@@ -0,0 +1,748 @@
+#include <assert.h>
+#include <limits.h>
+#include <math.h>
+#include <stdatomic.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+
+#include "mpv_talloc.h"
+#include "common/common.h"
+#include "misc/bstr.h"
+#include "misc/dispatch.h"
+#include "common/msg.h"
+#include "options/m_config.h"
+#include "options/options.h"
+#include "aspect.h"
+#include "dr_helper.h"
+#include "vo.h"
+#include "video/mp_image.h"
+#include "sub/osd.h"
+#include "osdep/threads.h"
+#include "osdep/timer.h"
+
+#include "common/global.h"
+#include "player/client.h"
+
+#include "libmpv.h"
+
+/*
+ * mpv_render_context is managed by the host application - the host application
+ * can access it any time, even if the VO is destroyed (or not created yet).
+ *
+ * - the libmpv user can mix render API and normal API; thus render API
+ *   functions can wait on the core, but not the reverse
+ * - the core does blocking calls into the VO thread, thus the VO functions
+ *   can't wait on the user calling the API functions
+ * - to make video timing work like it should, the VO thread waits on the
+ *   render API user anyway, and the (unlikely) deadlock is avoided with
+ *   a timeout
+ *
+ *  Locking:  mpv core > VO > mpv_render_context.lock > mp_client_api.lock
+ *              > mpv_render_context.update_lock
+ *  And: render thread > VO (wait for present)
+ *       VO > render thread (wait for present done, via timeout)
+ *
+ *  Locking gets more complex with advanced_control enabled. Use
+ *  mpv_render_context.dispatch with care; synchronous calls can add lock
+ *  dependencies.
+ */
+
+struct vo_priv {
+    struct mpv_render_context *ctx; // immutable after init
+};
+
+struct mpv_render_context {
+    struct mp_log *log;
+    struct mpv_global *global;
+    struct mp_client_api *client_api;
+
+    atomic_bool in_use;
+
+    // --- Immutable after init
+    struct mp_dispatch_queue *dispatch;
+    bool advanced_control;
+    struct dr_helper *dr;           // NULL if advanced_control disabled
+
+    mp_mutex control_lock;
+    // --- Protected by control_lock
+    mp_render_cb_control_fn control_cb;
+    void *control_cb_ctx;
+
+    mp_mutex update_lock;
+    mp_cond update_cond;     // paired with update_lock
+
+    // --- Protected by update_lock
+    mpv_render_update_fn update_cb;
+    void *update_cb_ctx;
+
+    mp_mutex lock;
+    mp_cond video_wait;      // paired with lock
+
+    // --- Protected by lock
+    struct vo_frame *next_frame;    // next frame to draw
+    int64_t present_count;          // incremented when next frame can be shown
+    int64_t expected_flip_count;    // next vsync event for next_frame
+    bool redrawing;                 // next_frame was a redraw request
+    int64_t flip_count;
+    struct vo_frame *cur_frame;
+    struct mp_image_params img_params;
+    int vp_w, vp_h;
+    bool flip;
+    bool imgfmt_supported[IMGFMT_END - IMGFMT_START];
+    bool need_reconfig;
+    bool need_resize;
+    bool need_reset;
+    bool need_update_external;
+    struct vo *vo;
+
+    // --- Mostly immutable after init.
+    struct mp_hwdec_devices *hwdec_devs;
+
+    // --- All of these can only be accessed from mpv_render_*() API, for
+    //     which the user makes sure they're called synchronized.
+    struct render_backend *renderer;
+    struct m_config_cache *vo_opts_cache;
+    struct mp_vo_opts *vo_opts;
+};
+
+const struct render_backend_fns *render_backends[] = {
+    &render_backend_gpu,
+    &render_backend_sw,
+    NULL
+};
+
+static void update(struct mpv_render_context *ctx)
+{
+    mp_mutex_lock(&ctx->update_lock);
+    if (ctx->update_cb)
+        ctx->update_cb(ctx->update_cb_ctx);
+
+    mp_cond_broadcast(&ctx->update_cond);
+    mp_mutex_unlock(&ctx->update_lock);
+}
+
+void *get_mpv_render_param(mpv_render_param *params, mpv_render_param_type type,
+                           void *def)
+{
+    for (int n = 0; params && params[n].type; n++) {
+        if (params[n].type == type)
+            return params[n].data;
+    }
+    return def;
+}
+
+static void forget_frames(struct mpv_render_context *ctx, bool all)
+{
+    mp_cond_broadcast(&ctx->video_wait);
+    if (all) {
+        talloc_free(ctx->cur_frame);
+        ctx->cur_frame = NULL;
+    }
+}
+
+static void dispatch_wakeup(void *ptr)
+{
+    struct mpv_render_context *ctx = ptr;
+
+    update(ctx);
+}
+
+static struct mp_image *render_get_image(void *ptr, int imgfmt, int w, int h,
+                                         int stride_align, int flags)
+{
+    struct mpv_render_context *ctx = ptr;
+
+    return ctx->renderer->fns->get_image(ctx->renderer, imgfmt, w, h, stride_align, flags);
+}
+
+int mpv_render_context_create(mpv_render_context **res, mpv_handle *mpv,
+                              mpv_render_param *params)
+{
+    mpv_render_context *ctx = talloc_zero(NULL, mpv_render_context);
+    mp_mutex_init(&ctx->control_lock);
+    mp_mutex_init(&ctx->lock);
+    mp_mutex_init(&ctx->update_lock);
+    mp_cond_init(&ctx->update_cond);
+    mp_cond_init(&ctx->video_wait);
+
+    ctx->global = mp_client_get_global(mpv);
+    ctx->client_api = ctx->global->client_api;
+    ctx->log = mp_log_new(ctx, ctx->global->log, "libmpv_render");
+
+    ctx->vo_opts_cache = m_config_cache_alloc(ctx, ctx->global, &vo_sub_opts);
+    ctx->vo_opts = ctx->vo_opts_cache->opts;
+
+    ctx->dispatch = mp_dispatch_create(ctx);
+    mp_dispatch_set_wakeup_fn(ctx->dispatch, dispatch_wakeup, ctx);
+
+    if (GET_MPV_RENDER_PARAM(params, MPV_RENDER_PARAM_ADVANCED_CONTROL, int, 0))
+        ctx->advanced_control = true;
+
+    int err = MPV_ERROR_NOT_IMPLEMENTED;
+    for (int n = 0; render_backends[n]; n++) {
+        ctx->renderer = talloc_zero(NULL, struct render_backend);
+        *ctx->renderer = (struct render_backend){
+            .global = ctx->global,
+            .log = ctx->log,
+            .fns = render_backends[n],
+        };
+        err = ctx->renderer->fns->init(ctx->renderer, params);
+        if (err >= 0)
+            break;
+        ctx->renderer->fns->destroy(ctx->renderer);
+        talloc_free(ctx->renderer->priv);
+        TA_FREEP(&ctx->renderer);
+        if (err != MPV_ERROR_NOT_IMPLEMENTED)
+            break;
+    }
+
+    if (err < 0) {
+        mpv_render_context_free(ctx);
+        return err;
+    }
+
+    ctx->hwdec_devs = ctx->renderer->hwdec_devs;
+
+    for (int n = IMGFMT_START; n < IMGFMT_END; n++) {
+        ctx->imgfmt_supported[n - IMGFMT_START] =
+            ctx->renderer->fns->check_format(ctx->renderer, n);
+    }
+
+    if (ctx->renderer->fns->get_image && ctx->advanced_control)
+        ctx->dr = dr_helper_create(ctx->dispatch, render_get_image, ctx);
+
+    if (!mp_set_main_render_context(ctx->client_api, ctx, true)) {
+        MP_ERR(ctx, "There is already a mpv_render_context set.\n");
+        mpv_render_context_free(ctx);
+        return MPV_ERROR_GENERIC;
+    }
+
+    *res = ctx;
+    return 0;
+}
+
+void mpv_render_context_set_update_callback(mpv_render_context *ctx,
+                                            mpv_render_update_fn callback,
+                                            void *callback_ctx)
+{
+    mp_mutex_lock(&ctx->update_lock);
+    ctx->update_cb = callback;
+    ctx->update_cb_ctx = callback_ctx;
+    if (ctx->update_cb)
+        ctx->update_cb(ctx->update_cb_ctx);
+    mp_mutex_unlock(&ctx->update_lock);
+}
+
+void mp_render_context_set_control_callback(mpv_render_context *ctx,
+                                            mp_render_cb_control_fn callback,
+                                            void *callback_ctx)
+{
+    mp_mutex_lock(&ctx->control_lock);
+    ctx->control_cb = callback;
+    ctx->control_cb_ctx = callback_ctx;
+    mp_mutex_unlock(&ctx->control_lock);
+}
+
+void mpv_render_context_free(mpv_render_context *ctx)
+{
+    if (!ctx)
+        return;
+
+    // From here on, ctx becomes invisible and cannot be newly acquired. Only
+    // a VO could still hold a reference.
+    mp_set_main_render_context(ctx->client_api, ctx, false);
+
+    if (atomic_load(&ctx->in_use)) {
+        // Start destroy the VO, and also bring down the decoder etc., which
+        // still might be using the hwdec context or use DR images. The above
+        // mp_set_main_render_context() call guarantees it can't come back (so
+        // ctx->vo can't change to non-NULL).
+        // In theory, this races with vo_libmpv exiting and another VO being
+        // used, which is a harmless grotesque corner case.
+        kill_video_async(ctx->client_api);
+
+        while (atomic_load(&ctx->in_use)) {
+            // As a nasty detail, we need to wait until the VO is released, but
+            // also need to react to update() calls during it (the update calls
+            // are supposed to trigger processing ctx->dispatch). We solve this
+            // by making the VO uninit function call mp_dispatch_interrupt().
+            //
+            // Other than that, processing ctx->dispatch is needed to serve the
+            // video decoder, which might still not be fully destroyed, and e.g.
+            // performs calls to release DR images (or, as a grotesque corner
+            // case may even try to allocate new ones).
+            //
+            // Once the VO is released, ctx->dispatch becomes truly inactive.
+            // (The libmpv API user could call mpv_render_context_update() while
+            // mpv_render_context_free() is being called, but of course this is
+            // invalid.)
+            mp_dispatch_queue_process(ctx->dispatch, INFINITY);
+        }
+    }
+
+    mp_mutex_lock(&ctx->lock);
+    // Barrier - guarantee uninit() has left the lock region. It will access ctx
+    // until the lock has been released, so we must not proceed with destruction
+    // before we can acquire the lock. (The opposite, uninit() acquiring the
+    // lock, can not happen anymore at this point - we've waited for VO uninit,
+    // and prevented that new VOs can be created.)
+    mp_mutex_unlock(&ctx->lock);
+
+    assert(!atomic_load(&ctx->in_use));
+    assert(!ctx->vo);
+
+    // With the dispatch queue not being served anymore, allow frame free
+    // requests from this thread to be served directly.
+    if (ctx->dr)
+        dr_helper_acquire_thread(ctx->dr);
+
+    // Possibly remaining outstanding work.
+    mp_dispatch_queue_process(ctx->dispatch, 0);
+
+    forget_frames(ctx, true);
+
+    if (ctx->renderer) {
+        ctx->renderer->fns->destroy(ctx->renderer);
+        talloc_free(ctx->renderer->priv);
+        talloc_free(ctx->renderer);
+    }
+    talloc_free(ctx->dr);
+    talloc_free(ctx->dispatch);
+
+    mp_cond_destroy(&ctx->update_cond);
+    mp_cond_destroy(&ctx->video_wait);
+    mp_mutex_destroy(&ctx->update_lock);
+    mp_mutex_destroy(&ctx->lock);
+    mp_mutex_destroy(&ctx->control_lock);
+
+    talloc_free(ctx);
+}
+
+// Try to mark the context as "in exclusive use" (e.g. by a VO).
+// Note: the function must not acquire any locks, because it's called with an
+// external leaf lock held.
+bool mp_render_context_acquire(mpv_render_context *ctx)
+{
+    bool prev = false;
+    return atomic_compare_exchange_strong(&ctx->in_use, &prev, true);
+}
+
+int mpv_render_context_render(mpv_render_context *ctx, mpv_render_param *params)
+{
+    mp_mutex_lock(&ctx->lock);
+
+    int do_render =
+        !GET_MPV_RENDER_PARAM(params, MPV_RENDER_PARAM_SKIP_RENDERING, int, 0);
+
+    if (do_render) {
+        int vp_w, vp_h;
+        int err = ctx->renderer->fns->get_target_size(ctx->renderer, params,
+                                                    &vp_w, &vp_h);
+        if (err < 0) {
+            mp_mutex_unlock(&ctx->lock);
+            return err;
+        }
+
+        if (ctx->vo && (ctx->vp_w != vp_w || ctx->vp_h != vp_h ||
+                        ctx->need_resize))
+        {
+            ctx->vp_w = vp_w;
+            ctx->vp_h = vp_h;
+
+            m_config_cache_update(ctx->vo_opts_cache);
+
+            struct mp_rect src, dst;
+            struct mp_osd_res osd;
+            mp_get_src_dst_rects(ctx->log, ctx->vo_opts, ctx->vo->driver->caps,
+                                &ctx->img_params, vp_w, abs(vp_h),
+                                1.0, &src, &dst, &osd);
+
+            ctx->renderer->fns->resize(ctx->renderer, &src, &dst, &osd);
+        }
+        ctx->need_resize = false;
+    }
+
+    if (ctx->need_reconfig)
+        ctx->renderer->fns->reconfig(ctx->renderer, &ctx->img_params);
+    ctx->need_reconfig = false;
+
+    if (ctx->need_update_external)
+        ctx->renderer->fns->update_external(ctx->renderer, ctx->vo);
+    ctx->need_update_external = false;
+
+    if (ctx->need_reset) {
+        ctx->renderer->fns->reset(ctx->renderer);
+        if (ctx->cur_frame)
+            ctx->cur_frame->still = true;
+    }
+    ctx->need_reset = false;
+
+    struct vo_frame *frame = ctx->next_frame;
+    int64_t wait_present_count = ctx->present_count;
+    if (frame) {
+        ctx->next_frame = NULL;
+        if (!(frame->redraw || !frame->current))
+            wait_present_count += 1;
+        mp_cond_broadcast(&ctx->video_wait);
+        talloc_free(ctx->cur_frame);
+        ctx->cur_frame = vo_frame_ref(frame);
+    } else {
+        frame = vo_frame_ref(ctx->cur_frame);
+        if (frame)
+            frame->redraw = true;
+        MP_STATS(ctx, "glcb-noframe");
+    }
+    struct vo_frame dummy = {0};
+    if (!frame)
+        frame = &dummy;
+
+    mp_mutex_unlock(&ctx->lock);
+
+    MP_STATS(ctx, "glcb-render");
+
+    int err = 0;
+
+    if (do_render)
+        err = ctx->renderer->fns->render(ctx->renderer, params, frame);
+
+    if (frame != &dummy)
+        talloc_free(frame);
+
+    if (GET_MPV_RENDER_PARAM(params, MPV_RENDER_PARAM_BLOCK_FOR_TARGET_TIME,
+                             int, 1))
+    {
+        mp_mutex_lock(&ctx->lock);
+        while (wait_present_count > ctx->present_count)
+            mp_cond_wait(&ctx->video_wait, &ctx->lock);
+        mp_mutex_unlock(&ctx->lock);
+    }
+
+    return err;
+}
+
+void mpv_render_context_report_swap(mpv_render_context *ctx)
+{
+    MP_STATS(ctx, "glcb-reportflip");
+
+    mp_mutex_lock(&ctx->lock);
+    ctx->flip_count += 1;
+    mp_cond_broadcast(&ctx->video_wait);
+    mp_mutex_unlock(&ctx->lock);
+}
+
+uint64_t mpv_render_context_update(mpv_render_context *ctx)
+{
+    uint64_t res = 0;
+
+    mp_dispatch_queue_process(ctx->dispatch, 0);
+
+    mp_mutex_lock(&ctx->lock);
+    if (ctx->next_frame)
+        res |= MPV_RENDER_UPDATE_FRAME;
+    mp_mutex_unlock(&ctx->lock);
+    return res;
+}
+
+int mpv_render_context_set_parameter(mpv_render_context *ctx,
+                                     mpv_render_param param)
+{
+    return ctx->renderer->fns->set_parameter(ctx->renderer, param);
+}
+
+int mpv_render_context_get_info(mpv_render_context *ctx,
+                                mpv_render_param param)
+{
+    int res = MPV_ERROR_NOT_IMPLEMENTED;
+    mp_mutex_lock(&ctx->lock);
+
+    switch (param.type) {
+    case MPV_RENDER_PARAM_NEXT_FRAME_INFO: {
+        mpv_render_frame_info *info = param.data;
+        *info = (mpv_render_frame_info){0};
+        struct vo_frame *frame = ctx->next_frame;
+        if (frame) {
+            info->flags =
+                MPV_RENDER_FRAME_INFO_PRESENT |
+                (frame->redraw ? MPV_RENDER_FRAME_INFO_REDRAW : 0) |
+                (frame->repeat ? MPV_RENDER_FRAME_INFO_REPEAT : 0) |
+                (frame->display_synced && !frame->redraw ?
+                    MPV_RENDER_FRAME_INFO_BLOCK_VSYNC : 0);
+            info->target_time = frame->pts;
+        }
+        res = 0;
+        break;
+    }
+    default:;
+    }
+
+    mp_mutex_unlock(&ctx->lock);
+    return res;
+}
+
+static void draw_frame(struct vo *vo, struct vo_frame *frame)
+{
+    struct vo_priv *p = vo->priv;
+    struct mpv_render_context *ctx = p->ctx;
+
+    mp_mutex_lock(&ctx->lock);
+    assert(!ctx->next_frame);
+    ctx->next_frame = vo_frame_ref(frame);
+    ctx->expected_flip_count = ctx->flip_count + 1;
+    ctx->redrawing = frame->redraw || !frame->current;
+    mp_mutex_unlock(&ctx->lock);
+
+    update(ctx);
+}
+
+static void flip_page(struct vo *vo)
+{
+    struct vo_priv *p = vo->priv;
+    struct mpv_render_context *ctx = p->ctx;
+    int64_t until = mp_time_ns() + MP_TIME_MS_TO_NS(200);
+
+    mp_mutex_lock(&ctx->lock);
+
+    // Wait until frame was rendered
+    while (ctx->next_frame) {
+        if (mp_cond_timedwait_until(&ctx->video_wait, &ctx->lock, until)) {
+            if (ctx->next_frame) {
+                MP_VERBOSE(vo, "mpv_render_context_render() not being called "
+                           "or stuck.\n");
+                goto done;
+            }
+        }
+    }
+
+    // Unblock mpv_render_context_render().
+    ctx->present_count += 1;
+    mp_cond_broadcast(&ctx->video_wait);
+
+    if (ctx->redrawing)
+        goto done; // do not block for redrawing
+
+    // Wait until frame was presented
+    while (ctx->expected_flip_count > ctx->flip_count) {
+        // mpv_render_report_swap() is declared as optional API.
+        // Assume the user calls it consistently _if_ it's called at all.
+        if (!ctx->flip_count)
+            break;
+        if (mp_cond_timedwait_until(&ctx->video_wait, &ctx->lock, until)) {
+            MP_VERBOSE(vo, "mpv_render_report_swap() not being called.\n");
+            goto done;
+        }
+    }
+
+done:
+
+    // Cleanup after the API user is not reacting, or is being unusually slow.
+    if (ctx->next_frame) {
+        talloc_free(ctx->cur_frame);
+        ctx->cur_frame = ctx->next_frame;
+        ctx->next_frame = NULL;
+        ctx->present_count += 2;
+        mp_cond_signal(&ctx->video_wait);
+        vo_increment_drop_count(vo, 1);
+    }
+
+    mp_mutex_unlock(&ctx->lock);
+}
+
+static int query_format(struct vo *vo, int format)
+{
+    struct vo_priv *p = vo->priv;
+    struct mpv_render_context *ctx = p->ctx;
+
+    bool ok = false;
+    mp_mutex_lock(&ctx->lock);
+    if (format >= IMGFMT_START && format < IMGFMT_END)
+        ok = ctx->imgfmt_supported[format - IMGFMT_START];
+    mp_mutex_unlock(&ctx->lock);
+    return ok;
+}
+
+static void run_control_on_render_thread(void *p)
+{
+    void **args = p;
+    struct mpv_render_context *ctx = args[0];
+    int request = (intptr_t)args[1];
+    void *data = args[2];
+    int ret = VO_NOTIMPL;
+
+    switch (request) {
+    case VOCTRL_SCREENSHOT: {
+        mp_mutex_lock(&ctx->lock);
+        struct vo_frame *frame = vo_frame_ref(ctx->cur_frame);
+        mp_mutex_unlock(&ctx->lock);
+        if (frame && ctx->renderer->fns->screenshot)
+            ctx->renderer->fns->screenshot(ctx->renderer, frame, data);
+        talloc_free(frame);
+        break;
+    }
+    case VOCTRL_PERFORMANCE_DATA: {
+        if (ctx->renderer->fns->perfdata) {
+            ctx->renderer->fns->perfdata(ctx->renderer, data);
+            ret = VO_TRUE;
+        }
+        break;
+    }
+    }
+
+    *(int *)args[3] = ret;
+}
+
+static int control(struct vo *vo, uint32_t request, void *data)
+{
+    struct vo_priv *p = vo->priv;
+    struct mpv_render_context *ctx = p->ctx;
+
+    switch (request) {
+    case VOCTRL_RESET:
+        mp_mutex_lock(&ctx->lock);
+        forget_frames(ctx, false);
+        ctx->need_reset = true;
+        mp_mutex_unlock(&ctx->lock);
+        vo->want_redraw = true;
+        return VO_TRUE;
+    case VOCTRL_PAUSE:
+        vo->want_redraw = true;
+        return VO_TRUE;
+    case VOCTRL_SET_EQUALIZER:
+        vo->want_redraw = true;
+        return VO_TRUE;
+    case VOCTRL_SET_PANSCAN:
+        mp_mutex_lock(&ctx->lock);
+        ctx->need_resize = true;
+        mp_mutex_unlock(&ctx->lock);
+        vo->want_redraw = true;
+        return VO_TRUE;
+    case VOCTRL_UPDATE_RENDER_OPTS:
+        mp_mutex_lock(&ctx->lock);
+        ctx->need_update_external = true;
+        mp_mutex_unlock(&ctx->lock);
+        vo->want_redraw = true;
+        return VO_TRUE;
+    }
+
+    // VOCTRLs to be run on the renderer thread (if possible at all).
+    if (ctx->advanced_control) {
+        switch (request) {
+        case VOCTRL_SCREENSHOT:
+        case VOCTRL_PERFORMANCE_DATA: {
+            int ret;
+            void *args[] = {ctx, (void *)(intptr_t)request, data, &ret};
+            mp_dispatch_run(ctx->dispatch, run_control_on_render_thread, args);
+            return ret;
+        }
+        }
+    }
+
+    int r = VO_NOTIMPL;
+    mp_mutex_lock(&ctx->control_lock);
+    if (ctx->control_cb) {
+        int events = 0;
+        r = p->ctx->control_cb(vo, p->ctx->control_cb_ctx,
+                               &events, request, data);
+        vo_event(vo, events);
+    }
+    mp_mutex_unlock(&ctx->control_lock);
+
+    return r;
+}
+
+static struct mp_image *get_image(struct vo *vo, int imgfmt, int w, int h,
+                                  int stride_align, int flags)
+{
+    struct vo_priv *p = vo->priv;
+    struct mpv_render_context *ctx = p->ctx;
+
+    if (ctx->dr)
+        return dr_helper_get_image(ctx->dr, imgfmt, w, h, stride_align, flags);
+
+    return NULL;
+}
+
+static int reconfig(struct vo *vo, struct mp_image_params *params)
+{
+    struct vo_priv *p = vo->priv;
+    struct mpv_render_context *ctx = p->ctx;
+
+    mp_mutex_lock(&ctx->lock);
+    forget_frames(ctx, true);
+    ctx->img_params = *params;
+    ctx->need_reconfig = true;
+    ctx->need_resize = true;
+    mp_mutex_unlock(&ctx->lock);
+
+    control(vo, VOCTRL_RECONFIG, NULL);
+
+    return 0;
+}
+
+static void uninit(struct vo *vo)
+{
+    struct vo_priv *p = vo->priv;
+    struct mpv_render_context *ctx = p->ctx;
+
+    control(vo, VOCTRL_UNINIT, NULL);
+
+    mp_mutex_lock(&ctx->lock);
+
+    forget_frames(ctx, true);
+    ctx->img_params = (struct mp_image_params){0};
+    ctx->need_reconfig = true;
+    ctx->need_resize = true;
+    ctx->need_update_external = true;
+    ctx->need_reset = true;
+    ctx->vo = NULL;
+
+    // The following do not normally need ctx->lock, however, ctx itself may
+    // become invalid once we release ctx->lock.
+    bool prev_in_use = atomic_exchange(&ctx->in_use, false);
+    assert(prev_in_use); // obviously must have been set
+    mp_dispatch_interrupt(ctx->dispatch);
+
+    mp_mutex_unlock(&ctx->lock);
+}
+
+static int preinit(struct vo *vo)
+{
+    struct vo_priv *p = vo->priv;
+
+    struct mpv_render_context *ctx =
+        mp_client_api_acquire_render_context(vo->global->client_api);
+    p->ctx = ctx;
+
+    if (!ctx) {
+        if (!vo->probing)
+            MP_FATAL(vo, "No render context set.\n");
+        return -1;
+    }
+
+    mp_mutex_lock(&ctx->lock);
+    ctx->vo = vo;
+    ctx->need_resize = true;
+    ctx->need_update_external = true;
+    mp_mutex_unlock(&ctx->lock);
+
+    vo->hwdec_devs = ctx->hwdec_devs;
+    control(vo, VOCTRL_PREINIT, NULL);
+
+    return 0;
+}
+
+const struct vo_driver video_out_libmpv = {
+    .description = "render API for libmpv",
+    .name = "libmpv",
+    .caps = VO_CAP_ROTATE90,
+    .preinit = preinit,
+    .query_format = query_format,
+    .reconfig = reconfig,
+    .control = control,
+    .get_image_ts = get_image,
+    .draw_frame = draw_frame,
+    .flip_page = flip_page,
+    .uninit = uninit,
+    .priv_size = sizeof(struct vo_priv),
+};
diff --git a/video/out/vo_mediacodec_embed.c b/video/out/vo_mediacodec_embed.c
new file mode 100644
index 0000000..08d3866
--- /dev/null
+++ b/video/out/vo_mediacodec_embed.c
@@ -0,0 +1,127 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <libavcodec/mediacodec.h>
+#include <libavutil/hwcontext.h>
+#include <libavutil/hwcontext_mediacodec.h>
+
+#include "common/common.h"
+#include "vo.h"
+#include "video/mp_image.h"
+#include "video/hwdec.h"
+
+struct priv {
+    struct mp_image *next_image;
+    struct mp_hwdec_ctx hwctx;
+};
+
+static AVBufferRef *create_mediacodec_device_ref(struct vo *vo)
+{
+    AVBufferRef *device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_MEDIACODEC);
+    if (!device_ref)
+        return NULL;
+
+    AVHWDeviceContext *ctx = (void *)device_ref->data;
+    AVMediaCodecDeviceContext *hwctx = ctx->hwctx;
+    assert(vo->opts->WinID != 0 && vo->opts->WinID != -1);
+    hwctx->surface = (void *)(intptr_t)(vo->opts->WinID);
+
+    if (av_hwdevice_ctx_init(device_ref) < 0)
+        av_buffer_unref(&device_ref);
+
+    return device_ref;
+}
+
+static int preinit(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    vo->hwdec_devs = hwdec_devices_create();
+    p->hwctx = (struct mp_hwdec_ctx){
+        .driver_name = "mediacodec_embed",
+        .av_device_ref = create_mediacodec_device_ref(vo),
+        .hw_imgfmt = IMGFMT_MEDIACODEC,
+    };
+
+    if (!p->hwctx.av_device_ref) {
+        MP_VERBOSE(vo, "Failed to create hwdevice_ctx\n");
+        return -1;
+    }
+
+    hwdec_devices_add(vo->hwdec_devs, &p->hwctx);
+    return 0;
+}
+
+static void flip_page(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    if (!p->next_image)
+        return;
+
+    AVMediaCodecBuffer *buffer = (AVMediaCodecBuffer *)p->next_image->planes[3];
+    av_mediacodec_release_buffer(buffer, 1);
+    mp_image_unrefp(&p->next_image);
+}
+
+static void draw_frame(struct vo *vo, struct vo_frame *frame)
+{
+    struct priv *p = vo->priv;
+
+    mp_image_t *mpi = NULL;
+    if (!frame->redraw && !frame->repeat)
+        mpi = mp_image_new_ref(frame->current);
+
+    talloc_free(p->next_image);
+    p->next_image = mpi;
+}
+
+static int query_format(struct vo *vo, int format)
+{
+    return format == IMGFMT_MEDIACODEC;
+}
+
+static int control(struct vo *vo, uint32_t request, void *data)
+{
+    return VO_NOTIMPL;
+}
+
+static int reconfig(struct vo *vo, struct mp_image_params *params)
+{
+    return 0;
+}
+
+static void uninit(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    mp_image_unrefp(&p->next_image);
+
+    hwdec_devices_remove(vo->hwdec_devs, &p->hwctx);
+    av_buffer_unref(&p->hwctx.av_device_ref);
+}
+
+const struct vo_driver video_out_mediacodec_embed = {
+    .description = "Android (Embedded MediaCodec Surface)",
+    .name = "mediacodec_embed",
+    .caps = VO_CAP_NORETAIN,
+    .preinit = preinit,
+    .query_format = query_format,
+    .control = control,
+    .draw_frame = draw_frame,
+    .flip_page = flip_page,
+    .reconfig = reconfig,
+    .uninit = uninit,
+    .priv_size = sizeof(struct priv),
+};
diff --git a/video/out/vo_null.c b/video/out/vo_null.c
new file mode 100644
index 0000000..0c49062
--- /dev/null
+++ b/video/out/vo_null.c
@@ -0,0 +1,104 @@
+/*
+ * based on video_out_null.c from mpeg2dec
+ *
+ * Copyright (C) Aaron Holtzman - June 2000
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdlib.h>
+#include "common/msg.h"
+#include "vo.h"
+#include "video/mp_image.h"
+#include "osdep/timer.h"
+#include "options/m_option.h"
+
+struct priv {
+    int64_t last_vsync;
+
+    double cfg_fps;
+};
+
+static void draw_frame(struct vo *vo, struct vo_frame *frame)
+{
+}
+
+static void flip_page(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    if (p->cfg_fps) {
+        int64_t ft = 1e9 / p->cfg_fps;
+        int64_t prev_vsync = mp_time_ns() / ft;
+        int64_t target_time = (prev_vsync + 1) * ft;
+        for (;;) {
+            int64_t now = mp_time_ns();
+            if (now >= target_time)
+                break;
+            mp_sleep_ns(target_time - now);
+        }
+    }
+}
+
+static int query_format(struct vo *vo, int format)
+{
+    return 1;
+}
+
+static int reconfig(struct vo *vo, struct mp_image_params *params)
+{
+    return 0;
+}
+
+static void uninit(struct vo *vo)
+{
+}
+
+static int preinit(struct vo *vo)
+{
+    return 0;
+}
+
+static int control(struct vo *vo, uint32_t request, void *data)
+{
+    struct priv *p = vo->priv;
+    switch (request) {
+    case VOCTRL_GET_DISPLAY_FPS:
+        if (!p->cfg_fps)
+            break;
+        *(double *)data = p->cfg_fps;
+        return VO_TRUE;
+    }
+    return VO_NOTIMPL;
+}
+
+#define OPT_BASE_STRUCT struct priv
+const struct vo_driver video_out_null = {
+    .description = "Null video output",
+    .name = "null",
+    .preinit = preinit,
+    .query_format = query_format,
+    .reconfig = reconfig,
+    .control = control,
+    .draw_frame = draw_frame,
+    .flip_page = flip_page,
+    .uninit = uninit,
+    .priv_size = sizeof(struct priv),
+    .options = (const struct m_option[]) {
+        {"fps", OPT_DOUBLE(cfg_fps), M_RANGE(0, 10000)},
+        {0},
+    },
+    .options_prefix = "vo-null",
+};
diff --git a/video/out/vo_rpi.c b/video/out/vo_rpi.c
new file mode 100644
index 0000000..55f1a68
--- /dev/null
+++ b/video/out/vo_rpi.c
@@ -0,0 +1,938 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <math.h>
+#include <stdbool.h>
+#include <assert.h>
+
+#include <bcm_host.h>
+#include <interface/mmal/mmal.h>
+#include <interface/mmal/util/mmal_util.h>
+#include <interface/mmal/util/mmal_default_components.h>
+#include <interface/mmal/vc/mmal_vc_api.h>
+
+#include <EGL/egl.h>
+#include <EGL/eglext.h>
+
+#include <libavutil/rational.h>
+
+#include "common/common.h"
+#include "common/msg.h"
+#include "opengl/common.h"
+#include "options/m_config.h"
+#include "osdep/timer.h"
+#include "vo.h"
+#include "win_state.h"
+#include "video/mp_image.h"
+#include "sub/osd.h"
+
+#include "opengl/ra_gl.h"
+#include "gpu/video.h"
+
+struct mp_egl_rpi {
+    struct mp_log *log;
+    struct GL *gl;
+    struct ra *ra;
+    EGLDisplay egl_display;
+    EGLConfig egl_config;
+    EGLContext egl_context;
+    EGLSurface egl_surface;
+    // yep, the API keeps a pointer to it
+    EGL_DISPMANX_WINDOW_T egl_window;
+};
+
+struct priv {
+    DISPMANX_DISPLAY_HANDLE_T display;
+    DISPMANX_ELEMENT_HANDLE_T window;
+    DISPMANX_ELEMENT_HANDLE_T osd_overlay;
+    DISPMANX_UPDATE_HANDLE_T update;
+    uint32_t w, h;
+    uint32_t x, y;
+    double display_fps;
+
+    double osd_pts;
+    struct mp_osd_res osd_res;
+    struct m_config_cache *opts_cache;
+
+    struct mp_egl_rpi egl;
+    struct gl_video *gl_video;
+    struct mpgl_osd *osd;
+
+    MMAL_COMPONENT_T *renderer;
+    bool renderer_enabled;
+
+    bool display_synced, skip_osd;
+    struct mp_image *next_image;
+
+    // for RAM input
+    MMAL_POOL_T *swpool;
+
+    mp_mutex display_mutex;
+    mp_cond display_cond;
+    int64_t vsync_counter;
+    bool reload_display;
+
+    int background_layer;
+    int video_layer;
+    int osd_layer;
+
+    int display_nr;
+    int layer;
+    bool background;
+    bool enable_osd;
+};
+
+// Magic alignments (in pixels) expected by the MMAL internals.
+#define ALIGN_W 32
+#define ALIGN_H 16
+
+static void recreate_renderer(struct vo *vo);
+
+static void *get_proc_address(const GLubyte *name)
+{
+    void *p = eglGetProcAddress(name);
+    // EGL 1.4 (supported by the RPI firmware) does not necessarily return
+    // function pointers for core functions.
+    if (!p) {
+        void *h = dlopen("/opt/vc/lib/libbrcmGLESv2.so", RTLD_LAZY);
+        if (h) {
+            p = dlsym(h, name);
+            dlclose(h);
+        }
+    }
+    return p;
+}
+
+static EGLConfig select_fb_config_egl(struct mp_egl_rpi *p)
+{
+    EGLint attributes[] = {
+        EGL_SURFACE_TYPE, EGL_WINDOW_BIT,
+        EGL_RED_SIZE, 8,
+        EGL_GREEN_SIZE, 8,
+        EGL_BLUE_SIZE, 8,
+        EGL_DEPTH_SIZE, 0,
+        EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT,
+        EGL_NONE
+    };
+
+    EGLint config_count;
+    EGLConfig config;
+
+    eglChooseConfig(p->egl_display, attributes, &config, 1, &config_count);
+
+    if (!config_count) {
+        MP_FATAL(p, "Could find EGL configuration!\n");
+        return NULL;
+    }
+
+    return config;
+}
+
+static void mp_egl_rpi_destroy(struct mp_egl_rpi *p)
+{
+    if (p->egl_display) {
+        eglMakeCurrent(p->egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE,
+                       EGL_NO_CONTEXT);
+    }
+    if (p->egl_surface)
+        eglDestroySurface(p->egl_display, p->egl_surface);
+    if (p->egl_context)
+        eglDestroyContext(p->egl_display, p->egl_context);
+    p->egl_context = EGL_NO_CONTEXT;
+    eglReleaseThread();
+    p->egl_display = EGL_NO_DISPLAY;
+    talloc_free(p->gl);
+    p->gl = NULL;
+}
+
+static int mp_egl_rpi_init(struct mp_egl_rpi *p, DISPMANX_ELEMENT_HANDLE_T window,
+                    int w, int h)
+{
+    p->egl_display = eglGetDisplay(EGL_DEFAULT_DISPLAY);
+    if (!eglInitialize(p->egl_display, NULL, NULL)) {
+        MP_FATAL(p, "EGL failed to initialize.\n");
+        goto fail;
+    }
+
+    eglBindAPI(EGL_OPENGL_ES_API);
+
+    EGLConfig config = select_fb_config_egl(p);
+    if (!config)
+        goto fail;
+
+    p->egl_window = (EGL_DISPMANX_WINDOW_T){
+        .element = window,
+        .width = w,
+        .height = h,
+    };
+    p->egl_surface = eglCreateWindowSurface(p->egl_display, config,
+                                            &p->egl_window, NULL);
+
+    if (p->egl_surface == EGL_NO_SURFACE) {
+        MP_FATAL(p, "Could not create EGL surface!\n");
+        goto fail;
+    }
+
+    EGLint context_attributes[] = {
+        EGL_CONTEXT_CLIENT_VERSION, 2,
+        EGL_NONE
+    };
+    p->egl_context = eglCreateContext(p->egl_display, config,
+                                      EGL_NO_CONTEXT, context_attributes);
+
+    if (p->egl_context == EGL_NO_CONTEXT) {
+        MP_FATAL(p, "Could not create EGL context!\n");
+        goto fail;
+    }
+
+    eglMakeCurrent(p->egl_display, p->egl_surface, p->egl_surface,
+                   p->egl_context);
+
+    p->gl = talloc_zero(NULL, struct GL);
+
+    const char *exts = eglQueryString(p->egl_display, EGL_EXTENSIONS);
+    mpgl_load_functions(p->gl, get_proc_address, exts, p->log);
+
+    if (!p->gl->version && !p->gl->es)
+        goto fail;
+
+    p->ra = ra_create_gl(p->gl, p->log);
+    if (!p->ra)
+        goto fail;
+
+    return 0;
+
+fail:
+    mp_egl_rpi_destroy(p);
+    return -1;
+}
+
+// Make mpi point to buffer, assuming MMAL_ENCODING_I420.
+// buffer can be NULL.
+// Return the required buffer space.
+static size_t layout_buffer(struct mp_image *mpi, MMAL_BUFFER_HEADER_T *buffer,
+                            struct mp_image_params *params)
+{
+    assert(params->imgfmt == IMGFMT_420P);
+    mp_image_set_params(mpi, params);
+    int w = MP_ALIGN_UP(params->w, ALIGN_W);
+    int h = MP_ALIGN_UP(params->h, ALIGN_H);
+    uint8_t *cur = buffer ? buffer->data : NULL;
+    size_t size = 0;
+    for (int i = 0; i < 3; i++) {
+        int div = i ? 2 : 1;
+        mpi->planes[i] = cur;
+        mpi->stride[i] = w / div;
+        size_t plane_size = h / div * mpi->stride[i];
+        if (cur)
+            cur += plane_size;
+        size += plane_size;
+    }
+    return size;
+}
+
+static void update_osd(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    if (!p->enable_osd)
+        return;
+
+    if (!gl_video_check_osd_change(p->gl_video, &p->osd_res, p->osd_pts)) {
+        p->skip_osd = true;
+        return;
+    }
+
+    MP_STATS(vo, "start rpi_osd");
+
+    struct vo_frame frame = {0};
+    struct ra_fbo target = {
+        .tex = ra_create_wrapped_fb(p->egl.ra, 0, p->osd_res.w, p->osd_res.h),
+        .flip = true,
+    };
+    gl_video_set_osd_pts(p->gl_video, p->osd_pts);
+    gl_video_render_frame(p->gl_video, &frame, target, RENDER_FRAME_DEF);
+    ra_tex_free(p->egl.ra, &target.tex);
+
+    MP_STATS(vo, "stop rpi_osd");
+}
+
+static void resize(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    MMAL_PORT_T *input = p->renderer->input[0];
+
+    struct mp_rect src, dst;
+
+    vo_get_src_dst_rects(vo, &src, &dst, &p->osd_res);
+
+    int rotate[] = {MMAL_DISPLAY_ROT0,
+                    MMAL_DISPLAY_ROT90,
+                    MMAL_DISPLAY_ROT180,
+                    MMAL_DISPLAY_ROT270};
+
+
+    int src_w = src.x1 - src.x0, src_h = src.y1 - src.y0,
+        dst_w = dst.x1 - dst.x0, dst_h = dst.y1 - dst.y0;
+    int p_x, p_y;
+    av_reduce(&p_x, &p_y, dst_w * src_h, src_w * dst_h, 16000);
+    MMAL_DISPLAYREGION_T dr = {
+        .hdr = { .id = MMAL_PARAMETER_DISPLAYREGION,
+                 .size = sizeof(MMAL_DISPLAYREGION_T), },
+        .src_rect = { .x = src.x0, .y = src.y0, .width = src_w, .height = src_h },
+        .dest_rect = { .x = dst.x0 + p->x, .y = dst.y0 + p->y,
+                       .width = dst_w, .height = dst_h },
+        .layer = p->video_layer,
+        .display_num = p->display_nr,
+        .pixel_x = p_x,
+        .pixel_y = p_y,
+        .transform = rotate[vo->params ? vo->params->rotate / 90 : 0],
+        .fullscreen = vo->opts->fullscreen,
+        .set = MMAL_DISPLAY_SET_SRC_RECT | MMAL_DISPLAY_SET_DEST_RECT |
+               MMAL_DISPLAY_SET_LAYER | MMAL_DISPLAY_SET_NUM |
+               MMAL_DISPLAY_SET_PIXEL | MMAL_DISPLAY_SET_TRANSFORM |
+               MMAL_DISPLAY_SET_FULLSCREEN,
+    };
+
+    if (vo->params && (vo->params->rotate % 180) == 90) {
+        MPSWAP(int, dr.src_rect.x, dr.src_rect.y);
+        MPSWAP(int, dr.src_rect.width, dr.src_rect.height);
+    }
+
+    if (mmal_port_parameter_set(input, &dr.hdr))
+        MP_WARN(vo, "could not set video rectangle\n");
+
+    if (p->gl_video)
+        gl_video_resize(p->gl_video, &src, &dst, &p->osd_res);
+}
+
+static void destroy_overlays(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+
+    if (p->window)
+        vc_dispmanx_element_remove(p->update, p->window);
+    p->window = 0;
+
+    gl_video_uninit(p->gl_video);
+    p->gl_video = NULL;
+    ra_free(&p->egl.ra);
+    mp_egl_rpi_destroy(&p->egl);
+
+    if (p->osd_overlay)
+        vc_dispmanx_element_remove(p->update, p->osd_overlay);
+    p->osd_overlay = 0;
+}
+
+static int update_display_size(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+
+    uint32_t n_w = 0, n_h = 0;
+    if (graphics_get_display_size(0, &n_w, &n_h) < 0) {
+        MP_FATAL(vo, "Could not get display size.\n");
+        return -1;
+    }
+
+    if (p->w == n_w && p->h == n_h)
+        return 0;
+
+    p->w = n_w;
+    p->h = n_h;
+
+    MP_VERBOSE(vo, "Display size: %dx%d\n", p->w, p->h);
+
+    return 0;
+}
+
+static int create_overlays(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    destroy_overlays(vo);
+
+    if (!p->display)
+        return -1;
+
+    if (vo->opts->fullscreen && p->background) {
+        // Use the whole screen.
+        VC_RECT_T dst = {.width = p->w, .height = p->h};
+        VC_RECT_T src = {.width = 1 << 16, .height = 1 << 16};
+        VC_DISPMANX_ALPHA_T alpha = {
+            .flags = DISPMANX_FLAGS_ALPHA_FIXED_ALL_PIXELS,
+            .opacity = 0xFF,
+        };
+
+        p->window = vc_dispmanx_element_add(p->update, p->display,
+                                            p->background_layer,
+                                            &dst, 0, &src,
+                                            DISPMANX_PROTECTION_NONE,
+                                            &alpha, 0, 0);
+        if (!p->window) {
+            MP_FATAL(vo, "Could not add DISPMANX element.\n");
+            return -1;
+        }
+    }
+
+    if (p->enable_osd) {
+        VC_RECT_T dst = {.x = p->x, .y = p->y,
+                         .width = p->osd_res.w, .height = p->osd_res.h};
+        VC_RECT_T src = {.width = p->osd_res.w << 16, .height = p->osd_res.h << 16};
+        VC_DISPMANX_ALPHA_T alpha = {
+            .flags = DISPMANX_FLAGS_ALPHA_FROM_SOURCE,
+            .opacity = 0xFF,
+        };
+        p->osd_overlay = vc_dispmanx_element_add(p->update, p->display,
+                                                 p->osd_layer,
+                                                 &dst, 0, &src,
+                                                 DISPMANX_PROTECTION_NONE,
+                                                 &alpha, 0, 0);
+        if (!p->osd_overlay) {
+            MP_FATAL(vo, "Could not add DISPMANX element.\n");
+            return -1;
+        }
+
+        if (mp_egl_rpi_init(&p->egl, p->osd_overlay,
+                            p->osd_res.w, p->osd_res.h) < 0)
+        {
+            MP_FATAL(vo, "EGL/GLES initialization for OSD renderer failed.\n");
+            return -1;
+        }
+        p->gl_video = gl_video_init(p->egl.ra, vo->log, vo->global);
+        gl_video_set_clear_color(p->gl_video, (struct m_color){.a = 0});
+        gl_video_set_osd_source(p->gl_video, vo->osd);
+    }
+
+    p->display_fps = 0;
+    TV_GET_STATE_RESP_T tvstate;
+    TV_DISPLAY_STATE_T tvstate_disp;
+    if (!vc_tv_get_state(&tvstate) && !vc_tv_get_display_state(&tvstate_disp)) {
+        if (tvstate_disp.state & (VC_HDMI_HDMI | VC_HDMI_DVI)) {
+            p->display_fps = tvstate_disp.display.hdmi.frame_rate;
+
+            HDMI_PROPERTY_PARAM_T param = {
+                .property = HDMI_PROPERTY_PIXEL_CLOCK_TYPE,
+            };
+            if (!vc_tv_hdmi_get_property(&param) &&
+                param.param1 == HDMI_PIXEL_CLOCK_TYPE_NTSC)
+                p->display_fps = p->display_fps / 1.001;
+        } else {
+            p->display_fps = tvstate_disp.display.sdtv.frame_rate;
+        }
+    }
+
+    resize(vo);
+
+    vo_event(vo, VO_EVENT_WIN_STATE);
+
+    vc_dispmanx_update_submit_sync(p->update);
+    p->update = vc_dispmanx_update_start(10);
+
+    return 0;
+}
+
+static int set_geometry(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+
+    if (vo->opts->fullscreen) {
+        vo->dwidth = p->w;
+        vo->dheight = p->h;
+        p->x = p->y = 0;
+    } else {
+        struct vo_win_geometry geo;
+        struct mp_rect screenrc = {0, 0, p->w, p->h};
+
+        vo_calc_window_geometry(vo, &screenrc, &geo);
+        vo_apply_window_geometry(vo, &geo);
+
+        p->x = geo.win.x0;
+        p->y = geo.win.y0;
+    }
+
+    resize(vo);
+
+    if (create_overlays(vo) < 0)
+        return -1;
+
+    return 0;
+}
+
+static void wait_next_vsync(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    mp_mutex_lock(&p->display_mutex);
+    int64_t end = mp_time_ns() + MP_TIME_MS_TO_NS(50);
+    int64_t old = p->vsync_counter;
+    while (old == p->vsync_counter && !p->reload_display) {
+        if (mp_cond_timedwait_until(&p->display_cond, &p->display_mutex, end))
+            break;
+    }
+    mp_mutex_unlock(&p->display_mutex);
+}
+
+static void flip_page(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+
+    if (!p->renderer_enabled)
+        return;
+
+    struct mp_image *mpi = p->next_image;
+    p->next_image = NULL;
+
+    // For OSD
+    if (!p->skip_osd && p->egl.gl)
+        eglSwapBuffers(p->egl.egl_display, p->egl.egl_surface);
+    p->skip_osd = false;
+
+    if (mpi) {
+        MMAL_PORT_T *input = p->renderer->input[0];
+        MMAL_BUFFER_HEADER_T *ref = (void *)mpi->planes[3];
+
+        // Assume this field is free for use by us.
+        ref->user_data = mpi;
+
+        if (mmal_port_send_buffer(input, ref)) {
+            MP_ERR(vo, "could not queue picture!\n");
+            talloc_free(mpi);
+        }
+    }
+
+    if (p->display_synced)
+        wait_next_vsync(vo);
+}
+
+static void free_mmal_buffer(void *arg)
+{
+    MMAL_BUFFER_HEADER_T *buffer = arg;
+    mmal_buffer_header_release(buffer);
+}
+
+static void draw_frame(struct vo *vo, struct vo_frame *frame)
+{
+    struct priv *p = vo->priv;
+
+    if (!p->renderer_enabled)
+        return;
+
+    mp_image_t *mpi = NULL;
+    if (!frame->redraw && !frame->repeat)
+        mpi = mp_image_new_ref(frame->current);
+
+    talloc_free(p->next_image);
+    p->next_image = NULL;
+
+    if (mpi)
+        p->osd_pts = mpi->pts;
+
+    // Redraw only if the OSD has meaningfully changed, which we assume it
+    // hasn't when a frame is merely repeated for display sync.
+    p->skip_osd = !frame->redraw && frame->repeat;
+
+    if (!p->skip_osd && p->egl.gl)
+        update_osd(vo);
+
+    p->display_synced = frame->display_synced;
+
+    if (mpi && mpi->imgfmt != IMGFMT_MMAL) {
+        MMAL_BUFFER_HEADER_T *buffer = mmal_queue_wait(p->swpool->queue);
+        if (!buffer) {
+            talloc_free(mpi);
+            MP_ERR(vo, "Can't allocate buffer.\n");
+            return;
+        }
+        mmal_buffer_header_reset(buffer);
+
+        struct mp_image *new_ref = mp_image_new_custom_ref(NULL, buffer,
+                                                           free_mmal_buffer);
+        if (!new_ref) {
+            mmal_buffer_header_release(buffer);
+            talloc_free(mpi);
+            MP_ERR(vo, "Out of memory.\n");
+            return;
+        }
+
+        mp_image_setfmt(new_ref, IMGFMT_MMAL);
+        new_ref->planes[3] = (void *)buffer;
+
+        struct mp_image dmpi = {0};
+        buffer->length = layout_buffer(&dmpi, buffer, vo->params);
+        mp_image_copy(&dmpi, mpi);
+
+        talloc_free(mpi);
+        mpi = new_ref;
+    }
+
+    p->next_image = mpi;
+}
+
+static int query_format(struct vo *vo, int format)
+{
+    return format == IMGFMT_MMAL || format == IMGFMT_420P;
+}
+
+static MMAL_FOURCC_T map_csp(enum mp_csp csp)
+{
+    switch (csp) {
+    case MP_CSP_BT_601:     return MMAL_COLOR_SPACE_ITUR_BT601;
+    case MP_CSP_BT_709:     return MMAL_COLOR_SPACE_ITUR_BT709;
+    case MP_CSP_SMPTE_240M: return MMAL_COLOR_SPACE_SMPTE240M;
+    default:                return MMAL_COLOR_SPACE_UNKNOWN;
+    }
+}
+
+static void control_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
+{
+    mmal_buffer_header_release(buffer);
+}
+
+static void input_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer)
+{
+    struct mp_image *mpi = buffer->user_data;
+    talloc_free(mpi);
+}
+
+static void disable_renderer(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+
+    if (p->renderer_enabled) {
+        mmal_port_disable(p->renderer->control);
+        mmal_port_disable(p->renderer->input[0]);
+
+        mmal_port_flush(p->renderer->control);
+        mmal_port_flush(p->renderer->input[0]);
+
+        mmal_component_disable(p->renderer);
+    }
+    mmal_pool_destroy(p->swpool);
+    p->swpool = NULL;
+    p->renderer_enabled = false;
+}
+
+static int reconfig(struct vo *vo, struct mp_image_params *params)
+{
+    struct priv *p = vo->priv;
+    MMAL_PORT_T *input = p->renderer->input[0];
+    bool opaque = params->imgfmt == IMGFMT_MMAL;
+
+    if (!p->display)
+        return -1;
+
+    disable_renderer(vo);
+
+    input->format->encoding = opaque ? MMAL_ENCODING_OPAQUE : MMAL_ENCODING_I420;
+    input->format->es->video.width = MP_ALIGN_UP(params->w, ALIGN_W);
+    input->format->es->video.height = MP_ALIGN_UP(params->h, ALIGN_H);
+    input->format->es->video.crop = (MMAL_RECT_T){0, 0, params->w, params->h};
+    input->format->es->video.par = (MMAL_RATIONAL_T){params->p_w, params->p_h};
+    input->format->es->video.color_space = map_csp(params->color.space);
+
+    if (mmal_port_format_commit(input))
+        return -1;
+
+    input->buffer_num = MPMAX(input->buffer_num_min,
+                              input->buffer_num_recommended) + 3;
+    input->buffer_size = MPMAX(input->buffer_size_min,
+                               input->buffer_size_recommended);
+
+    if (!opaque) {
+        size_t size = layout_buffer(&(struct mp_image){0}, NULL, params);
+        if (input->buffer_size != size) {
+            MP_FATAL(vo, "We disagree with MMAL about buffer sizes.\n");
+            return -1;
+        }
+
+        p->swpool = mmal_pool_create(input->buffer_num, input->buffer_size);
+        if (!p->swpool) {
+            MP_FATAL(vo, "Could not allocate buffer pool.\n");
+            return -1;
+        }
+    }
+
+    if (set_geometry(vo) < 0)
+        return -1;
+
+    p->renderer_enabled = true;
+
+    if (mmal_port_enable(p->renderer->control, control_port_cb))
+        return -1;
+
+    if (mmal_port_enable(input, input_port_cb))
+        return -1;
+
+    if (mmal_component_enable(p->renderer)) {
+        MP_FATAL(vo, "Failed to enable video renderer.\n");
+        return -1;
+    }
+
+    resize(vo);
+
+    return 0;
+}
+
+static struct mp_image *take_screenshot(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+
+    if (!p->display)
+        return NULL;
+
+    struct mp_image *img = mp_image_alloc(IMGFMT_BGR0, p->w, p->h);
+    if (!img)
+        return NULL;
+
+    DISPMANX_RESOURCE_HANDLE_T resource =
+        vc_dispmanx_resource_create(VC_IMAGE_ARGB8888,
+                                    img->w | ((img->w * 4) << 16), img->h,
+                                    &(int32_t){0});
+    if (!resource)
+        goto fail;
+
+    if (vc_dispmanx_snapshot(p->display, resource, 0))
+        goto fail;
+
+    VC_RECT_T rc = {.width = img->w, .height = img->h};
+    if (vc_dispmanx_resource_read_data(resource, &rc, img->planes[0], img->stride[0]))
+        goto fail;
+
+    vc_dispmanx_resource_delete(resource);
+    return img;
+
+fail:
+    vc_dispmanx_resource_delete(resource);
+    talloc_free(img);
+    return NULL;
+}
+
+static void set_fullscreen(struct vo *vo) {
+    struct priv *p = vo->priv;
+
+    if (p->renderer_enabled)
+	set_geometry(vo);
+    vo->want_redraw = true;
+}
+
+static int control(struct vo *vo, uint32_t request, void *data)
+{
+    struct priv *p = vo->priv;
+
+    switch (request) {
+    case VOCTRL_VO_OPTS_CHANGED: {
+        void *opt;
+        while (m_config_cache_get_next_changed(p->opts_cache, &opt)) {
+            struct mp_vo_opts *opts = p->opts_cache->opts;
+            if (&opts->fullscreen == opt)
+                set_fullscreen(vo);
+        }
+        return VO_TRUE;
+    }
+    case VOCTRL_SET_PANSCAN:
+        if (p->renderer_enabled)
+            resize(vo);
+        vo->want_redraw = true;
+        return VO_TRUE;
+    case VOCTRL_REDRAW_FRAME:
+        update_osd(vo);
+        return VO_TRUE;
+    case VOCTRL_SCREENSHOT_WIN:
+        *(struct mp_image **)data = take_screenshot(vo);
+        return VO_TRUE;
+    case VOCTRL_CHECK_EVENTS: {
+        mp_mutex_lock(&p->display_mutex);
+        bool reload_required = p->reload_display;
+        p->reload_display = false;
+        mp_mutex_unlock(&p->display_mutex);
+        if (reload_required)
+            recreate_renderer(vo);
+        return VO_TRUE;
+    }
+    case VOCTRL_GET_DISPLAY_FPS:
+        *(double *)data = p->display_fps;
+        return VO_TRUE;
+    case VOCTRL_GET_DISPLAY_RES:
+        ((int *)data)[0] = p->w;
+        ((int *)data)[1] = p->h;
+        return VO_TRUE;
+    }
+
+    return VO_NOTIMPL;
+}
+
+static void tv_callback(void *callback_data, uint32_t reason, uint32_t param1,
+                        uint32_t param2)
+{
+    struct vo *vo = callback_data;
+    struct priv *p = vo->priv;
+    mp_mutex_lock(&p->display_mutex);
+    p->reload_display = true;
+    mp_cond_signal(&p->display_cond);
+    mp_mutex_unlock(&p->display_mutex);
+    vo_wakeup(vo);
+}
+
+static void vsync_callback(DISPMANX_UPDATE_HANDLE_T u, void *arg)
+{
+    struct vo *vo = arg;
+    struct priv *p = vo->priv;
+    mp_mutex_lock(&p->display_mutex);
+    p->vsync_counter += 1;
+    mp_cond_signal(&p->display_cond);
+    mp_mutex_unlock(&p->display_mutex);
+}
+
+static void destroy_dispmanx(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+
+    disable_renderer(vo);
+    destroy_overlays(vo);
+
+    if (p->update)
+        vc_dispmanx_update_submit_sync(p->update);
+    p->update = 0;
+
+    if (p->display) {
+        vc_dispmanx_vsync_callback(p->display, NULL, NULL);
+        vc_dispmanx_display_close(p->display);
+    }
+    p->display = 0;
+}
+
+static int recreate_dispmanx(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+
+    p->display = vc_dispmanx_display_open(p->display_nr);
+    p->update = vc_dispmanx_update_start(0);
+    if (!p->display || !p->update) {
+        MP_FATAL(vo, "Could not get DISPMANX objects.\n");
+        if (p->display)
+            vc_dispmanx_display_close(p->display);
+        p->display = 0;
+        p->update = 0;
+        return -1;
+    }
+
+    update_display_size(vo);
+
+    vc_dispmanx_vsync_callback(p->display, vsync_callback, vo);
+
+    return 0;
+}
+
+static void recreate_renderer(struct vo *vo)
+{
+    MP_WARN(vo, "Recreating renderer after display change.\n");
+
+    destroy_dispmanx(vo);
+    recreate_dispmanx(vo);
+
+    if (vo->params) {
+        if (reconfig(vo, vo->params) < 0)
+            MP_FATAL(vo, "Recreation failed.\n");
+    }
+}
+
+static void uninit(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+
+    vc_tv_unregister_callback_full(tv_callback, vo);
+
+    talloc_free(p->next_image);
+
+    destroy_dispmanx(vo);
+
+    if (p->renderer)
+        mmal_component_release(p->renderer);
+
+    mmal_vc_deinit();
+
+    mp_cond_destroy(&p->display_cond);
+    mp_mutex_destroy(&p->display_mutex);
+}
+
+static int preinit(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+
+    p->background_layer = p->layer;
+    p->video_layer = p->layer + 1;
+    p->osd_layer = p->layer + 2;
+
+    p->egl.log = vo->log;
+
+    bcm_host_init();
+
+    if (mmal_vc_init()) {
+        MP_FATAL(vo, "Could not initialize MMAL.\n");
+        return -1;
+    }
+
+    mp_mutex_init(&p->display_mutex);
+    mp_cond_init(&p->display_cond);
+
+    p->opts_cache = m_config_cache_alloc(p, vo->global, &vo_sub_opts);
+
+    if (recreate_dispmanx(vo) < 0)
+        goto fail;
+
+    if (update_display_size(vo) < 0)
+        goto fail;
+
+    if (mmal_component_create(MMAL_COMPONENT_DEFAULT_VIDEO_RENDERER, &p->renderer))
+    {
+        MP_FATAL(vo, "Could not create MMAL renderer.\n");
+        goto fail;
+    }
+
+    vc_tv_register_callback(tv_callback, vo);
+
+    return 0;
+
+fail:
+    uninit(vo);
+    return -1;
+}
+
+#define OPT_BASE_STRUCT struct priv
+static const struct m_option options[] = {
+    {"display", OPT_INT(display_nr)},
+    {"layer", OPT_INT(layer), OPTDEF_INT(-10)},
+    {"background", OPT_BOOL(background)},
+    {"osd", OPT_BOOL(enable_osd), OPTDEF_INT(1)},
+    {0},
+};
+
+const struct vo_driver video_out_rpi = {
+    .description = "Raspberry Pi (MMAL)",
+    .name = "rpi",
+    .caps = VO_CAP_ROTATE90,
+    .preinit = preinit,
+    .query_format = query_format,
+    .reconfig = reconfig,
+    .control = control,
+    .draw_frame = draw_frame,
+    .flip_page = flip_page,
+    .uninit = uninit,
+    .priv_size = sizeof(struct priv),
+    .options = options,
+    .options_prefix = "rpi",
+};
diff --git a/video/out/vo_sdl.c b/video/out/vo_sdl.c
new file mode 100644
index 0000000..5f4c027
--- /dev/null
+++ b/video/out/vo_sdl.c
@@ -0,0 +1,992 @@
+/*
+ * video output driver for SDL 2.0+
+ *
+ * Copyright (C) 2012 Rudolf Polzer <divVerent@xonotic.org>
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <sys/stat.h>
+#include <unistd.h>
+#include <string.h>
+#include <time.h>
+#include <errno.h>
+#include <assert.h>
+
+#include <SDL.h>
+
+#include "input/input.h"
+#include "input/keycodes.h"
+#include "input/input.h"
+#include "common/msg.h"
+#include "options/m_config.h"
+#include "options/options.h"
+
+#include "osdep/timer.h"
+
+#include "sub/osd.h"
+
+#include "video/mp_image.h"
+
+#include "win_state.h"
+#include "vo.h"
+
+struct formatmap_entry {
+    Uint32 sdl;
+    unsigned int mpv;
+    int is_rgba;
+};
+const struct formatmap_entry formats[] = {
+    {SDL_PIXELFORMAT_YV12, IMGFMT_420P, 0},
+    {SDL_PIXELFORMAT_IYUV, IMGFMT_420P, 0},
+    {SDL_PIXELFORMAT_UYVY, IMGFMT_UYVY, 0},
+    //{SDL_PIXELFORMAT_YVYU, IMGFMT_YVYU, 0},
+#if BYTE_ORDER == BIG_ENDIAN
+    {SDL_PIXELFORMAT_RGB888, IMGFMT_0RGB, 0}, // RGB888 means XRGB8888
+    {SDL_PIXELFORMAT_RGBX8888, IMGFMT_RGB0, 0}, // has no alpha -> bad for OSD
+    {SDL_PIXELFORMAT_BGR888, IMGFMT_0BGR, 0}, // BGR888 means XBGR8888
+    {SDL_PIXELFORMAT_BGRX8888, IMGFMT_BGR0, 0}, // has no alpha -> bad for OSD
+    {SDL_PIXELFORMAT_ARGB8888, IMGFMT_ARGB, 1}, // matches SUBBITMAP_BGRA
+    {SDL_PIXELFORMAT_RGBA8888, IMGFMT_RGBA, 1},
+    {SDL_PIXELFORMAT_ABGR8888, IMGFMT_ABGR, 1},
+    {SDL_PIXELFORMAT_BGRA8888, IMGFMT_BGRA, 1},
+#else
+    {SDL_PIXELFORMAT_RGB888, IMGFMT_BGR0, 0}, // RGB888 means XRGB8888
+    {SDL_PIXELFORMAT_RGBX8888, IMGFMT_0BGR, 0}, // has no alpha -> bad for OSD
+    {SDL_PIXELFORMAT_BGR888, IMGFMT_RGB0, 0}, // BGR888 means XBGR8888
+    {SDL_PIXELFORMAT_BGRX8888, IMGFMT_0RGB, 0}, // has no alpha -> bad for OSD
+    {SDL_PIXELFORMAT_ARGB8888, IMGFMT_BGRA, 1}, // matches SUBBITMAP_BGRA
+    {SDL_PIXELFORMAT_RGBA8888, IMGFMT_ABGR, 1},
+    {SDL_PIXELFORMAT_ABGR8888, IMGFMT_RGBA, 1},
+    {SDL_PIXELFORMAT_BGRA8888, IMGFMT_ARGB, 1},
+#endif
+    {SDL_PIXELFORMAT_RGB24, IMGFMT_RGB24, 0},
+    {SDL_PIXELFORMAT_BGR24, IMGFMT_BGR24, 0},
+    {SDL_PIXELFORMAT_RGB565, IMGFMT_RGB565, 0},
+};
+
+struct keymap_entry {
+    SDL_Keycode sdl;
+    int mpv;
+};
+const struct keymap_entry keys[] = {
+    {SDLK_RETURN, MP_KEY_ENTER},
+    {SDLK_ESCAPE, MP_KEY_ESC},
+    {SDLK_BACKSPACE, MP_KEY_BACKSPACE},
+    {SDLK_TAB, MP_KEY_TAB},
+    {SDLK_PRINTSCREEN, MP_KEY_PRINT},
+    {SDLK_PAUSE, MP_KEY_PAUSE},
+    {SDLK_INSERT, MP_KEY_INSERT},
+    {SDLK_HOME, MP_KEY_HOME},
+    {SDLK_PAGEUP, MP_KEY_PAGE_UP},
+    {SDLK_DELETE, MP_KEY_DELETE},
+    {SDLK_END, MP_KEY_END},
+    {SDLK_PAGEDOWN, MP_KEY_PAGE_DOWN},
+    {SDLK_RIGHT, MP_KEY_RIGHT},
+    {SDLK_LEFT, MP_KEY_LEFT},
+    {SDLK_DOWN, MP_KEY_DOWN},
+    {SDLK_UP, MP_KEY_UP},
+    {SDLK_KP_ENTER, MP_KEY_KPENTER},
+    {SDLK_KP_1, MP_KEY_KP1},
+    {SDLK_KP_2, MP_KEY_KP2},
+    {SDLK_KP_3, MP_KEY_KP3},
+    {SDLK_KP_4, MP_KEY_KP4},
+    {SDLK_KP_5, MP_KEY_KP5},
+    {SDLK_KP_6, MP_KEY_KP6},
+    {SDLK_KP_7, MP_KEY_KP7},
+    {SDLK_KP_8, MP_KEY_KP8},
+    {SDLK_KP_9, MP_KEY_KP9},
+    {SDLK_KP_0, MP_KEY_KP0},
+    {SDLK_KP_PERIOD, MP_KEY_KPDEC},
+    {SDLK_POWER, MP_KEY_POWER},
+    {SDLK_MENU, MP_KEY_MENU},
+    {SDLK_STOP, MP_KEY_STOP},
+    {SDLK_MUTE, MP_KEY_MUTE},
+    {SDLK_VOLUMEUP, MP_KEY_VOLUME_UP},
+    {SDLK_VOLUMEDOWN, MP_KEY_VOLUME_DOWN},
+    {SDLK_KP_COMMA, MP_KEY_KPDEC},
+    {SDLK_AUDIONEXT, MP_KEY_NEXT},
+    {SDLK_AUDIOPREV, MP_KEY_PREV},
+    {SDLK_AUDIOSTOP, MP_KEY_STOP},
+    {SDLK_AUDIOPLAY, MP_KEY_PLAY},
+    {SDLK_AUDIOMUTE, MP_KEY_MUTE},
+    {SDLK_F1, MP_KEY_F + 1},
+    {SDLK_F2, MP_KEY_F + 2},
+    {SDLK_F3, MP_KEY_F + 3},
+    {SDLK_F4, MP_KEY_F + 4},
+    {SDLK_F5, MP_KEY_F + 5},
+    {SDLK_F6, MP_KEY_F + 6},
+    {SDLK_F7, MP_KEY_F + 7},
+    {SDLK_F8, MP_KEY_F + 8},
+    {SDLK_F9, MP_KEY_F + 9},
+    {SDLK_F10, MP_KEY_F + 10},
+    {SDLK_F11, MP_KEY_F + 11},
+    {SDLK_F12, MP_KEY_F + 12},
+    {SDLK_F13, MP_KEY_F + 13},
+    {SDLK_F14, MP_KEY_F + 14},
+    {SDLK_F15, MP_KEY_F + 15},
+    {SDLK_F16, MP_KEY_F + 16},
+    {SDLK_F17, MP_KEY_F + 17},
+    {SDLK_F18, MP_KEY_F + 18},
+    {SDLK_F19, MP_KEY_F + 19},
+    {SDLK_F20, MP_KEY_F + 20},
+    {SDLK_F21, MP_KEY_F + 21},
+    {SDLK_F22, MP_KEY_F + 22},
+    {SDLK_F23, MP_KEY_F + 23},
+    {SDLK_F24, MP_KEY_F + 24}
+};
+
+struct mousemap_entry {
+    Uint8 sdl;
+    int mpv;
+};
+const struct mousemap_entry mousebtns[] = {
+    {SDL_BUTTON_LEFT, MP_MBTN_LEFT},
+    {SDL_BUTTON_MIDDLE, MP_MBTN_MID},
+    {SDL_BUTTON_RIGHT, MP_MBTN_RIGHT},
+    {SDL_BUTTON_X1, MP_MBTN_BACK},
+    {SDL_BUTTON_X2, MP_MBTN_FORWARD},
+};
+
+struct priv {
+    SDL_Window *window;
+    SDL_Renderer *renderer;
+    int renderer_index;
+    SDL_RendererInfo renderer_info;
+    SDL_Texture *tex;
+    int tex_swapped;
+    struct mp_image_params params;
+    struct mp_rect src_rect;
+    struct mp_rect dst_rect;
+    struct mp_osd_res osd_res;
+    struct formatmap_entry osd_format;
+    struct osd_bitmap_surface {
+        int change_id;
+        struct osd_target {
+            SDL_Rect source;
+            SDL_Rect dest;
+            SDL_Texture *tex;
+            SDL_Texture *tex2;
+        } *targets;
+        int num_targets;
+        int targets_size;
+    } osd_surfaces[MAX_OSD_PARTS];
+    double osd_pts;
+    Uint32 wakeup_event;
+    bool screensaver_enabled;
+    struct m_config_cache *opts_cache;
+
+    // options
+    bool allow_sw;
+    bool switch_mode;
+    bool vsync;
+};
+
+static bool lock_texture(struct vo *vo, struct mp_image *texmpi)
+{
+    struct priv *vc = vo->priv;
+    *texmpi = (struct mp_image){0};
+    mp_image_set_size(texmpi, vc->params.w, vc->params.h);
+    mp_image_setfmt(texmpi, vc->params.imgfmt);
+    switch (texmpi->num_planes) {
+    case 1:
+    case 3:
+        break;
+    default:
+        MP_ERR(vo, "Invalid plane count\n");
+        return false;
+    }
+    void *pixels;
+    int pitch;
+    if (SDL_LockTexture(vc->tex, NULL, &pixels, &pitch)) {
+        MP_ERR(vo, "SDL_LockTexture failed\n");
+        return false;
+    }
+    texmpi->planes[0] = pixels;
+    texmpi->stride[0] = pitch;
+    if (texmpi->num_planes == 3) {
+        if (vc->tex_swapped) {
+            texmpi->planes[2] =
+                ((Uint8 *) texmpi->planes[0] + texmpi->h * pitch);
+            texmpi->stride[2] = pitch / 2;
+            texmpi->planes[1] =
+                ((Uint8 *) texmpi->planes[2] + (texmpi->h * pitch) / 4);
+            texmpi->stride[1] = pitch / 2;
+        } else {
+            texmpi->planes[1] =
+                ((Uint8 *) texmpi->planes[0] + texmpi->h * pitch);
+            texmpi->stride[1] = pitch / 2;
+            texmpi->planes[2] =
+                ((Uint8 *) texmpi->planes[1] + (texmpi->h * pitch) / 4);
+            texmpi->stride[2] = pitch / 2;
+        }
+    }
+    return true;
+}
+
+static bool is_good_renderer(SDL_RendererInfo *ri,
+                             const char *driver_name_wanted, bool allow_sw,
+                             struct formatmap_entry *osd_format)
+{
+    if (driver_name_wanted && driver_name_wanted[0])
+        if (strcmp(driver_name_wanted, ri->name))
+            return false;
+
+    if (!allow_sw &&
+        !(ri->flags & SDL_RENDERER_ACCELERATED))
+        return false;
+
+    int i, j;
+    for (i = 0; i < ri->num_texture_formats; ++i)
+        for (j = 0; j < sizeof(formats) / sizeof(formats[0]); ++j)
+            if (ri->texture_formats[i] == formats[j].sdl)
+                if (formats[j].is_rgba) {
+                    if (osd_format)
+                        *osd_format = formats[j];
+                    return true;
+                }
+
+    return false;
+}
+
+static void destroy_renderer(struct vo *vo)
+{
+    struct priv *vc = vo->priv;
+
+    // free ALL the textures
+    if (vc->tex) {
+        SDL_DestroyTexture(vc->tex);
+        vc->tex = NULL;
+    }
+
+    int i, j;
+    for (i = 0; i < MAX_OSD_PARTS; ++i) {
+        for (j = 0; j < vc->osd_surfaces[i].targets_size; ++j) {
+            if (vc->osd_surfaces[i].targets[j].tex) {
+                SDL_DestroyTexture(vc->osd_surfaces[i].targets[j].tex);
+                vc->osd_surfaces[i].targets[j].tex = NULL;
+            }
+            if (vc->osd_surfaces[i].targets[j].tex2) {
+                SDL_DestroyTexture(vc->osd_surfaces[i].targets[j].tex2);
+                vc->osd_surfaces[i].targets[j].tex2 = NULL;
+            }
+        }
+    }
+
+    if (vc->renderer) {
+        SDL_DestroyRenderer(vc->renderer);
+        vc->renderer = NULL;
+    }
+}
+
+static bool try_create_renderer(struct vo *vo, int i, const char *driver)
+{
+    struct priv *vc = vo->priv;
+
+    // first probe
+    SDL_RendererInfo ri;
+    if (SDL_GetRenderDriverInfo(i, &ri))
+        return false;
+    if (!is_good_renderer(&ri, driver, vc->allow_sw, NULL))
+        return false;
+
+    vc->renderer = SDL_CreateRenderer(vc->window, i, 0);
+    if (!vc->renderer) {
+        MP_ERR(vo, "SDL_CreateRenderer failed\n");
+        return false;
+    }
+
+    if (SDL_GetRendererInfo(vc->renderer, &vc->renderer_info)) {
+        MP_ERR(vo, "SDL_GetRendererInfo failed\n");
+        destroy_renderer(vo);
+        return false;
+    }
+
+    if (!is_good_renderer(&vc->renderer_info, NULL, vc->allow_sw,
+                          &vc->osd_format)) {
+        MP_ERR(vo, "Renderer '%s' does not fulfill "
+                                  "requirements on this system\n",
+                                  vc->renderer_info.name);
+        destroy_renderer(vo);
+        return false;
+    }
+
+    if (vc->renderer_index != i) {
+        MP_INFO(vo, "Using %s\n", vc->renderer_info.name);
+        vc->renderer_index = i;
+    }
+
+    return true;
+}
+
+static int init_renderer(struct vo *vo)
+{
+    struct priv *vc = vo->priv;
+
+    int n = SDL_GetNumRenderDrivers();
+    int i;
+
+    if (vc->renderer_index >= 0)
+        if (try_create_renderer(vo, vc->renderer_index, NULL))
+            return 0;
+
+    for (i = 0; i < n; ++i)
+        if (try_create_renderer(vo, i, SDL_GetHint(SDL_HINT_RENDER_DRIVER)))
+            return 0;
+
+    for (i = 0; i < n; ++i)
+        if (try_create_renderer(vo, i, NULL))
+            return 0;
+
+    MP_ERR(vo, "No supported renderer\n");
+    return -1;
+}
+
+static void resize(struct vo *vo, int w, int h)
+{
+    struct priv *vc = vo->priv;
+    vo->dwidth = w;
+    vo->dheight = h;
+    vo_get_src_dst_rects(vo, &vc->src_rect, &vc->dst_rect,
+                         &vc->osd_res);
+    SDL_RenderSetLogicalSize(vc->renderer, w, h);
+    vo->want_redraw = true;
+    vo_wakeup(vo);
+}
+
+static void force_resize(struct vo *vo)
+{
+    struct priv *vc = vo->priv;
+    int w, h;
+    SDL_GetWindowSize(vc->window, &w, &h);
+    resize(vo, w, h);
+}
+
+static void check_resize(struct vo *vo)
+{
+    struct priv *vc = vo->priv;
+    int w, h;
+    SDL_GetWindowSize(vc->window, &w, &h);
+    if (vo->dwidth != w || vo->dheight != h)
+        resize(vo, w, h);
+}
+
+static inline void set_screensaver(bool enabled)
+{
+    if (!!enabled == !!SDL_IsScreenSaverEnabled())
+        return;
+
+    if (enabled)
+        SDL_EnableScreenSaver();
+    else
+        SDL_DisableScreenSaver();
+}
+
+static void set_fullscreen(struct vo *vo)
+{
+    struct priv *vc = vo->priv;
+    struct mp_vo_opts *opts = vc->opts_cache->opts;
+    int fs = opts->fullscreen;
+    SDL_bool prev_screensaver_state = SDL_IsScreenSaverEnabled();
+
+    Uint32 fs_flag;
+    if (vc->switch_mode)
+        fs_flag = SDL_WINDOW_FULLSCREEN;
+    else
+        fs_flag = SDL_WINDOW_FULLSCREEN_DESKTOP;
+
+    Uint32 old_flags = SDL_GetWindowFlags(vc->window);
+    int prev_fs = !!(old_flags & fs_flag);
+    if (fs == prev_fs)
+        return;
+
+    Uint32 flags = 0;
+    if (fs)
+        flags |= fs_flag;
+
+    if (SDL_SetWindowFullscreen(vc->window, flags)) {
+        MP_ERR(vo, "SDL_SetWindowFullscreen failed\n");
+        return;
+    }
+
+    // toggling fullscreen might recreate the window, so better guard for this
+    set_screensaver(prev_screensaver_state);
+
+    force_resize(vo);
+}
+
+static void update_screeninfo(struct vo *vo, struct mp_rect *screenrc)
+{
+    struct priv *vc = vo->priv;
+    SDL_DisplayMode mode;
+    if (SDL_GetCurrentDisplayMode(SDL_GetWindowDisplayIndex(vc->window),
+                                  &mode)) {
+        MP_ERR(vo, "SDL_GetCurrentDisplayMode failed\n");
+        return;
+    }
+    *screenrc = (struct mp_rect){0, 0, mode.w, mode.h};
+}
+
+static int reconfig(struct vo *vo, struct mp_image_params *params)
+{
+    struct priv *vc = vo->priv;
+
+    struct vo_win_geometry geo;
+    struct mp_rect screenrc;
+
+    update_screeninfo(vo, &screenrc);
+    vo_calc_window_geometry(vo, &screenrc, &geo);
+    vo_apply_window_geometry(vo, &geo);
+
+    int win_w = vo->dwidth;
+    int win_h = vo->dheight;
+
+    SDL_SetWindowSize(vc->window, win_w, win_h);
+    if (geo.flags & VO_WIN_FORCE_POS)
+        SDL_SetWindowPosition(vc->window, geo.win.x0, geo.win.y0);
+
+    if (vc->tex)
+        SDL_DestroyTexture(vc->tex);
+    Uint32 texfmt = SDL_PIXELFORMAT_UNKNOWN;
+    int i, j;
+    for (i = 0; i < vc->renderer_info.num_texture_formats; ++i)
+        for (j = 0; j < sizeof(formats) / sizeof(formats[0]); ++j)
+            if (vc->renderer_info.texture_formats[i] == formats[j].sdl)
+                if (params->imgfmt == formats[j].mpv)
+                    texfmt = formats[j].sdl;
+    if (texfmt == SDL_PIXELFORMAT_UNKNOWN) {
+        MP_ERR(vo, "Invalid pixel format\n");
+        return -1;
+    }
+
+    vc->tex_swapped = texfmt == SDL_PIXELFORMAT_YV12;
+    vc->tex = SDL_CreateTexture(vc->renderer, texfmt,
+                                SDL_TEXTUREACCESS_STREAMING,
+                                params->w, params->h);
+    if (!vc->tex) {
+        MP_ERR(vo, "Could not create a texture\n");
+        return -1;
+    }
+
+    vc->params = *params;
+
+    struct mp_image tmp;
+    if (!lock_texture(vo, &tmp)) {
+        SDL_DestroyTexture(vc->tex);
+        vc->tex = NULL;
+        return -1;
+    }
+    mp_image_clear(&tmp, 0, 0, tmp.w, tmp.h);
+    SDL_UnlockTexture(vc->tex);
+
+    resize(vo, win_w, win_h);
+
+    set_screensaver(vc->screensaver_enabled);
+    set_fullscreen(vo);
+
+    SDL_ShowWindow(vc->window);
+
+    check_resize(vo);
+
+    return 0;
+}
+
+static void flip_page(struct vo *vo)
+{
+    struct priv *vc = vo->priv;
+    SDL_RenderPresent(vc->renderer);
+}
+
+static void wakeup(struct vo *vo)
+{
+    struct priv *vc = vo->priv;
+    SDL_Event event = {.type = vc->wakeup_event};
+    // Note that there is no context - SDL is a singleton.
+    SDL_PushEvent(&event);
+}
+
+static void wait_events(struct vo *vo, int64_t until_time_ns)
+{
+    int64_t wait_ns = until_time_ns - mp_time_ns();
+    // Round-up to 1ms for short timeouts (100us, 1000us]
+    if (wait_ns > MP_TIME_US_TO_NS(100))
+        wait_ns = MPMAX(wait_ns, MP_TIME_MS_TO_NS(1));
+    int timeout_ms = MPCLAMP(wait_ns / MP_TIME_MS_TO_NS(1), 0, 10000);
+    SDL_Event ev;
+
+    while (SDL_WaitEventTimeout(&ev, timeout_ms)) {
+        timeout_ms = 0;
+        switch (ev.type) {
+        case SDL_WINDOWEVENT:
+            switch (ev.window.event) {
+            case SDL_WINDOWEVENT_EXPOSED:
+                vo->want_redraw = true;
+                break;
+            case SDL_WINDOWEVENT_SIZE_CHANGED:
+                check_resize(vo);
+                vo_event(vo, VO_EVENT_RESIZE);
+                break;
+            case SDL_WINDOWEVENT_ENTER:
+                mp_input_put_key(vo->input_ctx, MP_KEY_MOUSE_ENTER);
+                break;
+            case SDL_WINDOWEVENT_LEAVE:
+                mp_input_put_key(vo->input_ctx, MP_KEY_MOUSE_LEAVE);
+                break;
+            }
+            break;
+        case SDL_QUIT:
+            mp_input_put_key(vo->input_ctx, MP_KEY_CLOSE_WIN);
+            break;
+        case SDL_TEXTINPUT: {
+            int sdl_mod = SDL_GetModState();
+            int mpv_mod = 0;
+            // we ignore KMOD_LSHIFT, KMOD_RSHIFT and KMOD_RALT (if
+            // mp_input_use_alt_gr() is true) because these are already
+            // factored into ev.text.text
+            if (sdl_mod & (KMOD_LCTRL | KMOD_RCTRL))
+                mpv_mod |= MP_KEY_MODIFIER_CTRL;
+            if ((sdl_mod & KMOD_LALT) ||
+                ((sdl_mod & KMOD_RALT) && !mp_input_use_alt_gr(vo->input_ctx)))
+                mpv_mod |= MP_KEY_MODIFIER_ALT;
+            if (sdl_mod & (KMOD_LGUI | KMOD_RGUI))
+                mpv_mod |= MP_KEY_MODIFIER_META;
+            struct bstr t = {
+                ev.text.text, strlen(ev.text.text)
+            };
+            mp_input_put_key_utf8(vo->input_ctx, mpv_mod, t);
+            break;
+        }
+        case SDL_KEYDOWN: {
+            // Issue: we don't know in advance whether this keydown event
+            // will ALSO cause a SDL_TEXTINPUT event
+            // So we're conservative, and only map non printable keycodes
+            // (e.g. function keys, arrow keys, etc.)
+            // However, this does lose some keypresses at least on X11
+            // (e.g. Ctrl-A generates SDL_KEYDOWN only, but the key is
+            // 'a'... and 'a' is normally also handled by SDL_TEXTINPUT).
+            // The default config does not use Ctrl, so this is fine...
+            int keycode = 0;
+            int i;
+            for (i = 0; i < sizeof(keys) / sizeof(keys[0]); ++i)
+                if (keys[i].sdl == ev.key.keysym.sym) {
+                    keycode = keys[i].mpv;
+                    break;
+                }
+            if (keycode) {
+                if (ev.key.keysym.mod & (KMOD_LSHIFT | KMOD_RSHIFT))
+                    keycode |= MP_KEY_MODIFIER_SHIFT;
+                if (ev.key.keysym.mod & (KMOD_LCTRL | KMOD_RCTRL))
+                    keycode |= MP_KEY_MODIFIER_CTRL;
+                if (ev.key.keysym.mod & (KMOD_LALT | KMOD_RALT))
+                    keycode |= MP_KEY_MODIFIER_ALT;
+                if (ev.key.keysym.mod & (KMOD_LGUI | KMOD_RGUI))
+                    keycode |= MP_KEY_MODIFIER_META;
+                mp_input_put_key(vo->input_ctx, keycode);
+            }
+            break;
+        }
+        case SDL_MOUSEMOTION:
+            mp_input_set_mouse_pos(vo->input_ctx, ev.motion.x, ev.motion.y);
+            break;
+        case SDL_MOUSEBUTTONDOWN: {
+            int i;
+            for (i = 0; i < sizeof(mousebtns) / sizeof(mousebtns[0]); ++i)
+                if (mousebtns[i].sdl == ev.button.button) {
+                    mp_input_put_key(vo->input_ctx, mousebtns[i].mpv | MP_KEY_STATE_DOWN);
+                    break;
+                }
+            break;
+        }
+        case SDL_MOUSEBUTTONUP: {
+            int i;
+            for (i = 0; i < sizeof(mousebtns) / sizeof(mousebtns[0]); ++i)
+                if (mousebtns[i].sdl == ev.button.button) {
+                    mp_input_put_key(vo->input_ctx, mousebtns[i].mpv | MP_KEY_STATE_UP);
+                    break;
+                }
+            break;
+        }
+        case SDL_MOUSEWHEEL: {
+#if SDL_VERSION_ATLEAST(2, 0, 4)
+            double multiplier = ev.wheel.direction == SDL_MOUSEWHEEL_FLIPPED ? -1 : 1;
+#else
+            double multiplier = 1;
+#endif
+            int y_code = ev.wheel.y > 0 ? MP_WHEEL_UP : MP_WHEEL_DOWN;
+            mp_input_put_wheel(vo->input_ctx, y_code, abs(ev.wheel.y) * multiplier);
+            int x_code = ev.wheel.x > 0 ? MP_WHEEL_RIGHT : MP_WHEEL_LEFT;
+            mp_input_put_wheel(vo->input_ctx, x_code, abs(ev.wheel.x) * multiplier);
+            break;
+        }
+        }
+    }
+}
+
+static void uninit(struct vo *vo)
+{
+    struct priv *vc = vo->priv;
+    destroy_renderer(vo);
+    SDL_DestroyWindow(vc->window);
+    vc->window = NULL;
+    SDL_QuitSubSystem(SDL_INIT_VIDEO);
+    talloc_free(vc);
+}
+
+static inline void upload_to_texture(struct vo *vo, SDL_Texture *tex,
+                                     int w, int h, void *bitmap, int stride)
+{
+    struct priv *vc = vo->priv;
+
+    if (vc->osd_format.sdl == SDL_PIXELFORMAT_ARGB8888) {
+        // NOTE: this optimization is questionable, because SDL docs say
+        // that this way is slow.
+        // It did measure up faster, though...
+        SDL_UpdateTexture(tex, NULL, bitmap, stride);
+        return;
+    }
+
+    void *pixels;
+    int pitch;
+    if (SDL_LockTexture(tex, NULL, &pixels, &pitch)) {
+        MP_ERR(vo, "Could not lock texture\n");
+    } else {
+        SDL_ConvertPixels(w, h, SDL_PIXELFORMAT_ARGB8888,
+                          bitmap, stride,
+                          vc->osd_format.sdl,
+                          pixels, pitch);
+        SDL_UnlockTexture(tex);
+    }
+}
+
+static inline void subbitmap_to_texture(struct vo *vo, SDL_Texture *tex,
+                                        struct sub_bitmap *bmp,
+                                        uint32_t ormask)
+{
+    if (ormask == 0) {
+        upload_to_texture(vo, tex, bmp->w, bmp->h,
+                          bmp->bitmap, bmp->stride);
+    } else {
+        uint32_t *temppixels;
+        temppixels = talloc_array(vo, uint32_t, bmp->w * bmp->h);
+
+        int x, y;
+        for (y = 0; y < bmp->h; ++y) {
+            const uint32_t *src =
+                (const uint32_t *) ((const char *) bmp->bitmap + y * bmp->stride);
+            uint32_t *dst = temppixels + y * bmp->w;
+            for (x = 0; x < bmp->w; ++x)
+                dst[x] = src[x] | ormask;
+        }
+
+        upload_to_texture(vo, tex, bmp->w, bmp->h,
+                          temppixels, sizeof(uint32_t) * bmp->w);
+
+        talloc_free(temppixels);
+    }
+}
+
+static void generate_osd_part(struct vo *vo, struct sub_bitmaps *imgs)
+{
+    struct priv *vc = vo->priv;
+    struct osd_bitmap_surface *sfc = &vc->osd_surfaces[imgs->render_index];
+
+    if (imgs->format == SUBBITMAP_EMPTY || imgs->num_parts == 0)
+        return;
+
+    if (imgs->change_id == sfc->change_id)
+        return;
+
+    if (imgs->num_parts > sfc->targets_size) {
+        sfc->targets = talloc_realloc(vc, sfc->targets,
+                                      struct osd_target, imgs->num_parts);
+        memset(&sfc->targets[sfc->targets_size], 0, sizeof(struct osd_target) *
+               (imgs->num_parts - sfc->targets_size));
+        sfc->targets_size = imgs->num_parts;
+    }
+    sfc->num_targets = imgs->num_parts;
+
+    for (int i = 0; i < imgs->num_parts; i++) {
+        struct osd_target *target = sfc->targets + i;
+        struct sub_bitmap *bmp = imgs->parts + i;
+
+        target->source = (SDL_Rect){
+            0, 0, bmp->w, bmp->h
+        };
+        target->dest = (SDL_Rect){
+            bmp->x, bmp->y, bmp->dw, bmp->dh
+        };
+
+        // tex: alpha blended texture
+        if (target->tex) {
+            SDL_DestroyTexture(target->tex);
+            target->tex = NULL;
+        }
+        if (!target->tex)
+            target->tex = SDL_CreateTexture(vc->renderer,
+                    vc->osd_format.sdl, SDL_TEXTUREACCESS_STREAMING,
+                    bmp->w, bmp->h);
+        if (!target->tex) {
+            MP_ERR(vo, "Could not create texture\n");
+        }
+        if (target->tex) {
+            SDL_SetTextureBlendMode(target->tex,
+                                    SDL_BLENDMODE_BLEND);
+            SDL_SetTextureColorMod(target->tex, 0, 0, 0);
+            subbitmap_to_texture(vo, target->tex, bmp, 0); // RGBA -> 000A
+        }
+
+        // tex2: added texture
+        if (target->tex2) {
+            SDL_DestroyTexture(target->tex2);
+            target->tex2 = NULL;
+        }
+        if (!target->tex2)
+            target->tex2 = SDL_CreateTexture(vc->renderer,
+                    vc->osd_format.sdl, SDL_TEXTUREACCESS_STREAMING,
+                    bmp->w, bmp->h);
+        if (!target->tex2) {
+            MP_ERR(vo, "Could not create texture\n");
+        }
+        if (target->tex2) {
+            SDL_SetTextureBlendMode(target->tex2,
+                                    SDL_BLENDMODE_ADD);
+            subbitmap_to_texture(vo, target->tex2, bmp,
+                                    0xFF000000); // RGBA -> RGB1
+        }
+    }
+
+    sfc->change_id = imgs->change_id;
+}
+
+static void draw_osd_part(struct vo *vo, int index)
+{
+    struct priv *vc = vo->priv;
+    struct osd_bitmap_surface *sfc = &vc->osd_surfaces[index];
+    int i;
+
+    for (i = 0; i < sfc->num_targets; i++) {
+        struct osd_target *target = sfc->targets + i;
+        if (target->tex)
+            SDL_RenderCopy(vc->renderer, target->tex,
+                           &target->source, &target->dest);
+        if (target->tex2)
+            SDL_RenderCopy(vc->renderer, target->tex2,
+                           &target->source, &target->dest);
+    }
+}
+
+static void draw_osd_cb(void *ctx, struct sub_bitmaps *imgs)
+{
+    struct vo *vo = ctx;
+    generate_osd_part(vo, imgs);
+    draw_osd_part(vo, imgs->render_index);
+}
+
+static void draw_osd(struct vo *vo)
+{
+    struct priv *vc = vo->priv;
+
+    static const bool osdformats[SUBBITMAP_COUNT] = {
+        [SUBBITMAP_BGRA] = true,
+    };
+
+    osd_draw(vo->osd, vc->osd_res, vc->osd_pts, 0, osdformats, draw_osd_cb, vo);
+}
+
+static int preinit(struct vo *vo)
+{
+    struct priv *vc = vo->priv;
+
+    if (SDL_WasInit(SDL_INIT_EVENTS)) {
+        MP_ERR(vo, "Another component is using SDL already.\n");
+        return -1;
+    }
+
+    vc->opts_cache = m_config_cache_alloc(vc, vo->global, &vo_sub_opts);
+
+    // predefine SDL defaults (SDL env vars shall override)
+    SDL_SetHintWithPriority(SDL_HINT_RENDER_SCALE_QUALITY, "1",
+                            SDL_HINT_DEFAULT);
+    SDL_SetHintWithPriority(SDL_HINT_VIDEO_MINIMIZE_ON_FOCUS_LOSS, "0",
+                            SDL_HINT_DEFAULT);
+
+    // predefine MPV options (SDL env vars shall be overridden)
+    SDL_SetHintWithPriority(SDL_HINT_RENDER_VSYNC, vc->vsync ? "1" : "0",
+                            SDL_HINT_OVERRIDE);
+
+    if (SDL_InitSubSystem(SDL_INIT_VIDEO)) {
+        MP_ERR(vo, "SDL_Init failed\n");
+        return -1;
+    }
+
+    // then actually try
+    vc->window = SDL_CreateWindow("MPV", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED,
+                                  640, 480, SDL_WINDOW_RESIZABLE | SDL_WINDOW_HIDDEN);
+    if (!vc->window) {
+        MP_ERR(vo, "SDL_CreateWindow failed\n");
+        return -1;
+    }
+
+    // try creating a renderer (this also gets the renderer_info data
+    // for query_format to use!)
+    if (init_renderer(vo) != 0) {
+        SDL_DestroyWindow(vc->window);
+        vc->window = NULL;
+        return -1;
+    }
+
+    vc->wakeup_event = SDL_RegisterEvents(1);
+    if (vc->wakeup_event == (Uint32)-1)
+        MP_ERR(vo, "SDL_RegisterEvents() failed.\n");
+
+    MP_WARN(vo, "Warning: this legacy VO has bad performance. Consider fixing "
+                "your graphics drivers, or not forcing the sdl VO.\n");
+
+    return 0;
+}
+
+static int query_format(struct vo *vo, int format)
+{
+    struct priv *vc = vo->priv;
+    int i, j;
+    for (i = 0; i < vc->renderer_info.num_texture_formats; ++i)
+        for (j = 0; j < sizeof(formats) / sizeof(formats[0]); ++j)
+            if (vc->renderer_info.texture_formats[i] == formats[j].sdl)
+                if (format == formats[j].mpv)
+                    return 1;
+    return 0;
+}
+
+static void draw_frame(struct vo *vo, struct vo_frame *frame)
+{
+    struct priv *vc = vo->priv;
+
+    // typically this runs in parallel with the following mp_image_copy call
+    SDL_SetRenderDrawColor(vc->renderer, 0, 0, 0, 255);
+    SDL_RenderClear(vc->renderer);
+
+    SDL_SetTextureBlendMode(vc->tex, SDL_BLENDMODE_NONE);
+
+    if (frame->current) {
+        vc->osd_pts = frame->current->pts;
+
+        mp_image_t texmpi;
+        if (!lock_texture(vo, &texmpi))
+            return;
+
+        mp_image_copy(&texmpi, frame->current);
+
+        SDL_UnlockTexture(vc->tex);
+    }
+
+    SDL_Rect src, dst;
+    src.x = vc->src_rect.x0;
+    src.y = vc->src_rect.y0;
+    src.w = vc->src_rect.x1 - vc->src_rect.x0;
+    src.h = vc->src_rect.y1 - vc->src_rect.y0;
+    dst.x = vc->dst_rect.x0;
+    dst.y = vc->dst_rect.y0;
+    dst.w = vc->dst_rect.x1 - vc->dst_rect.x0;
+    dst.h = vc->dst_rect.y1 - vc->dst_rect.y0;
+
+    SDL_RenderCopy(vc->renderer, vc->tex, &src, &dst);
+
+    draw_osd(vo);
+}
+
+static struct mp_image *get_window_screenshot(struct vo *vo)
+{
+    struct priv *vc = vo->priv;
+    struct mp_image *image = mp_image_alloc(vc->osd_format.mpv, vo->dwidth,
+                                                                vo->dheight);
+    if (!image)
+        return NULL;
+    if (SDL_RenderReadPixels(vc->renderer, NULL, vc->osd_format.sdl,
+                             image->planes[0], image->stride[0])) {
+        MP_ERR(vo, "SDL_RenderReadPixels failed\n");
+        talloc_free(image);
+        return NULL;
+    }
+    return image;
+}
+
+static int control(struct vo *vo, uint32_t request, void *data)
+{
+    struct priv *vc = vo->priv;
+
+    switch (request) {
+    case VOCTRL_VO_OPTS_CHANGED: {
+        void *opt;
+        while (m_config_cache_get_next_changed(vc->opts_cache, &opt)) {
+            struct mp_vo_opts *opts = vc->opts_cache->opts;
+            if (&opts->fullscreen == opt)
+                set_fullscreen(vo);
+        }
+        return 1;
+    }
+    case VOCTRL_SET_PANSCAN:
+        force_resize(vo);
+        return VO_TRUE;
+    case VOCTRL_SCREENSHOT_WIN:
+        *(struct mp_image **)data = get_window_screenshot(vo);
+        return true;
+    case VOCTRL_SET_CURSOR_VISIBILITY:
+        SDL_ShowCursor(*(bool *)data);
+        return true;
+    case VOCTRL_KILL_SCREENSAVER:
+        vc->screensaver_enabled = false;
+        set_screensaver(vc->screensaver_enabled);
+        return VO_TRUE;
+    case VOCTRL_RESTORE_SCREENSAVER:
+        vc->screensaver_enabled = true;
+        set_screensaver(vc->screensaver_enabled);
+        return VO_TRUE;
+    case VOCTRL_UPDATE_WINDOW_TITLE:
+        SDL_SetWindowTitle(vc->window, (char *)data);
+        return true;
+    }
+    return VO_NOTIMPL;
+}
+
+#define OPT_BASE_STRUCT struct priv
+
+const struct vo_driver video_out_sdl = {
+    .description = "SDL 2.0 Renderer",
+    .name = "sdl",
+    .priv_size = sizeof(struct priv),
+    .priv_defaults = &(const struct priv) {
+        .renderer_index = -1,
+        .vsync = true,
+    },
+    .options = (const struct m_option []){
+        {"sw", OPT_BOOL(allow_sw)},
+        {"switch-mode", OPT_BOOL(switch_mode)},
+        {"vsync", OPT_BOOL(vsync)},
+        {NULL}
+    },
+    .preinit = preinit,
+    .query_format = query_format,
+    .reconfig = reconfig,
+    .control = control,
+    .draw_frame = draw_frame,
+    .uninit = uninit,
+    .flip_page = flip_page,
+    .wait_events = wait_events,
+    .wakeup = wakeup,
+    .options_prefix = "sdl",
+};
diff --git a/video/out/vo_sixel.c b/video/out/vo_sixel.c
new file mode 100644
index 0000000..e05c455
--- /dev/null
+++ b/video/out/vo_sixel.c
@@ -0,0 +1,627 @@
+/*
+ * Sixel mpv output device implementation based on ffmpeg libavdevice implementation
+ * by Hayaki Saito
+ * https://github.com/saitoha/FFmpeg-SIXEL/blob/sixel/libavdevice/sixel.c
+ *
+ * Copyright (c) 2014 Hayaki Saito
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+
+#include <libswscale/swscale.h>
+#include <sixel.h>
+
+#include "config.h"
+#include "options/m_config.h"
+#include "osdep/terminal.h"
+#include "sub/osd.h"
+#include "vo.h"
+#include "video/sws_utils.h"
+#include "video/mp_image.h"
+
+#if HAVE_POSIX
+#include <unistd.h>
+#endif
+
+#define IMGFMT IMGFMT_RGB24
+
+#define TERM_ESC_USE_GLOBAL_COLOR_REG   "\033[?1070l"
+
+#define TERMINAL_FALLBACK_COLS      80
+#define TERMINAL_FALLBACK_ROWS      25
+#define TERMINAL_FALLBACK_PX_WIDTH  320
+#define TERMINAL_FALLBACK_PX_HEIGHT 240
+
+struct vo_sixel_opts {
+    int diffuse;
+    int reqcolors;
+    bool fixedpal;
+    int threshold;
+    int width, height, top, left;
+    int pad_y, pad_x;
+    int rows, cols;
+    bool config_clear, alt_screen;
+    bool buffered;
+};
+
+struct priv {
+    // User specified options
+    struct vo_sixel_opts opts;
+
+    // Internal data
+    sixel_output_t *output;
+    sixel_dither_t *dither;
+    sixel_dither_t *testdither;
+    uint8_t        *buffer;
+    char           *sixel_output_buf;
+    bool            skip_frame_draw;
+
+    int left, top;  // image origin cell (1 based)
+    int width, height;  // actual image px size - always reflects dst_rect.
+    int num_cols, num_rows;  // terminal size in cells
+    int canvas_ok;  // whether canvas vo->dwidth and vo->dheight are positive
+
+    int previous_histogram_colors;
+
+    struct mp_rect src_rect;
+    struct mp_rect dst_rect;
+    struct mp_osd_res osd;
+    struct mp_image *frame;
+    struct mp_sws_context *sws;
+};
+
+static const unsigned int depth = 3;
+
+static int detect_scene_change(struct vo* vo)
+{
+    struct priv* priv = vo->priv;
+    int previous_histogram_colors = priv->previous_histogram_colors;
+    int histogram_colors = 0;
+
+    // If threshold is set negative, then every frame must be a scene change
+    if (priv->dither == NULL || priv->opts.threshold < 0)
+        return 1;
+
+    histogram_colors = sixel_dither_get_num_of_histogram_colors(priv->testdither);
+
+    int color_difference_count = previous_histogram_colors - histogram_colors;
+    color_difference_count = (color_difference_count > 0) ?  // abs value
+                              color_difference_count : -color_difference_count;
+
+    if (100 * color_difference_count >
+        priv->opts.threshold * previous_histogram_colors)
+    {
+        priv->previous_histogram_colors = histogram_colors; // update history
+        return 1;
+    } else {
+        return 0;
+    }
+
+}
+
+static void dealloc_dithers_and_buffers(struct vo* vo)
+{
+    struct priv* priv = vo->priv;
+
+    if (priv->buffer) {
+        talloc_free(priv->buffer);
+        priv->buffer = NULL;
+    }
+
+    if (priv->frame) {
+        talloc_free(priv->frame);
+        priv->frame = NULL;
+    }
+
+    if (priv->dither) {
+        sixel_dither_unref(priv->dither);
+        priv->dither = NULL;
+    }
+
+    if (priv->testdither) {
+        sixel_dither_unref(priv->testdither);
+        priv->testdither = NULL;
+    }
+}
+
+static SIXELSTATUS prepare_static_palette(struct vo* vo)
+{
+    struct priv* priv = vo->priv;
+
+    if (!priv->dither) {
+        priv->dither = sixel_dither_get(BUILTIN_XTERM256);
+        if (priv->dither == NULL)
+            return SIXEL_FALSE;
+
+        sixel_dither_set_diffusion_type(priv->dither, priv->opts.diffuse);
+    }
+
+    sixel_dither_set_body_only(priv->dither, 0);
+    return SIXEL_OK;
+}
+
+static SIXELSTATUS prepare_dynamic_palette(struct vo *vo)
+{
+    SIXELSTATUS status = SIXEL_FALSE;
+    struct priv *priv = vo->priv;
+
+    /* create histogram and construct color palette
+     * with median cut algorithm. */
+    status = sixel_dither_initialize(priv->testdither, priv->buffer,
+                                     priv->width, priv->height,
+                                     SIXEL_PIXELFORMAT_RGB888,
+                                     LARGE_NORM, REP_CENTER_BOX,
+                                     QUALITY_LOW);
+    if (SIXEL_FAILED(status))
+        return status;
+
+    if (detect_scene_change(vo)) {
+        if (priv->dither) {
+            sixel_dither_unref(priv->dither);
+            priv->dither = NULL;
+        }
+
+        priv->dither = priv->testdither;
+        status = sixel_dither_new(&priv->testdither, priv->opts.reqcolors, NULL);
+
+        if (SIXEL_FAILED(status))
+            return status;
+
+        sixel_dither_set_diffusion_type(priv->dither, priv->opts.diffuse);
+    } else {
+        if (priv->dither == NULL)
+            return SIXEL_FALSE;
+    }
+
+    sixel_dither_set_body_only(priv->dither, 0);
+    return status;
+}
+
+static void update_canvas_dimensions(struct vo *vo)
+{
+    // this function sets the vo canvas size in pixels vo->dwidth, vo->dheight,
+    // and the number of rows and columns available in priv->num_rows/cols
+    struct priv *priv   = vo->priv;
+    int num_rows        = TERMINAL_FALLBACK_ROWS;
+    int num_cols        = TERMINAL_FALLBACK_COLS;
+    int total_px_width  = 0;
+    int total_px_height = 0;
+
+    terminal_get_size2(&num_rows, &num_cols, &total_px_width, &total_px_height);
+
+    // If the user has specified rows/cols use them for further calculations
+    num_rows = (priv->opts.rows > 0) ? priv->opts.rows : num_rows;
+    num_cols = (priv->opts.cols > 0) ? priv->opts.cols : num_cols;
+
+    // If the pad value is set in between 0 and width/2 - 1, then we
+    // subtract from the detected width. Otherwise, we assume that the width
+    // output must be a integer multiple of num_cols and accordingly set
+    // total_width to be an integer multiple of num_cols. So in case the padding
+    // added by terminal is less than the number of cells in that axis, then rounding
+    // down will take care of correcting the detected width and remove padding.
+    if (priv->opts.width > 0) {
+        // option - set by the user, hard truth
+        total_px_width = priv->opts.width;
+    } else {
+        if (total_px_width <= 0) {
+                // ioctl failed to read terminal width
+                total_px_width = TERMINAL_FALLBACK_PX_WIDTH;
+        } else {
+            if (priv->opts.pad_x >= 0 && priv->opts.pad_x < total_px_width / 2) {
+                // explicit padding set by the user
+                total_px_width -= (2 * priv->opts.pad_x);
+            } else {
+                // rounded "auto padding"
+                total_px_width = total_px_width / num_cols * num_cols;
+            }
+        }
+    }
+
+    if (priv->opts.height > 0) {
+        total_px_height = priv->opts.height;
+    } else {
+        if (total_px_height <= 0) {
+            total_px_height = TERMINAL_FALLBACK_PX_HEIGHT;
+        } else {
+            if (priv->opts.pad_y >= 0 && priv->opts.pad_y < total_px_height / 2) {
+                total_px_height -= (2 * priv->opts.pad_y);
+            } else {
+                total_px_height = total_px_height / num_rows * num_rows;
+            }
+        }
+    }
+
+    // use n-1 rows for height
+    // The last row can't be used for encoding image, because after sixel encode
+    // the terminal moves the cursor to next line below the image, causing the
+    // last line to be empty instead of displaying image data.
+    // TODO: Confirm if the output height must be a multiple of 6, if not, remove
+    // the / 6 * 6 part which is setting the height to be a multiple of 6.
+    vo->dheight = total_px_height * (num_rows - 1) / num_rows / 6 * 6;
+    vo->dwidth  = total_px_width;
+
+    priv->num_rows = num_rows;
+    priv->num_cols = num_cols;
+
+    priv->canvas_ok = vo->dwidth > 0 && vo->dheight > 0;
+}
+
+static void set_sixel_output_parameters(struct vo *vo)
+{
+    // This function sets output scaled size in priv->width, priv->height
+    // and the scaling rectangles in pixels priv->src_rect, priv->dst_rect
+    // as well as image positioning in cells priv->top, priv->left.
+    struct priv *priv = vo->priv;
+
+    vo_get_src_dst_rects(vo, &priv->src_rect, &priv->dst_rect, &priv->osd);
+
+    // priv->width and priv->height are the width and height of dst_rect
+    // and they are not changed anywhere else outside this function.
+    // It is the sixel image output dimension which is output by libsixel.
+    priv->width  = priv->dst_rect.x1 - priv->dst_rect.x0;
+    priv->height = priv->dst_rect.y1 - priv->dst_rect.y0;
+
+    // top/left values must be greater than 1. If it is set, then
+    // the image will be rendered from there and no further centering is done.
+    priv->top  = (priv->opts.top  > 0) ?  priv->opts.top :
+                  priv->num_rows * priv->dst_rect.y0 / vo->dheight + 1;
+    priv->left = (priv->opts.left > 0) ?  priv->opts.left :
+                  priv->num_cols * priv->dst_rect.x0 / vo->dwidth  + 1;
+}
+
+static int update_sixel_swscaler(struct vo *vo, struct mp_image_params *params)
+{
+    struct priv *priv = vo->priv;
+
+    priv->sws->src = *params;
+    priv->sws->src.w = mp_rect_w(priv->src_rect);
+    priv->sws->src.h = mp_rect_h(priv->src_rect);
+    priv->sws->dst = (struct mp_image_params) {
+        .imgfmt = IMGFMT,
+        .w = priv->width,
+        .h = priv->height,
+        .p_w = 1,
+        .p_h = 1,
+    };
+
+    dealloc_dithers_and_buffers(vo);
+
+    priv->frame = mp_image_alloc(IMGFMT, priv->width, priv->height);
+    if (!priv->frame)
+        return -1;
+
+    if (mp_sws_reinit(priv->sws) < 0)
+        return -1;
+
+    // create testdither only if dynamic palette mode is set
+    if (!priv->opts.fixedpal) {
+        SIXELSTATUS status = sixel_dither_new(&priv->testdither,
+                                              priv->opts.reqcolors, NULL);
+        if (SIXEL_FAILED(status)) {
+            MP_ERR(vo, "update_sixel_swscaler: Failed to create new dither: %s\n",
+                   sixel_helper_format_error(status));
+            return -1;
+        }
+    }
+
+    priv->buffer =
+        talloc_array(NULL, uint8_t, depth * priv->width * priv->height);
+
+    return 0;
+}
+
+static inline int sixel_buffer(char *data, int size, void *priv) {
+    char **out = (char **)priv;
+    *out = talloc_strndup_append_buffer(*out, data, size);
+    return size;
+}
+
+static inline int sixel_write(char *data, int size, void *priv)
+{
+    FILE *p = (FILE *)priv;
+    // On POSIX platforms, write() is the fastest method. It also is the only
+    // one that allows atomic writes so mpv’s output will not be interrupted
+    // by other processes or threads that write to stdout, which would cause
+    // screen corruption. POSIX does not guarantee atomicity for writes
+    // exceeding PIPE_BUF, but at least Linux does seem to implement it that
+    // way.
+#if HAVE_POSIX
+    int remain = size;
+
+    while (remain > 0) {
+        ssize_t written = write(fileno(p), data, remain);
+        if (written < 0)
+            return written;
+        remain -= written;
+        data += written;
+    }
+
+    return size;
+#else
+    int ret = fwrite(data, 1, size, p);
+    fflush(p);
+    return ret;
+#endif
+}
+
+static inline void sixel_strwrite(char *s)
+{
+    sixel_write(s, strlen(s), stdout);
+}
+
+static int reconfig(struct vo *vo, struct mp_image_params *params)
+{
+    struct priv *priv = vo->priv;
+    int ret = 0;
+    update_canvas_dimensions(vo);
+    if (priv->canvas_ok) {  // if too small - succeed but skip the rendering
+        set_sixel_output_parameters(vo);
+        ret = update_sixel_swscaler(vo, params);
+    }
+
+    if (priv->opts.config_clear)
+        sixel_strwrite(TERM_ESC_CLEAR_SCREEN);
+    vo->want_redraw = true;
+
+    return ret;
+}
+
+static void draw_frame(struct vo *vo, struct vo_frame *frame)
+{
+    struct priv *priv = vo->priv;
+    SIXELSTATUS status;
+    struct mp_image *mpi = NULL;
+
+    int  prev_rows   = priv->num_rows;
+    int  prev_cols   = priv->num_cols;
+    int  prev_height = vo->dheight;
+    int  prev_width  = vo->dwidth;
+    bool resized     = false;
+    update_canvas_dimensions(vo);
+    if (!priv->canvas_ok)
+        return;
+
+    if (prev_rows != priv->num_rows || prev_cols != priv->num_cols ||
+        prev_width != vo->dwidth || prev_height != vo->dheight)
+    {
+        set_sixel_output_parameters(vo);
+        // Not checking for vo->config_ok because draw_frame is never called
+        // with a failed reconfig.
+        update_sixel_swscaler(vo, vo->params);
+
+        if (priv->opts.config_clear)
+            sixel_strwrite(TERM_ESC_CLEAR_SCREEN);
+        resized = true;
+    }
+
+    if (frame->repeat && !frame->redraw && !resized) {
+        // Frame is repeated, and no need to update OSD either
+        priv->skip_frame_draw = true;
+        return;
+    } else {
+        // Either frame is new, or OSD has to be redrawn
+        priv->skip_frame_draw = false;
+    }
+
+    // Normal case where we have to draw the frame and the image is not NULL
+    if (frame->current) {
+        mpi = mp_image_new_ref(frame->current);
+        struct mp_rect src_rc = priv->src_rect;
+        src_rc.x0 = MP_ALIGN_DOWN(src_rc.x0, mpi->fmt.align_x);
+        src_rc.y0 = MP_ALIGN_DOWN(src_rc.y0, mpi->fmt.align_y);
+        mp_image_crop_rc(mpi, src_rc);
+
+        // scale/pan to our dest rect
+        mp_sws_scale(priv->sws, priv->frame, mpi);
+    } else {
+        // Image is NULL, so need to clear image and draw OSD
+        mp_image_clear(priv->frame, 0, 0, priv->width, priv->height);
+    }
+
+    struct mp_osd_res dim = {
+        .w = priv->width,
+        .h = priv->height
+    };
+    osd_draw_on_image(vo->osd, dim, mpi ? mpi->pts : 0, 0, priv->frame);
+
+    // Copy from mpv to RGB format as required by libsixel
+    memcpy_pic(priv->buffer, priv->frame->planes[0], priv->width * depth,
+               priv->height, priv->width * depth, priv->frame->stride[0]);
+
+    // Even if either of these prepare palette functions fail, on re-running them
+    // they should try to re-initialize the dithers, so it shouldn't dereference
+    // any NULL pointers. flip_page also has a check to make sure dither is not
+    // NULL before drawing, so failure in these functions should still be okay.
+    if (priv->opts.fixedpal) {
+        status = prepare_static_palette(vo);
+    } else {
+        status = prepare_dynamic_palette(vo);
+    }
+
+    if (SIXEL_FAILED(status)) {
+        MP_WARN(vo, "draw_frame: prepare_palette returned error: %s\n",
+                sixel_helper_format_error(status));
+    }
+
+    if (mpi)
+        talloc_free(mpi);
+}
+
+static void flip_page(struct vo *vo)
+{
+    struct priv* priv = vo->priv;
+    if (!priv->canvas_ok)
+        return;
+
+    // If frame is repeated and no update required, then we skip encoding
+    if (priv->skip_frame_draw)
+        return;
+
+    // Make sure that image and dither are valid before drawing
+    if (priv->buffer == NULL || priv->dither == NULL)
+        return;
+
+    // Go to the offset row and column, then display the image
+    priv->sixel_output_buf = talloc_asprintf(NULL, TERM_ESC_GOTO_YX,
+                                             priv->top, priv->left);
+    if (!priv->opts.buffered)
+        sixel_strwrite(priv->sixel_output_buf);
+
+    sixel_encode(priv->buffer, priv->width, priv->height,
+                 depth, priv->dither, priv->output);
+
+    if (priv->opts.buffered)
+        sixel_write(priv->sixel_output_buf,
+                    ta_get_size(priv->sixel_output_buf), stdout);
+
+    talloc_free(priv->sixel_output_buf);
+}
+
+static int preinit(struct vo *vo)
+{
+    struct priv *priv = vo->priv;
+    SIXELSTATUS status = SIXEL_FALSE;
+
+    // Parse opts set by CLI or conf
+    priv->sws = mp_sws_alloc(vo);
+    priv->sws->log = vo->log;
+    mp_sws_enable_cmdline_opts(priv->sws, vo->global);
+
+    if (priv->opts.buffered)
+        status = sixel_output_new(&priv->output, sixel_buffer,
+                                  &priv->sixel_output_buf, NULL);
+    else
+        status = sixel_output_new(&priv->output, sixel_write, stdout, NULL);
+    if (SIXEL_FAILED(status)) {
+        MP_ERR(vo, "preinit: Failed to create output file: %s\n",
+               sixel_helper_format_error(status));
+        return -1;
+    }
+
+    sixel_output_set_encode_policy(priv->output, SIXEL_ENCODEPOLICY_FAST);
+
+    if (priv->opts.alt_screen)
+        sixel_strwrite(TERM_ESC_ALT_SCREEN);
+
+    sixel_strwrite(TERM_ESC_HIDE_CURSOR);
+
+    /* don't use private color registers for each frame. */
+    sixel_strwrite(TERM_ESC_USE_GLOBAL_COLOR_REG);
+
+    priv->dither = NULL;
+
+    // create testdither only if dynamic palette mode is set
+    if (!priv->opts.fixedpal) {
+        status = sixel_dither_new(&priv->testdither, priv->opts.reqcolors, NULL);
+        if (SIXEL_FAILED(status)) {
+            MP_ERR(vo, "preinit: Failed to create new dither: %s\n",
+                   sixel_helper_format_error(status));
+            return -1;
+        }
+    }
+
+    priv->previous_histogram_colors = 0;
+
+    return 0;
+}
+
+static int query_format(struct vo *vo, int format)
+{
+    return format == IMGFMT;
+}
+
+static int control(struct vo *vo, uint32_t request, void *data)
+{
+    if (request == VOCTRL_SET_PANSCAN)
+        return (vo->config_ok && !reconfig(vo, vo->params)) ? VO_TRUE : VO_FALSE;
+    return VO_NOTIMPL;
+}
+
+
+static void uninit(struct vo *vo)
+{
+    struct priv *priv = vo->priv;
+
+    sixel_strwrite(TERM_ESC_RESTORE_CURSOR);
+
+    if (priv->opts.alt_screen)
+        sixel_strwrite(TERM_ESC_NORMAL_SCREEN);
+    fflush(stdout);
+
+    if (priv->output) {
+        sixel_output_unref(priv->output);
+        priv->output = NULL;
+    }
+
+    dealloc_dithers_and_buffers(vo);
+}
+
+#define OPT_BASE_STRUCT struct priv
+
+const struct vo_driver video_out_sixel = {
+    .name = "sixel",
+    .description = "terminal graphics using sixels",
+    .preinit = preinit,
+    .query_format = query_format,
+    .reconfig = reconfig,
+    .control = control,
+    .draw_frame = draw_frame,
+    .flip_page = flip_page,
+    .uninit = uninit,
+    .priv_size = sizeof(struct priv),
+    .priv_defaults = &(const struct priv) {
+        .opts.diffuse = DIFFUSE_AUTO,
+        .opts.reqcolors = 256,
+        .opts.threshold = -1,
+        .opts.fixedpal = true,
+        .opts.pad_y = -1,
+        .opts.pad_x = -1,
+        .opts.config_clear = true,
+        .opts.alt_screen = true,
+    },
+    .options = (const m_option_t[]) {
+        {"dither", OPT_CHOICE(opts.diffuse,
+            {"auto", DIFFUSE_AUTO},
+            {"none", DIFFUSE_NONE},
+            {"atkinson", DIFFUSE_ATKINSON},
+            {"fs", DIFFUSE_FS},
+            {"jajuni", DIFFUSE_JAJUNI},
+            {"stucki", DIFFUSE_STUCKI},
+            {"burkes", DIFFUSE_BURKES},
+            {"arithmetic", DIFFUSE_A_DITHER},
+            {"xor", DIFFUSE_X_DITHER})},
+        {"width", OPT_INT(opts.width)},
+        {"height", OPT_INT(opts.height)},
+        {"reqcolors", OPT_INT(opts.reqcolors)},
+        {"fixedpalette", OPT_BOOL(opts.fixedpal)},
+        {"threshold", OPT_INT(opts.threshold)},
+        {"top", OPT_INT(opts.top)},
+        {"left", OPT_INT(opts.left)},
+        {"pad-y", OPT_INT(opts.pad_y)},
+        {"pad-x", OPT_INT(opts.pad_x)},
+        {"rows", OPT_INT(opts.rows)},
+        {"cols", OPT_INT(opts.cols)},
+        {"config-clear", OPT_BOOL(opts.config_clear), },
+        {"alt-screen", OPT_BOOL(opts.alt_screen), },
+        {"buffered", OPT_BOOL(opts.buffered), },
+        {"exit-clear", OPT_REPLACED("vo-sixel-alt-screen")},
+        {0}
+    },
+    .options_prefix = "vo-sixel",
+};
diff --git a/video/out/vo_tct.c b/video/out/vo_tct.c
new file mode 100644
index 0000000..8859095
--- /dev/null
+++ b/video/out/vo_tct.c
@@ -0,0 +1,347 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <config.h>
+
+#if HAVE_POSIX
+#include <sys/ioctl.h>
+#endif
+
+#include <libswscale/swscale.h>
+
+#include "options/m_config.h"
+#include "config.h"
+#include "osdep/terminal.h"
+#include "osdep/io.h"
+#include "vo.h"
+#include "sub/osd.h"
+#include "video/sws_utils.h"
+#include "video/mp_image.h"
+
+#define IMGFMT IMGFMT_BGR24
+
+#define ALGO_PLAIN 1
+#define ALGO_HALF_BLOCKS 2
+
+#define TERM_ESC_CLEAR_COLORS           "\033[0m"
+#define TERM_ESC_COLOR256_BG            "\033[48;5"
+#define TERM_ESC_COLOR256_FG            "\033[38;5"
+#define TERM_ESC_COLOR24BIT_BG          "\033[48;2"
+#define TERM_ESC_COLOR24BIT_FG          "\033[38;2"
+
+#define DEFAULT_WIDTH 80
+#define DEFAULT_HEIGHT 25
+
+struct vo_tct_opts {
+    int algo;
+    int width;   // 0 -> default
+    int height;  // 0 -> default
+    bool term256;  // 0 -> true color
+};
+
+struct lut_item {
+    char str[4];
+    int width;
+};
+
+struct priv {
+    struct vo_tct_opts opts;
+    size_t buffer_size;
+    int swidth;
+    int sheight;
+    struct mp_image *frame;
+    struct mp_rect src;
+    struct mp_rect dst;
+    struct mp_sws_context *sws;
+    struct lut_item lut[256];
+};
+
+// Convert RGB24 to xterm-256 8-bit value
+// For simplicity, assume RGB space is perceptually uniform.
+// There are 5 places where one of two outputs needs to be chosen when the
+// input is the exact middle:
+// - The r/g/b channels and the gray value: the higher value output is chosen.
+// - If the gray and color have same distance from the input - color is chosen.
+static int rgb_to_x256(uint8_t r, uint8_t g, uint8_t b)
+{
+    // Calculate the nearest 0-based color index at 16 .. 231
+#   define v2ci(v) (v < 48 ? 0 : v < 115 ? 1 : (v - 35) / 40)
+    int ir = v2ci(r), ig = v2ci(g), ib = v2ci(b);   // 0..5 each
+#   define color_index() (36 * ir + 6 * ig + ib)  /* 0..215, lazy evaluation */
+
+    // Calculate the nearest 0-based gray index at 232 .. 255
+    int average = (r + g + b) / 3;
+    int gray_index = average > 238 ? 23 : (average - 3) / 10;  // 0..23
+
+    // Calculate the represented colors back from the index
+    static const int i2cv[6] = {0, 0x5f, 0x87, 0xaf, 0xd7, 0xff};
+    int cr = i2cv[ir], cg = i2cv[ig], cb = i2cv[ib];  // r/g/b, 0..255 each
+    int gv = 8 + 10 * gray_index;  // same value for r/g/b, 0..255
+
+    // Return the one which is nearer to the original input rgb value
+#   define dist_square(A,B,C, a,b,c) ((A-a)*(A-a) + (B-b)*(B-b) + (C-c)*(C-c))
+    int color_err = dist_square(cr, cg, cb, r, g, b);
+    int gray_err  = dist_square(gv, gv, gv, r, g, b);
+    return color_err <= gray_err ? 16 + color_index() : 232 + gray_index;
+}
+
+static void print_seq3(struct lut_item *lut, const char* prefix,
+                       uint8_t r, uint8_t g, uint8_t b)
+{
+// The fwrite implementation is about 25% faster than the printf code
+// (even if we use *.s with the lut values), however,
+// on windows we need to use printf in order to translate escape sequences and
+// UTF8 output for the console.
+#ifndef _WIN32
+    fputs(prefix, stdout);
+    fwrite(lut[r].str, lut[r].width, 1, stdout);
+    fwrite(lut[g].str, lut[g].width, 1, stdout);
+    fwrite(lut[b].str, lut[b].width, 1, stdout);
+    fputc('m', stdout);
+#else
+    printf("%s;%d;%d;%dm", prefix, (int)r, (int)g, (int)b);
+#endif
+}
+
+static void print_seq1(struct lut_item *lut, const char* prefix, uint8_t c)
+{
+#ifndef _WIN32
+    fputs(prefix, stdout);
+    fwrite(lut[c].str, lut[c].width, 1, stdout);
+    fputc('m', stdout);
+#else
+    printf("%s;%dm", prefix, (int)c);
+#endif
+}
+
+
+static void write_plain(
+    const int dwidth, const int dheight,
+    const int swidth, const int sheight,
+    const unsigned char *source, const int source_stride,
+    bool term256, struct lut_item *lut)
+{
+    assert(source);
+    const int tx = (dwidth - swidth) / 2;
+    const int ty = (dheight - sheight) / 2;
+    for (int y = 0; y < sheight; y++) {
+        const unsigned char *row = source + y * source_stride;
+        printf(TERM_ESC_GOTO_YX, ty + y, tx);
+        for (int x = 0; x < swidth; x++) {
+            unsigned char b = *row++;
+            unsigned char g = *row++;
+            unsigned char r = *row++;
+            if (term256) {
+                print_seq1(lut, TERM_ESC_COLOR256_BG, rgb_to_x256(r, g, b));
+            } else {
+                print_seq3(lut, TERM_ESC_COLOR24BIT_BG, r, g, b);
+            }
+            printf(" ");
+        }
+        printf(TERM_ESC_CLEAR_COLORS);
+    }
+    printf("\n");
+}
+
+static void write_half_blocks(
+    const int dwidth, const int dheight,
+    const int swidth, const int sheight,
+    unsigned char *source, int source_stride,
+    bool term256, struct lut_item *lut)
+{
+    assert(source);
+    const int tx = (dwidth - swidth) / 2;
+    const int ty = (dheight - sheight) / 2;
+    for (int y = 0; y < sheight * 2; y += 2) {
+        const unsigned char *row_up = source + y * source_stride;
+        const unsigned char *row_down = source + (y + 1) * source_stride;
+        printf(TERM_ESC_GOTO_YX, ty + y / 2, tx);
+        for (int x = 0; x < swidth; x++) {
+            unsigned char b_up = *row_up++;
+            unsigned char g_up = *row_up++;
+            unsigned char r_up = *row_up++;
+            unsigned char b_down = *row_down++;
+            unsigned char g_down = *row_down++;
+            unsigned char r_down = *row_down++;
+            if (term256) {
+                print_seq1(lut, TERM_ESC_COLOR256_BG, rgb_to_x256(r_up, g_up, b_up));
+                print_seq1(lut, TERM_ESC_COLOR256_FG, rgb_to_x256(r_down, g_down, b_down));
+            } else {
+                print_seq3(lut, TERM_ESC_COLOR24BIT_BG, r_up, g_up, b_up);
+                print_seq3(lut, TERM_ESC_COLOR24BIT_FG, r_down, g_down, b_down);
+            }
+            printf("\xe2\x96\x84");  // UTF8 bytes of U+2584 (lower half block)
+        }
+        printf(TERM_ESC_CLEAR_COLORS);
+    }
+    printf("\n");
+}
+
+static void get_win_size(struct vo *vo, int *out_width, int *out_height) {
+    struct priv *p = vo->priv;
+    *out_width = DEFAULT_WIDTH;
+    *out_height = DEFAULT_HEIGHT;
+
+    terminal_get_size(out_width, out_height);
+
+    if (p->opts.width > 0)
+        *out_width = p->opts.width;
+    if (p->opts.height > 0)
+        *out_height = p->opts.height;
+}
+
+static int reconfig(struct vo *vo, struct mp_image_params *params)
+{
+    struct priv *p = vo->priv;
+
+    get_win_size(vo, &vo->dwidth, &vo->dheight);
+
+    struct mp_osd_res osd;
+    vo_get_src_dst_rects(vo, &p->src, &p->dst, &osd);
+    p->swidth = p->dst.x1 - p->dst.x0;
+    p->sheight = p->dst.y1 - p->dst.y0;
+
+    p->sws->src = *params;
+    p->sws->dst = (struct mp_image_params) {
+        .imgfmt = IMGFMT,
+        .w = p->swidth,
+        .h = p->sheight,
+        .p_w = 1,
+        .p_h = 1,
+    };
+
+    const int mul = (p->opts.algo == ALGO_PLAIN ? 1 : 2);
+    if (p->frame)
+        talloc_free(p->frame);
+    p->frame = mp_image_alloc(IMGFMT, p->swidth, p->sheight * mul);
+    if (!p->frame)
+        return -1;
+
+    if (mp_sws_reinit(p->sws) < 0)
+        return -1;
+
+    printf(TERM_ESC_CLEAR_SCREEN);
+
+    vo->want_redraw = true;
+    return 0;
+}
+
+static void draw_frame(struct vo *vo, struct vo_frame *frame)
+{
+    struct priv *p = vo->priv;
+    struct mp_image *src = frame->current;
+    if (!src)
+        return;
+    // XXX: pan, crop etc.
+    mp_sws_scale(p->sws, p->frame, src);
+}
+
+static void flip_page(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+
+    int width, height;
+    get_win_size(vo, &width, &height);
+
+    if (vo->dwidth != width || vo->dheight != height)
+        reconfig(vo, vo->params);
+
+    if (p->opts.algo == ALGO_PLAIN) {
+        write_plain(
+            vo->dwidth, vo->dheight, p->swidth, p->sheight,
+            p->frame->planes[0], p->frame->stride[0],
+            p->opts.term256, p->lut);
+    } else {
+        write_half_blocks(
+            vo->dwidth, vo->dheight, p->swidth, p->sheight,
+            p->frame->planes[0], p->frame->stride[0],
+            p->opts.term256, p->lut);
+    }
+    fflush(stdout);
+}
+
+static void uninit(struct vo *vo)
+{
+    printf(TERM_ESC_RESTORE_CURSOR);
+    printf(TERM_ESC_NORMAL_SCREEN);
+    struct priv *p = vo->priv;
+    if (p->frame)
+        talloc_free(p->frame);
+}
+
+static int preinit(struct vo *vo)
+{
+    // most terminal characters aren't 1:1, so we default to 2:1.
+    // if user passes their own value of choice, it'll be scaled accordingly.
+    vo->monitor_par = vo->opts->monitor_pixel_aspect * 2;
+
+    struct priv *p = vo->priv;
+    p->sws = mp_sws_alloc(vo);
+    p->sws->log = vo->log;
+    mp_sws_enable_cmdline_opts(p->sws, vo->global);
+
+    for (int i = 0; i < 256; ++i) {
+        char buff[8];
+        p->lut[i].width = snprintf(buff, sizeof(buff), ";%d", i);
+        memcpy(p->lut[i].str, buff, 4); // some strings may not end on a null byte, but that's ok.
+    }
+
+    printf(TERM_ESC_HIDE_CURSOR);
+    printf(TERM_ESC_ALT_SCREEN);
+
+    return 0;
+}
+
+static int query_format(struct vo *vo, int format)
+{
+    return format == IMGFMT;
+}
+
+static int control(struct vo *vo, uint32_t request, void *data)
+{
+    return VO_NOTIMPL;
+}
+
+#define OPT_BASE_STRUCT struct priv
+
+const struct vo_driver video_out_tct = {
+    .name = "tct",
+    .description = "true-color terminals",
+    .preinit = preinit,
+    .query_format = query_format,
+    .reconfig = reconfig,
+    .control = control,
+    .draw_frame = draw_frame,
+    .flip_page = flip_page,
+    .uninit = uninit,
+    .priv_size = sizeof(struct priv),
+    .priv_defaults = &(const struct priv) {
+        .opts.algo = ALGO_HALF_BLOCKS,
+    },
+    .options = (const m_option_t[]) {
+        {"algo", OPT_CHOICE(opts.algo,
+            {"plain", ALGO_PLAIN},
+            {"half-blocks", ALGO_HALF_BLOCKS})},
+        {"width", OPT_INT(opts.width)},
+        {"height", OPT_INT(opts.height)},
+        {"256", OPT_BOOL(opts.term256)},
+        {0}
+    },
+    .options_prefix = "vo-tct",
+};
diff --git a/video/out/vo_vaapi.c b/video/out/vo_vaapi.c
new file mode 100644
index 0000000..12888fe
--- /dev/null
+++ b/video/out/vo_vaapi.c
@@ -0,0 +1,877 @@
+/*
+ * VA API output module
+ *
+ * Copyright (C) 2008-2009 Splitted-Desktop Systems
+ * Gwenole Beauchesne <gbeauchesne@splitted-desktop.com>
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <stdarg.h>
+#include <limits.h>
+
+#include <X11/Xlib.h>
+#include <X11/Xutil.h>
+#include <va/va_x11.h>
+
+#include "common/msg.h"
+#include "video/out/vo.h"
+#include "video/mp_image_pool.h"
+#include "video/sws_utils.h"
+#include "sub/draw_bmp.h"
+#include "sub/img_convert.h"
+#include "sub/osd.h"
+#include "present_sync.h"
+#include "x11_common.h"
+
+#include "video/mp_image.h"
+#include "video/vaapi.h"
+#include "video/hwdec.h"
+
+struct vaapi_osd_image {
+    int            w, h;
+    VAImage        image;
+    VASubpictureID subpic_id;
+    bool           is_used;
+};
+
+struct vaapi_subpic {
+    VASubpictureID id;
+    int src_x, src_y, src_w, src_h;
+    int dst_x, dst_y, dst_w, dst_h;
+};
+
+struct vaapi_osd_part {
+    bool active;
+    int change_id;
+    struct vaapi_osd_image image;
+    struct vaapi_subpic subpic;
+};
+
+#define MAX_OUTPUT_SURFACES 2
+
+struct priv {
+    struct mp_log           *log;
+    struct vo               *vo;
+    VADisplay                display;
+    struct mp_vaapi_ctx     *mpvaapi;
+
+    struct mp_image_params   image_params;
+    struct mp_rect           src_rect;
+    struct mp_rect           dst_rect;
+    struct mp_osd_res        screen_osd_res;
+
+    struct mp_image         *output_surfaces[MAX_OUTPUT_SURFACES];
+    struct mp_image         *swdec_surfaces[MAX_OUTPUT_SURFACES];
+
+    int                      output_surface;
+    int                      visible_surface;
+    int                      scaling;
+    bool                     force_scaled_osd;
+
+    VAImageFormat            osd_format; // corresponds to OSD_VA_FORMAT
+    struct vaapi_osd_part    osd_part;
+    bool                     osd_screen;
+    struct mp_draw_sub_cache *osd_cache;
+
+    struct mp_image_pool    *pool;
+
+    struct mp_image         *black_surface;
+
+    VAImageFormat           *va_subpic_formats;
+    unsigned int            *va_subpic_flags;
+    int                      va_num_subpic_formats;
+    VADisplayAttribute      *va_display_attrs;
+    int                     *mp_display_attr;
+    int                      va_num_display_attrs;
+
+    struct va_image_formats *image_formats;
+};
+
+#define OSD_VA_FORMAT VA_FOURCC_BGRA
+
+static void draw_osd(struct vo *vo);
+
+
+struct fmtentry {
+    uint32_t va;
+    enum mp_imgfmt mp;
+};
+
+static const struct fmtentry va_to_imgfmt[] = {
+    {VA_FOURCC_NV12, IMGFMT_NV12},
+    {VA_FOURCC_YV12, IMGFMT_420P},
+    {VA_FOURCC_IYUV, IMGFMT_420P},
+    {VA_FOURCC_UYVY, IMGFMT_UYVY},
+    // Note: not sure about endian issues (the mp formats are byte-addressed)
+    {VA_FOURCC_RGBA, IMGFMT_RGBA},
+    {VA_FOURCC_RGBX, IMGFMT_RGBA},
+    {VA_FOURCC_BGRA, IMGFMT_BGRA},
+    {VA_FOURCC_BGRX, IMGFMT_BGRA},
+    {0             , IMGFMT_NONE}
+};
+
+static enum mp_imgfmt va_fourcc_to_imgfmt(uint32_t fourcc)
+{
+    for (const struct fmtentry *entry = va_to_imgfmt; entry->va; ++entry) {
+        if (entry->va == fourcc)
+            return entry->mp;
+    }
+    return IMGFMT_NONE;
+}
+
+static uint32_t va_fourcc_from_imgfmt(int imgfmt)
+{
+    for (const struct fmtentry *entry = va_to_imgfmt; entry->va; ++entry) {
+        if (entry->mp == imgfmt)
+            return entry->va;
+    }
+    return 0;
+}
+
+struct va_image_formats {
+    VAImageFormat *entries;
+    int num;
+};
+
+static void va_get_formats(struct priv *ctx)
+{
+    struct va_image_formats *formats = talloc_ptrtype(ctx, formats);
+    formats->num = vaMaxNumImageFormats(ctx->display);
+    formats->entries = talloc_array(formats, VAImageFormat, formats->num);
+    VAStatus status = vaQueryImageFormats(ctx->display, formats->entries,
+                                          &formats->num);
+    if (!CHECK_VA_STATUS(ctx, "vaQueryImageFormats()"))
+        return;
+    MP_VERBOSE(ctx, "%d image formats available:\n", formats->num);
+    for (int i = 0; i < formats->num; i++)
+        MP_VERBOSE(ctx, "  %s\n", mp_tag_str(formats->entries[i].fourcc));
+    ctx->image_formats = formats;
+}
+
+static VAImageFormat *va_image_format_from_imgfmt(struct priv *ctx,
+                                                  int imgfmt)
+{
+    struct va_image_formats *formats = ctx->image_formats;
+    const int fourcc = va_fourcc_from_imgfmt(imgfmt);
+    if (!formats || !formats->num || !fourcc)
+        return NULL;
+    for (int i = 0; i < formats->num; i++) {
+        if (formats->entries[i].fourcc == fourcc)
+            return &formats->entries[i];
+    }
+    return NULL;
+}
+
+struct va_surface {
+    struct mp_vaapi_ctx *ctx;
+    VADisplay display;
+
+    VASurfaceID id;
+    int rt_format;
+
+    // The actually allocated surface size (needed for cropping).
+    // mp_images can have a smaller size than this, which means they are
+    // cropped down to a smaller size by removing right/bottom pixels.
+    int w, h;
+
+    VAImage image;       // used for software decoding case
+    bool is_derived;     // is image derived by vaDeriveImage()?
+};
+
+static struct va_surface *va_surface_in_mp_image(struct mp_image *mpi)
+{
+    return mpi && mpi->imgfmt == IMGFMT_VAAPI ?
+        (struct va_surface*)mpi->planes[0] : NULL;
+}
+
+static void release_va_surface(void *arg)
+{
+    struct va_surface *surface = arg;
+
+    if (surface->id != VA_INVALID_ID) {
+        if (surface->image.image_id != VA_INVALID_ID)
+            vaDestroyImage(surface->display, surface->image.image_id);
+        vaDestroySurfaces(surface->display, &surface->id, 1);
+    }
+
+    talloc_free(surface);
+}
+
+static struct mp_image *alloc_surface(struct mp_vaapi_ctx *ctx, int rt_format,
+                                      int w, int h)
+{
+    VASurfaceID id = VA_INVALID_ID;
+    VAStatus status;
+    status = vaCreateSurfaces(ctx->display, rt_format, w, h, &id, 1, NULL, 0);
+    if (!CHECK_VA_STATUS(ctx, "vaCreateSurfaces()"))
+        return NULL;
+
+    struct va_surface *surface = talloc_ptrtype(NULL, surface);
+    if (!surface)
+        return NULL;
+
+    *surface = (struct va_surface){
+        .ctx = ctx,
+        .id = id,
+        .rt_format = rt_format,
+        .w = w,
+        .h = h,
+        .display = ctx->display,
+        .image = { .image_id = VA_INVALID_ID, .buf = VA_INVALID_ID },
+    };
+
+    struct mp_image img = {0};
+    mp_image_setfmt(&img, IMGFMT_VAAPI);
+    mp_image_set_size(&img, w, h);
+    img.planes[0] = (uint8_t*)surface;
+    img.planes[3] = (uint8_t*)(uintptr_t)surface->id;
+    return mp_image_new_custom_ref(&img, surface, release_va_surface);
+}
+
+static void va_surface_image_destroy(struct va_surface *surface)
+{
+    if (!surface || surface->image.image_id == VA_INVALID_ID)
+        return;
+    vaDestroyImage(surface->display, surface->image.image_id);
+    surface->image.image_id = VA_INVALID_ID;
+    surface->is_derived = false;
+}
+
+static int va_surface_image_alloc(struct va_surface *p, VAImageFormat *format)
+{
+    VADisplay *display = p->display;
+
+    if (p->image.image_id != VA_INVALID_ID &&
+        p->image.format.fourcc == format->fourcc)
+        return 0;
+
+    int r = 0;
+
+    va_surface_image_destroy(p);
+
+    VAStatus status = vaDeriveImage(display, p->id, &p->image);
+    if (status == VA_STATUS_SUCCESS) {
+        /* vaDeriveImage() is supported, check format */
+        if (p->image.format.fourcc == format->fourcc &&
+            p->image.width == p->w && p->image.height == p->h)
+        {
+            p->is_derived = true;
+            MP_TRACE(p->ctx, "Using vaDeriveImage()\n");
+        } else {
+            vaDestroyImage(p->display, p->image.image_id);
+            status = VA_STATUS_ERROR_OPERATION_FAILED;
+        }
+    }
+    if (status != VA_STATUS_SUCCESS) {
+        p->image.image_id = VA_INVALID_ID;
+        status = vaCreateImage(p->display, format, p->w, p->h, &p->image);
+        if (!CHECK_VA_STATUS(p->ctx, "vaCreateImage()")) {
+            p->image.image_id = VA_INVALID_ID;
+            r = -1;
+        }
+    }
+
+    return r;
+}
+
+// img must be a VAAPI surface; make sure its internal VAImage is allocated
+// to a format corresponding to imgfmt (or return an error).
+static int va_surface_alloc_imgfmt(struct priv *priv, struct mp_image *img,
+                                   int imgfmt)
+{
+    struct va_surface *p = va_surface_in_mp_image(img);
+    if (!p)
+        return -1;
+    // Multiple FourCCs can refer to the same imgfmt, so check by doing the
+    // surjective conversion first.
+    if (p->image.image_id != VA_INVALID_ID &&
+        va_fourcc_to_imgfmt(p->image.format.fourcc) == imgfmt)
+        return 0;
+    VAImageFormat *format = va_image_format_from_imgfmt(priv, imgfmt);
+    if (!format)
+        return -1;
+    if (va_surface_image_alloc(p, format) < 0)
+        return -1;
+    return 0;
+}
+
+static bool va_image_map(struct mp_vaapi_ctx *ctx, VAImage *image,
+                         struct mp_image *mpi)
+{
+    int imgfmt = va_fourcc_to_imgfmt(image->format.fourcc);
+    if (imgfmt == IMGFMT_NONE)
+        return false;
+    void *data = NULL;
+    const VAStatus status = vaMapBuffer(ctx->display, image->buf, &data);
+    if (!CHECK_VA_STATUS(ctx, "vaMapBuffer()"))
+        return false;
+
+    *mpi = (struct mp_image) {0};
+    mp_image_setfmt(mpi, imgfmt);
+    mp_image_set_size(mpi, image->width, image->height);
+
+    for (int p = 0; p < image->num_planes; p++) {
+        mpi->stride[p] = image->pitches[p];
+        mpi->planes[p] = (uint8_t *)data + image->offsets[p];
+    }
+
+    if (image->format.fourcc == VA_FOURCC_YV12) {
+        MPSWAP(int, mpi->stride[1], mpi->stride[2]);
+        MPSWAP(uint8_t *, mpi->planes[1], mpi->planes[2]);
+    }
+
+    return true;
+}
+
+static bool va_image_unmap(struct mp_vaapi_ctx *ctx, VAImage *image)
+{
+    const VAStatus status = vaUnmapBuffer(ctx->display, image->buf);
+    return CHECK_VA_STATUS(ctx, "vaUnmapBuffer()");
+}
+
+// va_dst: copy destination, must be IMGFMT_VAAPI
+// sw_src: copy source, must be a software pixel format
+static int va_surface_upload(struct priv *priv, struct mp_image *va_dst,
+                             struct mp_image *sw_src)
+{
+    struct va_surface *p = va_surface_in_mp_image(va_dst);
+    if (!p)
+        return -1;
+
+    if (va_surface_alloc_imgfmt(priv, va_dst, sw_src->imgfmt) < 0)
+        return -1;
+
+    struct mp_image img;
+    if (!va_image_map(p->ctx, &p->image, &img))
+        return -1;
+    assert(sw_src->w <= img.w && sw_src->h <= img.h);
+    mp_image_set_size(&img, sw_src->w, sw_src->h); // copy only visible part
+    mp_image_copy(&img, sw_src);
+    va_image_unmap(p->ctx, &p->image);
+
+    if (!p->is_derived) {
+        VAStatus status = vaPutImage(p->display, p->id,
+                                     p->image.image_id,
+                                     0, 0, sw_src->w, sw_src->h,
+                                     0, 0, sw_src->w, sw_src->h);
+        if (!CHECK_VA_STATUS(p->ctx, "vaPutImage()"))
+            return -1;
+    }
+
+    if (p->is_derived)
+        va_surface_image_destroy(p);
+    return 0;
+}
+
+struct pool_alloc_ctx {
+    struct mp_vaapi_ctx *vaapi;
+    int rt_format;
+};
+
+static struct mp_image *alloc_pool(void *pctx, int fmt, int w, int h)
+{
+    struct pool_alloc_ctx *alloc_ctx = pctx;
+    if (fmt != IMGFMT_VAAPI)
+        return NULL;
+
+    return alloc_surface(alloc_ctx->vaapi, alloc_ctx->rt_format, w, h);
+}
+
+// The allocator of the given image pool to allocate VAAPI surfaces, using
+// the given rt_format.
+static void va_pool_set_allocator(struct mp_image_pool *pool,
+                                  struct mp_vaapi_ctx *ctx, int rt_format)
+{
+    struct pool_alloc_ctx *alloc_ctx = talloc_ptrtype(pool, alloc_ctx);
+    *alloc_ctx = (struct pool_alloc_ctx){
+        .vaapi = ctx,
+        .rt_format = rt_format,
+    };
+    mp_image_pool_set_allocator(pool, alloc_pool, alloc_ctx);
+    mp_image_pool_set_lru(pool);
+}
+
+static void flush_output_surfaces(struct priv *p)
+{
+    for (int n = 0; n < MAX_OUTPUT_SURFACES; n++)
+        mp_image_unrefp(&p->output_surfaces[n]);
+    p->output_surface = 0;
+    p->visible_surface = 0;
+}
+
+// See flush_surfaces() remarks - the same applies.
+static void free_video_specific(struct priv *p)
+{
+    flush_output_surfaces(p);
+
+    mp_image_unrefp(&p->black_surface);
+
+    for (int n = 0; n < MAX_OUTPUT_SURFACES; n++)
+        mp_image_unrefp(&p->swdec_surfaces[n]);
+
+    if (p->pool)
+        mp_image_pool_clear(p->pool);
+}
+
+static bool alloc_swdec_surfaces(struct priv *p, int w, int h, int imgfmt)
+{
+    free_video_specific(p);
+    for (int i = 0; i < MAX_OUTPUT_SURFACES; i++) {
+        p->swdec_surfaces[i] = mp_image_pool_get(p->pool, IMGFMT_VAAPI, w, h);
+        if (va_surface_alloc_imgfmt(p, p->swdec_surfaces[i], imgfmt) < 0)
+            return false;
+    }
+    return true;
+}
+
+static void resize(struct priv *p)
+{
+    vo_get_src_dst_rects(p->vo, &p->src_rect, &p->dst_rect, &p->screen_osd_res);
+
+    // It's not clear whether this is needed; maybe not.
+    //vo_x11_clearwindow(p->vo, p->vo->x11->window);
+
+    p->vo->want_redraw = true;
+}
+
+static int reconfig(struct vo *vo, struct mp_image_params *params)
+{
+    struct priv *p = vo->priv;
+
+    free_video_specific(p);
+
+    vo_x11_config_vo_window(vo);
+
+    if (params->imgfmt != IMGFMT_VAAPI) {
+        if (!alloc_swdec_surfaces(p, params->w, params->h, params->imgfmt))
+            return -1;
+    }
+
+    p->image_params = *params;
+    resize(p);
+    return 0;
+}
+
+static int query_format(struct vo *vo, int imgfmt)
+{
+    struct priv *p = vo->priv;
+    if (imgfmt == IMGFMT_VAAPI || va_image_format_from_imgfmt(p, imgfmt))
+        return 1;
+
+    return 0;
+}
+
+static bool render_to_screen(struct priv *p, struct mp_image *mpi)
+{
+    VAStatus status;
+
+    VASurfaceID surface = va_surface_id(mpi);
+    if (surface == VA_INVALID_ID) {
+        if (!p->black_surface) {
+            int w = p->image_params.w, h = p->image_params.h;
+            // 4:2:0 should work everywhere
+            int fmt = IMGFMT_420P;
+            p->black_surface = mp_image_pool_get(p->pool, IMGFMT_VAAPI, w, h);
+            if (p->black_surface) {
+                struct mp_image *img = mp_image_alloc(fmt, w, h);
+                if (img) {
+                    mp_image_clear(img, 0, 0, w, h);
+                    if (va_surface_upload(p, p->black_surface, img) < 0)
+                        mp_image_unrefp(&p->black_surface);
+                    talloc_free(img);
+                }
+            }
+        }
+        surface = va_surface_id(p->black_surface);
+    }
+
+    if (surface == VA_INVALID_ID)
+        return false;
+
+    struct vaapi_osd_part *part = &p->osd_part;
+    if (part->active) {
+        struct vaapi_subpic *sp = &part->subpic;
+        int flags = 0;
+        if (p->osd_screen)
+            flags |= VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD;
+        status = vaAssociateSubpicture(p->display,
+                                       sp->id, &surface, 1,
+                                       sp->src_x, sp->src_y,
+                                       sp->src_w, sp->src_h,
+                                       sp->dst_x, sp->dst_y,
+                                       sp->dst_w, sp->dst_h,
+                                       flags);
+        CHECK_VA_STATUS(p, "vaAssociateSubpicture()");
+    }
+
+    int flags = va_get_colorspace_flag(p->image_params.color.space) |
+                p->scaling | VA_FRAME_PICTURE;
+    status = vaPutSurface(p->display,
+                          surface,
+                          p->vo->x11->window,
+                          p->src_rect.x0,
+                          p->src_rect.y0,
+                          p->src_rect.x1 - p->src_rect.x0,
+                          p->src_rect.y1 - p->src_rect.y0,
+                          p->dst_rect.x0,
+                          p->dst_rect.y0,
+                          p->dst_rect.x1 - p->dst_rect.x0,
+                          p->dst_rect.y1 - p->dst_rect.y0,
+                          NULL, 0,
+                          flags);
+    CHECK_VA_STATUS(p, "vaPutSurface()");
+
+    if (part->active) {
+        struct vaapi_subpic *sp = &part->subpic;
+        status = vaDeassociateSubpicture(p->display, sp->id,
+                                         &surface, 1);
+        CHECK_VA_STATUS(p, "vaDeassociateSubpicture()");
+    }
+
+    return true;
+}
+
+static void flip_page(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+
+    p->visible_surface = p->output_surface;
+    render_to_screen(p, p->output_surfaces[p->output_surface]);
+    p->output_surface = (p->output_surface + 1) % MAX_OUTPUT_SURFACES;
+    vo_x11_present(vo);
+    present_sync_swap(vo->x11->present);
+}
+
+static void get_vsync(struct vo *vo, struct vo_vsync_info *info)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    present_sync_get_info(x11->present, info);
+}
+
+static void draw_frame(struct vo *vo, struct vo_frame *frame)
+{
+    struct priv *p = vo->priv;
+    struct mp_image *mpi = frame->current;
+
+    if (mpi && mpi->imgfmt != IMGFMT_VAAPI) {
+        struct mp_image *dst = p->swdec_surfaces[p->output_surface];
+        if (!dst || va_surface_upload(p, dst, mpi) < 0) {
+            MP_WARN(vo, "Could not upload surface.\n");
+            talloc_free(mpi);
+            return;
+        }
+        mp_image_copy_attributes(dst, mpi);
+        mpi = mp_image_new_ref(dst);
+    }
+
+    talloc_free(p->output_surfaces[p->output_surface]);
+    p->output_surfaces[p->output_surface] = mpi;
+
+    draw_osd(vo);
+}
+
+static void free_subpicture(struct priv *p, struct vaapi_osd_image *img)
+{
+    if (img->image.image_id != VA_INVALID_ID)
+        vaDestroyImage(p->display, img->image.image_id);
+    if (img->subpic_id != VA_INVALID_ID)
+        vaDestroySubpicture(p->display, img->subpic_id);
+    img->image.image_id = VA_INVALID_ID;
+    img->subpic_id = VA_INVALID_ID;
+}
+
+static int new_subpicture(struct priv *p, int w, int h,
+                          struct vaapi_osd_image *out)
+{
+    VAStatus status;
+
+    free_subpicture(p, out);
+
+    struct vaapi_osd_image m = {
+        .image = {.image_id = VA_INVALID_ID, .buf = VA_INVALID_ID},
+        .subpic_id = VA_INVALID_ID,
+        .w = w,
+        .h = h,
+    };
+
+    status = vaCreateImage(p->display, &p->osd_format, w, h, &m.image);
+    if (!CHECK_VA_STATUS(p, "vaCreateImage()"))
+        goto error;
+    status = vaCreateSubpicture(p->display, m.image.image_id, &m.subpic_id);
+    if (!CHECK_VA_STATUS(p, "vaCreateSubpicture()"))
+        goto error;
+
+    *out = m;
+    return 0;
+
+error:
+    free_subpicture(p, &m);
+    MP_ERR(p, "failed to allocate OSD sub-picture of size %dx%d.\n", w, h);
+    return -1;
+}
+
+static void draw_osd(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+
+    struct mp_image *cur = p->output_surfaces[p->output_surface];
+    double pts = cur ? cur->pts : 0;
+
+    if (!p->osd_format.fourcc)
+        return;
+
+    struct mp_osd_res vid_res = osd_res_from_image_params(vo->params);
+
+    struct mp_osd_res *res;
+    if (p->osd_screen) {
+        res = &p->screen_osd_res;
+    } else {
+        res = &vid_res;
+    }
+
+    p->osd_part.active = false;
+
+    if (!p->osd_cache)
+        p->osd_cache = mp_draw_sub_alloc(p, vo->global);
+
+    struct sub_bitmap_list *sbs = osd_render(vo->osd, *res, pts, 0,
+                                             mp_draw_sub_formats);
+
+    struct mp_rect act_rc[1], mod_rc[64];
+    int num_act_rc = 0, num_mod_rc = 0;
+
+    struct mp_image *osd = mp_draw_sub_overlay(p->osd_cache, sbs,
+                    act_rc, MP_ARRAY_SIZE(act_rc), &num_act_rc,
+                    mod_rc, MP_ARRAY_SIZE(mod_rc), &num_mod_rc);
+
+    if (!osd)
+        goto error;
+
+    struct vaapi_osd_part *part = &p->osd_part;
+
+    part->active = false;
+
+    int w = res->w;
+    int h = res->h;
+    if (part->image.w != w || part->image.h != h) {
+        if (new_subpicture(p, w, h, &part->image) < 0)
+            goto error;
+    }
+
+    struct vaapi_osd_image *img = &part->image;
+    struct mp_image vaimg;
+    if (!va_image_map(p->mpvaapi, &img->image, &vaimg))
+        goto error;
+
+    for (int n = 0; n < num_mod_rc; n++) {
+        struct mp_rect *rc = &mod_rc[n];
+
+        int rw = mp_rect_w(*rc);
+        int rh = mp_rect_h(*rc);
+
+        void *src = mp_image_pixel_ptr(osd, 0, rc->x0, rc->y0);
+        void *dst = vaimg.planes[0] + rc->y0 * vaimg.stride[0] + rc->x0 * 4;
+
+        memcpy_pic(dst, src, rw * 4, rh, vaimg.stride[0], osd->stride[0]);
+    }
+
+    if (!va_image_unmap(p->mpvaapi, &img->image))
+        goto error;
+
+    if (num_act_rc) {
+        struct mp_rect rc = act_rc[0];
+        rc.x0 = rc.y0 = 0; // must be a Mesa bug
+        part->subpic = (struct vaapi_subpic) {
+            .id = img->subpic_id,
+            .src_x = rc.x0,         .src_y = rc.y0,
+            .src_w = mp_rect_w(rc), .src_h = mp_rect_h(rc),
+            .dst_x = rc.x0,         .dst_y = rc.y0,
+            .dst_w = mp_rect_w(rc), .dst_h = mp_rect_h(rc),
+        };
+        part->active = true;
+    }
+
+error:
+    talloc_free(sbs);
+}
+
+static int control(struct vo *vo, uint32_t request, void *data)
+{
+    struct priv *p = vo->priv;
+
+    switch (request) {
+    case VOCTRL_SET_PANSCAN:
+        resize(p);
+        return VO_TRUE;
+    }
+
+    int events = 0;
+    int r = vo_x11_control(vo, &events, request, data);
+    if (events & VO_EVENT_RESIZE)
+        resize(p);
+    if (events & VO_EVENT_EXPOSE)
+        vo->want_redraw = true;
+    vo_event(vo, events);
+    return r;
+}
+
+static void uninit(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+
+    free_video_specific(p);
+    talloc_free(p->pool);
+
+    struct vaapi_osd_part *part = &p->osd_part;
+    free_subpicture(p, &part->image);
+
+    if (vo->hwdec_devs) {
+        hwdec_devices_remove(vo->hwdec_devs, &p->mpvaapi->hwctx);
+        hwdec_devices_destroy(vo->hwdec_devs);
+    }
+
+    va_destroy(p->mpvaapi);
+
+    vo_x11_uninit(vo);
+}
+
+static int preinit(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    p->vo = vo;
+    p->log = vo->log;
+
+    VAStatus status;
+
+    if (!vo_x11_init(vo))
+        goto fail;
+
+    if (!vo_x11_create_vo_window(vo, NULL, "vaapi"))
+        goto fail;
+
+    p->display = vaGetDisplay(vo->x11->display);
+    if (!p->display)
+        goto fail;
+
+    p->mpvaapi = va_initialize(p->display, p->log, false);
+    if (!p->mpvaapi) {
+        vaTerminate(p->display);
+        p->display = NULL;
+        goto fail;
+    }
+
+    if (va_guess_if_emulated(p->mpvaapi)) {
+        MP_WARN(vo, "VA-API is most likely emulated via VDPAU.\n"
+                    "It's better to use VDPAU directly with: --vo=vdpau\n");
+    }
+
+    va_get_formats(p);
+    if (!p->image_formats)
+        goto fail;
+
+    p->mpvaapi->hwctx.hw_imgfmt = IMGFMT_VAAPI;
+    p->pool = mp_image_pool_new(p);
+    va_pool_set_allocator(p->pool, p->mpvaapi, VA_RT_FORMAT_YUV420);
+
+    int max_subpic_formats = vaMaxNumSubpictureFormats(p->display);
+    p->va_subpic_formats = talloc_array(vo, VAImageFormat, max_subpic_formats);
+    p->va_subpic_flags = talloc_array(vo, unsigned int, max_subpic_formats);
+    status = vaQuerySubpictureFormats(p->display,
+                                      p->va_subpic_formats,
+                                      p->va_subpic_flags,
+                                      &p->va_num_subpic_formats);
+    if (!CHECK_VA_STATUS(p, "vaQuerySubpictureFormats()"))
+        p->va_num_subpic_formats = 0;
+    MP_VERBOSE(vo, "%d subpicture formats available:\n",
+               p->va_num_subpic_formats);
+
+    for (int i = 0; i < p->va_num_subpic_formats; i++) {
+        MP_VERBOSE(vo, "  %s, flags 0x%x\n",
+                   mp_tag_str(p->va_subpic_formats[i].fourcc),
+                   p->va_subpic_flags[i]);
+        if (p->va_subpic_formats[i].fourcc == OSD_VA_FORMAT) {
+            p->osd_format = p->va_subpic_formats[i];
+            if (!p->force_scaled_osd) {
+                p->osd_screen =
+                    p->va_subpic_flags[i] & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD;
+            }
+        }
+    }
+
+    if (!p->osd_format.fourcc)
+        MP_ERR(vo, "OSD format not supported. Disabling OSD.\n");
+
+    struct vaapi_osd_part *part = &p->osd_part;
+    part->image.image.image_id = VA_INVALID_ID;
+    part->image.subpic_id = VA_INVALID_ID;
+
+    int max_display_attrs = vaMaxNumDisplayAttributes(p->display);
+    p->va_display_attrs = talloc_array(vo, VADisplayAttribute, max_display_attrs);
+    if (p->va_display_attrs) {
+        status = vaQueryDisplayAttributes(p->display, p->va_display_attrs,
+                                          &p->va_num_display_attrs);
+        if (!CHECK_VA_STATUS(p, "vaQueryDisplayAttributes()"))
+            p->va_num_display_attrs = 0;
+        p->mp_display_attr = talloc_zero_array(vo, int, p->va_num_display_attrs);
+    }
+
+    vo->hwdec_devs = hwdec_devices_create();
+    hwdec_devices_add(vo->hwdec_devs, &p->mpvaapi->hwctx);
+
+    MP_WARN(vo, "Warning: this compatibility VO is low quality and may "
+                "have issues with OSD, scaling, screenshots and more.\n"
+                "vo=gpu is the preferred choice in any case and "
+                "includes VA-API support via hwdec=vaapi or vaapi-copy.\n");
+
+    return 0;
+
+fail:
+    uninit(vo);
+    return -1;
+}
+
+#define OPT_BASE_STRUCT struct priv
+
+const struct vo_driver video_out_vaapi = {
+    .description = "VA API with X11",
+    .name = "vaapi",
+    .preinit = preinit,
+    .query_format = query_format,
+    .reconfig = reconfig,
+    .control = control,
+    .draw_frame = draw_frame,
+    .flip_page = flip_page,
+    .get_vsync = get_vsync,
+    .wakeup = vo_x11_wakeup,
+    .wait_events = vo_x11_wait_events,
+    .uninit = uninit,
+    .priv_size = sizeof(struct priv),
+    .priv_defaults = &(const struct priv) {
+        .scaling = VA_FILTER_SCALING_DEFAULT,
+    },
+    .options = (const struct m_option[]) {
+        {"scaling", OPT_CHOICE(scaling,
+            {"default", VA_FILTER_SCALING_DEFAULT},
+            {"fast", VA_FILTER_SCALING_FAST},
+            {"hq", VA_FILTER_SCALING_HQ},
+            {"nla", VA_FILTER_SCALING_NL_ANAMORPHIC})},
+        {"scaled-osd", OPT_BOOL(force_scaled_osd)},
+        {0}
+    },
+    .options_prefix = "vo-vaapi",
+};
diff --git a/video/out/vo_vdpau.c b/video/out/vo_vdpau.c
new file mode 100644
index 0000000..d6b261f
--- /dev/null
+++ b/video/out/vo_vdpau.c
@@ -0,0 +1,1139 @@
+/*
+ * VDPAU video output driver
+ *
+ * Copyright (C) 2008 NVIDIA (Rajib Mahapatra <rmahapatra@nvidia.com>)
+ * Copyright (C) 2009 Uoti Urpala
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+/*
+ * Actual decoding is done in video/decode/vdpau.c
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <limits.h>
+#include <assert.h>
+
+#include "video/vdpau.h"
+#include "video/vdpau_mixer.h"
+#include "video/hwdec.h"
+#include "common/msg.h"
+#include "options/options.h"
+#include "mpv_talloc.h"
+#include "vo.h"
+#include "x11_common.h"
+#include "video/csputils.h"
+#include "sub/osd.h"
+#include "options/m_option.h"
+#include "video/mp_image.h"
+#include "osdep/timer.h"
+
+// Returns x + a, but wrapped around to the range [0, m)
+// a must be within [-m, m], x within [0, m)
+#define WRAP_ADD(x, a, m) ((a) < 0 \
+                           ? ((x)+(a)+(m) < (m) ? (x)+(a)+(m) : (x)+(a)) \
+                           : ((x)+(a) < (m) ? (x)+(a) : (x)+(a)-(m)))
+
+
+/* number of video and output surfaces */
+#define MAX_OUTPUT_SURFACES                15
+
+/* Pixelformat used for output surfaces */
+#define OUTPUT_RGBA_FORMAT VDP_RGBA_FORMAT_B8G8R8A8
+
+/*
+ * Global variable declaration - VDPAU specific
+ */
+
+struct vdpctx {
+    struct mp_vdpau_ctx               *mpvdp;
+    struct vdp_functions              *vdp;
+    VdpDevice                          vdp_device;
+    uint64_t                           preemption_counter;
+
+    struct m_color                     colorkey;
+
+    VdpPresentationQueueTarget         flip_target;
+    VdpPresentationQueue               flip_queue;
+
+    VdpOutputSurface                   output_surfaces[MAX_OUTPUT_SURFACES];
+    int                                num_output_surfaces;
+    VdpOutputSurface                   black_pixel;
+    VdpOutputSurface                   rotation_surface;
+
+    struct mp_image                   *current_image;
+    int64_t                            current_pts;
+    int                                current_duration;
+
+    int                                output_surface_w, output_surface_h;
+    int                                rotation;
+
+    bool                               force_yuv;
+    struct mp_vdpau_mixer             *video_mixer;
+    bool                               pullup;
+    float                              denoise;
+    float                              sharpen;
+    int                                hqscaling;
+    bool                               chroma_deint;
+    int                                flip_offset_window;
+    int                                flip_offset_fs;
+    int64_t                            flip_offset_us;
+
+    VdpRect                            src_rect_vid;
+    VdpRect                            out_rect_vid;
+    struct mp_osd_res                  osd_rect;
+    VdpBool                            supports_a8;
+
+    int                                surface_num; // indexes output_surfaces
+    int                                query_surface_num;
+    VdpTime                            recent_vsync_time;
+    float                              user_fps;
+    bool                               composite_detect;
+    int                                vsync_interval;
+    uint64_t                           last_queue_time;
+    uint64_t                           queue_time[MAX_OUTPUT_SURFACES];
+    uint64_t                           last_ideal_time;
+    bool                               dropped_frame;
+    uint64_t                           dropped_time;
+    uint32_t                           vid_width, vid_height;
+    uint32_t                           image_format;
+    VdpYCbCrFormat                     vdp_pixel_format;
+    bool                               rgb_mode;
+
+    // OSD
+    struct osd_bitmap_surface {
+        VdpRGBAFormat format;
+        VdpBitmapSurface surface;
+        uint32_t surface_w, surface_h;
+        // List of surfaces to be rendered
+        struct osd_target {
+            VdpRect source;
+            VdpRect dest;
+            VdpColor color;
+        } *targets;
+        int targets_size;
+        int render_count;
+        int change_id;
+    } osd_surfaces[MAX_OSD_PARTS];
+};
+
+static bool status_ok(struct vo *vo);
+
+static int video_to_output_surface(struct vo *vo, struct mp_image *mpi)
+{
+    struct vdpctx *vc = vo->priv;
+    struct vdp_functions *vdp = vc->vdp;
+    VdpTime dummy;
+    VdpStatus vdp_st;
+
+    VdpOutputSurface output_surface = vc->output_surfaces[vc->surface_num];
+    VdpRect *output_rect = &vc->out_rect_vid;
+    VdpRect *video_rect = &vc->src_rect_vid;
+
+    vdp_st = vdp->presentation_queue_block_until_surface_idle(vc->flip_queue,
+                                                              output_surface,
+                                                              &dummy);
+    CHECK_VDP_WARNING(vo, "Error when calling "
+                      "vdp_presentation_queue_block_until_surface_idle");
+
+    // Clear the borders between video and window (if there are any).
+    // For some reason, video_mixer_render doesn't need it for YUV.
+    // Also, if there is nothing to render, at least clear the screen.
+    if (vc->rgb_mode || !mpi || mpi->params.rotate != 0) {
+        int flags = VDP_OUTPUT_SURFACE_RENDER_ROTATE_0;
+        vdp_st = vdp->output_surface_render_output_surface(output_surface,
+                                                           NULL, vc->black_pixel,
+                                                           NULL, NULL, NULL,
+                                                           flags);
+        CHECK_VDP_WARNING(vo, "Error clearing screen");
+    }
+
+    if (!mpi)
+        return -1;
+
+    struct mp_vdpau_mixer_frame *frame = mp_vdpau_mixed_frame_get(mpi);
+    struct mp_vdpau_mixer_opts opts = {0};
+    if (frame)
+        opts = frame->opts;
+
+    // Apply custom vo_vdpau suboptions.
+    opts.chroma_deint |= vc->chroma_deint;
+    opts.pullup |= vc->pullup;
+    opts.denoise = MPCLAMP(opts.denoise + vc->denoise, 0, 1);
+    opts.sharpen = MPCLAMP(opts.sharpen + vc->sharpen, -1, 1);
+    if (vc->hqscaling)
+        opts.hqscaling = vc->hqscaling;
+
+    if (mpi->params.rotate != 0) {
+        int flags;
+        VdpRect r_rect;
+        switch (mpi->params.rotate) {
+        case 90:
+            r_rect.y0 = output_rect->x0;
+            r_rect.y1 = output_rect->x1;
+            r_rect.x0 = output_rect->y0;
+            r_rect.x1 = output_rect->y1;
+            flags = VDP_OUTPUT_SURFACE_RENDER_ROTATE_90;
+            break;
+        case 180:
+            r_rect.x0 = output_rect->x0;
+            r_rect.x1 = output_rect->x1;
+            r_rect.y0 = output_rect->y0;
+            r_rect.y1 = output_rect->y1;
+            flags = VDP_OUTPUT_SURFACE_RENDER_ROTATE_180;
+            break;
+        case 270:
+            r_rect.y0 = output_rect->x0;
+            r_rect.y1 = output_rect->x1;
+            r_rect.x0 = output_rect->y0;
+            r_rect.x1 = output_rect->y1;
+            flags = VDP_OUTPUT_SURFACE_RENDER_ROTATE_270;
+            break;
+        default:
+            MP_ERR(vo, "Unsupported rotation angle: %u\n", mpi->params.rotate);
+            return -1;
+        }
+
+        mp_vdpau_mixer_render(vc->video_mixer, &opts, vc->rotation_surface,
+                              &r_rect, mpi, video_rect);
+        vdp_st = vdp->output_surface_render_output_surface(output_surface,
+                                                           output_rect,
+                                                           vc->rotation_surface,
+                                                           &r_rect,
+                                                           NULL,
+                                                           NULL,
+                                                           flags);
+        CHECK_VDP_WARNING(vo, "Error rendering rotated frame");
+    } else {
+        mp_vdpau_mixer_render(vc->video_mixer, &opts, output_surface,
+                              output_rect, mpi, video_rect);
+    }
+    return 0;
+}
+
+static void forget_frames(struct vo *vo, bool seek_reset)
+{
+    struct vdpctx *vc = vo->priv;
+
+    if (!seek_reset)
+        mp_image_unrefp(&vc->current_image);
+
+    vc->dropped_frame = false;
+}
+
+static int s_size(int max, int s, int disp)
+{
+    disp = MPMAX(1, disp);
+    return MPMIN(max, MPMAX(s, disp));
+}
+
+static void resize(struct vo *vo)
+{
+    struct vdpctx *vc = vo->priv;
+    struct vdp_functions *vdp = vc->vdp;
+    VdpStatus vdp_st;
+    struct mp_rect src_rect;
+    struct mp_rect dst_rect;
+    vo_get_src_dst_rects(vo, &src_rect, &dst_rect, &vc->osd_rect);
+    vc->out_rect_vid.x0 = dst_rect.x0;
+    vc->out_rect_vid.x1 = dst_rect.x1;
+    vc->out_rect_vid.y0 = dst_rect.y0;
+    vc->out_rect_vid.y1 = dst_rect.y1;
+    if (vo->params->rotate == 90 || vo->params->rotate == 270) {
+        vc->src_rect_vid.y0 = src_rect.x0;
+        vc->src_rect_vid.y1 = src_rect.x1;
+        vc->src_rect_vid.x0 = src_rect.y0;
+        vc->src_rect_vid.x1 = src_rect.y1;
+    } else {
+        vc->src_rect_vid.x0 = src_rect.x0;
+        vc->src_rect_vid.x1 = src_rect.x1;
+        vc->src_rect_vid.y0 = src_rect.y0;
+        vc->src_rect_vid.y1 = src_rect.y1;
+    }
+
+    VdpBool ok;
+    uint32_t max_w, max_h;
+    vdp_st = vdp->output_surface_query_capabilities(vc->vdp_device,
+                                                    OUTPUT_RGBA_FORMAT,
+                                                    &ok, &max_w, &max_h);
+    if (vdp_st != VDP_STATUS_OK || !ok)
+        return;
+
+    vc->flip_offset_us = vo->opts->fullscreen ?
+                         1000LL * vc->flip_offset_fs :
+                         1000LL * vc->flip_offset_window;
+    vo_set_queue_params(vo, vc->flip_offset_us * 1000, 1);
+
+    if (vc->output_surface_w < vo->dwidth || vc->output_surface_h < vo->dheight ||
+        vc->rotation != vo->params->rotate)
+    {
+        vc->output_surface_w = s_size(max_w, vc->output_surface_w, vo->dwidth);
+        vc->output_surface_h = s_size(max_h, vc->output_surface_h, vo->dheight);
+        // Creation of output_surfaces
+        for (int i = 0; i < vc->num_output_surfaces; i++)
+            if (vc->output_surfaces[i] != VDP_INVALID_HANDLE) {
+                vdp_st = vdp->output_surface_destroy(vc->output_surfaces[i]);
+                CHECK_VDP_WARNING(vo, "Error when calling "
+                                  "vdp_output_surface_destroy");
+            }
+        for (int i = 0; i < vc->num_output_surfaces; i++) {
+            vdp_st = vdp->output_surface_create(vc->vdp_device,
+                                                OUTPUT_RGBA_FORMAT,
+                                                vc->output_surface_w,
+                                                vc->output_surface_h,
+                                                &vc->output_surfaces[i]);
+            CHECK_VDP_WARNING(vo, "Error when calling vdp_output_surface_create");
+            MP_DBG(vo, "vdpau out create: %u\n",
+                   vc->output_surfaces[i]);
+        }
+        if (vc->rotation_surface != VDP_INVALID_HANDLE) {
+            vdp_st = vdp->output_surface_destroy(vc->rotation_surface);
+            CHECK_VDP_WARNING(vo, "Error when calling "
+                              "vdp_output_surface_destroy");
+            vc->rotation_surface = VDP_INVALID_HANDLE;
+        }
+        if (vo->params->rotate == 90 || vo->params->rotate == 270) {
+            vdp_st = vdp->output_surface_create(vc->vdp_device,
+                                                OUTPUT_RGBA_FORMAT,
+                                                vc->output_surface_h,
+                                                vc->output_surface_w,
+                                                &vc->rotation_surface);
+        } else if (vo->params->rotate == 180) {
+            vdp_st = vdp->output_surface_create(vc->vdp_device,
+                                                OUTPUT_RGBA_FORMAT,
+                                                vc->output_surface_w,
+                                                vc->output_surface_h,
+                                                &vc->rotation_surface);
+        }
+        CHECK_VDP_WARNING(vo, "Error when calling vdp_output_surface_create");
+        MP_DBG(vo, "vdpau rotation surface create: %u\n",
+               vc->rotation_surface);
+    }
+    vc->rotation = vo->params->rotate;
+    vo->want_redraw = true;
+}
+
+static int win_x11_init_vdpau_flip_queue(struct vo *vo)
+{
+    struct vdpctx *vc = vo->priv;
+    struct vdp_functions *vdp = vc->vdp;
+    struct vo_x11_state *x11 = vo->x11;
+    VdpStatus vdp_st;
+
+    if (vc->flip_target == VDP_INVALID_HANDLE) {
+        vdp_st = vdp->presentation_queue_target_create_x11(vc->vdp_device,
+                                                           x11->window,
+                                                           &vc->flip_target);
+        CHECK_VDP_ERROR(vo, "Error when calling "
+                        "vdp_presentation_queue_target_create_x11");
+    }
+
+    /* Empirically this seems to be the first call which fails when we
+     * try to reinit after preemption while the user is still switched
+     * from X to a virtual terminal (creating the vdp_device initially
+     * succeeds, as does creating the flip_target above). This is
+     * probably not guaranteed behavior.
+     */
+    if (vc->flip_queue == VDP_INVALID_HANDLE) {
+        vdp_st = vdp->presentation_queue_create(vc->vdp_device, vc->flip_target,
+                                                &vc->flip_queue);
+        CHECK_VDP_ERROR(vo, "Error when calling vdp_presentation_queue_create");
+    }
+
+    if (vc->colorkey.a > 0) {
+        VdpColor color = {
+            .red = vc->colorkey.r / 255.0,
+            .green = vc->colorkey.g / 255.0,
+            .blue = vc->colorkey.b / 255.0,
+            .alpha = 0,
+        };
+        vdp_st = vdp->presentation_queue_set_background_color(vc->flip_queue,
+                                                              &color);
+        CHECK_VDP_WARNING(vo, "Error setting colorkey");
+    }
+
+    if (vc->composite_detect && vo_x11_screen_is_composited(vo)) {
+        MP_INFO(vo, "Compositing window manager detected. Assuming timing info "
+                "is inaccurate.\n");
+        vc->user_fps = -1;
+    }
+
+    return 0;
+}
+
+// Free everything specific to a certain video file
+static void free_video_specific(struct vo *vo)
+{
+    struct vdpctx *vc = vo->priv;
+    struct vdp_functions *vdp = vc->vdp;
+    VdpStatus vdp_st;
+
+    forget_frames(vo, false);
+
+    if (vc->black_pixel != VDP_INVALID_HANDLE) {
+        vdp_st = vdp->output_surface_destroy(vc->black_pixel);
+        CHECK_VDP_WARNING(vo, "Error when calling vdp_output_surface_destroy");
+    }
+    vc->black_pixel = VDP_INVALID_HANDLE;
+}
+
+static int initialize_vdpau_objects(struct vo *vo)
+{
+    struct vdpctx *vc = vo->priv;
+    struct vdp_functions *vdp = vc->vdp;
+    VdpStatus vdp_st;
+
+    mp_vdpau_get_format(vc->image_format, NULL, &vc->vdp_pixel_format);
+
+    vc->video_mixer->initialized = false;
+
+    if (win_x11_init_vdpau_flip_queue(vo) < 0)
+        return -1;
+
+    if (vc->black_pixel == VDP_INVALID_HANDLE) {
+        vdp_st = vdp->output_surface_create(vc->vdp_device, OUTPUT_RGBA_FORMAT,
+                                            1, 1, &vc->black_pixel);
+        CHECK_VDP_ERROR(vo, "Allocating clearing surface");
+        const char data[4] = {0};
+        vdp_st = vdp->output_surface_put_bits_native(vc->black_pixel,
+                                                     (const void*[]){data},
+                                                     (uint32_t[]){4}, NULL);
+        CHECK_VDP_ERROR(vo, "Initializing clearing surface");
+    }
+
+    forget_frames(vo, false);
+    resize(vo);
+    return 0;
+}
+
+static void mark_vdpau_objects_uninitialized(struct vo *vo)
+{
+    struct vdpctx *vc = vo->priv;
+
+    forget_frames(vo, false);
+    vc->black_pixel = VDP_INVALID_HANDLE;
+    vc->flip_queue = VDP_INVALID_HANDLE;
+    vc->flip_target = VDP_INVALID_HANDLE;
+    for (int i = 0; i < MAX_OUTPUT_SURFACES; i++)
+        vc->output_surfaces[i] = VDP_INVALID_HANDLE;
+    vc->rotation_surface = VDP_INVALID_HANDLE;
+    vc->vdp_device = VDP_INVALID_HANDLE;
+    for (int i = 0; i < MAX_OSD_PARTS; i++) {
+        struct osd_bitmap_surface *sfc = &vc->osd_surfaces[i];
+        sfc->change_id = 0;
+        *sfc = (struct osd_bitmap_surface){
+            .surface = VDP_INVALID_HANDLE,
+        };
+    }
+    vc->output_surface_w = vc->output_surface_h = -1;
+}
+
+static bool check_preemption(struct vo *vo)
+{
+    struct vdpctx *vc = vo->priv;
+
+    int r = mp_vdpau_handle_preemption(vc->mpvdp, &vc->preemption_counter);
+    if (r < 1) {
+        mark_vdpau_objects_uninitialized(vo);
+        if (r < 0)
+            return false;
+        vc->vdp_device = vc->mpvdp->vdp_device;
+        if (initialize_vdpau_objects(vo) < 0)
+            return false;
+    }
+    return true;
+}
+
+static bool status_ok(struct vo *vo)
+{
+    return vo->config_ok && check_preemption(vo);
+}
+
+/*
+ * connect to X server, create and map window, initialize all
+ * VDPAU objects, create different surfaces etc.
+ */
+static int reconfig(struct vo *vo, struct mp_image_params *params)
+{
+    struct vdpctx *vc = vo->priv;
+    struct vdp_functions *vdp = vc->vdp;
+    VdpStatus vdp_st;
+
+    if (!check_preemption(vo))
+    {
+        /*
+         * When prempted, leave the reconfig() immediately
+         * without reconfiguring the vo_window and without
+         * initializing the vdpau objects. When recovered
+         * from preemption, if there is a difference between
+         * the VD thread parameters and the VO thread parameters
+         * the reconfig() is triggered again.
+         */
+        return 0;
+    }
+
+    VdpChromaType chroma_type = VDP_CHROMA_TYPE_420;
+    mp_vdpau_get_format(params->imgfmt, &chroma_type, NULL);
+
+    VdpBool ok;
+    uint32_t max_w, max_h;
+    vdp_st = vdp->video_surface_query_capabilities(vc->vdp_device, chroma_type,
+                                                   &ok, &max_w, &max_h);
+    CHECK_VDP_ERROR(vo, "Error when calling vdp_video_surface_query_capabilities");
+
+    if (!ok)
+        return -1;
+    if (params->w > max_w || params->h > max_h) {
+        if (ok)
+            MP_ERR(vo, "Video too large for vdpau.\n");
+        return -1;
+    }
+
+    vc->image_format = params->imgfmt;
+    vc->vid_width    = params->w;
+    vc->vid_height   = params->h;
+
+    vc->rgb_mode = mp_vdpau_get_rgb_format(params->imgfmt, NULL);
+
+    free_video_specific(vo);
+
+    vo_x11_config_vo_window(vo);
+
+    if (initialize_vdpau_objects(vo) < 0)
+        return -1;
+
+    return 0;
+}
+
+static void draw_osd_part(struct vo *vo, int index)
+{
+    struct vdpctx *vc = vo->priv;
+    struct vdp_functions *vdp = vc->vdp;
+    VdpStatus vdp_st;
+    struct osd_bitmap_surface *sfc = &vc->osd_surfaces[index];
+    VdpOutputSurface output_surface = vc->output_surfaces[vc->surface_num];
+    int i;
+
+    VdpOutputSurfaceRenderBlendState blend_state = {
+        .struct_version = VDP_OUTPUT_SURFACE_RENDER_BLEND_STATE_VERSION,
+        .blend_factor_source_color =
+            VDP_OUTPUT_SURFACE_RENDER_BLEND_FACTOR_SRC_ALPHA,
+        .blend_factor_source_alpha =
+            VDP_OUTPUT_SURFACE_RENDER_BLEND_FACTOR_ZERO,
+        .blend_factor_destination_color =
+            VDP_OUTPUT_SURFACE_RENDER_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA,
+        .blend_factor_destination_alpha =
+            VDP_OUTPUT_SURFACE_RENDER_BLEND_FACTOR_ZERO,
+        .blend_equation_color = VDP_OUTPUT_SURFACE_RENDER_BLEND_EQUATION_ADD,
+        .blend_equation_alpha = VDP_OUTPUT_SURFACE_RENDER_BLEND_EQUATION_ADD,
+    };
+
+    VdpOutputSurfaceRenderBlendState blend_state_premultiplied = blend_state;
+    blend_state_premultiplied.blend_factor_source_color =
+            VDP_OUTPUT_SURFACE_RENDER_BLEND_FACTOR_ONE;
+
+    for (i = 0; i < sfc->render_count; i++) {
+        VdpOutputSurfaceRenderBlendState *blend = &blend_state;
+        if (sfc->format == VDP_RGBA_FORMAT_B8G8R8A8)
+            blend = &blend_state_premultiplied;
+        vdp_st = vdp->
+            output_surface_render_bitmap_surface(output_surface,
+                                                 &sfc->targets[i].dest,
+                                                 sfc->surface,
+                                                 &sfc->targets[i].source,
+                                                 &sfc->targets[i].color,
+                                                 blend,
+                                                 VDP_OUTPUT_SURFACE_RENDER_ROTATE_0);
+        CHECK_VDP_WARNING(vo, "OSD: Error when rendering");
+    }
+}
+
+static int next_pow2(int v)
+{
+    for (int x = 0; x < 30; x++) {
+        if ((1 << x) >= v)
+            return 1 << x;
+    }
+    return INT_MAX;
+}
+
+static void generate_osd_part(struct vo *vo, struct sub_bitmaps *imgs)
+{
+    struct vdpctx *vc = vo->priv;
+    struct vdp_functions *vdp = vc->vdp;
+    VdpStatus vdp_st;
+    struct osd_bitmap_surface *sfc = &vc->osd_surfaces[imgs->render_index];
+
+    if (imgs->change_id == sfc->change_id)
+        return; // Nothing changed and we still have the old data
+
+    sfc->change_id = imgs->change_id;
+    sfc->render_count = 0;
+
+    if (imgs->format == SUBBITMAP_EMPTY || imgs->num_parts == 0)
+        return;
+
+    VdpRGBAFormat format;
+    switch (imgs->format) {
+    case SUBBITMAP_LIBASS:
+        format = VDP_RGBA_FORMAT_A8;
+        break;
+    case SUBBITMAP_BGRA:
+        format = VDP_RGBA_FORMAT_B8G8R8A8;
+        break;
+    default:
+        MP_ASSERT_UNREACHABLE();
+    };
+
+    assert(imgs->packed);
+
+    int r_w = next_pow2(imgs->packed_w);
+    int r_h = next_pow2(imgs->packed_h);
+
+    if (sfc->format != format || sfc->surface == VDP_INVALID_HANDLE ||
+        sfc->surface_w < r_w || sfc->surface_h < r_h)
+    {
+        MP_VERBOSE(vo, "Allocating a %dx%d surface for OSD bitmaps.\n", r_w, r_h);
+
+        uint32_t m_w = 0, m_h = 0;
+        vdp_st = vdp->bitmap_surface_query_capabilities(vc->vdp_device, format,
+                                                        &(VdpBool){0}, &m_w, &m_h);
+        CHECK_VDP_WARNING(vo, "Query to get max OSD surface size failed");
+
+        if (r_w > m_w || r_h > m_h) {
+            MP_ERR(vo, "OSD bitmaps do not fit on a surface with the maximum "
+                   "supported size\n");
+            return;
+        }
+
+        if (sfc->surface != VDP_INVALID_HANDLE) {
+            vdp_st = vdp->bitmap_surface_destroy(sfc->surface);
+            CHECK_VDP_WARNING(vo, "Error when calling vdp_bitmap_surface_destroy");
+        }
+
+        VdpBitmapSurface surface;
+        vdp_st = vdp->bitmap_surface_create(vc->vdp_device, format,
+                                            r_w, r_h, true, &surface);
+        CHECK_VDP_WARNING(vo, "OSD: error when creating surface");
+        if (vdp_st != VDP_STATUS_OK)
+            return;
+
+        sfc->surface = surface;
+        sfc->surface_w = r_w;
+        sfc->surface_h = r_h;
+        sfc->format = format;
+    }
+
+    void *data = imgs->packed->planes[0];
+    int stride = imgs->packed->stride[0];
+    VdpRect rc = {0, 0, imgs->packed_w, imgs->packed_h};
+    vdp_st = vdp->bitmap_surface_put_bits_native(sfc->surface,
+                                                 &(const void *){data},
+                                                 &(uint32_t){stride},
+                                                 &rc);
+    CHECK_VDP_WARNING(vo, "OSD: putbits failed");
+
+    MP_TARRAY_GROW(vc, sfc->targets, imgs->num_parts);
+    sfc->render_count = imgs->num_parts;
+
+    for (int i = 0; i < imgs->num_parts; i++) {
+        struct sub_bitmap *b = &imgs->parts[i];
+        struct osd_target *target = &sfc->targets[i];
+        target->source = (VdpRect){b->src_x, b->src_y,
+                                   b->src_x + b->w, b->src_y + b->h};
+        target->dest = (VdpRect){b->x, b->y, b->x + b->dw, b->y + b->dh};
+        target->color = (VdpColor){1, 1, 1, 1};
+        if (imgs->format == SUBBITMAP_LIBASS) {
+            uint32_t color = b->libass.color;
+            target->color.alpha = 1.0 - ((color >> 0) & 0xff) / 255.0;
+            target->color.blue  = ((color >>  8) & 0xff) / 255.0;
+            target->color.green = ((color >> 16) & 0xff) / 255.0;
+            target->color.red   = ((color >> 24) & 0xff) / 255.0;
+        }
+    }
+}
+
+static void draw_osd_cb(void *ctx, struct sub_bitmaps *imgs)
+{
+    struct vo *vo = ctx;
+    generate_osd_part(vo, imgs);
+    draw_osd_part(vo, imgs->render_index);
+}
+
+static void draw_osd(struct vo *vo)
+{
+    struct vdpctx *vc = vo->priv;
+
+    if (!status_ok(vo))
+        return;
+
+    bool formats[SUBBITMAP_COUNT] = {
+        [SUBBITMAP_LIBASS] = vc->supports_a8,
+        [SUBBITMAP_BGRA] = true,
+    };
+
+    double pts = vc->current_image ? vc->current_image->pts : 0;
+    osd_draw(vo->osd, vc->osd_rect, pts, 0, formats, draw_osd_cb, vo);
+}
+
+static int update_presentation_queue_status(struct vo *vo)
+{
+    struct vdpctx *vc = vo->priv;
+    struct vdp_functions *vdp = vc->vdp;
+    VdpStatus vdp_st;
+
+    while (vc->query_surface_num != vc->surface_num) {
+        VdpTime vtime;
+        VdpPresentationQueueStatus status;
+        VdpOutputSurface surface = vc->output_surfaces[vc->query_surface_num];
+        vdp_st = vdp->presentation_queue_query_surface_status(vc->flip_queue,
+                                                              surface,
+                                                              &status, &vtime);
+        CHECK_VDP_WARNING(vo, "Error calling "
+                         "presentation_queue_query_surface_status");
+        if (mp_msg_test(vo->log, MSGL_TRACE)) {
+            VdpTime current;
+            vdp_st = vdp->presentation_queue_get_time(vc->flip_queue, &current);
+            CHECK_VDP_WARNING(vo, "Error when calling "
+                              "vdp_presentation_queue_get_time");
+            MP_TRACE(vo, "Vdpau time: %"PRIu64"\n", (uint64_t)current);
+            MP_TRACE(vo, "Surface %d status: %d time: %"PRIu64"\n",
+                     (int)surface, (int)status, (uint64_t)vtime);
+        }
+        if (status == VDP_PRESENTATION_QUEUE_STATUS_QUEUED)
+            break;
+        if (vc->vsync_interval > 1) {
+            uint64_t qtime = vc->queue_time[vc->query_surface_num];
+            int diff = ((int64_t)vtime - (int64_t)qtime) / 1e6;
+            MP_TRACE(vo, "Queue time difference: %d ms\n", diff);
+            if (vtime < qtime + vc->vsync_interval / 2)
+                MP_VERBOSE(vo, "Frame shown too early (%d ms)\n", diff);
+            if (vtime > qtime + vc->vsync_interval)
+                MP_VERBOSE(vo, "Frame shown late (%d ms)\n", diff);
+        }
+        vc->query_surface_num = WRAP_ADD(vc->query_surface_num, 1,
+                                         vc->num_output_surfaces);
+        vc->recent_vsync_time = vtime;
+    }
+    int num_queued = WRAP_ADD(vc->surface_num, -vc->query_surface_num,
+                              vc->num_output_surfaces);
+    MP_DBG(vo, "Queued surface count (before add): %d\n", num_queued);
+    return num_queued;
+}
+
+// Return the timestamp of the vsync that must have happened before ts.
+static inline uint64_t prev_vsync(struct vdpctx *vc, uint64_t ts)
+{
+    int64_t diff = (int64_t)(ts - vc->recent_vsync_time);
+    int64_t offset = diff % vc->vsync_interval;
+    if (offset < 0)
+        offset += vc->vsync_interval;
+    return ts - offset;
+}
+
+static void flip_page(struct vo *vo)
+{
+    struct vdpctx *vc = vo->priv;
+    struct vdp_functions *vdp = vc->vdp;
+    VdpStatus vdp_st;
+
+    int64_t pts_us = vc->current_pts;
+    int duration = vc->current_duration;
+
+    vc->dropped_frame = true; // changed at end if false
+
+    if (!check_preemption(vo))
+        goto drop;
+
+    vc->vsync_interval = 1;
+    if (vc->user_fps > 0) {
+        vc->vsync_interval = 1e9 / vc->user_fps;
+    } else if (vc->user_fps == 0) {
+        vc->vsync_interval = vo_get_vsync_interval(vo);
+    }
+    vc->vsync_interval = MPMAX(vc->vsync_interval, 1);
+
+    if (duration > INT_MAX / 1000)
+        duration = -1;
+    else
+        duration *= 1000;
+
+    if (vc->vsync_interval == 1)
+        duration = -1;  // Make sure drop logic is disabled
+
+    VdpTime vdp_time = 0;
+    vdp_st = vdp->presentation_queue_get_time(vc->flip_queue, &vdp_time);
+    CHECK_VDP_WARNING(vo, "Error when calling vdp_presentation_queue_get_time");
+
+    int64_t rel_pts_ns = (pts_us * 1000) - mp_time_ns();
+    if (!pts_us || rel_pts_ns < 0)
+        rel_pts_ns = 0;
+
+    uint64_t now = vdp_time;
+    uint64_t pts = now + rel_pts_ns;
+    uint64_t ideal_pts = pts;
+    uint64_t npts = duration >= 0 ? pts + duration : UINT64_MAX;
+
+    /* This should normally never happen.
+     * - The last queued frame can't have a PTS that goes more than 50ms in the
+     *   future. This is guaranteed by vo.c, which currently actually queues
+     *   ahead by roughly the flip queue offset. Just to be sure
+     *   give some additional room by doubling the time.
+     * - The last vsync can never be in the future.
+     */
+    int64_t max_pts_ahead = vc->flip_offset_us * 1000 * 2;
+    if (vc->last_queue_time > now + max_pts_ahead ||
+        vc->recent_vsync_time > now)
+    {
+        vc->last_queue_time = 0;
+        vc->recent_vsync_time = 0;
+        MP_WARN(vo, "Inconsistent timing detected.\n");
+    }
+
+#define PREV_VSYNC(ts) prev_vsync(vc, ts)
+
+    /* We hope to be here at least one vsync before the frame should be shown.
+     * If we are running late then don't drop the frame unless there is
+     * already one queued for the next vsync; even if we _hope_ to show the
+     * next frame soon enough to mean this one should be dropped we might
+     * not make the target time in reality. Without this check we could drop
+     * every frame, freezing the display completely if video lags behind.
+     */
+    if (now > PREV_VSYNC(MPMAX(pts, vc->last_queue_time + vc->vsync_interval)))
+        npts = UINT64_MAX;
+
+    /* Allow flipping a frame at a vsync if its presentation time is a
+     * bit after that vsync and the change makes the flip time delta
+     * from previous frame better match the target timestamp delta.
+     * This avoids instability with frame timestamps falling near vsyncs.
+     * For example if the frame timestamps were (with vsyncs at
+     * integer values) 0.01, 1.99, 4.01, 5.99, 8.01, ... then
+     * straightforward timing at next vsync would flip the frames at
+     * 1, 2, 5, 6, 9; this changes it to 1, 2, 4, 6, 8 and so on with
+     * regular 2-vsync intervals.
+     *
+     * Also allow moving the frame forward if it looks like we dropped
+     * the previous frame incorrectly (now that we know better after
+     * having final exact timestamp information for this frame) and
+     * there would unnecessarily be a vsync without a frame change.
+     */
+    uint64_t vsync = PREV_VSYNC(pts);
+    if (pts < vsync + vc->vsync_interval / 4
+        && (vsync - PREV_VSYNC(vc->last_queue_time)
+            > pts - vc->last_ideal_time + vc->vsync_interval / 2
+            || (vc->dropped_frame && vsync > vc->dropped_time)))
+        pts -= vc->vsync_interval / 2;
+
+    vc->dropped_time = ideal_pts;
+
+    pts = MPMAX(pts, vc->last_queue_time + vc->vsync_interval);
+    pts = MPMAX(pts, now);
+    if (npts < PREV_VSYNC(pts) + vc->vsync_interval)
+        goto drop;
+
+    int num_flips = update_presentation_queue_status(vo);
+    vsync = vc->recent_vsync_time + num_flips * vc->vsync_interval;
+    pts = MPMAX(pts, now);
+    pts = MPMAX(pts, vsync + (vc->vsync_interval >> 2));
+    vsync = PREV_VSYNC(pts);
+    if (npts < vsync + vc->vsync_interval)
+        goto drop;
+    pts = vsync + (vc->vsync_interval >> 2);
+    VdpOutputSurface frame = vc->output_surfaces[vc->surface_num];
+    vdp_st = vdp->presentation_queue_display(vc->flip_queue, frame,
+                                             vo->dwidth, vo->dheight, pts);
+    CHECK_VDP_WARNING(vo, "Error when calling vdp_presentation_queue_display");
+
+    MP_TRACE(vo, "Queue new surface %d: Vdpau time: %"PRIu64" "
+             "pts: %"PRIu64"\n", (int)frame, now, pts);
+
+    vc->last_queue_time = pts;
+    vc->queue_time[vc->surface_num] = pts;
+    vc->last_ideal_time = ideal_pts;
+    vc->dropped_frame = false;
+    vc->surface_num = WRAP_ADD(vc->surface_num, 1, vc->num_output_surfaces);
+    return;
+
+drop:
+    vo_increment_drop_count(vo, 1);
+}
+
+static void draw_frame(struct vo *vo, struct vo_frame *frame)
+{
+    struct vdpctx *vc = vo->priv;
+
+    check_preemption(vo);
+
+    if (frame->current && !frame->redraw) {
+        struct mp_image *vdp_mpi =
+            mp_vdpau_upload_video_surface(vc->mpvdp, frame->current);
+        if (!vdp_mpi)
+            MP_ERR(vo, "Could not upload image.\n");
+
+        talloc_free(vc->current_image);
+        vc->current_image = vdp_mpi;
+    }
+
+    vc->current_pts = frame->pts;
+    vc->current_duration = frame->duration;
+
+    if (status_ok(vo)) {
+        video_to_output_surface(vo, vc->current_image);
+        draw_osd(vo);
+    }
+}
+
+// warning: the size and pixel format of surface must match that of the
+//          surfaces in vc->output_surfaces
+static struct mp_image *read_output_surface(struct vo *vo,
+                                            VdpOutputSurface surface)
+{
+    struct vdpctx *vc = vo->priv;
+    VdpStatus vdp_st;
+    struct vdp_functions *vdp = vc->vdp;
+    if (!vo->params)
+        return NULL;
+
+    VdpRGBAFormat fmt;
+    uint32_t w, h;
+    vdp_st = vdp->output_surface_get_parameters(surface, &fmt, &w, &h);
+    if (vdp_st != VDP_STATUS_OK)
+        return NULL;
+
+    assert(fmt == OUTPUT_RGBA_FORMAT);
+
+    struct mp_image *image = mp_image_alloc(IMGFMT_BGR0, w, h);
+    if (!image)
+        return NULL;
+
+    void *dst_planes[] = { image->planes[0] };
+    uint32_t dst_pitches[] = { image->stride[0] };
+    vdp_st = vdp->output_surface_get_bits_native(surface, NULL, dst_planes,
+                                                 dst_pitches);
+    CHECK_VDP_WARNING(vo, "Error when calling vdp_output_surface_get_bits_native");
+
+    return image;
+}
+
+static struct mp_image *get_window_screenshot(struct vo *vo)
+{
+    struct vdpctx *vc = vo->priv;
+    int last_surface = WRAP_ADD(vc->surface_num, -1, vc->num_output_surfaces);
+    VdpOutputSurface screen = vc->output_surfaces[last_surface];
+    struct mp_image *image = read_output_surface(vo, screen);
+    if (image && image->w >= vo->dwidth && image->h >= vo->dheight)
+        mp_image_set_size(image, vo->dwidth, vo->dheight);
+    return image;
+}
+
+static int query_format(struct vo *vo, int format)
+{
+    struct vdpctx *vc = vo->priv;
+
+    if (mp_vdpau_get_format(format, NULL, NULL))
+        return 1;
+    if (!vc->force_yuv && mp_vdpau_get_rgb_format(format, NULL))
+        return 1;
+    return 0;
+}
+
+static void destroy_vdpau_objects(struct vo *vo)
+{
+    struct vdpctx *vc = vo->priv;
+    struct vdp_functions *vdp = vc->vdp;
+
+    VdpStatus vdp_st;
+
+    free_video_specific(vo);
+
+    if (vc->flip_queue != VDP_INVALID_HANDLE) {
+        vdp_st = vdp->presentation_queue_destroy(vc->flip_queue);
+        CHECK_VDP_WARNING(vo, "Error when calling vdp_presentation_queue_destroy");
+    }
+
+    if (vc->flip_target != VDP_INVALID_HANDLE) {
+        vdp_st = vdp->presentation_queue_target_destroy(vc->flip_target);
+        CHECK_VDP_WARNING(vo, "Error when calling "
+                         "vdp_presentation_queue_target_destroy");
+    }
+
+    for (int i = 0; i < vc->num_output_surfaces; i++) {
+        if (vc->output_surfaces[i] == VDP_INVALID_HANDLE)
+            continue;
+        vdp_st = vdp->output_surface_destroy(vc->output_surfaces[i]);
+        CHECK_VDP_WARNING(vo, "Error when calling vdp_output_surface_destroy");
+    }
+    if (vc->rotation_surface != VDP_INVALID_HANDLE) {
+        vdp_st = vdp->output_surface_destroy(vc->rotation_surface);
+        CHECK_VDP_WARNING(vo, "Error when calling vdp_output_surface_destroy");
+    }
+
+    for (int i = 0; i < MAX_OSD_PARTS; i++) {
+        struct osd_bitmap_surface *sfc = &vc->osd_surfaces[i];
+        if (sfc->surface != VDP_INVALID_HANDLE) {
+            vdp_st = vdp->bitmap_surface_destroy(sfc->surface);
+            CHECK_VDP_WARNING(vo, "Error when calling vdp_bitmap_surface_destroy");
+        }
+    }
+
+    mp_vdpau_destroy(vc->mpvdp);
+    vc->mpvdp = NULL;
+}
+
+static void uninit(struct vo *vo)
+{
+    struct vdpctx *vc = vo->priv;
+
+    hwdec_devices_remove(vo->hwdec_devs, &vc->mpvdp->hwctx);
+    hwdec_devices_destroy(vo->hwdec_devs);
+
+    /* Destroy all vdpau objects */
+    mp_vdpau_mixer_destroy(vc->video_mixer);
+    destroy_vdpau_objects(vo);
+
+    vo_x11_uninit(vo);
+}
+
+static int preinit(struct vo *vo)
+{
+    struct vdpctx *vc = vo->priv;
+
+    if (!vo_x11_init(vo))
+        return -1;
+
+    if (!vo_x11_create_vo_window(vo, NULL, "vdpau")) {
+        vo_x11_uninit(vo);
+        return -1;
+    }
+
+    vc->mpvdp = mp_vdpau_create_device_x11(vo->log, vo->x11->display, false);
+    if (!vc->mpvdp) {
+        vo_x11_uninit(vo);
+        return -1;
+    }
+    vc->mpvdp->hwctx.hw_imgfmt = IMGFMT_VDPAU;
+
+    vo->hwdec_devs = hwdec_devices_create();
+    hwdec_devices_add(vo->hwdec_devs, &vc->mpvdp->hwctx);
+
+    vc->video_mixer = mp_vdpau_mixer_create(vc->mpvdp, vo->log);
+    vc->video_mixer->video_eq = mp_csp_equalizer_create(vo, vo->global);
+
+    if (mp_vdpau_guess_if_emulated(vc->mpvdp)) {
+        MP_WARN(vo, "VDPAU is most likely emulated via VA-API.\n"
+                    "This is inefficient. Use --vo=gpu instead.\n");
+    }
+
+    // Mark everything as invalid first so uninit() can tell what has been
+    // allocated
+    mark_vdpau_objects_uninitialized(vo);
+
+    mp_vdpau_handle_preemption(vc->mpvdp, &vc->preemption_counter);
+
+    vc->vdp_device = vc->mpvdp->vdp_device;
+    vc->vdp = &vc->mpvdp->vdp;
+
+    vc->vdp->bitmap_surface_query_capabilities(vc->vdp_device, VDP_RGBA_FORMAT_A8,
+                            &vc->supports_a8, &(uint32_t){0}, &(uint32_t){0});
+
+    MP_WARN(vo, "Warning: this compatibility VO is low quality and may "
+                "have issues with OSD, scaling, screenshots and more.\n"
+                "vo=gpu is the preferred choice in any case and "
+                "includes VDPAU support via hwdec=vdpau or vdpau-copy.\n");
+
+    return 0;
+}
+
+static void checked_resize(struct vo *vo)
+{
+    if (!status_ok(vo))
+        return;
+    resize(vo);
+}
+
+static int control(struct vo *vo, uint32_t request, void *data)
+{
+    check_preemption(vo);
+
+    switch (request) {
+    case VOCTRL_SET_PANSCAN:
+        checked_resize(vo);
+        return VO_TRUE;
+    case VOCTRL_SET_EQUALIZER:
+        vo->want_redraw = true;
+        return true;
+    case VOCTRL_RESET:
+        forget_frames(vo, true);
+        return true;
+    case VOCTRL_SCREENSHOT_WIN:
+        if (!status_ok(vo))
+            return false;
+        *(struct mp_image **)data = get_window_screenshot(vo);
+        return true;
+    }
+
+    int events = 0;
+    int r = vo_x11_control(vo, &events, request, data);
+
+    if (events & VO_EVENT_RESIZE) {
+        checked_resize(vo);
+    } else if (events & VO_EVENT_EXPOSE) {
+        vo->want_redraw = true;
+    }
+    vo_event(vo, events);
+
+    return r;
+}
+
+#define OPT_BASE_STRUCT struct vdpctx
+
+const struct vo_driver video_out_vdpau = {
+    .description = "VDPAU with X11",
+    .name = "vdpau",
+    .caps = VO_CAP_FRAMEDROP | VO_CAP_ROTATE90,
+    .preinit = preinit,
+    .query_format = query_format,
+    .reconfig = reconfig,
+    .control = control,
+    .draw_frame = draw_frame,
+    .flip_page = flip_page,
+    .wakeup = vo_x11_wakeup,
+    .wait_events = vo_x11_wait_events,
+    .uninit = uninit,
+    .priv_size = sizeof(struct vdpctx),
+    .options = (const struct m_option []){
+        {"chroma-deint", OPT_BOOL(chroma_deint), OPTDEF_INT(1)},
+        {"pullup", OPT_BOOL(pullup)},
+        {"denoise", OPT_FLOAT(denoise), M_RANGE(0, 1)},
+        {"sharpen", OPT_FLOAT(sharpen), M_RANGE(-1, 1)},
+        {"hqscaling", OPT_INT(hqscaling), M_RANGE(0, 9)},
+        {"fps", OPT_FLOAT(user_fps)},
+        {"composite-detect", OPT_BOOL(composite_detect), OPTDEF_INT(1)},
+        {"queuetime-windowed", OPT_INT(flip_offset_window), OPTDEF_INT(50)},
+        {"queuetime-fs", OPT_INT(flip_offset_fs), OPTDEF_INT(50)},
+        {"output-surfaces", OPT_INT(num_output_surfaces),
+            M_RANGE(2, MAX_OUTPUT_SURFACES), OPTDEF_INT(3)},
+        {"colorkey", OPT_COLOR(colorkey),
+            .defval = &(const struct m_color){.r = 2, .g = 5, .b = 7, .a = 255}},
+        {"force-yuv", OPT_BOOL(force_yuv)},
+        {NULL},
+    },
+    .options_prefix = "vo-vdpau",
+};
diff --git a/video/out/vo_wlshm.c b/video/out/vo_wlshm.c
new file mode 100644
index 0000000..1e5e009
--- /dev/null
+++ b/video/out/vo_wlshm.c
@@ -0,0 +1,324 @@
+/*
+ * This file is part of mpv video player.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <errno.h>
+#include <fcntl.h>
+#include <sys/mman.h>
+#include <time.h>
+#include <unistd.h>
+
+#include <libswscale/swscale.h>
+
+#include "osdep/endian.h"
+#include "present_sync.h"
+#include "sub/osd.h"
+#include "video/fmt-conversion.h"
+#include "video/mp_image.h"
+#include "video/sws_utils.h"
+#include "vo.h"
+#include "wayland_common.h"
+
+struct buffer {
+    struct vo *vo;
+    size_t size;
+    struct wl_shm_pool *pool;
+    struct wl_buffer *buffer;
+    struct mp_image mpi;
+    struct buffer *next;
+};
+
+struct priv {
+    struct mp_sws_context *sws;
+    struct buffer *free_buffers;
+    struct mp_rect src;
+    struct mp_rect dst;
+    struct mp_osd_res osd;
+};
+
+static void buffer_handle_release(void *data, struct wl_buffer *wl_buffer)
+{
+    struct buffer *buf = data;
+    struct vo *vo = buf->vo;
+    struct priv *p = vo->priv;
+
+    if (buf->mpi.w == vo->dwidth && buf->mpi.h == vo->dheight) {
+        buf->next = p->free_buffers;
+        p->free_buffers = buf;
+    } else {
+        talloc_free(buf);
+    }
+}
+
+static const struct wl_buffer_listener buffer_listener = {
+    buffer_handle_release,
+};
+
+static void buffer_destroy(void *p)
+{
+    struct buffer *buf = p;
+    wl_buffer_destroy(buf->buffer);
+    wl_shm_pool_destroy(buf->pool);
+    munmap(buf->mpi.planes[0], buf->size);
+}
+
+static struct buffer *buffer_create(struct vo *vo, int width, int height)
+{
+    struct priv *p = vo->priv;
+    struct vo_wayland_state *wl = vo->wl;
+    int fd;
+    int stride;
+    size_t size;
+    uint8_t *data;
+    struct buffer *buf;
+
+    stride = MP_ALIGN_UP(width * 4, 16);
+    size = height * stride;
+    fd = vo_wayland_allocate_memfd(vo, size);
+    if (fd < 0)
+        goto error0;
+    data = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0);
+    if (data == MAP_FAILED)
+        goto error1;
+    buf = talloc_zero(NULL, struct buffer);
+    if (!buf)
+        goto error2;
+    buf->vo = vo;
+    buf->size = size;
+    mp_image_set_params(&buf->mpi, &p->sws->dst);
+    mp_image_set_size(&buf->mpi, width, height);
+    buf->mpi.planes[0] = data;
+    buf->mpi.stride[0] = stride;
+    buf->pool = wl_shm_create_pool(wl->shm, fd, size);
+    if (!buf->pool)
+        goto error3;
+    buf->buffer = wl_shm_pool_create_buffer(buf->pool, 0, width, height,
+                                            stride, WL_SHM_FORMAT_XRGB8888);
+    if (!buf->buffer)
+        goto error4;
+    wl_buffer_add_listener(buf->buffer, &buffer_listener, buf);
+
+    close(fd);
+    talloc_set_destructor(buf, buffer_destroy);
+
+    return buf;
+
+error4:
+    wl_shm_pool_destroy(buf->pool);
+error3:
+    talloc_free(buf);
+error2:
+    munmap(data, size);
+error1:
+    close(fd);
+error0:
+    return NULL;
+}
+
+static void uninit(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    struct buffer *buf;
+
+    while (p->free_buffers) {
+        buf = p->free_buffers;
+        p->free_buffers = buf->next;
+        talloc_free(buf);
+    }
+    vo_wayland_uninit(vo);
+}
+
+static int preinit(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+
+    if (!vo_wayland_init(vo))
+        goto err;
+    if (!vo->wl->shm) {
+        MP_FATAL(vo->wl, "Compositor doesn't support the %s protocol!\n",
+                 wl_shm_interface.name);
+        goto err;
+    }
+    p->sws = mp_sws_alloc(vo);
+    p->sws->log = vo->log;
+    mp_sws_enable_cmdline_opts(p->sws, vo->global);
+
+    return 0;
+err:
+    uninit(vo);
+    return -1;
+}
+
+static int query_format(struct vo *vo, int format)
+{
+    return sws_isSupportedInput(imgfmt2pixfmt(format));
+}
+
+static int reconfig(struct vo *vo, struct mp_image_params *params)
+{
+    struct priv *p = vo->priv;
+
+    if (!vo_wayland_reconfig(vo))
+        return -1;
+    p->sws->src = *params;
+
+    return 0;
+}
+
+static int resize(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    struct vo_wayland_state *wl = vo->wl;
+    const int32_t width = mp_rect_w(wl->geometry);
+    const int32_t height = mp_rect_h(wl->geometry);
+
+    if (width == 0 || height == 0)
+        return 1;
+
+    struct buffer *buf;
+
+    vo_wayland_set_opaque_region(wl, false);
+    vo->want_redraw = true;
+    vo->dwidth = width;
+    vo->dheight = height;
+    vo_get_src_dst_rects(vo, &p->src, &p->dst, &p->osd);
+    p->sws->dst = (struct mp_image_params) {
+        .imgfmt = MP_SELECT_LE_BE(IMGFMT_BGR0, IMGFMT_0RGB),
+        .w = width,
+        .h = height,
+        .p_w = 1,
+        .p_h = 1,
+    };
+    mp_image_params_guess_csp(&p->sws->dst);
+    while (p->free_buffers) {
+        buf = p->free_buffers;
+        p->free_buffers = buf->next;
+        talloc_free(buf);
+    }
+
+    vo_wayland_handle_fractional_scale(wl);
+
+    return mp_sws_reinit(p->sws);
+}
+
+static int control(struct vo *vo, uint32_t request, void *data)
+{
+    switch (request) {
+    case VOCTRL_SET_PANSCAN:
+        resize(vo);
+        return VO_TRUE;
+    }
+
+    int events = 0;
+    int ret = vo_wayland_control(vo, &events, request, data);
+
+    if (events & VO_EVENT_RESIZE)
+        ret = resize(vo);
+    if (events & VO_EVENT_EXPOSE)
+        vo->want_redraw = true;
+    vo_event(vo, events);
+    return ret;
+}
+
+static void draw_frame(struct vo *vo, struct vo_frame *frame)
+{
+    struct priv *p = vo->priv;
+    struct vo_wayland_state *wl = vo->wl;
+    struct mp_image *src = frame->current;
+    struct buffer *buf;
+
+    bool render = vo_wayland_check_visible(vo);
+    if (!render)
+        return;
+
+    buf = p->free_buffers;
+    if (buf) {
+        p->free_buffers = buf->next;
+    } else {
+        buf = buffer_create(vo, vo->dwidth, vo->dheight);
+        if (!buf) {
+            wl_surface_attach(wl->surface, NULL, 0, 0);
+            return;
+        }
+    }
+    if (src) {
+        struct mp_image dst = buf->mpi;
+        struct mp_rect src_rc;
+        struct mp_rect dst_rc;
+        src_rc.x0 = MP_ALIGN_DOWN(p->src.x0, MPMAX(src->fmt.align_x, 4));
+        src_rc.y0 = MP_ALIGN_DOWN(p->src.y0, MPMAX(src->fmt.align_y, 4));
+        src_rc.x1 = p->src.x1 - (p->src.x0 - src_rc.x0);
+        src_rc.y1 = p->src.y1 - (p->src.y0 - src_rc.y0);
+        dst_rc.x0 = MP_ALIGN_DOWN(p->dst.x0, MPMAX(dst.fmt.align_x, 4));
+        dst_rc.y0 = MP_ALIGN_DOWN(p->dst.y0, MPMAX(dst.fmt.align_y, 4));
+        dst_rc.x1 = p->dst.x1 - (p->dst.x0 - dst_rc.x0);
+        dst_rc.y1 = p->dst.y1 - (p->dst.y0 - dst_rc.y0);
+        mp_image_crop_rc(src, src_rc);
+        mp_image_crop_rc(&dst, dst_rc);
+        mp_sws_scale(p->sws, &dst, src);
+        if (dst_rc.y0 > 0)
+            mp_image_clear(&buf->mpi, 0, 0, buf->mpi.w, dst_rc.y0);
+        if (buf->mpi.h > dst_rc.y1)
+            mp_image_clear(&buf->mpi, 0, dst_rc.y1, buf->mpi.w, buf->mpi.h);
+        if (dst_rc.x0 > 0)
+            mp_image_clear(&buf->mpi, 0, dst_rc.y0, dst_rc.x0, dst_rc.y1);
+        if (buf->mpi.w > dst_rc.x1)
+            mp_image_clear(&buf->mpi, dst_rc.x1, dst_rc.y0, buf->mpi.w, dst_rc.y1);
+        osd_draw_on_image(vo->osd, p->osd, src->pts, 0, &buf->mpi);
+    } else {
+        mp_image_clear(&buf->mpi, 0, 0, buf->mpi.w, buf->mpi.h);
+        osd_draw_on_image(vo->osd, p->osd, 0, 0, &buf->mpi);
+    }
+    wl_surface_attach(wl->surface, buf->buffer, 0, 0);
+}
+
+static void flip_page(struct vo *vo)
+{
+    struct vo_wayland_state *wl = vo->wl;
+
+    wl_surface_damage_buffer(wl->surface, 0, 0, vo->dwidth,
+                             vo->dheight);
+    wl_surface_commit(wl->surface);
+
+    if (!wl->opts->disable_vsync)
+        vo_wayland_wait_frame(wl);
+
+    if (wl->use_present)
+        present_sync_swap(wl->present);
+}
+
+static void get_vsync(struct vo *vo, struct vo_vsync_info *info)
+{
+    struct vo_wayland_state *wl = vo->wl;
+    if (wl->use_present)
+        present_sync_get_info(wl->present, info);
+}
+
+const struct vo_driver video_out_wlshm = {
+    .description = "Wayland SHM video output (software scaling)",
+    .name = "wlshm",
+    .preinit = preinit,
+    .query_format = query_format,
+    .reconfig = reconfig,
+    .control = control,
+    .draw_frame = draw_frame,
+    .flip_page = flip_page,
+    .get_vsync = get_vsync,
+    .wakeup = vo_wayland_wakeup,
+    .wait_events = vo_wayland_wait_events,
+    .uninit = uninit,
+    .priv_size = sizeof(struct priv),
+};
diff --git a/video/out/vo_x11.c b/video/out/vo_x11.c
new file mode 100644
index 0000000..fa93157
--- /dev/null
+++ b/video/out/vo_x11.c
@@ -0,0 +1,447 @@
+/*
+ * Original author: Aaron Holtzman <aholtzma@ess.engr.uvic.ca>
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/types.h>
+
+#include <libswscale/swscale.h>
+
+#include "vo.h"
+#include "video/csputils.h"
+#include "video/mp_image.h"
+
+#include <X11/Xlib.h>
+#include <X11/Xutil.h>
+
+#include <errno.h>
+
+#include "present_sync.h"
+#include "x11_common.h"
+
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <X11/extensions/XShm.h>
+
+#include "sub/osd.h"
+#include "sub/draw_bmp.h"
+
+#include "video/sws_utils.h"
+#include "video/fmt-conversion.h"
+
+#include "common/msg.h"
+#include "input/input.h"
+#include "options/options.h"
+#include "osdep/timer.h"
+
+struct priv {
+    struct vo *vo;
+
+    struct mp_image *original_image;
+
+    XImage *myximage[2];
+    struct mp_image mp_ximages[2];
+    int depth;
+    GC gc;
+
+    uint32_t image_width;
+    uint32_t image_height;
+
+    struct mp_rect src;
+    struct mp_rect dst;
+    struct mp_osd_res osd;
+
+    struct mp_sws_context *sws;
+
+    XVisualInfo vinfo;
+
+    int current_buf;
+
+    int Shmem_Flag;
+    XShmSegmentInfo Shminfo[2];
+    int Shm_Warned_Slow;
+};
+
+static bool resize(struct vo *vo);
+
+static bool getMyXImage(struct priv *p, int foo)
+{
+    struct vo *vo = p->vo;
+    if (vo->x11->display_is_local && XShmQueryExtension(vo->x11->display)) {
+        p->Shmem_Flag = 1;
+        vo->x11->ShmCompletionEvent = XShmGetEventBase(vo->x11->display)
+                                    + ShmCompletion;
+    } else {
+        p->Shmem_Flag = 0;
+        MP_WARN(vo, "Shared memory not supported\nReverting to normal Xlib\n");
+    }
+
+    if (p->Shmem_Flag) {
+        p->myximage[foo] =
+            XShmCreateImage(vo->x11->display, p->vinfo.visual, p->depth,
+                            ZPixmap, NULL, &p->Shminfo[foo], p->image_width,
+                            p->image_height);
+        if (p->myximage[foo] == NULL) {
+            MP_WARN(vo, "Shared memory error,disabling ( Ximage error )\n");
+            goto shmemerror;
+        }
+        p->Shminfo[foo].shmid = shmget(IPC_PRIVATE,
+                                       p->myximage[foo]->bytes_per_line *
+                                       p->myximage[foo]->height,
+                                       IPC_CREAT | 0777);
+        if (p->Shminfo[foo].shmid < 0) {
+            XDestroyImage(p->myximage[foo]);
+            MP_WARN(vo, "Shared memory error,disabling ( seg id error )\n");
+            goto shmemerror;
+        }
+        p->Shminfo[foo].shmaddr = (char *) shmat(p->Shminfo[foo].shmid, 0, 0);
+
+        if (p->Shminfo[foo].shmaddr == ((char *) -1)) {
+            XDestroyImage(p->myximage[foo]);
+            MP_WARN(vo, "Shared memory error,disabling ( address error )\n");
+            goto shmemerror;
+        }
+        p->myximage[foo]->data = p->Shminfo[foo].shmaddr;
+        p->Shminfo[foo].readOnly = False;
+        XShmAttach(vo->x11->display, &p->Shminfo[foo]);
+
+        XSync(vo->x11->display, False);
+
+        shmctl(p->Shminfo[foo].shmid, IPC_RMID, 0);
+    } else {
+shmemerror:
+        p->Shmem_Flag = 0;
+
+        MP_VERBOSE(vo, "Not using SHM.\n");
+        p->myximage[foo] =
+            XCreateImage(vo->x11->display, p->vinfo.visual, p->depth, ZPixmap,
+                         0, NULL, p->image_width, p->image_height, 8, 0);
+        if (p->myximage[foo]) {
+            p->myximage[foo]->data =
+                calloc(1, p->myximage[foo]->bytes_per_line * p->image_height + 32);
+        }
+        if (!p->myximage[foo] || !p->myximage[foo]->data) {
+            MP_WARN(vo, "could not allocate image");
+            return false;
+        }
+    }
+    return true;
+}
+
+static void freeMyXImage(struct priv *p, int foo)
+{
+    struct vo *vo = p->vo;
+    if (p->Shmem_Flag) {
+        XShmDetach(vo->x11->display, &p->Shminfo[foo]);
+        XDestroyImage(p->myximage[foo]);
+        shmdt(p->Shminfo[foo].shmaddr);
+    } else {
+        if (p->myximage[foo]) {
+            // XDestroyImage() would free the data too since XFree() just calls
+            // free(), but do it ourselves for portability reasons
+            free(p->myximage[foo]->data);
+            p->myximage[foo]->data = NULL;
+            XDestroyImage(p->myximage[foo]);
+        }
+    }
+    p->myximage[foo] = NULL;
+}
+
+#define MAKE_MASK(comp) (((1ul << (comp).size) - 1) << (comp).offset)
+
+static int reconfig(struct vo *vo, struct mp_image_params *fmt)
+{
+    vo_x11_config_vo_window(vo);
+
+    if (!resize(vo))
+        return -1;
+
+    return 0;
+}
+
+static bool resize(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+
+    // Attempt to align. We don't know the size in bytes yet (????), so just
+    // assume worst case (1 byte per pixel).
+    int nw = MPMAX(1, MP_ALIGN_UP(vo->dwidth, MP_IMAGE_BYTE_ALIGN));
+    int nh = MPMAX(1, vo->dheight);
+
+    if (nw > p->image_width || nh > p->image_height) {
+        for (int i = 0; i < 2; i++)
+            freeMyXImage(p, i);
+
+        p->image_width = nw;
+        p->image_height = nh;
+
+        for (int i = 0; i < 2; i++) {
+            if (!getMyXImage(p, i)) {
+                p->image_width = 0;
+                p->image_height = 0;
+                return false;
+            }
+        }
+    }
+
+    int mpfmt = 0;
+    for (int fmt = IMGFMT_START; fmt < IMGFMT_END; fmt++) {
+        struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(fmt);
+        if ((desc.flags & MP_IMGFLAG_HAS_COMPS) && desc.num_planes == 1 &&
+            (desc.flags & MP_IMGFLAG_COLOR_MASK) == MP_IMGFLAG_COLOR_RGB &&
+            (desc.flags & MP_IMGFLAG_TYPE_MASK) == MP_IMGFLAG_TYPE_UINT &&
+            (desc.flags & MP_IMGFLAG_NE) && !(desc.flags & MP_IMGFLAG_ALPHA) &&
+            desc.bpp[0] <= 8 * sizeof(unsigned long) &&
+            p->myximage[0]->bits_per_pixel == desc.bpp[0] &&
+            p->myximage[0]->byte_order == MP_SELECT_LE_BE(LSBFirst, MSBFirst))
+        {
+            // desc.comps[] uses little endian bit offsets, so "swap" the
+            // offsets here.
+            if (MP_SELECT_LE_BE(0, 1)) {
+                // Except for formats that use byte swapping; for these, the
+                // offsets are in native endian. There is no way to distinguish
+                // which one a given format is (could even be both), and using
+                // mp_find_other_endian() is just a guess.
+                if (!mp_find_other_endian(fmt)) {
+                    for (int c = 0; c < 3; c++) {
+                        desc.comps[c].offset =
+                            desc.bpp[0] - desc.comps[c].size -desc.comps[c].offset;
+                    }
+                }
+            }
+            if (p->myximage[0]->red_mask == MAKE_MASK(desc.comps[0]) &&
+                p->myximage[0]->green_mask == MAKE_MASK(desc.comps[1]) &&
+                p->myximage[0]->blue_mask == MAKE_MASK(desc.comps[2]))
+            {
+                mpfmt = fmt;
+                break;
+            }
+        }
+    }
+
+    if (!mpfmt) {
+        MP_ERR(vo, "X server image format not supported, use another VO.\n");
+        return false;
+    }
+    MP_VERBOSE(vo, "Using mp format: %s\n", mp_imgfmt_to_name(mpfmt));
+
+    for (int i = 0; i < 2; i++) {
+        struct mp_image *img = &p->mp_ximages[i];
+        *img = (struct mp_image){0};
+        mp_image_setfmt(img, mpfmt);
+        mp_image_set_size(img, p->image_width, p->image_height);
+        img->planes[0] = p->myximage[i]->data;
+        img->stride[0] = p->myximage[i]->bytes_per_line;
+
+        mp_image_params_guess_csp(&img->params);
+    }
+
+    vo_get_src_dst_rects(vo, &p->src, &p->dst, &p->osd);
+
+    if (vo->params) {
+        p->sws->src = *vo->params;
+        p->sws->src.w = mp_rect_w(p->src);
+        p->sws->src.h = mp_rect_h(p->src);
+
+        p->sws->dst = p->mp_ximages[0].params;
+        p->sws->dst.w = mp_rect_w(p->dst);
+        p->sws->dst.h = mp_rect_h(p->dst);
+
+        if (mp_sws_reinit(p->sws) < 0)
+            return false;
+    }
+
+    vo->want_redraw = true;
+    return true;
+}
+
+static void Display_Image(struct priv *p, XImage *myximage)
+{
+    struct vo *vo = p->vo;
+
+    XImage *x_image = p->myximage[p->current_buf];
+
+    if (p->Shmem_Flag) {
+        XShmPutImage(vo->x11->display, vo->x11->window, p->gc, x_image,
+                     0, 0, 0, 0, vo->dwidth, vo->dheight, True);
+        vo->x11->ShmCompletionWaitCount++;
+    } else {
+        XPutImage(vo->x11->display, vo->x11->window, p->gc, x_image,
+                  0, 0, 0, 0, vo->dwidth, vo->dheight);
+    }
+}
+
+static void wait_for_completion(struct vo *vo, int max_outstanding)
+{
+    struct priv *ctx = vo->priv;
+    struct vo_x11_state *x11 = vo->x11;
+    if (ctx->Shmem_Flag) {
+        while (x11->ShmCompletionWaitCount > max_outstanding) {
+            if (!ctx->Shm_Warned_Slow) {
+                MP_WARN(vo, "can't keep up! Waiting"
+                            " for XShm completion events...\n");
+                ctx->Shm_Warned_Slow = 1;
+            }
+            mp_sleep_ns(MP_TIME_MS_TO_NS(1));
+            vo_x11_check_events(vo);
+        }
+    }
+}
+
+static void flip_page(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    Display_Image(p, p->myximage[p->current_buf]);
+    p->current_buf = (p->current_buf + 1) % 2;
+    if (vo->x11->use_present) {
+        vo_x11_present(vo);
+        present_sync_swap(vo->x11->present);
+    }
+}
+
+static void get_vsync(struct vo *vo, struct vo_vsync_info *info)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    if (x11->use_present)
+        present_sync_get_info(x11->present, info);
+}
+
+static void draw_frame(struct vo *vo, struct vo_frame *frame)
+{
+    struct priv *p = vo->priv;
+
+    wait_for_completion(vo, 1);
+    bool render = vo_x11_check_visible(vo);
+    if (!render)
+        return;
+
+    struct mp_image *img = &p->mp_ximages[p->current_buf];
+
+    if (frame->current) {
+        mp_image_clear_rc_inv(img, p->dst);
+
+        struct mp_image *src = frame->current;
+        struct mp_rect src_rc = p->src;
+        src_rc.x0 = MP_ALIGN_DOWN(src_rc.x0, src->fmt.align_x);
+        src_rc.y0 = MP_ALIGN_DOWN(src_rc.y0, src->fmt.align_y);
+        mp_image_crop_rc(src, src_rc);
+
+        struct mp_image dst = *img;
+        mp_image_crop_rc(&dst, p->dst);
+
+        mp_sws_scale(p->sws, &dst, src);
+    } else {
+        mp_image_clear(img, 0, 0, img->w, img->h);
+    }
+
+    osd_draw_on_image(vo->osd, p->osd, frame->current ? frame->current->pts : 0, 0, img);
+
+    if (frame->current != p->original_image)
+        p->original_image = frame->current;
+}
+
+static int query_format(struct vo *vo, int format)
+{
+    struct priv *p = vo->priv;
+    if (mp_sws_supports_formats(p->sws, IMGFMT_RGB0, format))
+        return 1;
+    return 0;
+}
+
+static void uninit(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    if (p->myximage[0])
+        freeMyXImage(p, 0);
+    if (p->myximage[1])
+        freeMyXImage(p, 1);
+    if (p->gc)
+        XFreeGC(vo->x11->display, p->gc);
+
+    vo_x11_uninit(vo);
+}
+
+static int preinit(struct vo *vo)
+{
+    struct priv *p = vo->priv;
+    p->vo = vo;
+    p->sws = mp_sws_alloc(vo);
+    p->sws->log = vo->log;
+    mp_sws_enable_cmdline_opts(p->sws, vo->global);
+
+    if (!vo_x11_init(vo))
+        goto error;
+    struct vo_x11_state *x11 = vo->x11;
+
+    XWindowAttributes attribs;
+    XGetWindowAttributes(x11->display, x11->rootwin, &attribs);
+    p->depth = attribs.depth;
+
+    if (!XMatchVisualInfo(x11->display, x11->screen, p->depth,
+                          TrueColor, &p->vinfo))
+        goto error;
+
+    MP_VERBOSE(vo, "selected visual: %d\n", (int)p->vinfo.visualid);
+
+    if (!vo_x11_create_vo_window(vo, &p->vinfo, "x11"))
+        goto error;
+
+    p->gc = XCreateGC(x11->display, x11->window, 0, NULL);
+    MP_WARN(vo, "Warning: this legacy VO has bad performance. Consider fixing "
+                "your graphics drivers, or not forcing the x11 VO.\n");
+    return 0;
+
+error:
+    uninit(vo);
+    return -1;
+}
+
+static int control(struct vo *vo, uint32_t request, void *data)
+{
+    switch (request) {
+    case VOCTRL_SET_PANSCAN:
+        if (vo->config_ok)
+            resize(vo);
+        return VO_TRUE;
+    }
+
+    int events = 0;
+    int r = vo_x11_control(vo, &events, request, data);
+    if (vo->config_ok && (events & (VO_EVENT_EXPOSE | VO_EVENT_RESIZE)))
+        resize(vo);
+    vo_event(vo, events);
+    return r;
+}
+
+const struct vo_driver video_out_x11 = {
+    .description = "X11 (software scaling)",
+    .name = "x11",
+    .priv_size = sizeof(struct priv),
+    .preinit = preinit,
+    .query_format = query_format,
+    .reconfig = reconfig,
+    .control = control,
+    .draw_frame = draw_frame,
+    .flip_page = flip_page,
+    .get_vsync = get_vsync,
+    .wakeup = vo_x11_wakeup,
+    .wait_events = vo_x11_wait_events,
+    .uninit = uninit,
+};
diff --git a/video/out/vo_xv.c b/video/out/vo_xv.c
new file mode 100644
index 0000000..6c776c5
--- /dev/null
+++ b/video/out/vo_xv.c
@@ -0,0 +1,921 @@
+/*
+ * X11 Xv interface
+ *
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <float.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <errno.h>
+#include <X11/Xlib.h>
+#include <X11/Xutil.h>
+
+#include <libavutil/common.h>
+
+#include <sys/types.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <X11/extensions/XShm.h>
+
+// Note: depends on the inclusion of X11/extensions/XShm.h
+#include <X11/extensions/Xv.h>
+#include <X11/extensions/Xvlib.h>
+
+#include "options/options.h"
+#include "mpv_talloc.h"
+#include "common/msg.h"
+#include "vo.h"
+#include "video/mp_image.h"
+#include "present_sync.h"
+#include "x11_common.h"
+#include "sub/osd.h"
+#include "sub/draw_bmp.h"
+#include "video/csputils.h"
+#include "options/m_option.h"
+#include "input/input.h"
+#include "osdep/timer.h"
+
+#define CK_METHOD_NONE       0 // no colorkey drawing
+#define CK_METHOD_BACKGROUND 1 // set colorkey as window background
+#define CK_METHOD_AUTOPAINT  2 // let xv draw the colorkey
+#define CK_METHOD_MANUALFILL 3 // manually draw the colorkey
+#define CK_SRC_USE           0 // use specified / default colorkey
+#define CK_SRC_SET           1 // use and set specified / default colorkey
+#define CK_SRC_CUR           2 // use current colorkey (get it from xv)
+
+#define MAX_BUFFERS 10
+
+struct xvctx {
+    struct xv_ck_info_s {
+        int method; // CK_METHOD_* constants
+        int source; // CK_SRC_* constants
+    } xv_ck_info;
+    int colorkey;
+    unsigned long xv_colorkey;
+    int xv_port;
+    int cfg_xv_adaptor;
+    int cfg_buffers;
+    XvAdaptorInfo *ai;
+    XvImageFormatValues *fo;
+    unsigned int formats, adaptors, xv_format;
+    int current_buf;
+    int current_ip_buf;
+    int num_buffers;
+    XvImage *xvimage[MAX_BUFFERS];
+    struct mp_image *original_image;
+    uint32_t image_width;
+    uint32_t image_height;
+    uint32_t image_format;
+    int cached_csp;
+    struct mp_rect src_rect;
+    struct mp_rect dst_rect;
+    uint32_t max_width, max_height; // zero means: not set
+    GC f_gc;    // used to paint background
+    GC vo_gc;   // used to paint video
+    int Shmem_Flag;
+    XShmSegmentInfo Shminfo[MAX_BUFFERS];
+    int Shm_Warned_Slow;
+};
+
+#define MP_FOURCC(a,b,c,d) ((a) | ((b)<<8) | ((c)<<16) | ((unsigned)(d)<<24))
+
+#define MP_FOURCC_YV12  MP_FOURCC('Y', 'V', '1', '2')
+#define MP_FOURCC_I420  MP_FOURCC('I', '4', '2', '0')
+#define MP_FOURCC_IYUV  MP_FOURCC('I', 'Y', 'U', 'V')
+#define MP_FOURCC_UYVY  MP_FOURCC('U', 'Y', 'V', 'Y')
+
+struct fmt_entry {
+    int imgfmt;
+    int fourcc;
+};
+static const struct fmt_entry fmt_table[] = {
+    {IMGFMT_420P,       MP_FOURCC_YV12},
+    {IMGFMT_420P,       MP_FOURCC_I420},
+    {IMGFMT_UYVY,       MP_FOURCC_UYVY},
+    {0}
+};
+
+static bool allocate_xvimage(struct vo *, int);
+static void deallocate_xvimage(struct vo *vo, int foo);
+static struct mp_image get_xv_buffer(struct vo *vo, int buf_index);
+
+static int find_xv_format(int imgfmt)
+{
+    for (int n = 0; fmt_table[n].imgfmt; n++) {
+        if (fmt_table[n].imgfmt == imgfmt)
+            return fmt_table[n].fourcc;
+    }
+    return 0;
+}
+
+static int xv_find_atom(struct vo *vo, uint32_t xv_port, const char *name,
+                        bool get, int *min, int *max)
+{
+    Atom atom = None;
+    int howmany = 0;
+    XvAttribute *attributes = XvQueryPortAttributes(vo->x11->display, xv_port,
+                                                    &howmany);
+    for (int i = 0; i < howmany && attributes; i++) {
+        int flag = get ? XvGettable : XvSettable;
+        if (attributes[i].flags & flag) {
+            atom = XInternAtom(vo->x11->display, attributes[i].name, True);
+            *min = attributes[i].min_value;
+            *max = attributes[i].max_value;
+/* since we have SET_DEFAULTS first in our list, we can check if it's available
+   then trigger it if it's ok so that the other values are at default upon query */
+            if (atom != None) {
+                if (!strcmp(attributes[i].name, "XV_BRIGHTNESS") &&
+                    (!strcmp(name, "brightness")))
+                    break;
+                else if (!strcmp(attributes[i].name, "XV_CONTRAST") &&
+                         (!strcmp(name, "contrast")))
+                    break;
+                else if (!strcmp(attributes[i].name, "XV_SATURATION") &&
+                         (!strcmp(name, "saturation")))
+                    break;
+                else if (!strcmp(attributes[i].name, "XV_HUE") &&
+                         (!strcmp(name, "hue")))
+                    break;
+                if (!strcmp(attributes[i].name, "XV_RED_INTENSITY") &&
+                    (!strcmp(name, "red_intensity")))
+                    break;
+                else if (!strcmp(attributes[i].name, "XV_GREEN_INTENSITY")
+                         && (!strcmp(name, "green_intensity")))
+                    break;
+                else if (!strcmp(attributes[i].name, "XV_BLUE_INTENSITY")
+                         && (!strcmp(name, "blue_intensity")))
+                    break;
+                else if ((!strcmp(attributes[i].name, "XV_ITURBT_709") //NVIDIA
+                          || !strcmp(attributes[i].name, "XV_COLORSPACE")) //ATI
+                         && (!strcmp(name, "bt_709")))
+                    break;
+                atom = None;
+                continue;
+            }
+        }
+    }
+    XFree(attributes);
+    return atom;
+}
+
+static int xv_set_eq(struct vo *vo, uint32_t xv_port, const char *name,
+                     int value)
+{
+    MP_VERBOSE(vo, "xv_set_eq called! (%s, %d)\n", name, value);
+
+    int min, max;
+    int atom = xv_find_atom(vo, xv_port, name, false, &min, &max);
+    if (atom != None) {
+        // -100 -> min
+        //   0  -> (max+min)/2
+        // +100 -> max
+        int port_value = (value + 100) * (max - min) / 200 + min;
+        XvSetPortAttribute(vo->x11->display, xv_port, atom, port_value);
+        return VO_TRUE;
+    }
+    return VO_FALSE;
+}
+
+static int xv_get_eq(struct vo *vo, uint32_t xv_port, const char *name,
+                     int *value)
+{
+    int min, max;
+    int atom = xv_find_atom(vo, xv_port, name, true, &min, &max);
+    if (atom != None) {
+        int port_value = 0;
+        XvGetPortAttribute(vo->x11->display, xv_port, atom, &port_value);
+
+        *value = (port_value - min) * 200 / (max - min) - 100;
+        MP_VERBOSE(vo, "xv_get_eq called! (%s, %d)\n", name, *value);
+        return VO_TRUE;
+    }
+    return VO_FALSE;
+}
+
+static Atom xv_intern_atom_if_exists(struct vo *vo, char const *atom_name)
+{
+    struct xvctx *ctx = vo->priv;
+    XvAttribute *attributes;
+    int attrib_count, i;
+    Atom xv_atom = None;
+
+    attributes = XvQueryPortAttributes(vo->x11->display, ctx->xv_port,
+                                       &attrib_count);
+    if (attributes != NULL) {
+        for (i = 0; i < attrib_count; ++i) {
+            if (strcmp(attributes[i].name, atom_name) == 0) {
+                xv_atom = XInternAtom(vo->x11->display, atom_name, False);
+                break;
+            }
+        }
+        XFree(attributes);
+    }
+
+    return xv_atom;
+}
+
+// Try to enable vsync for xv.
+// Returns -1 if not available, 0 on failure and 1 on success.
+static int xv_enable_vsync(struct vo *vo)
+{
+    struct xvctx *ctx = vo->priv;
+    Atom xv_atom = xv_intern_atom_if_exists(vo, "XV_SYNC_TO_VBLANK");
+    if (xv_atom == None)
+        return -1;
+    return XvSetPortAttribute(vo->x11->display, ctx->xv_port, xv_atom, 1)
+           == Success;
+}
+
+// Get maximum supported source image dimensions.
+// If querying the dimensions fails, don't change *width and *height.
+static void xv_get_max_img_dim(struct vo *vo, uint32_t *width, uint32_t *height)
+{
+    struct xvctx *ctx = vo->priv;
+    XvEncodingInfo *encodings;
+    unsigned int num_encodings, idx;
+
+    XvQueryEncodings(vo->x11->display, ctx->xv_port, &num_encodings, &encodings);
+
+    if (encodings) {
+        for (idx = 0; idx < num_encodings; ++idx) {
+            if (strcmp(encodings[idx].name, "XV_IMAGE") == 0) {
+                *width  = encodings[idx].width;
+                *height = encodings[idx].height;
+                break;
+            }
+        }
+    }
+
+    MP_VERBOSE(vo, "Maximum source image dimensions: %ux%u\n", *width, *height);
+
+    XvFreeEncodingInfo(encodings);
+}
+
+static void xv_print_ck_info(struct vo *vo)
+{
+    struct xvctx *xv = vo->priv;
+
+    switch (xv->xv_ck_info.method) {
+    case CK_METHOD_NONE:
+        MP_VERBOSE(vo, "Drawing no colorkey.\n");
+        return;
+    case CK_METHOD_AUTOPAINT:
+        MP_VERBOSE(vo, "Colorkey is drawn by Xv.\n");
+        break;
+    case CK_METHOD_MANUALFILL:
+        MP_VERBOSE(vo, "Drawing colorkey manually.\n");
+        break;
+    case CK_METHOD_BACKGROUND:
+        MP_VERBOSE(vo, "Colorkey is drawn as window background.\n");
+        break;
+    }
+
+    switch (xv->xv_ck_info.source) {
+    case CK_SRC_CUR:
+        MP_VERBOSE(vo, "Using colorkey from Xv (0x%06lx).\n", xv->xv_colorkey);
+        break;
+    case CK_SRC_USE:
+        if (xv->xv_ck_info.method == CK_METHOD_AUTOPAINT) {
+            MP_VERBOSE(vo, "Ignoring colorkey from mpv (0x%06lx).\n",
+                       xv->xv_colorkey);
+        } else {
+            MP_VERBOSE(vo, "Using colorkey from mpv (0x%06lx). Use -colorkey to change.\n",
+                       xv->xv_colorkey);
+        }
+        break;
+    case CK_SRC_SET:
+        MP_VERBOSE(vo, "Setting and using colorkey from mpv (0x%06lx)."
+                   " Use -colorkey to change.\n", xv->xv_colorkey);
+        break;
+    }
+}
+
+/* NOTE: If vo.colorkey has bits set after the first 3 low order bytes
+ *       we don't draw anything as this means it was forced to off. */
+static int xv_init_colorkey(struct vo *vo)
+{
+    struct xvctx *ctx = vo->priv;
+    Display *display = vo->x11->display;
+    Atom xv_atom;
+    int rez;
+
+    /* check if colorkeying is needed */
+    xv_atom = xv_intern_atom_if_exists(vo, "XV_COLORKEY");
+    if (xv_atom != None && ctx->xv_ck_info.method != CK_METHOD_NONE) {
+        if (ctx->xv_ck_info.source == CK_SRC_CUR) {
+            int colorkey_ret;
+
+            rez = XvGetPortAttribute(display, ctx->xv_port, xv_atom,
+                                     &colorkey_ret);
+            if (rez == Success)
+                ctx->xv_colorkey = colorkey_ret;
+            else {
+                MP_FATAL(vo, "Couldn't get colorkey! "
+                         "Maybe the selected Xv port has no overlay.\n");
+                return 0; // error getting colorkey
+            }
+        } else {
+            ctx->xv_colorkey = ctx->colorkey;
+
+            /* check if we have to set the colorkey too */
+            if (ctx->xv_ck_info.source == CK_SRC_SET) {
+                xv_atom = XInternAtom(display, "XV_COLORKEY", False);
+
+                rez = XvSetPortAttribute(display, ctx->xv_port, xv_atom,
+                                         ctx->colorkey);
+                if (rez != Success) {
+                    MP_FATAL(vo, "Couldn't set colorkey!\n");
+                    return 0; // error setting colorkey
+                }
+            }
+        }
+
+        xv_atom = xv_intern_atom_if_exists(vo, "XV_AUTOPAINT_COLORKEY");
+
+        /* should we draw the colorkey ourselves or activate autopainting? */
+        if (ctx->xv_ck_info.method == CK_METHOD_AUTOPAINT) {
+            rez = !Success;
+
+            if (xv_atom != None) // autopaint is supported
+                rez = XvSetPortAttribute(display, ctx->xv_port, xv_atom, 1);
+
+            if (rez != Success)
+                ctx->xv_ck_info.method = CK_METHOD_MANUALFILL;
+        } else {
+            // disable colorkey autopainting if supported
+            if (xv_atom != None)
+                XvSetPortAttribute(display, ctx->xv_port, xv_atom, 0);
+        }
+    } else { // do no colorkey drawing at all
+        ctx->xv_ck_info.method = CK_METHOD_NONE;
+        ctx->colorkey = 0xFF000000;
+    }
+
+    xv_print_ck_info(vo);
+
+    return 1;
+}
+
+/* Draw the colorkey on the video window.
+ *
+ * Draws the colorkey depending on the set method ( colorkey_handling ).
+ *
+ * Also draws the black bars ( when the video doesn't fit the display in
+ * fullscreen ) separately, so they don't overlap with the video area. */
+static void xv_draw_colorkey(struct vo *vo, const struct mp_rect *rc)
+{
+    struct xvctx *ctx = vo->priv;
+    struct vo_x11_state *x11 = vo->x11;
+    if (ctx->xv_ck_info.method == CK_METHOD_MANUALFILL ||
+        ctx->xv_ck_info.method == CK_METHOD_BACKGROUND)
+    {
+        if (!ctx->vo_gc)
+            return;
+        //less tearing than XClearWindow()
+        XSetForeground(x11->display, ctx->vo_gc, ctx->xv_colorkey);
+        XFillRectangle(x11->display, x11->window, ctx->vo_gc, rc->x0, rc->y0,
+                       rc->x1 - rc->x0, rc->y1 - rc->y0);
+    }
+}
+
+static void read_xv_csp(struct vo *vo)
+{
+    struct xvctx *ctx = vo->priv;
+    ctx->cached_csp = 0;
+    int bt709_enabled;
+    if (xv_get_eq(vo, ctx->xv_port, "bt_709", &bt709_enabled))
+        ctx->cached_csp = bt709_enabled == 100 ? MP_CSP_BT_709 : MP_CSP_BT_601;
+}
+
+
+static void fill_rect(struct vo *vo, GC gc, int x0, int y0, int x1, int y1)
+{
+    struct vo_x11_state *x11 = vo->x11;
+
+    x0 = MPMAX(x0, 0);
+    y0 = MPMAX(y0, 0);
+    x1 = MPMIN(x1, vo->dwidth);
+    y1 = MPMIN(y1, vo->dheight);
+
+    if (x11->window && gc && x1 > x0 && y1 > y0)
+        XFillRectangle(x11->display, x11->window, gc, x0, y0, x1 - x0, y1 - y0);
+}
+
+// Clear everything outside of rc with the background color
+static void vo_x11_clear_background(struct vo *vo, const struct mp_rect *rc)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    struct xvctx *ctx = vo->priv;
+    GC gc = ctx->f_gc;
+
+    int w = vo->dwidth;
+    int h = vo->dheight;
+
+    fill_rect(vo, gc, 0,      0,      w,      rc->y0); // top
+    fill_rect(vo, gc, 0,      rc->y1, w,      h);      // bottom
+    fill_rect(vo, gc, 0,      rc->y0, rc->x0, rc->y1); // left
+    fill_rect(vo, gc, rc->x1, rc->y0, w,      rc->y1); // right
+
+    XFlush(x11->display);
+}
+
+static void resize(struct vo *vo)
+{
+    struct xvctx *ctx = vo->priv;
+
+    // Can't be used, because the function calculates screen-space coordinates,
+    // while we need video-space.
+    struct mp_osd_res unused;
+
+    vo_get_src_dst_rects(vo, &ctx->src_rect, &ctx->dst_rect, &unused);
+
+    vo_x11_clear_background(vo, &ctx->dst_rect);
+    xv_draw_colorkey(vo, &ctx->dst_rect);
+    read_xv_csp(vo);
+
+    mp_input_set_mouse_transform(vo->input_ctx, &ctx->dst_rect, &ctx->src_rect);
+
+    vo->want_redraw = true;
+}
+
+/*
+ * create and map window,
+ * allocate colors and (shared) memory
+ */
+static int reconfig(struct vo *vo, struct mp_image_params *params)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    struct xvctx *ctx = vo->priv;
+    int i;
+
+    ctx->image_height = params->h;
+    ctx->image_width  = params->w;
+    ctx->image_format = params->imgfmt;
+
+    if ((ctx->max_width != 0 && ctx->max_height != 0)
+        && (ctx->image_width > ctx->max_width
+            || ctx->image_height > ctx->max_height)) {
+        MP_ERR(vo, "Source image dimensions are too high: %ux%u (maximum is %ux%u)\n",
+               ctx->image_width, ctx->image_height, ctx->max_width,
+               ctx->max_height);
+        return -1;
+    }
+
+    /* check image formats */
+    ctx->xv_format = 0;
+    for (i = 0; i < ctx->formats; i++) {
+        MP_VERBOSE(vo, "Xvideo image format: 0x%x (%4.4s) %s\n",
+                   ctx->fo[i].id, (char *) &ctx->fo[i].id,
+                   (ctx->fo[i].format == XvPacked) ? "packed" : "planar");
+        if (ctx->fo[i].id == find_xv_format(ctx->image_format))
+            ctx->xv_format = ctx->fo[i].id;
+    }
+    if (!ctx->xv_format)
+        return -1;
+
+    vo_x11_config_vo_window(vo);
+
+    if (!ctx->f_gc && !ctx->vo_gc) {
+        ctx->f_gc = XCreateGC(x11->display, x11->window, 0, 0);
+        ctx->vo_gc = XCreateGC(x11->display, x11->window, 0, NULL);
+        XSetForeground(x11->display, ctx->f_gc, 0);
+    }
+
+    if (ctx->xv_ck_info.method == CK_METHOD_BACKGROUND)
+        XSetWindowBackground(x11->display, x11->window, ctx->xv_colorkey);
+
+    MP_VERBOSE(vo, "using Xvideo port %d for hw scaling\n", ctx->xv_port);
+
+    // In case config has been called before
+    for (i = 0; i < ctx->num_buffers; i++)
+        deallocate_xvimage(vo, i);
+
+    ctx->num_buffers = ctx->cfg_buffers;
+
+    for (i = 0; i < ctx->num_buffers; i++) {
+        if (!allocate_xvimage(vo, i)) {
+            MP_FATAL(vo, "could not allocate Xv image data\n");
+            return -1;
+        }
+    }
+
+    ctx->current_buf = 0;
+    ctx->current_ip_buf = 0;
+
+    int is_709 = params->color.space == MP_CSP_BT_709;
+    xv_set_eq(vo, ctx->xv_port, "bt_709", is_709 * 200 - 100);
+    read_xv_csp(vo);
+
+    resize(vo);
+
+    return 0;
+}
+
+static bool allocate_xvimage(struct vo *vo, int foo)
+{
+    struct xvctx *ctx = vo->priv;
+    struct vo_x11_state *x11 = vo->x11;
+    // align it for faster OSD rendering (draw_bmp.c swscale usage)
+    int aligned_w = FFALIGN(ctx->image_width, 32);
+    // round up the height to next chroma boundary too
+    int aligned_h = FFALIGN(ctx->image_height, 2);
+    if (x11->display_is_local && XShmQueryExtension(x11->display)) {
+        ctx->Shmem_Flag = 1;
+        x11->ShmCompletionEvent = XShmGetEventBase(x11->display)
+                                + ShmCompletion;
+    } else {
+        ctx->Shmem_Flag = 0;
+        MP_INFO(vo, "Shared memory not supported\nReverting to normal Xv.\n");
+    }
+    if (ctx->Shmem_Flag) {
+        ctx->xvimage[foo] =
+            (XvImage *) XvShmCreateImage(x11->display, ctx->xv_port,
+                                         ctx->xv_format, NULL,
+                                         aligned_w, aligned_h,
+                                         &ctx->Shminfo[foo]);
+        if (!ctx->xvimage[foo])
+            return false;
+
+        ctx->Shminfo[foo].shmid = shmget(IPC_PRIVATE,
+                                         ctx->xvimage[foo]->data_size,
+                                         IPC_CREAT | 0777);
+        ctx->Shminfo[foo].shmaddr = shmat(ctx->Shminfo[foo].shmid, 0, 0);
+        if (ctx->Shminfo[foo].shmaddr == (void *)-1)
+            return false;
+        ctx->Shminfo[foo].readOnly = False;
+
+        ctx->xvimage[foo]->data = ctx->Shminfo[foo].shmaddr;
+        XShmAttach(x11->display, &ctx->Shminfo[foo]);
+        XSync(x11->display, False);
+        shmctl(ctx->Shminfo[foo].shmid, IPC_RMID, 0);
+    } else {
+        ctx->xvimage[foo] =
+            (XvImage *) XvCreateImage(x11->display, ctx->xv_port,
+                                      ctx->xv_format, NULL, aligned_w,
+                                      aligned_h);
+        if (!ctx->xvimage[foo])
+            return false;
+        ctx->xvimage[foo]->data = av_malloc(ctx->xvimage[foo]->data_size);
+        if (!ctx->xvimage[foo]->data)
+            return false;
+        XSync(x11->display, False);
+    }
+
+    if ((ctx->xvimage[foo]->width < aligned_w) ||
+        (ctx->xvimage[foo]->height < aligned_h)) {
+        MP_ERR(vo, "Got XvImage with too small size: %ux%u (expected %ux%u)\n",
+               ctx->xvimage[foo]->width, ctx->xvimage[foo]->height,
+               aligned_w, ctx->image_height);
+        return false;
+    }
+
+    struct mp_image img = get_xv_buffer(vo, foo);
+    mp_image_set_size(&img, aligned_w, aligned_h);
+    mp_image_clear(&img, 0, 0, img.w, img.h);
+    return true;
+}
+
+static void deallocate_xvimage(struct vo *vo, int foo)
+{
+    struct xvctx *ctx = vo->priv;
+    if (ctx->Shmem_Flag) {
+        XShmDetach(vo->x11->display, &ctx->Shminfo[foo]);
+        shmdt(ctx->Shminfo[foo].shmaddr);
+    } else {
+        av_free(ctx->xvimage[foo]->data);
+    }
+    if (ctx->xvimage[foo])
+        XFree(ctx->xvimage[foo]);
+
+    ctx->xvimage[foo] = NULL;
+    ctx->Shminfo[foo] = (XShmSegmentInfo){0};
+
+    XSync(vo->x11->display, False);
+    return;
+}
+
+static inline void put_xvimage(struct vo *vo, XvImage *xvi)
+{
+    struct xvctx *ctx = vo->priv;
+    struct vo_x11_state *x11 = vo->x11;
+    struct mp_rect *src = &ctx->src_rect;
+    struct mp_rect *dst = &ctx->dst_rect;
+    int dw = dst->x1 - dst->x0, dh = dst->y1 - dst->y0;
+    int sw = src->x1 - src->x0, sh = src->y1 - src->y0;
+
+    if (ctx->Shmem_Flag) {
+        XvShmPutImage(x11->display, ctx->xv_port, x11->window, ctx->vo_gc, xvi,
+                      src->x0, src->y0, sw, sh,
+                      dst->x0, dst->y0, dw, dh,
+                      True);
+        x11->ShmCompletionWaitCount++;
+    } else {
+        XvPutImage(x11->display, ctx->xv_port, x11->window, ctx->vo_gc, xvi,
+                   src->x0, src->y0, sw, sh,
+                   dst->x0, dst->y0, dw, dh);
+    }
+}
+
+static struct mp_image get_xv_buffer(struct vo *vo, int buf_index)
+{
+    struct xvctx *ctx = vo->priv;
+    XvImage *xv_image = ctx->xvimage[buf_index];
+
+    struct mp_image img = {0};
+    mp_image_set_size(&img, ctx->image_width, ctx->image_height);
+    mp_image_setfmt(&img, ctx->image_format);
+
+    bool swapuv = ctx->xv_format == MP_FOURCC_YV12;
+    for (int n = 0; n < img.num_planes; n++) {
+        int sn = n > 0 &&  swapuv ? (n == 1 ? 2 : 1) : n;
+        img.planes[n] = xv_image->data + xv_image->offsets[sn];
+        img.stride[n] = xv_image->pitches[sn];
+    }
+
+    if (vo->params) {
+        struct mp_image_params params = *vo->params;
+        if (ctx->cached_csp)
+            params.color.space = ctx->cached_csp;
+        mp_image_set_attributes(&img, &params);
+    }
+
+    return img;
+}
+
+static void wait_for_completion(struct vo *vo, int max_outstanding)
+{
+    struct xvctx *ctx = vo->priv;
+    struct vo_x11_state *x11 = vo->x11;
+    if (ctx->Shmem_Flag) {
+        while (x11->ShmCompletionWaitCount > max_outstanding) {
+            if (!ctx->Shm_Warned_Slow) {
+                MP_WARN(vo, "X11 can't keep up! Waiting"
+                        " for XShm completion events...\n");
+                ctx->Shm_Warned_Slow = 1;
+            }
+            mp_sleep_ns(MP_TIME_MS_TO_NS(1));
+            vo_x11_check_events(vo);
+        }
+    }
+}
+
+static void flip_page(struct vo *vo)
+{
+    struct xvctx *ctx = vo->priv;
+    put_xvimage(vo, ctx->xvimage[ctx->current_buf]);
+
+    /* remember the currently visible buffer */
+    ctx->current_buf = (ctx->current_buf + 1) % ctx->num_buffers;
+
+    if (!ctx->Shmem_Flag)
+        XSync(vo->x11->display, False);
+
+    if (vo->x11->use_present) {
+        vo_x11_present(vo);
+        present_sync_swap(vo->x11->present);
+    }
+}
+
+static void get_vsync(struct vo *vo, struct vo_vsync_info *info)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    if (x11->use_present)
+        present_sync_get_info(x11->present, info);
+}
+
+static void draw_frame(struct vo *vo, struct vo_frame *frame)
+{
+    struct xvctx *ctx = vo->priv;
+
+    wait_for_completion(vo, ctx->num_buffers - 1);
+    bool render = vo_x11_check_visible(vo);
+    if (!render)
+        return;
+
+    struct mp_image xv_buffer = get_xv_buffer(vo, ctx->current_buf);
+    if (frame->current) {
+        mp_image_copy(&xv_buffer, frame->current);
+    } else {
+        mp_image_clear(&xv_buffer, 0, 0, xv_buffer.w, xv_buffer.h);
+    }
+
+    struct mp_osd_res res = osd_res_from_image_params(vo->params);
+    osd_draw_on_image(vo->osd, res, frame->current ? frame->current->pts : 0, 0, &xv_buffer);
+
+    if (frame->current != ctx->original_image)
+        ctx->original_image = frame->current;
+}
+
+static int query_format(struct vo *vo, int format)
+{
+    struct xvctx *ctx = vo->priv;
+    uint32_t i;
+
+    int fourcc = find_xv_format(format);
+    if (fourcc) {
+        for (i = 0; i < ctx->formats; i++) {
+            if (ctx->fo[i].id == fourcc)
+                return 1;
+        }
+    }
+    return 0;
+}
+
+static void uninit(struct vo *vo)
+{
+    struct xvctx *ctx = vo->priv;
+    int i;
+
+    if (ctx->ai)
+        XvFreeAdaptorInfo(ctx->ai);
+    ctx->ai = NULL;
+    if (ctx->fo) {
+        XFree(ctx->fo);
+        ctx->fo = NULL;
+    }
+    for (i = 0; i < ctx->num_buffers; i++)
+        deallocate_xvimage(vo, i);
+    if (ctx->f_gc != None)
+        XFreeGC(vo->x11->display, ctx->f_gc);
+    if (ctx->vo_gc != None)
+        XFreeGC(vo->x11->display, ctx->vo_gc);
+    // uninit() shouldn't get called unless initialization went past vo_init()
+    vo_x11_uninit(vo);
+}
+
+static int preinit(struct vo *vo)
+{
+    XvPortID xv_p;
+    int busy_ports = 0;
+    unsigned int i;
+    struct xvctx *ctx = vo->priv;
+    int xv_adaptor = ctx->cfg_xv_adaptor;
+
+    if (!vo_x11_init(vo))
+        return -1;
+
+    if (!vo_x11_create_vo_window(vo, NULL, "xv"))
+        goto error;
+
+    struct vo_x11_state *x11 = vo->x11;
+
+    /* check for Xvideo extension */
+    unsigned int ver, rel, req, ev, err;
+    if (Success != XvQueryExtension(x11->display, &ver, &rel, &req, &ev, &err)) {
+        MP_ERR(vo, "Xv not supported by this X11 version/driver\n");
+        goto error;
+    }
+
+    /* check for Xvideo support */
+    if (Success !=
+        XvQueryAdaptors(x11->display, DefaultRootWindow(x11->display),
+                        &ctx->adaptors, &ctx->ai)) {
+        MP_ERR(vo, "XvQueryAdaptors failed.\n");
+        goto error;
+    }
+
+    /* check adaptors */
+    if (ctx->xv_port) {
+        int port_found;
+
+        for (port_found = 0, i = 0; !port_found && i < ctx->adaptors; i++) {
+            if ((ctx->ai[i].type & XvInputMask)
+                && (ctx->ai[i].type & XvImageMask)) {
+                for (xv_p = ctx->ai[i].base_id;
+                     xv_p < ctx->ai[i].base_id + ctx->ai[i].num_ports;
+                     ++xv_p) {
+                    if (xv_p == ctx->xv_port) {
+                        port_found = 1;
+                        break;
+                    }
+                }
+            }
+        }
+        if (port_found) {
+            if (XvGrabPort(x11->display, ctx->xv_port, CurrentTime))
+                ctx->xv_port = 0;
+        } else {
+            MP_WARN(vo, "Invalid port parameter, overriding with port 0.\n");
+            ctx->xv_port = 0;
+        }
+    }
+
+    for (i = 0; i < ctx->adaptors && ctx->xv_port == 0; i++) {
+        /* check if adaptor number has been specified */
+        if (xv_adaptor != -1 && xv_adaptor != i)
+            continue;
+
+        if ((ctx->ai[i].type & XvInputMask) && (ctx->ai[i].type & XvImageMask)) {
+            for (xv_p = ctx->ai[i].base_id;
+                 xv_p < ctx->ai[i].base_id + ctx->ai[i].num_ports; ++xv_p)
+                if (!XvGrabPort(x11->display, xv_p, CurrentTime)) {
+                    ctx->xv_port = xv_p;
+                    MP_VERBOSE(vo, "Using Xv Adapter #%d (%s)\n",
+                               i, ctx->ai[i].name);
+                    break;
+                } else {
+                    MP_WARN(vo, "Could not grab port %i.\n", (int) xv_p);
+                    ++busy_ports;
+                }
+        }
+    }
+    if (!ctx->xv_port) {
+        if (busy_ports)
+            MP_ERR(vo, "Xvideo ports busy.\n");
+        else
+            MP_ERR(vo, "No Xvideo support found.\n");
+        goto error;
+    }
+
+    if (!xv_init_colorkey(vo)) {
+        goto error;             // bail out, colorkey setup failed
+    }
+    xv_enable_vsync(vo);
+    xv_get_max_img_dim(vo, &ctx->max_width, &ctx->max_height);
+
+    ctx->fo = XvListImageFormats(x11->display, ctx->xv_port,
+                                 (int *) &ctx->formats);
+
+    MP_WARN(vo, "Warning: this legacy VO has bad quality and performance, "
+                "and will in particular result in blurry OSD and subtitles. "
+                "You should fix your graphics drivers, or not force the xv VO.\n");
+    return 0;
+
+  error:
+    uninit(vo);                 // free resources
+    return -1;
+}
+
+static int control(struct vo *vo, uint32_t request, void *data)
+{
+    switch (request) {
+    case VOCTRL_SET_PANSCAN:
+        resize(vo);
+        return VO_TRUE;
+    }
+    int events = 0;
+    int r = vo_x11_control(vo, &events, request, data);
+    if (events & (VO_EVENT_EXPOSE | VO_EVENT_RESIZE))
+        resize(vo);
+    vo_event(vo, events);
+    return r;
+}
+
+#define OPT_BASE_STRUCT struct xvctx
+
+const struct vo_driver video_out_xv = {
+    .description = "X11/Xv",
+    .name = "xv",
+    .preinit = preinit,
+    .query_format = query_format,
+    .reconfig = reconfig,
+    .control = control,
+    .draw_frame = draw_frame,
+    .flip_page = flip_page,
+    .get_vsync = get_vsync,
+    .wakeup = vo_x11_wakeup,
+    .wait_events = vo_x11_wait_events,
+    .uninit = uninit,
+    .priv_size = sizeof(struct xvctx),
+    .priv_defaults = &(const struct xvctx) {
+        .cfg_xv_adaptor = -1,
+        .xv_ck_info = {CK_METHOD_MANUALFILL, CK_SRC_CUR},
+        .colorkey = 0x0000ff00, // default colorkey is green
+                    // (0xff000000 means that colorkey has been disabled)
+        .cfg_buffers = 2,
+    },
+    .options = (const struct m_option[]) {
+        {"port", OPT_INT(xv_port), M_RANGE(0, DBL_MAX)},
+        {"adaptor", OPT_INT(cfg_xv_adaptor), M_RANGE(-1, DBL_MAX)},
+        {"ck", OPT_CHOICE(xv_ck_info.source,
+            {"use", CK_SRC_USE},
+            {"set", CK_SRC_SET},
+            {"cur", CK_SRC_CUR})},
+        {"ck-method", OPT_CHOICE(xv_ck_info.method,
+            {"none", CK_METHOD_NONE},
+            {"bg", CK_METHOD_BACKGROUND},
+            {"man", CK_METHOD_MANUALFILL},
+            {"auto", CK_METHOD_AUTOPAINT})},
+        {"colorkey", OPT_INT(colorkey)},
+        {"buffers", OPT_INT(cfg_buffers), M_RANGE(1, MAX_BUFFERS)},
+        {0}
+    },
+    .options_prefix = "xv",
+};
diff --git a/video/out/vulkan/common.h b/video/out/vulkan/common.h
new file mode 100644
index 0000000..d006942
--- /dev/null
+++ b/video/out/vulkan/common.h
@@ -0,0 +1,40 @@
+#pragma once
+
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdbool.h>
+#include <assert.h>
+
+#include "config.h"
+
+#include "common/common.h"
+#include "common/msg.h"
+
+// We need to define all platforms we want to support. Since we have
+// our own mechanism for checking this, we re-define the right symbols
+#if HAVE_WAYLAND
+#define VK_USE_PLATFORM_WAYLAND_KHR
+#endif
+#if HAVE_X11
+#define VK_USE_PLATFORM_XLIB_KHR
+#endif
+#if HAVE_WIN32_DESKTOP
+#define VK_USE_PLATFORM_WIN32_KHR
+#endif
+#if HAVE_COCOA
+#define VK_USE_PLATFORM_MACOS_MVK
+#define VK_USE_PLATFORM_METAL_EXT
+#endif
+
+#include <libplacebo/vulkan.h>
+
+// Shared struct used to hold vulkan context information
+struct mpvk_ctx {
+    pl_log pllog;
+    pl_vk_inst vkinst;
+    pl_vulkan vulkan;
+    pl_gpu gpu; // points to vulkan->gpu for convenience
+    pl_swapchain swapchain;
+    VkSurfaceKHR surface;
+};
diff --git a/video/out/vulkan/context.c b/video/out/vulkan/context.c
new file mode 100644
index 0000000..5087403
--- /dev/null
+++ b/video/out/vulkan/context.c
@@ -0,0 +1,372 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "config.h"
+
+#if HAVE_LAVU_UUID
+#include <libavutil/uuid.h>
+#else
+#include "misc/uuid.h"
+#endif
+
+#include "options/m_config.h"
+#include "video/out/placebo/ra_pl.h"
+
+#include "context.h"
+#include "utils.h"
+
+struct vulkan_opts {
+    char *device; // force a specific GPU
+    int swap_mode;
+    int queue_count;
+    bool async_transfer;
+    bool async_compute;
+};
+
+static int vk_validate_dev(struct mp_log *log, const struct m_option *opt,
+                           struct bstr name, const char **value)
+{
+    struct bstr param = bstr0(*value);
+    int ret = M_OPT_INVALID;
+    VkResult res;
+
+    // Create a dummy instance to validate/list the devices
+    VkInstanceCreateInfo info = {
+        .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+        .pApplicationInfo = &(VkApplicationInfo) {
+            .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO,
+            .apiVersion = VK_API_VERSION_1_1,
+        }
+    };
+
+    VkInstance inst;
+    VkPhysicalDevice *devices = NULL;
+    uint32_t num = 0;
+
+    res = vkCreateInstance(&info, NULL, &inst);
+    if (res != VK_SUCCESS)
+        goto done;
+
+    res = vkEnumeratePhysicalDevices(inst, &num, NULL);
+    if (res != VK_SUCCESS)
+        goto done;
+
+    devices = talloc_array(NULL, VkPhysicalDevice, num);
+    res = vkEnumeratePhysicalDevices(inst, &num, devices);
+    if (res != VK_SUCCESS)
+        goto done;
+
+    bool help = bstr_equals0(param, "help");
+    if (help) {
+        mp_info(log, "Available vulkan devices:\n");
+        ret = M_OPT_EXIT;
+    }
+
+    AVUUID param_uuid;
+    bool is_uuid = av_uuid_parse(*value, param_uuid) == 0;
+
+    for (int i = 0; i < num; i++) {
+        VkPhysicalDeviceIDPropertiesKHR id_prop = { 0 };
+        id_prop.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR;
+
+        VkPhysicalDeviceProperties2KHR prop2 = { 0 };
+        prop2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR;
+        prop2.pNext = &id_prop;
+
+        vkGetPhysicalDeviceProperties2(devices[i], &prop2);
+
+        const VkPhysicalDeviceProperties *prop = &prop2.properties;
+
+        if (help) {
+            char device_uuid[37];
+            av_uuid_unparse(id_prop.deviceUUID, device_uuid);
+            mp_info(log, "  '%s' (GPU %d, PCI ID %x:%x, UUID %s)\n",
+                    prop->deviceName, i, (unsigned)prop->vendorID,
+                    (unsigned)prop->deviceID, device_uuid);
+        } else if (bstr_equals0(param, prop->deviceName)) {
+            ret = 0;
+            goto done;
+        } else if (is_uuid && av_uuid_equal(param_uuid, id_prop.deviceUUID)) {
+            ret = 0;
+            goto done;
+        }
+    }
+
+    if (!help)
+        mp_err(log, "No device with %s '%.*s'!\n", is_uuid ? "UUID" : "name",
+               BSTR_P(param));
+
+done:
+    talloc_free(devices);
+    return ret;
+}
+
+#define OPT_BASE_STRUCT struct vulkan_opts
+const struct m_sub_options vulkan_conf = {
+    .opts = (const struct m_option[]) {
+        {"vulkan-device", OPT_STRING_VALIDATE(device, vk_validate_dev)},
+        {"vulkan-swap-mode", OPT_CHOICE(swap_mode,
+            {"auto",        -1},
+            {"fifo",         VK_PRESENT_MODE_FIFO_KHR},
+            {"fifo-relaxed", VK_PRESENT_MODE_FIFO_RELAXED_KHR},
+            {"mailbox",      VK_PRESENT_MODE_MAILBOX_KHR},
+            {"immediate",    VK_PRESENT_MODE_IMMEDIATE_KHR})},
+        {"vulkan-queue-count", OPT_INT(queue_count), M_RANGE(1, 8)},
+        {"vulkan-async-transfer", OPT_BOOL(async_transfer)},
+        {"vulkan-async-compute", OPT_BOOL(async_compute)},
+        {"vulkan-disable-events", OPT_REMOVED("Unused")},
+        {0}
+    },
+    .size = sizeof(struct vulkan_opts),
+    .defaults = &(struct vulkan_opts) {
+        .swap_mode = -1,
+        .queue_count = 1,
+        .async_transfer = true,
+        .async_compute = true,
+    },
+};
+
+struct priv {
+    struct mpvk_ctx *vk;
+    struct vulkan_opts *opts;
+    struct ra_vk_ctx_params params;
+    struct ra_tex proxy_tex;
+};
+
+static const struct ra_swapchain_fns vulkan_swapchain;
+
+struct mpvk_ctx *ra_vk_ctx_get(struct ra_ctx *ctx)
+{
+    if (!ctx->swapchain || ctx->swapchain->fns != &vulkan_swapchain)
+        return NULL;
+
+    struct priv *p = ctx->swapchain->priv;
+    return p->vk;
+}
+
+void ra_vk_ctx_uninit(struct ra_ctx *ctx)
+{
+    if (!ctx->swapchain)
+        return;
+
+    struct priv *p = ctx->swapchain->priv;
+    struct mpvk_ctx *vk = p->vk;
+
+    if (ctx->ra) {
+        pl_gpu_finish(vk->gpu);
+        pl_swapchain_destroy(&vk->swapchain);
+        ctx->ra->fns->destroy(ctx->ra);
+        ctx->ra = NULL;
+    }
+
+    vk->gpu = NULL;
+    pl_vulkan_destroy(&vk->vulkan);
+    TA_FREEP(&ctx->swapchain);
+}
+
+bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk,
+                    struct ra_vk_ctx_params params,
+                    VkPresentModeKHR preferred_mode)
+{
+    struct ra_swapchain *sw = ctx->swapchain = talloc_zero(NULL, struct ra_swapchain);
+    sw->ctx = ctx;
+    sw->fns = &vulkan_swapchain;
+
+    struct priv *p = sw->priv = talloc_zero(sw, struct priv);
+    p->vk = vk;
+    p->params = params;
+    p->opts = mp_get_config_group(p, ctx->global, &vulkan_conf);
+
+    VkPhysicalDeviceFeatures2 features = {
+        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2,
+    };
+
+#if HAVE_VULKAN_INTEROP
+    /*
+     * Request the additional extensions and features required to make full use
+     * of the ffmpeg Vulkan hwcontext and video decoding capability.
+     */
+    const char *opt_extensions[] = {
+        VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME,
+        VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME,
+        VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME,
+        VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME,
+        VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME,
+        VK_KHR_VIDEO_QUEUE_EXTENSION_NAME,
+        // This is a literal string as it's not in the official headers yet.
+        "VK_MESA_video_decode_av1",
+    };
+
+    VkPhysicalDeviceDescriptorBufferFeaturesEXT descriptor_buffer_feature = {
+        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT,
+        .pNext = NULL,
+        .descriptorBuffer = true,
+        .descriptorBufferPushDescriptors = true,
+    };
+
+    VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_feature = {
+        .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT,
+        .pNext = &descriptor_buffer_feature,
+        .shaderBufferFloat32Atomics = true,
+        .shaderBufferFloat32AtomicAdd = true,
+    };
+
+    features.pNext = &atomic_float_feature;
+#endif
+
+    AVUUID param_uuid = { 0 };
+    bool is_uuid = p->opts->device &&
+                   av_uuid_parse(p->opts->device, param_uuid) == 0;
+
+    assert(vk->pllog);
+    assert(vk->vkinst);
+    struct pl_vulkan_params device_params = {
+        .instance = vk->vkinst->instance,
+        .get_proc_addr = vk->vkinst->get_proc_addr,
+        .surface = vk->surface,
+        .async_transfer = p->opts->async_transfer,
+        .async_compute = p->opts->async_compute,
+        .queue_count = p->opts->queue_count,
+#if HAVE_VULKAN_INTEROP
+        .extra_queues = VK_QUEUE_VIDEO_DECODE_BIT_KHR,
+        .opt_extensions = opt_extensions,
+        .num_opt_extensions = MP_ARRAY_SIZE(opt_extensions),
+#endif
+        .features = &features,
+        .device_name = is_uuid ? NULL : p->opts->device,
+    };
+    if (is_uuid)
+        av_uuid_copy(device_params.device_uuid, param_uuid);
+
+    vk->vulkan = pl_vulkan_create(vk->pllog, &device_params);
+    if (!vk->vulkan)
+        goto error;
+
+    vk->gpu = vk->vulkan->gpu;
+    ctx->ra = ra_create_pl(vk->gpu, ctx->log);
+    if (!ctx->ra)
+        goto error;
+
+    // Create the swapchain
+    struct pl_vulkan_swapchain_params pl_params = {
+        .surface = vk->surface,
+        .present_mode = preferred_mode,
+        .swapchain_depth = ctx->vo->opts->swapchain_depth,
+        // mpv already handles resize events, so gracefully allow suboptimal
+        // swapchains to exist in order to make resizing even smoother
+        .allow_suboptimal = true,
+    };
+
+    if (p->opts->swap_mode >= 0) // user override
+        pl_params.present_mode = p->opts->swap_mode;
+
+    vk->swapchain = pl_vulkan_create_swapchain(vk->vulkan, &pl_params);
+    if (!vk->swapchain)
+        goto error;
+
+    return true;
+
+error:
+    ra_vk_ctx_uninit(ctx);
+    return false;
+}
+
+bool ra_vk_ctx_resize(struct ra_ctx *ctx, int width, int height)
+{
+    struct priv *p = ctx->swapchain->priv;
+
+    bool ok = pl_swapchain_resize(p->vk->swapchain, &width, &height);
+    ctx->vo->dwidth = width;
+    ctx->vo->dheight = height;
+
+    return ok;
+}
+
+char *ra_vk_ctx_get_device_name(struct ra_ctx *ctx)
+{
+    /*
+     * This implementation is a bit odd because it has to work even if the
+     * ctx hasn't been initialised yet. A context implementation may need access
+     * to the device name before it can fully initialise the ctx.
+     */
+    struct vulkan_opts *opts = mp_get_config_group(NULL, ctx->global,
+                                                   &vulkan_conf);
+    char *device_name = talloc_strdup(NULL, opts->device);
+    talloc_free(opts);
+    return device_name;
+}
+
+static int color_depth(struct ra_swapchain *sw)
+{
+    return 0; // TODO: implement this somehow?
+}
+
+static bool start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo)
+{
+    struct priv *p = sw->priv;
+    struct pl_swapchain_frame frame;
+
+    bool visible = true;
+    if (p->params.check_visible)
+        visible = p->params.check_visible(sw->ctx);
+
+    // If out_fbo is NULL, this was called from vo_gpu_next. Bail out.
+    if (out_fbo == NULL || !visible)
+        return visible;
+
+    if (!pl_swapchain_start_frame(p->vk->swapchain, &frame))
+        return false;
+    if (!mppl_wrap_tex(sw->ctx->ra, frame.fbo, &p->proxy_tex))
+        return false;
+
+    *out_fbo = (struct ra_fbo) {
+        .tex = &p->proxy_tex,
+        .flip = frame.flipped,
+    };
+
+    return true;
+}
+
+static bool submit_frame(struct ra_swapchain *sw, const struct vo_frame *frame)
+{
+    struct priv *p = sw->priv;
+    return pl_swapchain_submit_frame(p->vk->swapchain);
+}
+
+static void swap_buffers(struct ra_swapchain *sw)
+{
+    struct priv *p = sw->priv;
+    pl_swapchain_swap_buffers(p->vk->swapchain);
+    if (p->params.swap_buffers)
+        p->params.swap_buffers(sw->ctx);
+}
+
+static void get_vsync(struct ra_swapchain *sw,
+                      struct vo_vsync_info *info)
+{
+    struct priv *p = sw->priv;
+    if (p->params.get_vsync)
+        p->params.get_vsync(sw->ctx, info);
+}
+
+static const struct ra_swapchain_fns vulkan_swapchain = {
+    .color_depth   = color_depth,
+    .start_frame   = start_frame,
+    .submit_frame  = submit_frame,
+    .swap_buffers  = swap_buffers,
+    .get_vsync     = get_vsync,
+};
diff --git a/video/out/vulkan/context.h b/video/out/vulkan/context.h
new file mode 100644
index 0000000..c846942
--- /dev/null
+++ b/video/out/vulkan/context.h
@@ -0,0 +1,31 @@
+#pragma once
+
+#include "video/out/gpu/context.h"
+#include "common.h"
+
+struct ra_vk_ctx_params {
+    // See ra_swapchain_fns.get_vsync.
+    void (*get_vsync)(struct ra_ctx *ctx, struct vo_vsync_info *info);
+
+    // For special contexts (i.e. wayland) that want to check visibility
+    // before drawing a frame.
+    bool (*check_visible)(struct ra_ctx *ctx);
+
+    // In case something special needs to be done on the buffer swap.
+    void (*swap_buffers)(struct ra_ctx *ctx);
+};
+
+// Helpers for ra_ctx based on ra_vk. These initialize ctx->ra and ctx->swchain.
+void ra_vk_ctx_uninit(struct ra_ctx *ctx);
+bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk,
+                    struct ra_vk_ctx_params params,
+                    VkPresentModeKHR preferred_mode);
+
+// Handles a resize request, and updates ctx->vo->dwidth/dheight
+bool ra_vk_ctx_resize(struct ra_ctx *ctx, int width, int height);
+
+// May be called on a ra_ctx of any type.
+struct mpvk_ctx *ra_vk_ctx_get(struct ra_ctx *ctx);
+
+// Get the user requested Vulkan device name.
+char *ra_vk_ctx_get_device_name(struct ra_ctx *ctx);
diff --git a/video/out/vulkan/context_android.c b/video/out/vulkan/context_android.c
new file mode 100644
index 0000000..ddab391
--- /dev/null
+++ b/video/out/vulkan/context_android.c
@@ -0,0 +1,96 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <vulkan/vulkan.h>
+#include <vulkan/vulkan_android.h>
+
+#include "video/out/android_common.h"
+#include "common.h"
+#include "context.h"
+#include "utils.h"
+
+struct priv {
+    struct mpvk_ctx vk;
+};
+
+static void android_uninit(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    ra_vk_ctx_uninit(ctx);
+    mpvk_uninit(&p->vk);
+
+    vo_android_uninit(ctx->vo);
+}
+
+static bool android_init(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv = talloc_zero(ctx, struct priv);
+    struct mpvk_ctx *vk = &p->vk;
+    int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR;
+
+    if (!vo_android_init(ctx->vo))
+        goto fail;
+
+    if (!mpvk_init(vk, ctx, VK_KHR_ANDROID_SURFACE_EXTENSION_NAME))
+        goto fail;
+
+    VkAndroidSurfaceCreateInfoKHR info = {
+         .sType = VK_STRUCTURE_TYPE_ANDROID_SURFACE_CREATE_INFO_KHR,
+         .window = vo_android_native_window(ctx->vo)
+    };
+
+    struct ra_vk_ctx_params params = {0};
+
+    VkInstance inst = vk->vkinst->instance;
+    VkResult res = vkCreateAndroidSurfaceKHR(inst, &info, NULL, &vk->surface);
+    if (res != VK_SUCCESS) {
+        MP_MSG(ctx, msgl, "Failed creating Android surface\n");
+        goto fail;
+    }
+
+    if (!ra_vk_ctx_init(ctx, vk, params, VK_PRESENT_MODE_FIFO_KHR))
+        goto fail;
+
+    return true;
+fail:
+    android_uninit(ctx);
+    return false;
+}
+
+static bool android_reconfig(struct ra_ctx *ctx)
+{
+    int w, h;
+    if (!vo_android_surface_size(ctx->vo, &w, &h))
+        return false;
+
+    ra_vk_ctx_resize(ctx, w, h);
+    return true;
+}
+
+static int android_control(struct ra_ctx *ctx, int *events, int request, void *arg)
+{
+    return VO_NOTIMPL;
+}
+
+const struct ra_ctx_fns ra_ctx_vulkan_android = {
+    .type           = "vulkan",
+    .name           = "androidvk",
+    .reconfig       = android_reconfig,
+    .control        = android_control,
+    .init           = android_init,
+    .uninit         = android_uninit,
+};
diff --git a/video/out/vulkan/context_display.c b/video/out/vulkan/context_display.c
new file mode 100644
index 0000000..84cef1e
--- /dev/null
+++ b/video/out/vulkan/context_display.c
@@ -0,0 +1,491 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "context.h"
+#include "options/m_config.h"
+#include "utils.h"
+
+#if HAVE_DRM
+#include <errno.h>
+#include <fcntl.h>
+#include <unistd.h>
+
+#include "libmpv/render_gl.h"
+#include "video/out/drm_common.h"
+#endif
+
+struct vulkan_display_opts {
+    int display;
+    int mode;
+    int plane;
+};
+
+struct mode_selector {
+    // Indexes of selected display/mode/plane.
+    int display_idx;
+    int mode_idx;
+    int plane_idx;
+
+    // Must be freed with talloc_free
+    VkDisplayModePropertiesKHR *out_mode_props;
+};
+
+/**
+ * If a selector is passed, verify that it is valid and return the matching
+ * mode properties. If null is passed, walk all modes and print them out.
+ */
+static bool walk_display_properties(struct mp_log *log,
+                                    int msgl_err,
+                                    VkPhysicalDevice device,
+                                    struct mode_selector *selector) {
+    bool ret = false;
+    VkResult res;
+
+    int msgl_info = selector ? MSGL_TRACE : MSGL_INFO;
+
+    // Use a dummy as parent for all other allocations.
+    void *tmp = talloc_new(NULL);
+
+    VkPhysicalDeviceProperties prop;
+    vkGetPhysicalDeviceProperties(device, &prop);
+    mp_msg(log, msgl_info, "  '%s' (GPU ID %x:%x)\n", prop.deviceName,
+           (unsigned)prop.vendorID, (unsigned)prop.deviceID);
+
+    // Count displays. This must be done before enumerating planes with the
+    // Intel driver, or it will not enumerate any planes. WTF.
+    int num_displays = 0;
+    vkGetPhysicalDeviceDisplayPropertiesKHR(device, &num_displays, NULL);
+    if (!num_displays) {
+        mp_msg(log, msgl_info, "    No available displays for device.\n");
+        goto done;
+    }
+    if (selector && selector->display_idx + 1 > num_displays) {
+        mp_msg(log, msgl_err, "Selected display (%d) not present.\n",
+               selector->display_idx);
+        goto done;
+    }
+
+    // Enumerate Planes
+    int num_planes = 0;
+    vkGetPhysicalDeviceDisplayPlanePropertiesKHR(device, &num_planes, NULL);
+    if (!num_planes) {
+        mp_msg(log, msgl_info, "    No available planes for device.\n");
+        goto done;
+    }
+    if (selector && selector->plane_idx + 1 > num_planes) {
+        mp_msg(log, msgl_err, "Selected plane (%d) not present.\n",
+               selector->plane_idx);
+        goto done;
+    }
+
+    VkDisplayPlanePropertiesKHR *planes =
+        talloc_array(tmp, VkDisplayPlanePropertiesKHR, num_planes);
+    res = vkGetPhysicalDeviceDisplayPlanePropertiesKHR(device, &num_planes,
+                                                       planes);
+    if (res != VK_SUCCESS) {
+        mp_msg(log, msgl_err, "    Failed enumerating planes\n");
+        goto done;
+    }
+
+    // Allocate zeroed arrays so that planes with no displays have a null entry.
+    VkDisplayKHR **planes_to_displays =
+        talloc_zero_array(tmp, VkDisplayKHR *, num_planes);
+    for (int j = 0; j < num_planes; j++) {
+        int num_displays_for_plane = 0;
+        vkGetDisplayPlaneSupportedDisplaysKHR(device, j,
+                                              &num_displays_for_plane, NULL);
+        if (!num_displays_for_plane)
+            continue;
+
+        // Null terminated array
+        VkDisplayKHR *displays =
+            talloc_zero_array(planes_to_displays, VkDisplayKHR,
+                              num_displays_for_plane + 1);
+        res = vkGetDisplayPlaneSupportedDisplaysKHR(device, j,
+                                                    &num_displays_for_plane,
+                                                    displays);
+        if (res != VK_SUCCESS) {
+            mp_msg(log, msgl_err, "      Failed enumerating plane displays\n");
+            continue;
+        }
+        planes_to_displays[j] = displays;
+    }
+
+    // Enumerate Displays and Modes
+    VkDisplayPropertiesKHR *props =
+        talloc_array(tmp, VkDisplayPropertiesKHR, num_displays);
+    res = vkGetPhysicalDeviceDisplayPropertiesKHR(device, &num_displays, props);
+    if (res != VK_SUCCESS) {
+        mp_msg(log, msgl_err, "    Failed enumerating display properties\n");
+        goto done;
+    }
+
+    for (int j = 0; j < num_displays; j++) {
+        if (selector && selector->display_idx != j)
+            continue;
+
+        mp_msg(log, msgl_info, "    Display %d: '%s' (%dx%d)\n",
+               j,
+               props[j].displayName,
+               props[j].physicalResolution.width,
+               props[j].physicalResolution.height);
+
+        VkDisplayKHR display = props[j].display;
+
+        mp_msg(log, msgl_info, "    Modes:\n");
+
+        int num_modes = 0;
+        vkGetDisplayModePropertiesKHR(device, display, &num_modes, NULL);
+        if (!num_modes) {
+            mp_msg(log, msgl_info, "      No available modes for display.\n");
+            continue;
+        }
+        if (selector && selector->mode_idx + 1 > num_modes) {
+            mp_msg(log, msgl_err, "Selected mode (%d) not present.\n",
+                   selector->mode_idx);
+            goto done;
+        }
+
+        VkDisplayModePropertiesKHR *modes =
+            talloc_array(tmp, VkDisplayModePropertiesKHR, num_modes);
+        res = vkGetDisplayModePropertiesKHR(device, display, &num_modes, modes);
+        if (res != VK_SUCCESS) {
+            mp_msg(log, msgl_err, "      Failed enumerating display modes\n");
+            continue;
+        }
+
+        for (int k = 0; k < num_modes; k++) {
+            if (selector && selector->mode_idx != k)
+                continue;
+
+            mp_msg(log, msgl_info, "      Mode %02d: %dx%d (%02d.%03d Hz)\n", k,
+                   modes[k].parameters.visibleRegion.width,
+                   modes[k].parameters.visibleRegion.height,
+                   modes[k].parameters.refreshRate / 1000,
+                   modes[k].parameters.refreshRate % 1000);
+
+            if (selector)
+                selector->out_mode_props = talloc_dup(NULL, &modes[k]);
+        }
+
+        int found_plane = -1;
+        mp_msg(log, msgl_info, "    Planes:\n");
+        for (int k = 0; k < num_planes; k++) {
+            VkDisplayKHR *displays = planes_to_displays[k];
+            if (!displays) {
+                // This plane is not connected to any displays.
+                continue;
+            }
+            for (int d = 0; displays[d]; d++) {
+                if (displays[d] == display) {
+                    if (selector && selector->plane_idx != k)
+                        continue;
+
+                    mp_msg(log, msgl_info, "      Plane: %d\n", k);
+                    found_plane = k;
+                }
+            }
+        }
+        if (selector && selector->plane_idx != found_plane) {
+            mp_msg(log, msgl_err,
+                   "Selected plane (%d) not available on selected display.\n",
+                   selector->plane_idx);
+            goto done;
+        }
+    }
+    ret = true;
+done:
+    talloc_free(tmp);
+    return ret;
+}
+
+static int print_display_info(struct mp_log *log, const struct m_option *opt,
+                              struct bstr name) {
+    VkResult res;
+    VkPhysicalDevice *devices = NULL;
+
+    // Create a dummy instance to list the resources
+    VkInstanceCreateInfo info = {
+        .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO,
+        .enabledExtensionCount = 1,
+        .ppEnabledExtensionNames = (const char*[]) {
+            VK_KHR_DISPLAY_EXTENSION_NAME
+        },
+    };
+
+    VkInstance inst = NULL;
+    res = vkCreateInstance(&info, NULL, &inst);
+    if (res != VK_SUCCESS) {
+        mp_warn(log, "Unable to create Vulkan instance.\n");
+        goto done;
+    }
+
+    uint32_t num_devices = 0;
+    vkEnumeratePhysicalDevices(inst, &num_devices, NULL);
+    if (!num_devices) {
+        mp_info(log, "No Vulkan devices detected.\n");
+        goto done;
+    }
+
+    devices = talloc_array(NULL, VkPhysicalDevice, num_devices);
+    vkEnumeratePhysicalDevices(inst, &num_devices, devices);
+    if (res != VK_SUCCESS) {
+        mp_warn(log, "Failed enumerating physical devices.\n");
+        goto done;
+    }
+
+    mp_info(log, "Vulkan Devices:\n");
+    for (int i = 0; i < num_devices; i++) {
+        walk_display_properties(log, MSGL_WARN, devices[i], NULL);
+    }
+
+done:
+    talloc_free(devices);
+    vkDestroyInstance(inst, NULL);
+    return M_OPT_EXIT;
+}
+
+#define OPT_BASE_STRUCT struct vulkan_display_opts
+const struct m_sub_options vulkan_display_conf = {
+    .opts = (const struct m_option[]) {
+        {"vulkan-display-display", OPT_INT(display),
+            .help = print_display_info,
+        },
+        {"vulkan-display-mode", OPT_INT(mode),
+            .help = print_display_info,
+        },
+        {"vulkan-display-plane", OPT_INT(plane),
+            .help = print_display_info,
+        },
+        {0}
+    },
+    .size = sizeof(struct vulkan_display_opts),
+    .defaults = &(struct vulkan_display_opts) {0},
+};
+
+struct priv {
+    struct mpvk_ctx vk;
+    struct vulkan_display_opts *opts;
+    uint32_t width;
+    uint32_t height;
+
+#if HAVE_DRM
+    struct mpv_opengl_drm_params_v2 drm_params;
+#endif
+};
+
+#if HAVE_DRM
+static void open_render_fd(struct ra_ctx *ctx, const char *render_path)
+{
+    struct priv *p = ctx->priv;
+    p->drm_params.fd = -1;
+    p->drm_params.render_fd = open(render_path, O_RDWR | O_CLOEXEC);
+    if (p->drm_params.render_fd == -1) {
+        MP_WARN(ctx, "Failed to open render node: %s\n",
+                strerror(errno));
+    }
+}
+
+static bool drm_setup(struct ra_ctx *ctx, int display_idx,
+                      VkPhysicalDevicePCIBusInfoPropertiesEXT *pci_props)
+{
+    drmDevice *devs[32] = {};
+    int count = drmGetDevices2(0, devs, MP_ARRAY_SIZE(devs));
+    for (int i = 0; i < count; i++) {
+        drmDevice *dev = devs[i];
+
+        if (dev->bustype != DRM_BUS_PCI ||
+            dev->businfo.pci->domain != pci_props->pciDomain ||
+            dev->businfo.pci->bus != pci_props->pciBus ||
+            dev->businfo.pci->dev != pci_props->pciDevice ||
+            dev->businfo.pci->func != pci_props->pciFunction)
+        {
+            continue;
+        }
+
+        // Found our matching device.
+        MP_DBG(ctx, "DRM device found for Vulkan device at %04X:%02X:%02X:%02X\n",
+                pci_props->pciDomain, pci_props->pciBus,
+                pci_props->pciDevice, pci_props->pciFunction);
+
+        if (!(dev->available_nodes & 1 << DRM_NODE_RENDER)) {
+            MP_DBG(ctx, "Card does not have a render node.\n");
+            continue;
+        }
+
+        open_render_fd(ctx, dev->nodes[DRM_NODE_RENDER]);
+
+        break;
+    }
+    drmFreeDevices(devs, MP_ARRAY_SIZE(devs));
+
+    struct priv *p = ctx->priv;
+    if (p->drm_params.render_fd == -1) {
+        MP_WARN(ctx, "Couldn't open DRM render node for Vulkan device "
+                     "at: %04X:%02X:%02X:%02X\n",
+                     pci_props->pciDomain, pci_props->pciBus,
+                     pci_props->pciDevice, pci_props->pciFunction);
+        return false;
+    }
+
+    return true;
+}
+#endif
+
+static void display_uninit(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    ra_vk_ctx_uninit(ctx);
+    mpvk_uninit(&p->vk);
+
+#if HAVE_DRM
+    if (p->drm_params.render_fd != -1) {
+        close(p->drm_params.render_fd);
+        p->drm_params.render_fd = -1;
+    }
+#endif
+}
+
+static bool display_init(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv = talloc_zero(ctx, struct priv);
+    struct mpvk_ctx *vk = &p->vk;
+    int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR;
+    VkResult res;
+    bool ret = false;
+
+    VkDisplayModePropertiesKHR *mode = NULL;
+
+    p->opts = mp_get_config_group(p, ctx->global, &vulkan_display_conf);
+    int display_idx = p->opts->display;
+    int mode_idx = p->opts->mode;
+    int plane_idx = p->opts->plane;
+
+    if (!mpvk_init(vk, ctx, VK_KHR_DISPLAY_EXTENSION_NAME))
+        goto error;
+
+    char *device_name = ra_vk_ctx_get_device_name(ctx);
+    struct pl_vulkan_device_params vulkan_params = {
+        .instance = vk->vkinst->instance,
+        .device_name = device_name,
+    };
+    VkPhysicalDevice device = pl_vulkan_choose_device(vk->pllog, &vulkan_params);
+    talloc_free(device_name);
+    if (!device) {
+        MP_MSG(ctx, msgl, "Failed to open physical device.\n");
+        goto error;
+    }
+
+#if HAVE_DRM
+        VkPhysicalDevicePCIBusInfoPropertiesEXT pci_props = {
+            .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT,
+        };
+        VkPhysicalDeviceProperties2KHR props = {
+            .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR,
+            .pNext = &pci_props,
+        };
+        vkGetPhysicalDeviceProperties2(device, &props);
+
+        if (!drm_setup(ctx, display_idx, &pci_props))
+            MP_WARN(ctx, "Failed to set up DRM.\n");
+#endif
+
+    struct mode_selector selector = {
+        .display_idx = display_idx,
+        .mode_idx = mode_idx,
+        .plane_idx = plane_idx,
+
+    };
+    if (!walk_display_properties(ctx->log, msgl, device, &selector))
+        goto error;
+    mode = selector.out_mode_props;
+
+    VkDisplaySurfaceCreateInfoKHR xinfo = {
+        .sType = VK_STRUCTURE_TYPE_DISPLAY_SURFACE_CREATE_INFO_KHR,
+        .displayMode = mode->displayMode,
+        .imageExtent = mode->parameters.visibleRegion,
+        .planeIndex = plane_idx,
+        .transform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR,
+        .alphaMode = VK_DISPLAY_PLANE_ALPHA_OPAQUE_BIT_KHR,
+    };
+
+    res = vkCreateDisplayPlaneSurfaceKHR(vk->vkinst->instance, &xinfo, NULL,
+                                         &vk->surface);
+    if (res != VK_SUCCESS) {
+        MP_MSG(ctx, msgl, "Failed creating Display surface\n");
+        goto error;
+    }
+
+    p->width = mode->parameters.visibleRegion.width;
+    p->height = mode->parameters.visibleRegion.height;
+
+    struct ra_vk_ctx_params params = {0};
+    if (!ra_vk_ctx_init(ctx, vk, params, VK_PRESENT_MODE_FIFO_KHR))
+        goto error;
+
+#if HAVE_DRM
+    if (p->drm_params.render_fd > -1) {
+        ra_add_native_resource(ctx->ra, "drm_params_v2", &p->drm_params);
+    } else {
+        MP_WARN(ctx,
+               "No DRM render fd available. VAAPI hwaccel will not be usable.\n");
+    }
+#endif
+
+    ret = true;
+
+done:
+    talloc_free(mode);
+    return ret;
+
+error:
+    display_uninit(ctx);
+    goto done;
+}
+
+static bool display_reconfig(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    return ra_vk_ctx_resize(ctx, p->width, p->height);
+}
+
+static int display_control(struct ra_ctx *ctx, int *events, int request, void *arg)
+{
+    return VO_NOTIMPL;
+}
+
+static void display_wakeup(struct ra_ctx *ctx)
+{
+    // TODO
+}
+
+static void display_wait_events(struct ra_ctx *ctx, int64_t until_time_ns)
+{
+    // TODO
+}
+
+const struct ra_ctx_fns ra_ctx_vulkan_display = {
+    .type           = "vulkan",
+    .name           = "displayvk",
+    .reconfig       = display_reconfig,
+    .control        = display_control,
+    .wakeup         = display_wakeup,
+    .wait_events    = display_wait_events,
+    .init           = display_init,
+    .uninit         = display_uninit,
+};
diff --git a/video/out/vulkan/context_mac.m b/video/out/vulkan/context_mac.m
new file mode 100644
index 0000000..8ac6e16
--- /dev/null
+++ b/video/out/vulkan/context_mac.m
@@ -0,0 +1,119 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "video/out/gpu/context.h"
+#include "osdep/macOS_swift.h"
+
+#include "common.h"
+#include "context.h"
+#include "utils.h"
+
+struct priv {
+    struct mpvk_ctx vk;
+    MacCommon *vo_mac;
+};
+
+static void mac_vk_uninit(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    ra_vk_ctx_uninit(ctx);
+    mpvk_uninit(&p->vk);
+    [p->vo_mac uninit:ctx->vo];
+}
+
+static void mac_vk_swap_buffers(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    [p->vo_mac swapBuffer];
+}
+
+static bool mac_vk_init(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv = talloc_zero(ctx, struct priv);
+    struct mpvk_ctx *vk = &p->vk;
+    int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR;
+
+    if (!mpvk_init(vk, ctx, VK_EXT_METAL_SURFACE_EXTENSION_NAME))
+        goto error;
+
+    p->vo_mac = [[MacCommon alloc] init:ctx->vo];
+    if (!p->vo_mac)
+        goto error;
+
+    VkMetalSurfaceCreateInfoEXT mac_info = {
+        .sType = VK_STRUCTURE_TYPE_MACOS_SURFACE_CREATE_INFO_MVK,
+        .pNext = NULL,
+        .flags = 0,
+        .pLayer = p->vo_mac.layer,
+    };
+
+    struct ra_vk_ctx_params params = {
+        .swap_buffers = mac_vk_swap_buffers,
+    };
+
+    VkInstance inst = vk->vkinst->instance;
+    VkResult res = vkCreateMetalSurfaceEXT(inst, &mac_info, NULL, &vk->surface);
+    if (res != VK_SUCCESS) {
+        MP_MSG(ctx, msgl, "Failed creating metal surface\n");
+        goto error;
+    }
+
+    if (!ra_vk_ctx_init(ctx, vk, params, VK_PRESENT_MODE_FIFO_KHR))
+        goto error;
+
+    return true;
+error:
+    if (p->vo_mac)
+        [p->vo_mac uninit:ctx->vo];
+    return false;
+}
+
+static bool resize(struct ra_ctx *ctx)
+{
+    return ra_vk_ctx_resize(ctx, ctx->vo->dwidth, ctx->vo->dheight);
+}
+
+static bool mac_vk_reconfig(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+    if (![p->vo_mac config:ctx->vo])
+        return false;
+    return true;
+}
+
+static int mac_vk_control(struct ra_ctx *ctx, int *events, int request, void *arg)
+{
+    struct priv *p = ctx->priv;
+    int ret = [p->vo_mac control:ctx->vo events:events request:request data:arg];
+
+    if (*events & VO_EVENT_RESIZE) {
+        if (!resize(ctx))
+            return VO_ERROR;
+    }
+
+    return ret;
+}
+
+const struct ra_ctx_fns ra_ctx_vulkan_mac = {
+    .type           = "vulkan",
+    .name           = "macvk",
+    .reconfig       = mac_vk_reconfig,
+    .control        = mac_vk_control,
+    .init           = mac_vk_init,
+    .uninit         = mac_vk_uninit,
+};
diff --git a/video/out/vulkan/context_wayland.c b/video/out/vulkan/context_wayland.c
new file mode 100644
index 0000000..761ff5b
--- /dev/null
+++ b/video/out/vulkan/context_wayland.c
@@ -0,0 +1,167 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "video/out/gpu/context.h"
+#include "video/out/present_sync.h"
+#include "video/out/wayland_common.h"
+
+#include "common.h"
+#include "context.h"
+#include "utils.h"
+
+struct priv {
+    struct mpvk_ctx vk;
+};
+
+static bool wayland_vk_check_visible(struct ra_ctx *ctx)
+{
+    return vo_wayland_check_visible(ctx->vo);
+}
+
+static void wayland_vk_swap_buffers(struct ra_ctx *ctx)
+{
+    struct vo_wayland_state *wl = ctx->vo->wl;
+
+    if (!wl->opts->disable_vsync)
+        vo_wayland_wait_frame(wl);
+
+    if (wl->use_present)
+        present_sync_swap(wl->present);
+}
+
+static void wayland_vk_get_vsync(struct ra_ctx *ctx, struct vo_vsync_info *info)
+{
+    struct vo_wayland_state *wl = ctx->vo->wl;
+    if (wl->use_present)
+        present_sync_get_info(wl->present, info);
+}
+
+static void wayland_vk_uninit(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    ra_vk_ctx_uninit(ctx);
+    mpvk_uninit(&p->vk);
+    vo_wayland_uninit(ctx->vo);
+}
+
+static bool wayland_vk_init(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv = talloc_zero(ctx, struct priv);
+    struct mpvk_ctx *vk = &p->vk;
+    int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR;
+
+    if (!mpvk_init(vk, ctx, VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME))
+        goto error;
+
+    if (!vo_wayland_init(ctx->vo))
+        goto error;
+
+    VkWaylandSurfaceCreateInfoKHR wlinfo = {
+         .sType   = VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR,
+         .display = ctx->vo->wl->display,
+         .surface = ctx->vo->wl->surface,
+    };
+
+    struct ra_vk_ctx_params params = {
+        .check_visible = wayland_vk_check_visible,
+        .swap_buffers = wayland_vk_swap_buffers,
+        .get_vsync = wayland_vk_get_vsync,
+    };
+
+    VkInstance inst = vk->vkinst->instance;
+    VkResult res = vkCreateWaylandSurfaceKHR(inst, &wlinfo, NULL, &vk->surface);
+    if (res != VK_SUCCESS) {
+        MP_MSG(ctx, msgl, "Failed creating Wayland surface\n");
+        goto error;
+    }
+
+    /* Because in Wayland clients render whenever they receive a callback from
+     * the compositor, and the fact that the compositor usually stops sending
+     * callbacks once the surface is no longer visible, using FIFO here would
+     * mean the entire player would block on acquiring swapchain images. Hence,
+     * use MAILBOX to guarantee that there'll always be a swapchain image and
+     * the player won't block waiting on those */
+    if (!ra_vk_ctx_init(ctx, vk, params, VK_PRESENT_MODE_MAILBOX_KHR))
+        goto error;
+
+    ra_add_native_resource(ctx->ra, "wl", ctx->vo->wl->display);
+
+    return true;
+
+error:
+    wayland_vk_uninit(ctx);
+    return false;
+}
+
+static bool resize(struct ra_ctx *ctx)
+{
+    struct vo_wayland_state *wl = ctx->vo->wl;
+
+    MP_VERBOSE(wl, "Handling resize on the vk side\n");
+
+    const int32_t width = mp_rect_w(wl->geometry);
+    const int32_t height = mp_rect_h(wl->geometry);
+
+    vo_wayland_set_opaque_region(wl, ctx->opts.want_alpha);
+    vo_wayland_handle_fractional_scale(wl);
+    return ra_vk_ctx_resize(ctx, width, height);
+}
+
+static bool wayland_vk_reconfig(struct ra_ctx *ctx)
+{
+    return vo_wayland_reconfig(ctx->vo);
+}
+
+static int wayland_vk_control(struct ra_ctx *ctx, int *events, int request, void *arg)
+{
+    int ret = vo_wayland_control(ctx->vo, events, request, arg);
+    if (*events & VO_EVENT_RESIZE) {
+        if (!resize(ctx))
+            return VO_ERROR;
+    }
+    return ret;
+}
+
+static void wayland_vk_wakeup(struct ra_ctx *ctx)
+{
+    vo_wayland_wakeup(ctx->vo);
+}
+
+static void wayland_vk_wait_events(struct ra_ctx *ctx, int64_t until_time_ns)
+{
+    vo_wayland_wait_events(ctx->vo, until_time_ns);
+}
+
+static void wayland_vk_update_render_opts(struct ra_ctx *ctx)
+{
+    struct vo_wayland_state *wl = ctx->vo->wl;
+    vo_wayland_set_opaque_region(wl, ctx->opts.want_alpha);
+    wl_surface_commit(wl->surface);
+}
+
+const struct ra_ctx_fns ra_ctx_vulkan_wayland = {
+    .type               = "vulkan",
+    .name               = "waylandvk",
+    .reconfig           = wayland_vk_reconfig,
+    .control            = wayland_vk_control,
+    .wakeup             = wayland_vk_wakeup,
+    .wait_events        = wayland_vk_wait_events,
+    .update_render_opts = wayland_vk_update_render_opts,
+    .init               = wayland_vk_init,
+    .uninit             = wayland_vk_uninit,
+};
diff --git a/video/out/vulkan/context_win.c b/video/out/vulkan/context_win.c
new file mode 100644
index 0000000..a89c644
--- /dev/null
+++ b/video/out/vulkan/context_win.c
@@ -0,0 +1,106 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "video/out/gpu/context.h"
+#include "video/out/w32_common.h"
+
+#include "common.h"
+#include "context.h"
+#include "utils.h"
+
+EXTERN_C IMAGE_DOS_HEADER __ImageBase;
+#define HINST_THISCOMPONENT ((HINSTANCE)&__ImageBase)
+
+struct priv {
+    struct mpvk_ctx vk;
+};
+
+static void win_uninit(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    ra_vk_ctx_uninit(ctx);
+    mpvk_uninit(&p->vk);
+    vo_w32_uninit(ctx->vo);
+}
+
+static bool win_init(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv = talloc_zero(ctx, struct priv);
+    struct mpvk_ctx *vk = &p->vk;
+    int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR;
+
+    if (!mpvk_init(vk, ctx, VK_KHR_WIN32_SURFACE_EXTENSION_NAME))
+        goto error;
+
+    if (!vo_w32_init(ctx->vo))
+        goto error;
+
+    VkWin32SurfaceCreateInfoKHR wininfo = {
+         .sType = VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR,
+         .hinstance = HINST_THISCOMPONENT,
+         .hwnd = vo_w32_hwnd(ctx->vo),
+    };
+
+    struct ra_vk_ctx_params params = {0};
+
+    VkInstance inst = vk->vkinst->instance;
+    VkResult res = vkCreateWin32SurfaceKHR(inst, &wininfo, NULL, &vk->surface);
+    if (res != VK_SUCCESS) {
+        MP_MSG(ctx, msgl, "Failed creating Windows surface\n");
+        goto error;
+    }
+
+    if (!ra_vk_ctx_init(ctx, vk, params, VK_PRESENT_MODE_FIFO_KHR))
+        goto error;
+
+    return true;
+
+error:
+    win_uninit(ctx);
+    return false;
+}
+
+static bool resize(struct ra_ctx *ctx)
+{
+    return ra_vk_ctx_resize(ctx, ctx->vo->dwidth, ctx->vo->dheight);
+}
+
+static bool win_reconfig(struct ra_ctx *ctx)
+{
+    vo_w32_config(ctx->vo);
+    return resize(ctx);
+}
+
+static int win_control(struct ra_ctx *ctx, int *events, int request, void *arg)
+{
+    int ret = vo_w32_control(ctx->vo, events, request, arg);
+    if (*events & VO_EVENT_RESIZE) {
+        if (!resize(ctx))
+            return VO_ERROR;
+    }
+    return ret;
+}
+
+const struct ra_ctx_fns ra_ctx_vulkan_win = {
+    .type           = "vulkan",
+    .name           = "winvk",
+    .reconfig       = win_reconfig,
+    .control        = win_control,
+    .init           = win_init,
+    .uninit         = win_uninit,
+};
diff --git a/video/out/vulkan/context_xlib.c b/video/out/vulkan/context_xlib.c
new file mode 100644
index 0000000..673dc31
--- /dev/null
+++ b/video/out/vulkan/context_xlib.c
@@ -0,0 +1,143 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "video/out/gpu/context.h"
+#include "video/out/present_sync.h"
+#include "video/out/x11_common.h"
+
+#include "common.h"
+#include "context.h"
+#include "utils.h"
+
+struct priv {
+    struct mpvk_ctx vk;
+};
+
+static bool xlib_check_visible(struct ra_ctx *ctx)
+{
+    return vo_x11_check_visible(ctx->vo);
+}
+
+static void xlib_vk_swap_buffers(struct ra_ctx *ctx)
+{
+    if (ctx->vo->x11->use_present)
+        present_sync_swap(ctx->vo->x11->present);
+}
+
+static void xlib_vk_get_vsync(struct ra_ctx *ctx, struct vo_vsync_info *info)
+{
+    struct vo_x11_state *x11 = ctx->vo->x11;
+    if (ctx->vo->x11->use_present)
+        present_sync_get_info(x11->present, info);
+}
+
+static void xlib_uninit(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv;
+
+    ra_vk_ctx_uninit(ctx);
+    mpvk_uninit(&p->vk);
+    vo_x11_uninit(ctx->vo);
+}
+
+static bool xlib_init(struct ra_ctx *ctx)
+{
+    struct priv *p = ctx->priv = talloc_zero(ctx, struct priv);
+    struct mpvk_ctx *vk = &p->vk;
+    int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR;
+
+    if (!mpvk_init(vk, ctx, VK_KHR_XLIB_SURFACE_EXTENSION_NAME))
+        goto error;
+
+    if (!vo_x11_init(ctx->vo))
+        goto error;
+
+    if (!vo_x11_create_vo_window(ctx->vo, NULL, "mpvk"))
+        goto error;
+
+    VkXlibSurfaceCreateInfoKHR xinfo = {
+         .sType = VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR,
+         .dpy = ctx->vo->x11->display,
+         .window = ctx->vo->x11->window,
+    };
+
+    struct ra_vk_ctx_params params = {
+        .check_visible = xlib_check_visible,
+        .swap_buffers = xlib_vk_swap_buffers,
+        .get_vsync = xlib_vk_get_vsync,
+    };
+
+    VkInstance inst = vk->vkinst->instance;
+    VkResult res = vkCreateXlibSurfaceKHR(inst, &xinfo, NULL, &vk->surface);
+    if (res != VK_SUCCESS) {
+        MP_MSG(ctx, msgl, "Failed creating Xlib surface\n");
+        goto error;
+    }
+
+    if (!ra_vk_ctx_init(ctx, vk, params, VK_PRESENT_MODE_FIFO_KHR))
+        goto error;
+
+    ra_add_native_resource(ctx->ra, "x11", ctx->vo->x11->display);
+
+    return true;
+
+error:
+    xlib_uninit(ctx);
+    return false;
+}
+
+static bool resize(struct ra_ctx *ctx)
+{
+    return ra_vk_ctx_resize(ctx, ctx->vo->dwidth, ctx->vo->dheight);
+}
+
+static bool xlib_reconfig(struct ra_ctx *ctx)
+{
+    vo_x11_config_vo_window(ctx->vo);
+    return resize(ctx);
+}
+
+static int xlib_control(struct ra_ctx *ctx, int *events, int request, void *arg)
+{
+    int ret = vo_x11_control(ctx->vo, events, request, arg);
+    if (*events & VO_EVENT_RESIZE) {
+        if (!resize(ctx))
+            return VO_ERROR;
+    }
+    return ret;
+}
+
+static void xlib_wakeup(struct ra_ctx *ctx)
+{
+    vo_x11_wakeup(ctx->vo);
+}
+
+static void xlib_wait_events(struct ra_ctx *ctx, int64_t until_time_ns)
+{
+    vo_x11_wait_events(ctx->vo, until_time_ns);
+}
+
+const struct ra_ctx_fns ra_ctx_vulkan_xlib = {
+    .type           = "vulkan",
+    .name           = "x11vk",
+    .reconfig       = xlib_reconfig,
+    .control        = xlib_control,
+    .wakeup         = xlib_wakeup,
+    .wait_events    = xlib_wait_events,
+    .init           = xlib_init,
+    .uninit         = xlib_uninit,
+};
diff --git a/video/out/vulkan/utils.c b/video/out/vulkan/utils.c
new file mode 100644
index 0000000..57a3664
--- /dev/null
+++ b/video/out/vulkan/utils.c
@@ -0,0 +1,42 @@
+#include "video/out/placebo/utils.h"
+#include "utils.h"
+
+bool mpvk_init(struct mpvk_ctx *vk, struct ra_ctx *ctx, const char *surface_ext)
+{
+    vk->pllog = mppl_log_create(ctx, ctx->vo->log);
+    if (!vk->pllog)
+        goto error;
+
+    const char *exts[] = {
+        VK_KHR_SURFACE_EXTENSION_NAME,
+        surface_ext,
+    };
+
+    mppl_log_set_probing(vk->pllog, true);
+    vk->vkinst = pl_vk_inst_create(vk->pllog, &(struct pl_vk_inst_params) {
+        .debug = ctx->opts.debug,
+        .extensions = exts,
+        .num_extensions = MP_ARRAY_SIZE(exts),
+    });
+    mppl_log_set_probing(vk->pllog, false);
+    if (!vk->vkinst)
+        goto error;
+
+    return true;
+
+error:
+    mpvk_uninit(vk);
+    return false;
+}
+
+void mpvk_uninit(struct mpvk_ctx *vk)
+{
+    if (vk->surface) {
+        assert(vk->vkinst);
+        vkDestroySurfaceKHR(vk->vkinst->instance, vk->surface, NULL);
+        vk->surface = VK_NULL_HANDLE;
+    }
+
+    pl_vk_inst_destroy(&vk->vkinst);
+    pl_log_destroy(&vk->pllog);
+}
diff --git a/video/out/vulkan/utils.h b/video/out/vulkan/utils.h
new file mode 100644
index 0000000..a98e147
--- /dev/null
+++ b/video/out/vulkan/utils.h
@@ -0,0 +1,6 @@
+#pragma once
+#include "common.h"
+#include "video/out/gpu/context.h"
+
+bool mpvk_init(struct mpvk_ctx *vk, struct ra_ctx *ctx, const char *surface_ext);
+void mpvk_uninit(struct mpvk_ctx *vk);
diff --git a/video/out/w32_common.c b/video/out/w32_common.c
new file mode 100644
index 0000000..e6a4670
--- /dev/null
+++ b/video/out/w32_common.c
@@ -0,0 +1,2144 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+#include <limits.h>
+#include <stdatomic.h>
+#include <stdio.h>
+
+#include <windows.h>
+#include <windowsx.h>
+#include <dwmapi.h>
+#include <ole2.h>
+#include <shobjidl.h>
+#include <avrt.h>
+
+#include "options/m_config.h"
+#include "options/options.h"
+#include "input/keycodes.h"
+#include "input/input.h"
+#include "input/event.h"
+#include "stream/stream.h"
+#include "common/msg.h"
+#include "common/common.h"
+#include "vo.h"
+#include "win_state.h"
+#include "w32_common.h"
+#include "win32/displayconfig.h"
+#include "win32/droptarget.h"
+#include "osdep/io.h"
+#include "osdep/threads.h"
+#include "osdep/w32_keyboard.h"
+#include "misc/dispatch.h"
+#include "misc/rendezvous.h"
+#include "mpv_talloc.h"
+
+EXTERN_C IMAGE_DOS_HEADER __ImageBase;
+#define HINST_THISCOMPONENT ((HINSTANCE)&__ImageBase)
+
+#ifndef WM_DPICHANGED
+#define WM_DPICHANGED (0x02E0)
+#endif
+
+#ifndef DWMWA_USE_IMMERSIVE_DARK_MODE
+#define DWMWA_USE_IMMERSIVE_DARK_MODE 20
+#endif
+
+
+//Older MinGW compatibility
+#define DWMWA_WINDOW_CORNER_PREFERENCE 33
+#define DWMWA_SYSTEMBACKDROP_TYPE 38
+
+#ifndef DPI_ENUMS_DECLARED
+typedef enum MONITOR_DPI_TYPE {
+    MDT_EFFECTIVE_DPI = 0,
+    MDT_ANGULAR_DPI = 1,
+    MDT_RAW_DPI = 2,
+    MDT_DEFAULT = MDT_EFFECTIVE_DPI
+} MONITOR_DPI_TYPE;
+#endif
+
+#define rect_w(r) ((r).right - (r).left)
+#define rect_h(r) ((r).bottom - (r).top)
+
+struct w32_api {
+    HRESULT (WINAPI *pGetDpiForMonitor)(HMONITOR, MONITOR_DPI_TYPE, UINT*, UINT*);
+    BOOL (WINAPI *pImmDisableIME)(DWORD);
+    BOOL (WINAPI *pAdjustWindowRectExForDpi)(LPRECT lpRect, DWORD dwStyle, BOOL bMenu, DWORD dwExStyle, UINT dpi);
+    BOOLEAN (WINAPI *pShouldAppsUseDarkMode)(void);
+    DWORD (WINAPI *pSetPreferredAppMode)(DWORD mode);
+};
+
+struct vo_w32_state {
+    struct mp_log *log;
+    struct vo *vo;
+    struct mp_vo_opts *opts;
+    struct m_config_cache *opts_cache;
+    struct input_ctx *input_ctx;
+
+    mp_thread thread;
+    bool terminate;
+    struct mp_dispatch_queue *dispatch; // used to run stuff on the GUI thread
+    bool in_dispatch;
+
+    struct w32_api api; // stores functions from dynamically loaded DLLs
+
+    HWND window;
+    HWND parent; // 0 normally, set in embedding mode
+    HHOOK parent_win_hook;
+    HWINEVENTHOOK parent_evt_hook;
+
+    HMONITOR monitor; // Handle of the current screen
+    char *color_profile; // Path of the current screen's color profile
+
+    // Has the window seen a WM_DESTROY? If so, don't call DestroyWindow again.
+    bool destroyed;
+
+    bool focused;
+
+    // whether the window position and size were initialized
+    bool window_bounds_initialized;
+
+    bool current_fs;
+    bool toggle_fs; // whether the current fullscreen state needs to be switched
+
+    // Note: maximized state doesn't involve nor modify windowrc
+    RECT windowrc; // currently known normal/fullscreen window client rect
+    RECT prev_windowrc; // saved normal window client rect while in fullscreen
+
+    // video size
+    uint32_t o_dwidth;
+    uint32_t o_dheight;
+
+    int dpi;
+    double dpi_scale;
+
+    bool disable_screensaver;
+    bool cursor_visible;
+    atomic_uint event_flags;
+
+    BOOL tracking;
+    TRACKMOUSEEVENT trackEvent;
+
+    int mouse_x;
+    int mouse_y;
+
+    // Should SetCursor be called when handling VOCTRL_SET_CURSOR_VISIBILITY?
+    bool can_set_cursor;
+
+    // UTF-16 decoding state for WM_CHAR and VK_PACKET
+    int high_surrogate;
+
+    // Fit the window to one monitor working area next time it's not fullscreen
+    // and not maximized. Used once after every new "untrusted" size comes from
+    // mpv, else we assume that the last known size is valid and don't fit.
+    // FIXME: on a multi-monitor setup one bit is not enough, because the first
+    // fit (autofit etc) should be to one monitor, but later size changes from
+    // mpv like window-scale (VOCTRL_SET_UNFS_WINDOW_SIZE) should allow the
+    // entire virtual desktop area - but we still limit to one monitor size.
+    bool fit_on_screen;
+
+    bool win_force_pos;
+
+    ITaskbarList2 *taskbar_list;
+    ITaskbarList3 *taskbar_list3;
+    UINT tbtnCreatedMsg;
+    bool tbtnCreated;
+
+    struct voctrl_playback_state current_pstate;
+
+    // updates on move/resize/displaychange
+    double display_fps;
+
+    bool moving;
+
+    union {
+        uint8_t snapped;
+        struct {
+            uint8_t snapped_left : 1;
+            uint8_t snapped_right : 1;
+            uint8_t snapped_top : 1;
+            uint8_t snapped_bottom : 1;
+        };
+    };
+    int snap_dx;
+    int snap_dy;
+
+    HANDLE avrt_handle;
+
+    bool cleared;
+};
+
+static void adjust_window_rect(struct vo_w32_state *w32, HWND hwnd, RECT *rc)
+{
+    if (!w32->opts->border)
+        return;
+
+    if (w32->api.pAdjustWindowRectExForDpi) {
+        w32->api.pAdjustWindowRectExForDpi(rc,
+            GetWindowLongPtrW(hwnd, GWL_STYLE), 0,
+            GetWindowLongPtrW(hwnd, GWL_EXSTYLE), w32->dpi);
+    } else {
+        AdjustWindowRect(rc, GetWindowLongPtrW(hwnd, GWL_STYLE), 0);
+    }
+}
+
+static void add_window_borders(struct vo_w32_state *w32, HWND hwnd, RECT *rc)
+{
+    RECT win = *rc;
+    adjust_window_rect(w32, hwnd, rc);
+    // Adjust for title bar height that will be hidden in WM_NCCALCSIZE
+    if (w32->opts->border && !w32->opts->title_bar && !w32->current_fs)
+        rc->top -= rc->top - win.top;
+}
+
+// basically a reverse AdjustWindowRect (win32 doesn't appear to have this)
+static void subtract_window_borders(struct vo_w32_state *w32, HWND hwnd, RECT *rc)
+{
+    RECT b = { 0, 0, 0, 0 };
+    add_window_borders(w32, hwnd, &b);
+    rc->left -= b.left;
+    rc->top -= b.top;
+    rc->right -= b.right;
+    rc->bottom -= b.bottom;
+}
+
+static LRESULT borderless_nchittest(struct vo_w32_state *w32, int x, int y)
+{
+    if (IsMaximized(w32->window))
+        return HTCLIENT;
+
+    RECT rc;
+    if (!GetWindowRect(w32->window, &rc))
+        return HTNOWHERE;
+
+    POINT frame = {GetSystemMetrics(SM_CXSIZEFRAME),
+                   GetSystemMetrics(SM_CYSIZEFRAME)};
+    if (w32->opts->border) {
+        frame.x += GetSystemMetrics(SM_CXPADDEDBORDER);
+        frame.y += GetSystemMetrics(SM_CXPADDEDBORDER);
+        if (!w32->opts->title_bar)
+            rc.top -= GetSystemMetrics(SM_CXPADDEDBORDER);
+    }
+    InflateRect(&rc, -frame.x, -frame.y);
+
+    // Hit-test top border
+    if (y < rc.top) {
+        if (x < rc.left)
+            return HTTOPLEFT;
+        if (x > rc.right)
+            return HTTOPRIGHT;
+        return HTTOP;
+    }
+
+    // Hit-test bottom border
+    if (y > rc.bottom) {
+        if (x < rc.left)
+            return HTBOTTOMLEFT;
+        if (x > rc.right)
+            return HTBOTTOMRIGHT;
+        return HTBOTTOM;
+    }
+
+    // Hit-test side borders
+    if (x < rc.left)
+        return HTLEFT;
+    if (x > rc.right)
+        return HTRIGHT;
+    return HTCLIENT;
+}
+
+// turn a WMSZ_* input value in v into the border that should be resized
+// take into consideration which borders are snapped to avoid detaching
+// returns: 0=left, 1=top, 2=right, 3=bottom, -1=undefined
+static int get_resize_border(struct vo_w32_state *w32, int v)
+{
+    switch (v) {
+    case WMSZ_LEFT:
+    case WMSZ_RIGHT:
+        return w32->snapped_bottom ? 1 : 3;
+    case WMSZ_TOP:
+    case WMSZ_BOTTOM:
+        return w32->snapped_right ? 0 : 2;
+    case WMSZ_TOPLEFT: return 1;
+    case WMSZ_TOPRIGHT: return 1;
+    case WMSZ_BOTTOMLEFT: return 3;
+    case WMSZ_BOTTOMRIGHT: return 3;
+    default: return -1;
+    }
+}
+
+static bool key_state(int vk)
+{
+    return GetKeyState(vk) & 0x8000;
+}
+
+static int mod_state(struct vo_w32_state *w32)
+{
+    int res = 0;
+
+    // AltGr is represented as LCONTROL+RMENU on Windows
+    bool alt_gr = mp_input_use_alt_gr(w32->input_ctx) &&
+        key_state(VK_RMENU) && key_state(VK_LCONTROL);
+
+    if (key_state(VK_RCONTROL) || (key_state(VK_LCONTROL) && !alt_gr))
+        res |= MP_KEY_MODIFIER_CTRL;
+    if (key_state(VK_SHIFT))
+        res |= MP_KEY_MODIFIER_SHIFT;
+    if (key_state(VK_LMENU) || (key_state(VK_RMENU) && !alt_gr))
+        res |= MP_KEY_MODIFIER_ALT;
+    return res;
+}
+
+static int decode_surrogate_pair(wchar_t lead, wchar_t trail)
+{
+    return 0x10000 + (((lead & 0x3ff) << 10) | (trail & 0x3ff));
+}
+
+static int decode_utf16(struct vo_w32_state *w32, wchar_t c)
+{
+    // Decode UTF-16, keeping state in w32->high_surrogate
+    if (IS_HIGH_SURROGATE(c)) {
+        w32->high_surrogate = c;
+        return 0;
+    }
+    if (IS_LOW_SURROGATE(c)) {
+        if (!w32->high_surrogate) {
+            MP_ERR(w32, "Invalid UTF-16 input\n");
+            return 0;
+        }
+        int codepoint = decode_surrogate_pair(w32->high_surrogate, c);
+        w32->high_surrogate = 0;
+        return codepoint;
+    }
+    if (w32->high_surrogate != 0) {
+        w32->high_surrogate = 0;
+        MP_ERR(w32, "Invalid UTF-16 input\n");
+        return 0;
+    }
+
+    return c;
+}
+
+static void clear_keyboard_buffer(void)
+{
+    static const UINT vkey = VK_DECIMAL;
+    static const BYTE keys[256] = { 0 };
+    UINT scancode = MapVirtualKey(vkey, MAPVK_VK_TO_VSC);
+    wchar_t buf[10];
+    int ret = 0;
+
+    // Use the method suggested by Michael Kaplan to clear any pending dead
+    // keys from the current keyboard layout. See:
+    // https://web.archive.org/web/20101004154432/http://blogs.msdn.com/b/michkap/archive/2006/04/06/569632.aspx
+    // https://web.archive.org/web/20100820152419/http://blogs.msdn.com/b/michkap/archive/2007/10/27/5717859.aspx
+    do {
+        ret = ToUnicode(vkey, scancode, keys, buf, MP_ARRAY_SIZE(buf), 0);
+    } while (ret < 0);
+}
+
+static int to_unicode(UINT vkey, UINT scancode, const BYTE keys[256])
+{
+    // This wraps ToUnicode to be stateless and to return only one character
+
+    // Make the buffer 10 code units long to be safe, same as here:
+    // https://web.archive.org/web/20101013215215/http://blogs.msdn.com/b/michkap/archive/2006/03/24/559169.aspx
+    wchar_t buf[10] = { 0 };
+
+    // Dead keys aren't useful for key shortcuts, so clear the keyboard state
+    clear_keyboard_buffer();
+
+    int len = ToUnicode(vkey, scancode, keys, buf, MP_ARRAY_SIZE(buf), 0);
+
+    // Return the last complete UTF-16 code point. A negative return value
+    // indicates a dead key, however there should still be a non-combining
+    // version of the key in the buffer.
+    if (len < 0)
+        len = -len;
+    if (len >= 2 && IS_SURROGATE_PAIR(buf[len - 2], buf[len - 1]))
+        return decode_surrogate_pair(buf[len - 2], buf[len - 1]);
+    if (len >= 1)
+        return buf[len - 1];
+
+    return 0;
+}
+
+static int decode_key(struct vo_w32_state *w32, UINT vkey, UINT scancode)
+{
+    BYTE keys[256];
+    GetKeyboardState(keys);
+
+    // If mp_input_use_alt_gr is false, detect and remove AltGr so normal
+    // characters are generated. Note that AltGr is represented as
+    // LCONTROL+RMENU on Windows.
+    if ((keys[VK_RMENU] & 0x80) && (keys[VK_LCONTROL] & 0x80) &&
+        !mp_input_use_alt_gr(w32->input_ctx))
+    {
+        keys[VK_RMENU] = keys[VK_LCONTROL] = 0;
+        keys[VK_MENU] = keys[VK_LMENU];
+        keys[VK_CONTROL] = keys[VK_RCONTROL];
+    }
+
+    int c = to_unicode(vkey, scancode, keys);
+
+    // Some shift states prevent ToUnicode from working or cause it to produce
+    // control characters. If this is detected, remove modifiers until it
+    // starts producing normal characters.
+    if (c < 0x20 && (keys[VK_MENU] & 0x80)) {
+        keys[VK_LMENU] = keys[VK_RMENU] = keys[VK_MENU] = 0;
+        c = to_unicode(vkey, scancode, keys);
+    }
+    if (c < 0x20 && (keys[VK_CONTROL] & 0x80)) {
+        keys[VK_LCONTROL] = keys[VK_RCONTROL] = keys[VK_CONTROL] = 0;
+        c = to_unicode(vkey, scancode, keys);
+    }
+    if (c < 0x20)
+        return 0;
+
+    // Decode lone UTF-16 surrogates (VK_PACKET can generate these)
+    if (c < 0x10000)
+        return decode_utf16(w32, c);
+    return c;
+}
+
+static bool handle_appcommand(struct vo_w32_state *w32, UINT cmd)
+{
+    if (!mp_input_use_media_keys(w32->input_ctx))
+        return false;
+    int mpkey = mp_w32_appcmd_to_mpkey(cmd);
+    if (!mpkey)
+        return false;
+    mp_input_put_key(w32->input_ctx, mpkey | mod_state(w32));
+    return true;
+}
+
+static void handle_key_down(struct vo_w32_state *w32, UINT vkey, UINT scancode)
+{
+    // Ignore key repeat
+    if (scancode & KF_REPEAT)
+        return;
+
+    int mpkey = mp_w32_vkey_to_mpkey(vkey, scancode & KF_EXTENDED);
+    if (!mpkey) {
+        mpkey = decode_key(w32, vkey, scancode & (0xff | KF_EXTENDED));
+        if (!mpkey)
+            return;
+    }
+
+    mp_input_put_key(w32->input_ctx, mpkey | mod_state(w32) | MP_KEY_STATE_DOWN);
+}
+
+static void handle_key_up(struct vo_w32_state *w32, UINT vkey, UINT scancode)
+{
+    switch (vkey) {
+    case VK_MENU:
+    case VK_CONTROL:
+    case VK_SHIFT:
+        break;
+    default:
+        // Releasing all keys on key-up is simpler and ensures no keys can be
+        // get "stuck." This matches the behaviour of other VOs.
+        mp_input_put_key(w32->input_ctx, MP_INPUT_RELEASE_ALL);
+    }
+}
+
+static bool handle_char(struct vo_w32_state *w32, wchar_t wc)
+{
+    int c = decode_utf16(w32, wc);
+
+    if (c == 0)
+        return true;
+    if (c < 0x20)
+        return false;
+
+    mp_input_put_key(w32->input_ctx, c | mod_state(w32));
+    return true;
+}
+
+static bool handle_mouse_down(struct vo_w32_state *w32, int btn, int x, int y)
+{
+    btn |= mod_state(w32);
+    mp_input_put_key(w32->input_ctx, btn | MP_KEY_STATE_DOWN);
+
+    if (btn == MP_MBTN_LEFT && !w32->current_fs &&
+        !mp_input_test_dragging(w32->input_ctx, x, y))
+    {
+        // Window dragging hack
+        ReleaseCapture();
+        SendMessage(w32->window, WM_NCLBUTTONDOWN, HTCAPTION, 0);
+        mp_input_put_key(w32->input_ctx, MP_MBTN_LEFT | MP_KEY_STATE_UP);
+
+        // Indicate the message was handled, so DefWindowProc won't be called
+        return true;
+    }
+
+    SetCapture(w32->window);
+    return false;
+}
+
+static void handle_mouse_up(struct vo_w32_state *w32, int btn)
+{
+    btn |= mod_state(w32);
+    mp_input_put_key(w32->input_ctx, btn | MP_KEY_STATE_UP);
+
+    ReleaseCapture();
+}
+
+static void handle_mouse_wheel(struct vo_w32_state *w32, bool horiz, int val)
+{
+    int code;
+    if (horiz)
+        code = val > 0 ? MP_WHEEL_RIGHT : MP_WHEEL_LEFT;
+    else
+        code = val > 0 ? MP_WHEEL_UP : MP_WHEEL_DOWN;
+    mp_input_put_wheel(w32->input_ctx, code | mod_state(w32), abs(val) / 120.);
+}
+
+static void signal_events(struct vo_w32_state *w32, int events)
+{
+    atomic_fetch_or(&w32->event_flags, events);
+    vo_wakeup(w32->vo);
+}
+
+static void wakeup_gui_thread(void *ctx)
+{
+    struct vo_w32_state *w32 = ctx;
+    // Wake up the window procedure (which processes the dispatch queue)
+    if (GetWindowThreadProcessId(w32->window, NULL) == GetCurrentThreadId()) {
+        PostMessageW(w32->window, WM_NULL, 0, 0);
+    } else {
+        // Use a sent message when cross-thread, since the queue of sent
+        // messages is processed in some cases when posted messages are blocked
+        SendNotifyMessageW(w32->window, WM_NULL, 0, 0);
+    }
+}
+
+static double get_refresh_rate_from_gdi(const wchar_t *device)
+{
+    DEVMODEW dm = { .dmSize = sizeof dm };
+    if (!EnumDisplaySettingsW(device, ENUM_CURRENT_SETTINGS, &dm))
+        return 0.0;
+
+    // May return 0 or 1 which "represent the display hardware's default refresh rate"
+    // https://msdn.microsoft.com/en-us/library/windows/desktop/dd183565%28v=vs.85%29.aspx
+    // mpv validates this value with a threshold of 1, so don't return exactly 1
+    if (dm.dmDisplayFrequency == 1)
+        return 0.0;
+
+    // dm.dmDisplayFrequency is an integer which is rounded down, so it's
+    // highly likely that 23 represents 24/1.001, 59 represents 60/1.001, etc.
+    // A caller can always reproduce the original value by using floor.
+    double rv = dm.dmDisplayFrequency;
+    switch (dm.dmDisplayFrequency) {
+        case  23:
+        case  29:
+        case  47:
+        case  59:
+        case  71:
+        case  89:
+        case  95:
+        case 119:
+        case 143:
+        case 164:
+        case 239:
+        case 359:
+        case 479:
+            rv = (rv + 1) / 1.001;
+    }
+
+    return rv;
+}
+
+static char *get_color_profile(void *ctx, const wchar_t *device)
+{
+    char *name = NULL;
+
+    HDC ic = CreateICW(device, NULL, NULL, NULL);
+    if (!ic)
+        goto done;
+    wchar_t wname[MAX_PATH + 1];
+    if (!GetICMProfileW(ic, &(DWORD){ MAX_PATH }, wname))
+        goto done;
+
+    name = mp_to_utf8(ctx, wname);
+done:
+    if (ic)
+        DeleteDC(ic);
+    return name;
+}
+
+static void update_dpi(struct vo_w32_state *w32)
+{
+    UINT dpiX, dpiY;
+    HDC hdc = NULL;
+    int dpi = 0;
+
+    if (w32->api.pGetDpiForMonitor && w32->api.pGetDpiForMonitor(w32->monitor,
+                                     MDT_EFFECTIVE_DPI, &dpiX, &dpiY) == S_OK) {
+        dpi = (int)dpiX;
+        MP_VERBOSE(w32, "DPI detected from the new API: %d\n", dpi);
+    } else if ((hdc = GetDC(NULL))) {
+        dpi = GetDeviceCaps(hdc, LOGPIXELSX);
+        ReleaseDC(NULL, hdc);
+        MP_VERBOSE(w32, "DPI detected from the old API: %d\n", dpi);
+    }
+
+    if (dpi <= 0) {
+        dpi = 96;
+        MP_VERBOSE(w32, "Couldn't determine DPI, falling back to %d\n", dpi);
+    }
+
+    w32->dpi = dpi;
+    w32->dpi_scale = w32->opts->hidpi_window_scale ? w32->dpi / 96.0 : 1.0;
+    signal_events(w32, VO_EVENT_DPI);
+}
+
+static void update_display_info(struct vo_w32_state *w32)
+{
+    HMONITOR monitor = MonitorFromWindow(w32->window, MONITOR_DEFAULTTOPRIMARY);
+    if (w32->monitor == monitor)
+        return;
+    w32->monitor = monitor;
+
+    update_dpi(w32);
+
+    MONITORINFOEXW mi = { .cbSize = sizeof mi };
+    GetMonitorInfoW(monitor, (MONITORINFO*)&mi);
+
+    // Try to get the monitor refresh rate.
+    double freq = 0.0;
+
+    if (freq == 0.0)
+        freq = mp_w32_displayconfig_get_refresh_rate(mi.szDevice);
+    if (freq == 0.0)
+        freq = get_refresh_rate_from_gdi(mi.szDevice);
+
+    if (freq != w32->display_fps) {
+        MP_VERBOSE(w32, "display-fps: %f\n", freq);
+        if (freq == 0.0)
+            MP_WARN(w32, "Couldn't determine monitor refresh rate\n");
+        w32->display_fps = freq;
+        signal_events(w32, VO_EVENT_WIN_STATE);
+    }
+
+    char *color_profile = get_color_profile(w32, mi.szDevice);
+    if ((color_profile == NULL) != (w32->color_profile == NULL) ||
+        (color_profile && strcmp(color_profile, w32->color_profile)))
+    {
+        if (color_profile)
+            MP_VERBOSE(w32, "color-profile: %s\n", color_profile);
+        talloc_free(w32->color_profile);
+        w32->color_profile = color_profile;
+        color_profile = NULL;
+        signal_events(w32, VO_EVENT_ICC_PROFILE_CHANGED);
+    }
+
+    talloc_free(color_profile);
+}
+
+static void force_update_display_info(struct vo_w32_state *w32)
+{
+    w32->monitor = 0;
+    update_display_info(w32);
+}
+
+static void update_playback_state(struct vo_w32_state *w32)
+{
+    struct voctrl_playback_state *pstate = &w32->current_pstate;
+
+    if (!w32->taskbar_list3 || !w32->tbtnCreated)
+        return;
+
+    if (!pstate->playing || !pstate->taskbar_progress) {
+        ITaskbarList3_SetProgressState(w32->taskbar_list3, w32->window,
+                                       TBPF_NOPROGRESS);
+        return;
+    }
+
+    ITaskbarList3_SetProgressValue(w32->taskbar_list3, w32->window,
+                                   pstate->percent_pos, 100);
+    ITaskbarList3_SetProgressState(w32->taskbar_list3, w32->window,
+                                   pstate->paused ? TBPF_PAUSED :
+                                                    TBPF_NORMAL);
+}
+
+struct get_monitor_data {
+    int i;
+    int target;
+    HMONITOR mon;
+};
+
+static BOOL CALLBACK get_monitor_proc(HMONITOR mon, HDC dc, LPRECT r, LPARAM p)
+{
+    struct get_monitor_data *data = (struct get_monitor_data*)p;
+
+    if (data->i == data->target) {
+        data->mon = mon;
+        return FALSE;
+    }
+    data->i++;
+    return TRUE;
+}
+
+static HMONITOR get_monitor(int id)
+{
+    struct get_monitor_data data = { .target = id };
+    EnumDisplayMonitors(NULL, NULL, get_monitor_proc, (LPARAM)&data);
+    return data.mon;
+}
+
+static HMONITOR get_default_monitor(struct vo_w32_state *w32)
+{
+    const int id = w32->current_fs ? w32->opts->fsscreen_id :
+                                     w32->opts->screen_id;
+
+    // Handle --fs-screen=<all|default> and --screen=default
+    if (id < 0) {
+        if (w32->win_force_pos && !w32->current_fs) {
+            // Get window from forced position
+            return MonitorFromRect(&w32->windowrc, MONITOR_DEFAULTTOPRIMARY);
+        } else {
+            // Let compositor decide
+            return MonitorFromWindow(w32->window, MONITOR_DEFAULTTOPRIMARY);
+        }
+    }
+
+    HMONITOR mon = get_monitor(id);
+    if (mon)
+        return mon;
+    MP_VERBOSE(w32, "Screen %d does not exist, falling back to primary\n", id);
+    return MonitorFromPoint((POINT){0, 0}, MONITOR_DEFAULTTOPRIMARY);
+}
+
+static MONITORINFO get_monitor_info(struct vo_w32_state *w32)
+{
+    HMONITOR mon;
+    if (IsWindowVisible(w32->window) && !w32->current_fs) {
+        mon = MonitorFromWindow(w32->window, MONITOR_DEFAULTTOPRIMARY);
+    } else {
+        // The window is not visible during initialization, so get the
+        // monitor by --screen or --fs-screen id, or fallback to primary.
+        mon = get_default_monitor(w32);
+    }
+    MONITORINFO mi = { .cbSize = sizeof(mi) };
+    GetMonitorInfoW(mon, &mi);
+    return mi;
+}
+
+static RECT get_screen_area(struct vo_w32_state *w32)
+{
+    // Handle --fs-screen=all
+    if (w32->current_fs && w32->opts->fsscreen_id == -2) {
+        const int x = GetSystemMetrics(SM_XVIRTUALSCREEN);
+        const int y = GetSystemMetrics(SM_YVIRTUALSCREEN);
+        return (RECT) { x, y, x + GetSystemMetrics(SM_CXVIRTUALSCREEN),
+                              y + GetSystemMetrics(SM_CYVIRTUALSCREEN) };
+    }
+    return get_monitor_info(w32).rcMonitor;
+}
+
+static RECT get_working_area(struct vo_w32_state *w32)
+{
+    return w32->current_fs ? get_screen_area(w32) :
+                             get_monitor_info(w32).rcWork;
+}
+
+// Adjust working area boundaries to compensate for invisible borders.
+static void adjust_working_area_for_extended_frame(RECT *wa_rect, RECT *wnd_rect, HWND wnd)
+{
+    RECT frame = {0};
+
+    if (DwmGetWindowAttribute(wnd, DWMWA_EXTENDED_FRAME_BOUNDS,
+                              &frame, sizeof(RECT)) == S_OK) {
+        wa_rect->left -= frame.left - wnd_rect->left;
+        wa_rect->top -= frame.top - wnd_rect->top;
+        wa_rect->right += wnd_rect->right - frame.right;
+        wa_rect->bottom += wnd_rect->bottom - frame.bottom;
+    }
+}
+
+static bool snap_to_screen_edges(struct vo_w32_state *w32, RECT *rc)
+{
+    if (w32->parent || w32->current_fs || IsMaximized(w32->window))
+        return false;
+
+    if (!w32->opts->snap_window) {
+        w32->snapped = 0;
+        return false;
+    }
+
+    RECT rect;
+    POINT cursor;
+    if (!GetWindowRect(w32->window, &rect) || !GetCursorPos(&cursor))
+        return false;
+    // Check if window is going to be aero-snapped
+    if (rect_w(*rc) != rect_w(rect) || rect_h(*rc) != rect_h(rect))
+        return false;
+
+    // Check if window has already been aero-snapped
+    WINDOWPLACEMENT wp = {0};
+    wp.length = sizeof(wp);
+    if (!GetWindowPlacement(w32->window, &wp))
+        return false;
+    RECT wr = wp.rcNormalPosition;
+    if (rect_w(*rc) != rect_w(wr) || rect_h(*rc) != rect_h(wr))
+        return false;
+
+    // Get the work area to let the window snap to taskbar
+    wr = get_working_area(w32);
+
+    adjust_working_area_for_extended_frame(&wr, &rect, w32->window);
+
+    // Let the window to unsnap by changing its position,
+    // otherwise it will stick to the screen edges forever
+    rect = *rc;
+    if (w32->snapped) {
+        OffsetRect(&rect, cursor.x - rect.left - w32->snap_dx,
+                          cursor.y - rect.top - w32->snap_dy);
+    }
+
+    int threshold = (w32->dpi * 16) / 96;
+    bool was_snapped = !!w32->snapped;
+    w32->snapped = 0;
+    // Adjust X position
+    // snapped_left & snapped_right are mutually exclusive
+    if (abs(rect.left - wr.left) < threshold) {
+        w32->snapped_left = 1;
+        OffsetRect(&rect, wr.left - rect.left, 0);
+    } else if (abs(rect.right - wr.right) < threshold) {
+        w32->snapped_right = 1;
+        OffsetRect(&rect, wr.right - rect.right, 0);
+    }
+    // Adjust Y position
+    // snapped_top & snapped_bottom are mutually exclusive
+    if (abs(rect.top - wr.top) < threshold) {
+        w32->snapped_top = 1;
+        OffsetRect(&rect, 0, wr.top - rect.top);
+    } else if (abs(rect.bottom - wr.bottom) < threshold) {
+        w32->snapped_bottom = 1;
+        OffsetRect(&rect, 0, wr.bottom - rect.bottom);
+    }
+
+    if (!was_snapped && w32->snapped != 0) {
+        w32->snap_dx = cursor.x - rc->left;
+        w32->snap_dy = cursor.y - rc->top;
+    }
+
+    *rc = rect;
+    return true;
+}
+
+static DWORD update_style(struct vo_w32_state *w32, DWORD style)
+{
+    const DWORD NO_FRAME = WS_OVERLAPPED | WS_MINIMIZEBOX | WS_THICKFRAME;
+    const DWORD FRAME = WS_OVERLAPPEDWINDOW;
+    const DWORD FULLSCREEN = NO_FRAME & ~WS_THICKFRAME;
+    style &= ~(NO_FRAME | FRAME | FULLSCREEN);
+    style |= WS_SYSMENU;
+    if (w32->current_fs) {
+        style |= FULLSCREEN;
+    } else {
+        style |= w32->opts->border ? FRAME : NO_FRAME;
+    }
+    return style;
+}
+
+static LONG get_title_bar_height(struct vo_w32_state *w32)
+{
+    RECT rc = {0};
+    adjust_window_rect(w32, w32->window, &rc);
+    return -rc.top;
+}
+
+static void update_window_style(struct vo_w32_state *w32)
+{
+    if (w32->parent)
+        return;
+
+    // SetWindowLongPtr can trigger a WM_SIZE event, so window rect
+    // has to be saved now and restored after setting the new style.
+    const RECT wr = w32->windowrc;
+    const DWORD style = GetWindowLongPtrW(w32->window, GWL_STYLE);
+    SetWindowLongPtrW(w32->window, GWL_STYLE, update_style(w32, style));
+    w32->windowrc = wr;
+}
+
+// Resize window rect to width = w and height = h. If window is snapped,
+// don't let it detach from snapped borders. Otherwise resize around the center.
+static void resize_and_move_rect(struct vo_w32_state *w32, RECT *rc, int w, int h)
+{
+    int x, y;
+
+    if (w32->snapped_left)
+        x = rc->left;
+    else if (w32->snapped_right)
+        x = rc->right - w;
+    else
+        x = rc->left + rect_w(*rc) / 2 - w / 2;
+
+    if (w32->snapped_top)
+        y = rc->top;
+    else if (w32->snapped_bottom)
+        y = rc->bottom - h;
+    else
+        y = rc->top + rect_h(*rc) / 2 - h / 2;
+
+    SetRect(rc, x, y, x + w, y + h);
+}
+
+// If rc is wider/taller than n_w/n_h, shrink rc size while keeping the center.
+// returns true if the rectangle was modified.
+static bool fit_rect_size(struct vo_w32_state *w32, RECT *rc, long n_w, long n_h)
+{
+    // nothing to do if we already fit.
+    int o_w = rect_w(*rc), o_h = rect_h(*rc);
+    if (o_w <= n_w && o_h <= n_h)
+        return false;
+
+    // Apply letterboxing
+    const float o_asp = o_w / (float)MPMAX(o_h, 1);
+    const float n_asp = n_w / (float)MPMAX(n_h, 1);
+    if (o_asp > n_asp) {
+        n_h = n_w / o_asp;
+    } else {
+        n_w = n_h * o_asp;
+    }
+
+    resize_and_move_rect(w32, rc, n_w, n_h);
+
+    return true;
+}
+
+// If the window is bigger than the desktop, shrink to fit with same center.
+// Also, if the top edge is above the working area, move down to align.
+static void fit_window_on_screen(struct vo_w32_state *w32)
+{
+    RECT screen = get_working_area(w32);
+    if (w32->opts->border)
+        subtract_window_borders(w32, w32->window, &screen);
+
+    RECT window_rect;
+    if (GetWindowRect(w32->window, &window_rect))
+        adjust_working_area_for_extended_frame(&screen, &window_rect, w32->window);
+
+    bool adjusted = fit_rect_size(w32, &w32->windowrc, rect_w(screen), rect_h(screen));
+
+    if (w32->windowrc.top < screen.top) {
+        // if the top-edge of client area is above the target area (mainly
+        // because the client-area is centered but the title bar is taller
+        // than the bottom border), then move it down to align the edges.
+        // Windows itself applies the same constraint during manual move.
+        w32->windowrc.bottom += screen.top - w32->windowrc.top;
+        w32->windowrc.top = screen.top;
+        adjusted = true;
+    }
+
+    if (adjusted) {
+        MP_VERBOSE(w32, "adjusted window bounds: %d:%d:%d:%d\n",
+                   (int)w32->windowrc.left, (int)w32->windowrc.top,
+                   (int)rect_w(w32->windowrc), (int)rect_h(w32->windowrc));
+    }
+}
+
+// Calculate new fullscreen state and change window size and position.
+static void update_fullscreen_state(struct vo_w32_state *w32)
+{
+    if (w32->parent)
+        return;
+
+    bool new_fs = w32->opts->fullscreen;
+    if (w32->toggle_fs) {
+        new_fs = !w32->current_fs;
+        w32->toggle_fs = false;
+    }
+
+    bool toggle_fs = w32->current_fs != new_fs;
+    w32->opts->fullscreen = w32->current_fs = new_fs;
+    m_config_cache_write_opt(w32->opts_cache,
+                             &w32->opts->fullscreen);
+
+    if (toggle_fs) {
+        if (w32->current_fs) {
+            // Save window rect when switching to fullscreen.
+            w32->prev_windowrc = w32->windowrc;
+            MP_VERBOSE(w32, "save window bounds: %d:%d:%d:%d\n",
+                       (int)w32->windowrc.left, (int)w32->windowrc.top,
+                       (int)rect_w(w32->windowrc), (int)rect_h(w32->windowrc));
+        } else {
+            // Restore window rect when switching from fullscreen.
+            w32->windowrc = w32->prev_windowrc;
+        }
+    }
+
+    if (w32->current_fs)
+        w32->windowrc = get_screen_area(w32);
+
+    MP_VERBOSE(w32, "reset window bounds: %d:%d:%d:%d\n",
+               (int)w32->windowrc.left, (int)w32->windowrc.top,
+               (int)rect_w(w32->windowrc), (int)rect_h(w32->windowrc));
+}
+
+static void update_minimized_state(struct vo_w32_state *w32)
+{
+    if (w32->parent)
+        return;
+
+    if (!!IsMinimized(w32->window) != w32->opts->window_minimized) {
+        if (w32->opts->window_minimized) {
+            ShowWindow(w32->window, SW_SHOWMINNOACTIVE);
+        } else {
+            ShowWindow(w32->window, SW_RESTORE);
+        }
+    }
+}
+
+static void update_maximized_state(struct vo_w32_state *w32)
+{
+    if (w32->parent)
+        return;
+
+    // Don't change the maximized state in fullscreen for now. In future, this
+    // should be made to apply the maximized state on leaving fullscreen.
+    if (w32->current_fs)
+        return;
+
+    WINDOWPLACEMENT wp = { .length = sizeof wp };
+    GetWindowPlacement(w32->window, &wp);
+
+    if (wp.showCmd == SW_SHOWMINIMIZED) {
+        // When the window is minimized, setting this property just changes
+        // whether it will be maximized when it's restored
+        if (w32->opts->window_maximized) {
+            wp.flags |= WPF_RESTORETOMAXIMIZED;
+        } else {
+            wp.flags &= ~WPF_RESTORETOMAXIMIZED;
+        }
+        SetWindowPlacement(w32->window, &wp);
+    } else if ((wp.showCmd == SW_SHOWMAXIMIZED) != w32->opts->window_maximized) {
+        if (w32->opts->window_maximized) {
+            ShowWindow(w32->window, SW_SHOWMAXIMIZED);
+        } else {
+            ShowWindow(w32->window, SW_SHOWNOACTIVATE);
+        }
+    }
+}
+
+static bool is_visible(HWND window)
+{
+    // Unlike IsWindowVisible, this doesn't check the window's parents
+    return GetWindowLongPtrW(window, GWL_STYLE) & WS_VISIBLE;
+}
+
+//Set the mpv window's affinity.
+//This will affect how it's displayed on the desktop and in system-level operations like taking screenshots.
+static void update_affinity(struct vo_w32_state *w32)
+{
+    if (!w32 || w32->parent) {
+        return;
+    }
+    SetWindowDisplayAffinity(w32->window, w32->opts->window_affinity);
+}
+
+static void update_window_state(struct vo_w32_state *w32)
+{
+    if (w32->parent)
+        return;
+
+    RECT wr = w32->windowrc;
+    add_window_borders(w32, w32->window, &wr);
+
+    SetWindowPos(w32->window, w32->opts->ontop ? HWND_TOPMOST : HWND_NOTOPMOST,
+                 wr.left, wr.top, rect_w(wr), rect_h(wr),
+                 SWP_FRAMECHANGED | SWP_NOACTIVATE | SWP_NOOWNERZORDER);
+
+    // Show the window if it's not yet visible
+    if (!is_visible(w32->window)) {
+        if (w32->opts->window_minimized) {
+            ShowWindow(w32->window, SW_SHOWMINNOACTIVE);
+            update_maximized_state(w32); // Set the WPF_RESTORETOMAXIMIZED flag
+        } else if (w32->opts->window_maximized) {
+            ShowWindow(w32->window, SW_SHOWMAXIMIZED);
+        } else {
+            ShowWindow(w32->window, SW_SHOW);
+        }
+    }
+
+    // Notify the taskbar about the fullscreen state only after the window
+    // is visible, to make sure the taskbar item has already been created
+    if (w32->taskbar_list) {
+        ITaskbarList2_MarkFullscreenWindow(w32->taskbar_list,
+                                           w32->window, w32->current_fs);
+    }
+
+    // Update snapping status if needed
+    if (w32->opts->snap_window && !w32->parent &&
+        !w32->current_fs && !IsMaximized(w32->window)) {
+        RECT wa = get_working_area(w32);
+
+        adjust_working_area_for_extended_frame(&wa, &wr, w32->window);
+
+        // snapped_left & snapped_right are mutually exclusive
+        if (wa.left == wr.left && wa.right == wr.right) {
+            // Leave as is.
+        } else if (wa.left == wr.left) {
+            w32->snapped_left = 1;
+            w32->snapped_right = 0;
+        } else if (wa.right == wr.right) {
+            w32->snapped_right = 1;
+            w32->snapped_left = 0;
+        } else {
+            w32->snapped_left = w32->snapped_right = 0;
+        }
+
+        // snapped_top & snapped_bottom are mutually exclusive
+        if (wa.top == wr.top && wa.bottom == wr.bottom) {
+            // Leave as is.
+        } else if (wa.top == wr.top) {
+            w32->snapped_top = 1;
+            w32->snapped_bottom = 0;
+        } else if (wa.bottom == wr.bottom) {
+            w32->snapped_bottom = 1;
+            w32->snapped_top = 0;
+        } else {
+            w32->snapped_top = w32->snapped_bottom = 0;
+        }
+    }
+
+    signal_events(w32, VO_EVENT_RESIZE);
+}
+
+static void update_corners_pref(const struct vo_w32_state *w32) {
+    if (w32->parent)
+        return;
+
+    int pref = w32->current_fs ? 0 : w32->opts->window_corners;
+    DwmSetWindowAttribute(w32->window, DWMWA_WINDOW_CORNER_PREFERENCE,
+                          &pref, sizeof(pref));
+}
+
+static void reinit_window_state(struct vo_w32_state *w32)
+{
+    if (w32->parent)
+        return;
+
+    // The order matters: fs state should be updated prior to changing styles
+    update_fullscreen_state(w32);
+    update_corners_pref(w32);
+    update_window_style(w32);
+
+    // fit_on_screen is applied at most once when/if applicable (normal win).
+    if (w32->fit_on_screen && !w32->current_fs && !IsMaximized(w32->window)) {
+        fit_window_on_screen(w32);
+        w32->fit_on_screen = false;
+    }
+
+    // Show and activate the window after all window state parameters were set
+    update_window_state(w32);
+}
+
+// Follow Windows settings and update dark mode state
+// Microsoft documented how to enable dark mode for title bar:
+// https://learn.microsoft.com/windows/apps/desktop/modernize/apply-windows-themes
+// https://learn.microsoft.com/windows/win32/api/dwmapi/ne-dwmapi-dwmwindowattribute
+// Documentation says to set the DWMWA_USE_IMMERSIVE_DARK_MODE attribute to
+// TRUE to honor dark mode for the window, FALSE to always use light mode. While
+// in fact setting it to TRUE causes dark mode to be always enabled, regardless
+// of the settings. Since it is quite unlikely that it will be fixed, just use
+// UxTheme API to check if dark mode should be applied and while at it enable it
+// fully. Ideally this function should only call the DwmSetWindowAttribute(),
+// but it just doesn't work as documented.
+static void update_dark_mode(const struct vo_w32_state *w32)
+{
+    if (w32->api.pSetPreferredAppMode)
+        w32->api.pSetPreferredAppMode(1); // allow dark mode
+
+    HIGHCONTRAST hc = {sizeof(hc)};
+    SystemParametersInfo(SPI_GETHIGHCONTRAST, sizeof(hc), &hc, 0);
+    bool high_contrast = hc.dwFlags & HCF_HIGHCONTRASTON;
+
+    // if pShouldAppsUseDarkMode is not available, just assume it to be true
+    const BOOL use_dark_mode = !high_contrast && (!w32->api.pShouldAppsUseDarkMode ||
+                                                  w32->api.pShouldAppsUseDarkMode());
+
+    SetWindowTheme(w32->window, use_dark_mode ? L"DarkMode_Explorer" : L"", NULL);
+
+    DwmSetWindowAttribute(w32->window, DWMWA_USE_IMMERSIVE_DARK_MODE,
+                          &use_dark_mode, sizeof(use_dark_mode));
+}
+
+static void update_backdrop(const struct vo_w32_state *w32)
+{
+    if (w32->parent)
+        return;
+
+    int backdropType = w32->opts->backdrop_type;
+    DwmSetWindowAttribute(w32->window, DWMWA_SYSTEMBACKDROP_TYPE,
+                          &backdropType, sizeof(backdropType));
+}
+
+static LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam,
+                                LPARAM lParam)
+{
+    struct vo_w32_state *w32 = (void*)GetWindowLongPtrW(hWnd, GWLP_USERDATA);
+    if (!w32) {
+        // WM_NCCREATE is supposed to be the first message that a window
+        // receives. It allows struct vo_w32_state to be passed from
+        // CreateWindow's lpParam to the window procedure. However, as a
+        // longstanding Windows bug, overlapped top-level windows will get a
+        // WM_GETMINMAXINFO before WM_NCCREATE. This can be ignored.
+        if (message != WM_NCCREATE)
+            return DefWindowProcW(hWnd, message, wParam, lParam);
+
+        CREATESTRUCTW *cs = (CREATESTRUCTW *)lParam;
+        w32 = cs->lpCreateParams;
+        w32->window = hWnd;
+        SetWindowLongPtrW(hWnd, GWLP_USERDATA, (LONG_PTR)w32);
+    }
+
+    // The dispatch queue should be processed as soon as possible to prevent
+    // playback glitches, since it is likely blocking the VO thread
+    if (!w32->in_dispatch) {
+        w32->in_dispatch = true;
+        mp_dispatch_queue_process(w32->dispatch, 0);
+        w32->in_dispatch = false;
+    }
+
+    switch (message) {
+    case WM_ERASEBKGND:
+        if (w32->cleared || !w32->opts->border || w32->current_fs)
+            return TRUE;
+        break;
+    case WM_PAINT:
+        w32->cleared = true;
+        signal_events(w32, VO_EVENT_EXPOSE);
+        break;
+    case WM_MOVE: {
+        w32->moving = false;
+        const int x = GET_X_LPARAM(lParam), y = GET_Y_LPARAM(lParam);
+        OffsetRect(&w32->windowrc, x - w32->windowrc.left,
+                                   y - w32->windowrc.top);
+
+        // Window may intersect with new monitors (see VOCTRL_GET_DISPLAY_NAMES)
+        signal_events(w32, VO_EVENT_WIN_STATE);
+
+        update_display_info(w32);  // if we moved between monitors
+        break;
+    }
+    case WM_MOVING: {
+        w32->moving = true;
+        RECT *rc = (RECT*)lParam;
+        if (snap_to_screen_edges(w32, rc))
+            return TRUE;
+        break;
+    }
+    case WM_ENTERSIZEMOVE:
+        w32->moving = true;
+        if (w32->snapped != 0) {
+            // Save the cursor offset from the window borders,
+            // so the player window can be unsnapped later
+            RECT rc;
+            POINT cursor;
+            if (GetWindowRect(w32->window, &rc) && GetCursorPos(&cursor)) {
+                w32->snap_dx = cursor.x - rc.left;
+                w32->snap_dy = cursor.y - rc.top;
+            }
+        }
+        break;
+    case WM_EXITSIZEMOVE:
+        w32->moving = false;
+        break;
+    case WM_SIZE: {
+        const int w = LOWORD(lParam), h = HIWORD(lParam);
+        if (w > 0 && h > 0) {
+            w32->windowrc.right = w32->windowrc.left + w;
+            w32->windowrc.bottom = w32->windowrc.top + h;
+            signal_events(w32, VO_EVENT_RESIZE);
+            MP_VERBOSE(w32, "resize window: %d:%d\n", w, h);
+        }
+
+        // Window may have been minimized, maximized or restored
+        if (is_visible(w32->window)) {
+            WINDOWPLACEMENT wp = { .length = sizeof wp };
+            GetWindowPlacement(w32->window, &wp);
+
+            bool is_minimized = wp.showCmd == SW_SHOWMINIMIZED;
+            if (w32->opts->window_minimized != is_minimized) {
+                w32->opts->window_minimized = is_minimized;
+                m_config_cache_write_opt(w32->opts_cache,
+                                         &w32->opts->window_minimized);
+            }
+
+            bool is_maximized = wp.showCmd == SW_SHOWMAXIMIZED ||
+                (wp.showCmd == SW_SHOWMINIMIZED &&
+                    (wp.flags & WPF_RESTORETOMAXIMIZED));
+            if (w32->opts->window_maximized != is_maximized) {
+                w32->opts->window_maximized = is_maximized;
+                m_config_cache_write_opt(w32->opts_cache,
+                                         &w32->opts->window_maximized);
+            }
+        }
+
+        signal_events(w32, VO_EVENT_WIN_STATE);
+
+        update_display_info(w32);
+        break;
+    }
+    case WM_SIZING:
+        if (w32->opts->keepaspect && w32->opts->keepaspect_window &&
+            !w32->current_fs && !w32->parent)
+        {
+            RECT *rc = (RECT*)lParam;
+            // get client area of the windows if it had the rect rc
+            // (subtracting the window borders)
+            RECT r = *rc;
+            subtract_window_borders(w32, w32->window, &r);
+            int c_w = rect_w(r), c_h = rect_h(r);
+            float aspect = w32->o_dwidth / (float) MPMAX(w32->o_dheight, 1);
+            int d_w = c_h * aspect - c_w;
+            int d_h = c_w / aspect - c_h;
+            int d_corners[4] = { d_w, d_h, -d_w, -d_h };
+            int corners[4] = { rc->left, rc->top, rc->right, rc->bottom };
+            int corner = get_resize_border(w32, wParam);
+            if (corner >= 0)
+                corners[corner] -= d_corners[corner];
+            *rc = (RECT) { corners[0], corners[1], corners[2], corners[3] };
+            return TRUE;
+        }
+        break;
+    case WM_DPICHANGED:
+        update_display_info(w32);
+
+        RECT *rc = (RECT*)lParam;
+        w32->windowrc = *rc;
+        subtract_window_borders(w32, w32->window, &w32->windowrc);
+        update_window_state(w32);
+        break;
+    case WM_CLOSE:
+        // Don't destroy the window yet to not lose wakeup events.
+        mp_input_put_key(w32->input_ctx, MP_KEY_CLOSE_WIN);
+        return 0;
+    case WM_NCDESTROY: // Sometimes only WM_NCDESTROY is received in --wid mode
+    case WM_DESTROY:
+        if (w32->destroyed)
+            break;
+        // If terminate is not set, something else destroyed the window. This
+        // can also happen in --wid mode when the parent window is destroyed.
+        if (!w32->terminate)
+            mp_input_put_key(w32->input_ctx, MP_KEY_CLOSE_WIN);
+        RevokeDragDrop(w32->window);
+        w32->destroyed = true;
+        w32->window = NULL;
+        PostQuitMessage(0);
+        break;
+    case WM_SYSCOMMAND:
+        switch (wParam & 0xFFF0) {
+        case SC_SCREENSAVE:
+        case SC_MONITORPOWER:
+            if (w32->disable_screensaver) {
+                MP_VERBOSE(w32, "killing screensaver\n");
+                return 0;
+            }
+            break;
+        case SC_RESTORE:
+            if (IsMaximized(w32->window) && w32->current_fs) {
+                w32->toggle_fs = true;
+                reinit_window_state(w32);
+
+                return 0;
+            }
+            break;
+        }
+        break;
+    case WM_NCACTIVATE:
+        // Cosmetic to remove blinking window border when initializing window
+        if (!w32->opts->border)
+            lParam = -1;
+        break;
+    case WM_NCHITTEST:
+        // Provide sizing handles for borderless windows
+        if ((!w32->opts->border || !w32->opts->title_bar) && !w32->current_fs) {
+            return borderless_nchittest(w32, GET_X_LPARAM(lParam),
+                                        GET_Y_LPARAM(lParam));
+        }
+        break;
+    case WM_APPCOMMAND:
+        if (handle_appcommand(w32, GET_APPCOMMAND_LPARAM(lParam)))
+            return TRUE;
+        break;
+    case WM_SYSKEYDOWN:
+        // Open the window menu on Alt+Space. Normally DefWindowProc opens the
+        // window menu in response to WM_SYSCHAR, but since mpv translates its
+        // own keyboard input, WM_SYSCHAR isn't generated, so the window menu
+        // must be opened manually.
+        if (wParam == VK_SPACE) {
+            SendMessage(w32->window, WM_SYSCOMMAND, SC_KEYMENU, ' ');
+            return 0;
+        }
+
+        handle_key_down(w32, wParam, HIWORD(lParam));
+        if (wParam == VK_F10)
+            return 0;
+        break;
+    case WM_KEYDOWN:
+        handle_key_down(w32, wParam, HIWORD(lParam));
+        break;
+    case WM_SYSKEYUP:
+    case WM_KEYUP:
+        handle_key_up(w32, wParam, HIWORD(lParam));
+        if (wParam == VK_F10)
+            return 0;
+        break;
+    case WM_CHAR:
+    case WM_SYSCHAR:
+        if (handle_char(w32, wParam))
+            return 0;
+        break;
+    case WM_KILLFOCUS:
+        mp_input_put_key(w32->input_ctx, MP_INPUT_RELEASE_ALL);
+        w32->focused = false;
+        signal_events(w32, VO_EVENT_FOCUS);
+        return 0;
+    case WM_SETFOCUS:
+        w32->focused = true;
+        signal_events(w32, VO_EVENT_FOCUS);
+        return 0;
+    case WM_SETCURSOR:
+        // The cursor should only be hidden if the mouse is in the client area
+        // and if the window isn't in menu mode (HIWORD(lParam) is non-zero)
+        w32->can_set_cursor = LOWORD(lParam) == HTCLIENT && HIWORD(lParam);
+        if (w32->can_set_cursor && !w32->cursor_visible) {
+            SetCursor(NULL);
+            return TRUE;
+        }
+        break;
+    case WM_MOUSELEAVE:
+        w32->tracking = FALSE;
+        mp_input_put_key(w32->input_ctx, MP_KEY_MOUSE_LEAVE);
+        break;
+    case WM_MOUSEMOVE: {
+        if (!w32->tracking) {
+            w32->tracking = TrackMouseEvent(&w32->trackEvent);
+            mp_input_put_key(w32->input_ctx, MP_KEY_MOUSE_ENTER);
+        }
+        // Windows can send spurious mouse events, which would make the mpv
+        // core unhide the mouse cursor on completely unrelated events. See:
+        //  https://blogs.msdn.com/b/oldnewthing/archive/2003/10/01/55108.aspx
+        int x = GET_X_LPARAM(lParam);
+        int y = GET_Y_LPARAM(lParam);
+        if (x != w32->mouse_x || y != w32->mouse_y) {
+            w32->mouse_x = x;
+            w32->mouse_y = y;
+            mp_input_set_mouse_pos(w32->input_ctx, x, y);
+        }
+        break;
+    }
+    case WM_LBUTTONDOWN:
+        if (handle_mouse_down(w32, MP_MBTN_LEFT, GET_X_LPARAM(lParam),
+                                                 GET_Y_LPARAM(lParam)))
+            return 0;
+        break;
+    case WM_LBUTTONUP:
+        handle_mouse_up(w32, MP_MBTN_LEFT);
+        break;
+    case WM_MBUTTONDOWN:
+        handle_mouse_down(w32, MP_MBTN_MID, GET_X_LPARAM(lParam),
+                                            GET_Y_LPARAM(lParam));
+        break;
+    case WM_MBUTTONUP:
+        handle_mouse_up(w32, MP_MBTN_MID);
+        break;
+    case WM_RBUTTONDOWN:
+        handle_mouse_down(w32, MP_MBTN_RIGHT, GET_X_LPARAM(lParam),
+                                              GET_Y_LPARAM(lParam));
+        break;
+    case WM_RBUTTONUP:
+        handle_mouse_up(w32, MP_MBTN_RIGHT);
+        break;
+    case WM_MOUSEWHEEL:
+        handle_mouse_wheel(w32, false, GET_WHEEL_DELTA_WPARAM(wParam));
+        return 0;
+    case WM_MOUSEHWHEEL:
+        handle_mouse_wheel(w32, true, GET_WHEEL_DELTA_WPARAM(wParam));
+        // Some buggy mouse drivers (SetPoint) stop delivering WM_MOUSEHWHEEL
+        // events when the message loop doesn't return TRUE (even on Windows 7)
+        return TRUE;
+    case WM_XBUTTONDOWN:
+        handle_mouse_down(w32,
+            HIWORD(wParam) == 1 ? MP_MBTN_BACK : MP_MBTN_FORWARD,
+            GET_X_LPARAM(lParam), GET_Y_LPARAM(lParam));
+        break;
+    case WM_XBUTTONUP:
+        handle_mouse_up(w32,
+            HIWORD(wParam) == 1 ? MP_MBTN_BACK : MP_MBTN_FORWARD);
+        break;
+    case WM_DISPLAYCHANGE:
+        force_update_display_info(w32);
+        break;
+    case WM_SETTINGCHANGE:
+        update_dark_mode(w32);
+        break;
+    case WM_NCCALCSIZE:
+        if (!w32->opts->border)
+            return 0;
+        // Apparently removing WS_CAPTION disables some window animation, instead
+        // just reduce non-client size to remove title bar.
+        if (wParam && lParam && w32->opts->border && !w32->opts->title_bar &&
+            !w32->current_fs && !w32->parent)
+        {
+            ((LPNCCALCSIZE_PARAMS) lParam)->rgrc[0].top -= get_title_bar_height(w32);
+        }
+        break;
+    }
+
+    if (message == w32->tbtnCreatedMsg) {
+        w32->tbtnCreated = true;
+        update_playback_state(w32);
+        return 0;
+    }
+
+    return DefWindowProcW(hWnd, message, wParam, lParam);
+}
+
+static mp_once window_class_init_once = MP_STATIC_ONCE_INITIALIZER;
+static ATOM window_class;
+static void register_window_class(void)
+{
+    window_class = RegisterClassExW(&(WNDCLASSEXW) {
+        .cbSize = sizeof(WNDCLASSEXW),
+        .style = CS_HREDRAW | CS_VREDRAW,
+        .lpfnWndProc = WndProc,
+        .hInstance = HINST_THISCOMPONENT,
+        .hIcon = LoadIconW(HINST_THISCOMPONENT, L"IDI_ICON1"),
+        .hCursor = LoadCursor(NULL, IDC_ARROW),
+        .hbrBackground = (HBRUSH) GetStockObject(BLACK_BRUSH),
+        .lpszClassName = L"mpv",
+    });
+}
+
+static ATOM get_window_class(void)
+{
+    mp_exec_once(&window_class_init_once, register_window_class);
+    return window_class;
+}
+
+static void resize_child_win(HWND parent)
+{
+    // Check if an mpv window is a child of this window. This will not
+    // necessarily be the case because the hook functions will run for all
+    // windows on the parent window's thread.
+    ATOM cls = get_window_class();
+    HWND child = FindWindowExW(parent, NULL, (LPWSTR)MAKEINTATOM(cls), NULL);
+    if (!child)
+        return;
+    // Make sure the window was created by this instance
+    if (GetWindowLongPtrW(child, GWLP_HINSTANCE) != (LONG_PTR)HINST_THISCOMPONENT)
+        return;
+
+    // Resize the mpv window to match its parent window's size
+    RECT rm, rp;
+    if (!GetClientRect(child, &rm))
+        return;
+    if (!GetClientRect(parent, &rp))
+        return;
+    if (EqualRect(&rm, &rp))
+        return;
+    SetWindowPos(child, NULL, 0, 0, rp.right, rp.bottom, SWP_ASYNCWINDOWPOS |
+        SWP_NOACTIVATE | SWP_NOZORDER | SWP_NOOWNERZORDER | SWP_NOSENDCHANGING);
+}
+
+static LRESULT CALLBACK parent_win_hook(int nCode, WPARAM wParam, LPARAM lParam)
+{
+    if (nCode != HC_ACTION)
+        goto done;
+    CWPSTRUCT *cwp = (CWPSTRUCT*)lParam;
+    if (cwp->message != WM_WINDOWPOSCHANGED)
+        goto done;
+    resize_child_win(cwp->hwnd);
+done:
+    return CallNextHookEx(NULL, nCode, wParam, lParam);
+}
+
+static void CALLBACK parent_evt_hook(HWINEVENTHOOK hWinEventHook, DWORD event,
+    HWND hwnd, LONG idObject, LONG idChild, DWORD dwEventThread,
+    DWORD dwmsEventTime)
+{
+    if (event != EVENT_OBJECT_LOCATIONCHANGE)
+        return;
+    if (!hwnd || idObject != OBJID_WINDOW || idChild != CHILDID_SELF)
+        return;
+    resize_child_win(hwnd);
+}
+
+static void install_parent_hook(struct vo_w32_state *w32)
+{
+    DWORD pid;
+    DWORD tid = GetWindowThreadProcessId(w32->parent, &pid);
+
+    // If the parent lives inside the current process, install a Windows hook
+    if (pid == GetCurrentProcessId()) {
+        w32->parent_win_hook = SetWindowsHookExW(WH_CALLWNDPROC,
+            parent_win_hook, NULL, tid);
+    } else {
+        // Otherwise, use a WinEvent hook. These don't seem to be as smooth as
+        // Windows hooks, but they can be delivered across process boundaries.
+        w32->parent_evt_hook = SetWinEventHook(
+            EVENT_OBJECT_LOCATIONCHANGE, EVENT_OBJECT_LOCATIONCHANGE,
+            NULL, parent_evt_hook, pid, tid, WINEVENT_OUTOFCONTEXT);
+    }
+}
+
+static void remove_parent_hook(struct vo_w32_state *w32)
+{
+    if (w32->parent_win_hook)
+        UnhookWindowsHookEx(w32->parent_win_hook);
+    if (w32->parent_evt_hook)
+        UnhookWinEvent(w32->parent_evt_hook);
+}
+
+// Dispatch incoming window events and handle them.
+// This returns only when the thread is asked to terminate.
+static void run_message_loop(struct vo_w32_state *w32)
+{
+    MSG msg;
+    while (GetMessageW(&msg, 0, 0, 0) > 0)
+        DispatchMessageW(&msg);
+
+    // Even if the message loop somehow exits, we still have to respond to
+    // external requests until termination is requested.
+    while (!w32->terminate)
+        mp_dispatch_queue_process(w32->dispatch, 1000);
+}
+
+static void gui_thread_reconfig(void *ptr)
+{
+    struct vo_w32_state *w32 = ptr;
+    struct vo *vo = w32->vo;
+
+    RECT r = get_working_area(w32);
+    // for normal window which is auto-positioned (centered), center the window
+    // rather than the content (by subtracting the borders from the work area)
+    if (!w32->current_fs && !IsMaximized(w32->window) && w32->opts->border &&
+        !w32->opts->geometry.xy_valid /* specific position not requested */)
+    {
+        subtract_window_borders(w32, w32->window, &r);
+    }
+    struct mp_rect screen = { r.left, r.top, r.right, r.bottom };
+    struct vo_win_geometry geo;
+
+    RECT monrc = get_monitor_info(w32).rcMonitor;
+    struct mp_rect mon = { monrc.left, monrc.top, monrc.right, monrc.bottom };
+
+    if (w32->dpi_scale == 0)
+        force_update_display_info(w32);
+
+    vo_calc_window_geometry3(vo, &screen, &mon, w32->dpi_scale, &geo);
+    vo_apply_window_geometry(vo, &geo);
+
+    bool reset_size = (w32->o_dwidth != vo->dwidth ||
+                       w32->o_dheight != vo->dheight) &&
+                       w32->opts->auto_window_resize;
+
+    w32->o_dwidth = vo->dwidth;
+    w32->o_dheight = vo->dheight;
+
+    if (!w32->parent && !w32->window_bounds_initialized) {
+        SetRect(&w32->windowrc, geo.win.x0, geo.win.y0,
+                geo.win.x0 + vo->dwidth, geo.win.y0 + vo->dheight);
+        w32->prev_windowrc = w32->windowrc;
+        w32->window_bounds_initialized = true;
+        w32->win_force_pos = geo.flags & VO_WIN_FORCE_POS;
+        w32->fit_on_screen = !w32->win_force_pos;
+        goto finish;
+    }
+
+    // The rect which size is going to be modified.
+    RECT *rc = &w32->windowrc;
+
+    // The desired size always matches the window size in wid mode.
+    if (!reset_size || w32->parent) {
+        GetClientRect(w32->window, &r);
+        // Restore vo_dwidth and vo_dheight, which were reset in vo_config()
+        vo->dwidth = r.right;
+        vo->dheight = r.bottom;
+    } else {
+        if (w32->current_fs)
+            rc = &w32->prev_windowrc;
+        w32->fit_on_screen = true;
+    }
+
+    resize_and_move_rect(w32, rc, vo->dwidth, vo->dheight);
+
+finish:
+    reinit_window_state(w32);
+}
+
+// Resize the window. On the first call, it's also made visible.
+void vo_w32_config(struct vo *vo)
+{
+    struct vo_w32_state *w32 = vo->w32;
+    mp_dispatch_run(w32->dispatch, gui_thread_reconfig, w32);
+}
+
+static void w32_api_load(struct vo_w32_state *w32)
+{
+    HMODULE shcore_dll = LoadLibraryW(L"shcore.dll");
+    // Available since Win8.1
+    w32->api.pGetDpiForMonitor = !shcore_dll ? NULL :
+                (void *)GetProcAddress(shcore_dll, "GetDpiForMonitor");
+
+    HMODULE user32_dll = LoadLibraryW(L"user32.dll");
+    // Available since Win10
+    w32->api.pAdjustWindowRectExForDpi = !user32_dll ? NULL :
+                (void *)GetProcAddress(user32_dll, "AdjustWindowRectExForDpi");
+
+    // imm32.dll must be loaded dynamically
+    // to account for machines without East Asian language support
+    HMODULE imm32_dll = LoadLibraryW(L"imm32.dll");
+    w32->api.pImmDisableIME = !imm32_dll ? NULL :
+                (void *)GetProcAddress(imm32_dll, "ImmDisableIME");
+
+    // Dark mode related functions, available since the 1809 Windows 10 update
+    // Check the Windows build version as on previous versions used ordinals
+    // may point to unexpected code/data. Alternatively could check uxtheme.dll
+    // version directly, but it is little bit more boilerplate code, and build
+    // number is good enough check.
+    void (WINAPI *pRtlGetNtVersionNumbers)(LPDWORD, LPDWORD, LPDWORD) =
+        (void *)GetProcAddress(GetModuleHandleW(L"ntdll.dll"), "RtlGetNtVersionNumbers");
+
+    DWORD major, build;
+    pRtlGetNtVersionNumbers(&major, NULL, &build);
+    build &= ~0xF0000000;
+
+    HMODULE uxtheme_dll = (major < 10 || build < 17763) ? NULL :
+                GetModuleHandle(L"uxtheme.dll");
+    w32->api.pShouldAppsUseDarkMode = !uxtheme_dll ? NULL :
+                (void *)GetProcAddress(uxtheme_dll, MAKEINTRESOURCEA(132));
+    w32->api.pSetPreferredAppMode = !uxtheme_dll ? NULL :
+                (void *)GetProcAddress(uxtheme_dll, MAKEINTRESOURCEA(135));
+}
+
+static MP_THREAD_VOID gui_thread(void *ptr)
+{
+    struct vo_w32_state *w32 = ptr;
+    bool ole_ok = false;
+    int res = 0;
+
+    mp_thread_set_name("window");
+
+    w32_api_load(w32);
+
+    // Disables the IME for windows on this thread
+    if (w32->api.pImmDisableIME)
+        w32->api.pImmDisableIME(0);
+
+    if (w32->opts->WinID >= 0)
+        w32->parent = (HWND)(intptr_t)(w32->opts->WinID);
+
+    ATOM cls = get_window_class();
+    if (w32->parent) {
+        RECT r;
+        GetClientRect(w32->parent, &r);
+        CreateWindowExW(WS_EX_NOPARENTNOTIFY, (LPWSTR)MAKEINTATOM(cls), L"mpv",
+                        WS_CHILD | WS_VISIBLE, 0, 0, r.right, r.bottom,
+                        w32->parent, 0, HINST_THISCOMPONENT, w32);
+
+        // Install a hook to get notifications when the parent changes size
+        if (w32->window)
+            install_parent_hook(w32);
+    } else {
+        CreateWindowExW(0, (LPWSTR)MAKEINTATOM(cls), L"mpv",
+                        update_style(w32, 0), CW_USEDEFAULT, SW_HIDE, 100, 100,
+                        0, 0, HINST_THISCOMPONENT, w32);
+    }
+
+    if (!w32->window) {
+        MP_ERR(w32, "unable to create window!\n");
+        goto done;
+    }
+
+    update_dark_mode(w32);
+    update_corners_pref(w32);
+    if (w32->opts->window_affinity)
+        update_affinity(w32);
+    if (w32->opts->backdrop_type)
+        update_backdrop(w32);
+
+    if (SUCCEEDED(OleInitialize(NULL))) {
+        ole_ok = true;
+
+        IDropTarget *dt = mp_w32_droptarget_create(w32->log, w32->opts, w32->input_ctx);
+        RegisterDragDrop(w32->window, dt);
+
+        // ITaskbarList2 has the MarkFullscreenWindow method, which is used to
+        // make sure the taskbar is hidden when mpv goes fullscreen
+        if (SUCCEEDED(CoCreateInstance(&CLSID_TaskbarList, NULL,
+                                       CLSCTX_INPROC_SERVER, &IID_ITaskbarList2,
+                                       (void**)&w32->taskbar_list)))
+        {
+            if (FAILED(ITaskbarList2_HrInit(w32->taskbar_list))) {
+                ITaskbarList2_Release(w32->taskbar_list);
+                w32->taskbar_list = NULL;
+            }
+        }
+
+        // ITaskbarList3 has methods for status indication on taskbar buttons,
+        // however that interface is only available on Win7/2008 R2 or newer
+        if (SUCCEEDED(CoCreateInstance(&CLSID_TaskbarList, NULL,
+                                       CLSCTX_INPROC_SERVER, &IID_ITaskbarList3,
+                                       (void**)&w32->taskbar_list3)))
+        {
+            if (FAILED(ITaskbarList3_HrInit(w32->taskbar_list3))) {
+                ITaskbarList3_Release(w32->taskbar_list3);
+                w32->taskbar_list3 = NULL;
+            } else {
+                w32->tbtnCreatedMsg = RegisterWindowMessage(L"TaskbarButtonCreated");
+            }
+        }
+    } else {
+        MP_ERR(w32, "Failed to initialize OLE/COM\n");
+    }
+
+    w32->tracking   = FALSE;
+    w32->trackEvent = (TRACKMOUSEEVENT){
+        .cbSize    = sizeof(TRACKMOUSEEVENT),
+        .dwFlags   = TME_LEAVE,
+        .hwndTrack = w32->window,
+    };
+
+    if (w32->parent)
+        EnableWindow(w32->window, 0);
+
+    w32->cursor_visible = true;
+    w32->moving = false;
+    w32->snapped = 0;
+    w32->snap_dx = w32->snap_dy = 0;
+
+    mp_dispatch_set_wakeup_fn(w32->dispatch, wakeup_gui_thread, w32);
+
+    res = 1;
+done:
+
+    mp_rendezvous(w32, res); // init barrier
+
+    // This blocks until the GUI thread is to be exited.
+    if (res)
+        run_message_loop(w32);
+
+    MP_VERBOSE(w32, "uninit\n");
+
+    remove_parent_hook(w32);
+    if (w32->window && !w32->destroyed)
+        DestroyWindow(w32->window);
+    if (w32->taskbar_list)
+        ITaskbarList2_Release(w32->taskbar_list);
+    if (w32->taskbar_list3)
+        ITaskbarList3_Release(w32->taskbar_list3);
+    if (ole_ok)
+        OleUninitialize();
+    SetThreadExecutionState(ES_CONTINUOUS);
+    MP_THREAD_RETURN();
+}
+
+bool vo_w32_init(struct vo *vo)
+{
+    assert(!vo->w32);
+
+    struct vo_w32_state *w32 = talloc_ptrtype(vo, w32);
+    *w32 = (struct vo_w32_state){
+        .log = mp_log_new(w32, vo->log, "win32"),
+        .vo = vo,
+        .opts_cache = m_config_cache_alloc(w32, vo->global, &vo_sub_opts),
+        .input_ctx = vo->input_ctx,
+        .dispatch = mp_dispatch_create(w32),
+    };
+    w32->opts = w32->opts_cache->opts;
+    vo->w32 = w32;
+
+    if (mp_thread_create(&w32->thread, gui_thread, w32))
+        goto fail;
+
+    if (!mp_rendezvous(w32, 0)) { // init barrier
+        mp_thread_join(w32->thread);
+        goto fail;
+    }
+
+    // While the UI runs in its own thread, the thread in which this function
+    // runs in will be the renderer thread. Apply magic MMCSS cargo-cult,
+    // which might stop Windows from throttling clock rate and so on.
+    if (vo->opts->mmcss_profile[0]) {
+        wchar_t *profile = mp_from_utf8(NULL, vo->opts->mmcss_profile);
+        w32->avrt_handle = AvSetMmThreadCharacteristicsW(profile, &(DWORD){0});
+        talloc_free(profile);
+    }
+
+    return true;
+fail:
+    talloc_free(w32);
+    vo->w32 = NULL;
+    return false;
+}
+
+struct disp_names_data {
+    HMONITOR assoc;
+    int count;
+    char **names;
+};
+
+static BOOL CALLBACK disp_names_proc(HMONITOR mon, HDC dc, LPRECT r, LPARAM p)
+{
+    struct disp_names_data *data = (struct disp_names_data*)p;
+
+    // get_disp_names() adds data->assoc to the list, so skip it here
+    if (mon == data->assoc)
+        return TRUE;
+
+    MONITORINFOEXW mi = { .cbSize = sizeof mi };
+    if (GetMonitorInfoW(mon, (MONITORINFO*)&mi)) {
+        MP_TARRAY_APPEND(NULL, data->names, data->count,
+                         mp_to_utf8(NULL, mi.szDevice));
+    }
+    return TRUE;
+}
+
+static char **get_disp_names(struct vo_w32_state *w32)
+{
+    // Get the client area of the window in screen space
+    RECT rect = { 0 };
+    GetClientRect(w32->window, &rect);
+    MapWindowPoints(w32->window, NULL, (POINT*)&rect, 2);
+
+    struct disp_names_data data = { .assoc = w32->monitor };
+
+    // Make sure the monitor that Windows considers to be associated with the
+    // window is first in the list
+    MONITORINFOEXW mi = { .cbSize = sizeof mi };
+    if (GetMonitorInfoW(data.assoc, (MONITORINFO*)&mi)) {
+        MP_TARRAY_APPEND(NULL, data.names, data.count,
+                         mp_to_utf8(NULL, mi.szDevice));
+    }
+
+    // Get the names of the other monitors that intersect the client rect
+    EnumDisplayMonitors(NULL, &rect, disp_names_proc, (LPARAM)&data);
+    MP_TARRAY_APPEND(NULL, data.names, data.count, NULL);
+    return data.names;
+}
+
+static int gui_thread_control(struct vo_w32_state *w32, int request, void *arg)
+{
+    switch (request) {
+    case VOCTRL_VO_OPTS_CHANGED: {
+        void *changed_option;
+
+        while (m_config_cache_get_next_changed(w32->opts_cache,
+                                               &changed_option))
+        {
+            struct mp_vo_opts *vo_opts = w32->opts_cache->opts;
+
+            if (changed_option == &vo_opts->fullscreen) {
+                reinit_window_state(w32);
+            } else if (changed_option == &vo_opts->window_affinity) {
+                update_affinity(w32);
+            } else if (changed_option == &vo_opts->ontop) {
+                update_window_state(w32);
+            } else if (changed_option == &vo_opts->backdrop_type) {
+                update_backdrop(w32);
+            } else if (changed_option == &vo_opts->border ||
+                       changed_option == &vo_opts->title_bar)
+            {
+                update_window_style(w32);
+                update_window_state(w32);
+            } else if (changed_option == &vo_opts->window_minimized) {
+                update_minimized_state(w32);
+            } else if (changed_option == &vo_opts->window_maximized) {
+                update_maximized_state(w32);
+            } else if (changed_option == &vo_opts->window_corners) {
+                update_corners_pref(w32);
+            }
+        }
+
+        return VO_TRUE;
+    }
+    case VOCTRL_GET_WINDOW_ID: {
+        if (!w32->window)
+            return VO_NOTAVAIL;
+        *(int64_t *)arg = (intptr_t)w32->window;
+        return VO_TRUE;
+    }
+    case VOCTRL_GET_HIDPI_SCALE: {
+        *(double *)arg = w32->dpi_scale;
+        return VO_TRUE;
+    }
+    case VOCTRL_GET_UNFS_WINDOW_SIZE: {
+        int *s = arg;
+
+        if (!w32->window_bounds_initialized)
+            return VO_FALSE;
+
+        RECT *rc = w32->current_fs ? &w32->prev_windowrc : &w32->windowrc;
+        s[0] = rect_w(*rc) / w32->dpi_scale;
+        s[1] = rect_h(*rc) / w32->dpi_scale;
+        return VO_TRUE;
+    }
+    case VOCTRL_SET_UNFS_WINDOW_SIZE: {
+        int *s = arg;
+
+        if (!w32->window_bounds_initialized)
+            return VO_FALSE;
+
+        s[0] *= w32->dpi_scale;
+        s[1] *= w32->dpi_scale;
+
+        RECT *rc = w32->current_fs ? &w32->prev_windowrc : &w32->windowrc;
+        resize_and_move_rect(w32, rc, s[0], s[1]);
+
+        w32->fit_on_screen = true;
+        reinit_window_state(w32);
+        return VO_TRUE;
+    }
+    case VOCTRL_SET_CURSOR_VISIBILITY:
+        w32->cursor_visible = *(bool *)arg;
+
+        if (w32->can_set_cursor && w32->tracking) {
+            if (w32->cursor_visible)
+                SetCursor(LoadCursor(NULL, IDC_ARROW));
+            else
+                SetCursor(NULL);
+        }
+        return VO_TRUE;
+    case VOCTRL_KILL_SCREENSAVER:
+        w32->disable_screensaver = true;
+        SetThreadExecutionState(ES_CONTINUOUS | ES_SYSTEM_REQUIRED |
+                                ES_DISPLAY_REQUIRED);
+        return VO_TRUE;
+    case VOCTRL_RESTORE_SCREENSAVER:
+        w32->disable_screensaver = false;
+        SetThreadExecutionState(ES_CONTINUOUS);
+        return VO_TRUE;
+    case VOCTRL_UPDATE_WINDOW_TITLE: {
+        wchar_t *title = mp_from_utf8(NULL, (char *)arg);
+        SetWindowTextW(w32->window, title);
+        talloc_free(title);
+        return VO_TRUE;
+    }
+    case VOCTRL_UPDATE_PLAYBACK_STATE: {
+        w32->current_pstate = *(struct voctrl_playback_state *)arg;
+
+        update_playback_state(w32);
+        return VO_TRUE;
+    }
+    case VOCTRL_GET_DISPLAY_FPS:
+        update_display_info(w32);
+        *(double*) arg = w32->display_fps;
+        return VO_TRUE;
+    case VOCTRL_GET_DISPLAY_RES: ;
+        RECT monrc = get_monitor_info(w32).rcMonitor;
+        ((int *)arg)[0] = monrc.right - monrc.left;
+        ((int *)arg)[1] = monrc.bottom - monrc.top;
+        return VO_TRUE;
+    case VOCTRL_GET_DISPLAY_NAMES:
+        *(char ***)arg = get_disp_names(w32);
+        return VO_TRUE;
+    case VOCTRL_GET_ICC_PROFILE:
+        update_display_info(w32);
+        if (w32->color_profile) {
+            bstr *p = arg;
+            *p = stream_read_file(w32->color_profile, NULL,
+                w32->vo->global, 100000000); // 100 MB
+            return p->len ? VO_TRUE : VO_FALSE;
+        }
+        return VO_FALSE;
+    case VOCTRL_GET_FOCUSED:
+        *(bool *)arg = w32->focused;
+        return VO_TRUE;
+    }
+    return VO_NOTIMPL;
+}
+
+static void do_control(void *ptr)
+{
+    void **p = ptr;
+    struct vo_w32_state *w32 = p[0];
+    int *events = p[1];
+    int request = *(int *)p[2];
+    void *arg = p[3];
+    int *ret = p[4];
+    *ret = gui_thread_control(w32, request, arg);
+    *events |= atomic_fetch_and(&w32->event_flags, 0);
+    // Safe access, since caller (owner of vo) is blocked.
+    if (*events & VO_EVENT_RESIZE) {
+        w32->vo->dwidth = rect_w(w32->windowrc);
+        w32->vo->dheight = rect_h(w32->windowrc);
+    }
+}
+
+int vo_w32_control(struct vo *vo, int *events, int request, void *arg)
+{
+    struct vo_w32_state *w32 = vo->w32;
+    if (request == VOCTRL_CHECK_EVENTS) {
+        *events |= atomic_fetch_and(&w32->event_flags, 0);
+        if (*events & VO_EVENT_RESIZE) {
+            mp_dispatch_lock(w32->dispatch);
+            vo->dwidth = rect_w(w32->windowrc);
+            vo->dheight = rect_h(w32->windowrc);
+            mp_dispatch_unlock(w32->dispatch);
+        }
+        return VO_TRUE;
+    } else {
+        int r;
+        void *p[] = {w32, events, &request, arg, &r};
+        mp_dispatch_run(w32->dispatch, do_control, p);
+        return r;
+    }
+}
+
+static void do_terminate(void *ptr)
+{
+    struct vo_w32_state *w32 = ptr;
+    w32->terminate = true;
+
+    if (!w32->destroyed)
+        DestroyWindow(w32->window);
+
+    mp_dispatch_interrupt(w32->dispatch);
+}
+
+void vo_w32_uninit(struct vo *vo)
+{
+    struct vo_w32_state *w32 = vo->w32;
+    if (!w32)
+        return;
+
+    mp_dispatch_run(w32->dispatch, do_terminate, w32);
+    mp_thread_join(w32->thread);
+
+    AvRevertMmThreadCharacteristics(w32->avrt_handle);
+
+    talloc_free(w32);
+    vo->w32 = NULL;
+}
+
+HWND vo_w32_hwnd(struct vo *vo)
+{
+    struct vo_w32_state *w32 = vo->w32;
+    return w32->window; // immutable, so no synchronization needed
+}
+
+void vo_w32_run_on_thread(struct vo *vo, void (*cb)(void *ctx), void *ctx)
+{
+    struct vo_w32_state *w32 = vo->w32;
+    mp_dispatch_run(w32->dispatch, cb, ctx);
+}
diff --git a/video/out/w32_common.h b/video/out/w32_common.h
new file mode 100644
index 0000000..528b216
--- /dev/null
+++ b/video/out/w32_common.h
@@ -0,0 +1,36 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MPLAYER_W32_COMMON_H
+#define MPLAYER_W32_COMMON_H
+
+#include <stdint.h>
+#include <stdbool.h>
+#include <windows.h>
+
+#include "common/common.h"
+
+struct vo;
+
+bool vo_w32_init(struct vo *vo);
+void vo_w32_uninit(struct vo *vo);
+int vo_w32_control(struct vo *vo, int *events, int request, void *arg);
+void vo_w32_config(struct vo *vo);
+HWND vo_w32_hwnd(struct vo *vo);
+void vo_w32_run_on_thread(struct vo *vo, void (*cb)(void *ctx), void *ctx);
+
+#endif /* MPLAYER_W32_COMMON_H */
diff --git a/video/out/wayland_common.c b/video/out/wayland_common.c
new file mode 100644
index 0000000..589135f
--- /dev/null
+++ b/video/out/wayland_common.c
@@ -0,0 +1,2629 @@
+/*
+ * This file is part of mpv video player.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <errno.h>
+#include <limits.h>
+#include <linux/input-event-codes.h>
+#include <poll.h>
+#include <time.h>
+#include <unistd.h>
+#include <wayland-cursor.h>
+#include <xkbcommon/xkbcommon.h>
+
+#include "common/msg.h"
+#include "input/input.h"
+#include "input/keycodes.h"
+#include "options/m_config.h"
+#include "osdep/io.h"
+#include "osdep/poll_wrapper.h"
+#include "osdep/timer.h"
+#include "present_sync.h"
+#include "wayland_common.h"
+#include "win_state.h"
+
+// Generated from wayland-protocols
+#include "idle-inhibit-unstable-v1.h"
+#include "linux-dmabuf-unstable-v1.h"
+#include "presentation-time.h"
+#include "xdg-decoration-unstable-v1.h"
+#include "xdg-shell.h"
+#include "viewporter.h"
+
+#if HAVE_WAYLAND_PROTOCOLS_1_27
+#include "content-type-v1.h"
+#include "single-pixel-buffer-v1.h"
+#endif
+
+#if HAVE_WAYLAND_PROTOCOLS_1_31
+#include "fractional-scale-v1.h"
+#endif
+
+#if HAVE_WAYLAND_PROTOCOLS_1_32
+#include "cursor-shape-v1.h"
+#endif
+
+#if WAYLAND_VERSION_MAJOR > 1 || WAYLAND_VERSION_MINOR >= 22
+#define HAVE_WAYLAND_1_22
+#endif
+
+#ifndef CLOCK_MONOTONIC_RAW
+#define CLOCK_MONOTONIC_RAW 4
+#endif
+
+#ifndef XDG_TOPLEVEL_STATE_SUSPENDED
+#define XDG_TOPLEVEL_STATE_SUSPENDED 9
+#endif
+
+
+static const struct mp_keymap keymap[] = {
+    /* Special keys */
+    {XKB_KEY_Pause,     MP_KEY_PAUSE}, {XKB_KEY_Escape,       MP_KEY_ESC},
+    {XKB_KEY_BackSpace, MP_KEY_BS},    {XKB_KEY_Tab,          MP_KEY_TAB},
+    {XKB_KEY_Return,    MP_KEY_ENTER}, {XKB_KEY_Menu,         MP_KEY_MENU},
+    {XKB_KEY_Print,     MP_KEY_PRINT}, {XKB_KEY_ISO_Left_Tab, MP_KEY_TAB},
+
+    /* Cursor keys */
+    {XKB_KEY_Left, MP_KEY_LEFT}, {XKB_KEY_Right, MP_KEY_RIGHT},
+    {XKB_KEY_Up,   MP_KEY_UP},   {XKB_KEY_Down,  MP_KEY_DOWN},
+
+    /* Navigation keys */
+    {XKB_KEY_Insert,  MP_KEY_INSERT},  {XKB_KEY_Delete,    MP_KEY_DELETE},
+    {XKB_KEY_Home,    MP_KEY_HOME},    {XKB_KEY_End,       MP_KEY_END},
+    {XKB_KEY_Page_Up, MP_KEY_PAGE_UP}, {XKB_KEY_Page_Down, MP_KEY_PAGE_DOWN},
+
+    /* F-keys */
+    {XKB_KEY_F1,  MP_KEY_F + 1},  {XKB_KEY_F2,  MP_KEY_F + 2},
+    {XKB_KEY_F3,  MP_KEY_F + 3},  {XKB_KEY_F4,  MP_KEY_F + 4},
+    {XKB_KEY_F5,  MP_KEY_F + 5},  {XKB_KEY_F6,  MP_KEY_F + 6},
+    {XKB_KEY_F7,  MP_KEY_F + 7},  {XKB_KEY_F8,  MP_KEY_F + 8},
+    {XKB_KEY_F9,  MP_KEY_F + 9},  {XKB_KEY_F10, MP_KEY_F +10},
+    {XKB_KEY_F11, MP_KEY_F +11},  {XKB_KEY_F12, MP_KEY_F +12},
+    {XKB_KEY_F13, MP_KEY_F +13},  {XKB_KEY_F14, MP_KEY_F +14},
+    {XKB_KEY_F15, MP_KEY_F +15},  {XKB_KEY_F16, MP_KEY_F +16},
+    {XKB_KEY_F17, MP_KEY_F +17},  {XKB_KEY_F18, MP_KEY_F +18},
+    {XKB_KEY_F19, MP_KEY_F +19},  {XKB_KEY_F20, MP_KEY_F +20},
+    {XKB_KEY_F21, MP_KEY_F +21},  {XKB_KEY_F22, MP_KEY_F +22},
+    {XKB_KEY_F23, MP_KEY_F +23},  {XKB_KEY_F24, MP_KEY_F +24},
+
+    /* Numpad independent of numlock */
+    {XKB_KEY_KP_Subtract, '-'}, {XKB_KEY_KP_Add,    '+'},
+    {XKB_KEY_KP_Multiply, '*'}, {XKB_KEY_KP_Divide, '/'},
+    {XKB_KEY_KP_Enter, MP_KEY_KPENTER},
+
+    /* Numpad with numlock */
+    {XKB_KEY_KP_0, MP_KEY_KP0}, {XKB_KEY_KP_1, MP_KEY_KP1},
+    {XKB_KEY_KP_2, MP_KEY_KP2}, {XKB_KEY_KP_3, MP_KEY_KP3},
+    {XKB_KEY_KP_4, MP_KEY_KP4}, {XKB_KEY_KP_5, MP_KEY_KP5},
+    {XKB_KEY_KP_6, MP_KEY_KP6}, {XKB_KEY_KP_7, MP_KEY_KP7},
+    {XKB_KEY_KP_8, MP_KEY_KP8}, {XKB_KEY_KP_9, MP_KEY_KP9},
+    {XKB_KEY_KP_Decimal, MP_KEY_KPDEC}, {XKB_KEY_KP_Separator, MP_KEY_KPDEC},
+
+    /* Numpad without numlock */
+    {XKB_KEY_KP_Insert, MP_KEY_KPINS},   {XKB_KEY_KP_End,       MP_KEY_KPEND},
+    {XKB_KEY_KP_Down,   MP_KEY_KPDOWN},  {XKB_KEY_KP_Page_Down, MP_KEY_KPPGDOWN},
+    {XKB_KEY_KP_Left,   MP_KEY_KPLEFT},  {XKB_KEY_KP_Begin,     MP_KEY_KP5},
+    {XKB_KEY_KP_Right,  MP_KEY_KPRIGHT}, {XKB_KEY_KP_Home,      MP_KEY_KPHOME},
+    {XKB_KEY_KP_Up,     MP_KEY_KPUP},    {XKB_KEY_KP_Page_Up,   MP_KEY_KPPGUP},
+    {XKB_KEY_KP_Delete, MP_KEY_KPDEL},
+
+    /* Multimedia keys */
+    {XKB_KEY_XF86MenuKB, MP_KEY_MENU},
+    {XKB_KEY_XF86AudioPlay, MP_KEY_PLAY}, {XKB_KEY_XF86AudioPause, MP_KEY_PAUSE},
+    {XKB_KEY_XF86AudioStop, MP_KEY_STOP},
+    {XKB_KEY_XF86AudioPrev, MP_KEY_PREV}, {XKB_KEY_XF86AudioNext, MP_KEY_NEXT},
+    {XKB_KEY_XF86AudioRewind, MP_KEY_REWIND},
+    {XKB_KEY_XF86AudioForward, MP_KEY_FORWARD},
+    {XKB_KEY_XF86AudioMute, MP_KEY_MUTE},
+    {XKB_KEY_XF86AudioLowerVolume, MP_KEY_VOLUME_DOWN},
+    {XKB_KEY_XF86AudioRaiseVolume, MP_KEY_VOLUME_UP},
+    {XKB_KEY_XF86HomePage, MP_KEY_HOMEPAGE}, {XKB_KEY_XF86WWW, MP_KEY_WWW},
+    {XKB_KEY_XF86Mail, MP_KEY_MAIL}, {XKB_KEY_XF86Favorites, MP_KEY_FAVORITES},
+    {XKB_KEY_XF86Search, MP_KEY_SEARCH}, {XKB_KEY_XF86Sleep, MP_KEY_SLEEP},
+    {XKB_KEY_XF86Back, MP_KEY_BACK}, {XKB_KEY_XF86Tools, MP_KEY_TOOLS},
+    {XKB_KEY_XF86ZoomIn, MP_KEY_ZOOMIN}, {XKB_KEY_XF86ZoomOut, MP_KEY_ZOOMOUT},
+
+    {0, 0}
+};
+
+#define OPT_BASE_STRUCT struct wayland_opts
+const struct m_sub_options wayland_conf = {
+    .opts = (const struct m_option[]) {
+        {"wayland-configure-bounds", OPT_CHOICE(configure_bounds,
+            {"auto", -1}, {"no", 0}, {"yes", 1})},
+        {"wayland-disable-vsync", OPT_BOOL(disable_vsync)},
+        {"wayland-edge-pixels-pointer", OPT_INT(edge_pixels_pointer),
+            M_RANGE(0, INT_MAX)},
+        {"wayland-edge-pixels-touch", OPT_INT(edge_pixels_touch),
+            M_RANGE(0, INT_MAX)},
+        {0},
+    },
+    .size = sizeof(struct wayland_opts),
+    .defaults = &(struct wayland_opts) {
+        .configure_bounds = -1,
+        .edge_pixels_pointer = 16,
+        .edge_pixels_touch = 32,
+    },
+};
+
+struct vo_wayland_feedback_pool {
+    struct wp_presentation_feedback **fback;
+    struct vo_wayland_state *wl;
+    int len;
+};
+
+struct vo_wayland_output {
+    struct vo_wayland_state *wl;
+    struct wl_output *output;
+    struct mp_rect geometry;
+    bool has_surface;
+    uint32_t id;
+    uint32_t flags;
+    int phys_width;
+    int phys_height;
+    int scale;
+    double refresh_rate;
+    char *make;
+    char *model;
+    char *name;
+    struct wl_list link;
+};
+
+static int check_for_resize(struct vo_wayland_state *wl, int edge_pixels,
+                            enum xdg_toplevel_resize_edge *edge);
+static int get_mods(struct vo_wayland_state *wl);
+static int lookupkey(int key);
+static int set_cursor_visibility(struct vo_wayland_state *wl, bool on);
+static int spawn_cursor(struct vo_wayland_state *wl);
+
+static void add_feedback(struct vo_wayland_feedback_pool *fback_pool,
+                         struct wp_presentation_feedback *fback);
+static void get_shape_device(struct vo_wayland_state *wl);
+static int greatest_common_divisor(int a, int b);
+static void guess_focus(struct vo_wayland_state *wl);
+static void prepare_resize(struct vo_wayland_state *wl, int width, int height);
+static void remove_feedback(struct vo_wayland_feedback_pool *fback_pool,
+                            struct wp_presentation_feedback *fback);
+static void remove_output(struct vo_wayland_output *out);
+static void request_decoration_mode(struct vo_wayland_state *wl, uint32_t mode);
+static void rescale_geometry(struct vo_wayland_state *wl, double old_scale);
+static void set_geometry(struct vo_wayland_state *wl, bool resize);
+static void set_surface_scaling(struct vo_wayland_state *wl);
+static void window_move(struct vo_wayland_state *wl, uint32_t serial);
+
+/* Wayland listener boilerplate */
+static void pointer_handle_enter(void *data, struct wl_pointer *pointer,
+                                 uint32_t serial, struct wl_surface *surface,
+                                 wl_fixed_t sx, wl_fixed_t sy)
+{
+    struct vo_wayland_state *wl = data;
+
+    wl->pointer    = pointer;
+    wl->pointer_id = serial;
+
+    set_cursor_visibility(wl, wl->cursor_visible);
+    mp_input_put_key(wl->vo->input_ctx, MP_KEY_MOUSE_ENTER);
+}
+
+static void pointer_handle_leave(void *data, struct wl_pointer *pointer,
+                                 uint32_t serial, struct wl_surface *surface)
+{
+    struct vo_wayland_state *wl = data;
+    mp_input_put_key(wl->vo->input_ctx, MP_KEY_MOUSE_LEAVE);
+}
+
+static void pointer_handle_motion(void *data, struct wl_pointer *pointer,
+                                  uint32_t time, wl_fixed_t sx, wl_fixed_t sy)
+{
+    struct vo_wayland_state *wl = data;
+
+    wl->mouse_x = wl_fixed_to_int(sx) * wl->scaling;
+    wl->mouse_y = wl_fixed_to_int(sy) * wl->scaling;
+
+    if (!wl->toplevel_configured)
+        mp_input_set_mouse_pos(wl->vo->input_ctx, wl->mouse_x, wl->mouse_y);
+    wl->toplevel_configured = false;
+}
+
+static void pointer_handle_button(void *data, struct wl_pointer *wl_pointer,
+                                  uint32_t serial, uint32_t time, uint32_t button,
+                                  uint32_t state)
+{
+    struct vo_wayland_state *wl = data;
+    state = state == WL_POINTER_BUTTON_STATE_PRESSED ? MP_KEY_STATE_DOWN
+                                                     : MP_KEY_STATE_UP;
+
+    if (button >= BTN_MOUSE && button < BTN_JOYSTICK) {
+        switch (button) {
+        case BTN_LEFT:
+            button = MP_MBTN_LEFT;
+            break;
+        case BTN_MIDDLE:
+            button = MP_MBTN_MID;
+            break;
+        case BTN_RIGHT:
+            button = MP_MBTN_RIGHT;
+            break;
+        case BTN_SIDE:
+            button = MP_MBTN_BACK;
+            break;
+        case BTN_EXTRA:
+            button = MP_MBTN_FORWARD;
+            break;
+        default:
+            button += MP_MBTN9 - BTN_FORWARD;
+            break;
+        }
+    } else {
+        button = 0;
+    }
+
+    if (button)
+        mp_input_put_key(wl->vo->input_ctx, button | state | wl->mpmod);
+
+    if (!mp_input_test_dragging(wl->vo->input_ctx, wl->mouse_x, wl->mouse_y) &&
+        !wl->locked_size && (button == MP_MBTN_LEFT) && (state == MP_KEY_STATE_DOWN))
+    {
+        uint32_t edges;
+        // Implement an edge resize zone if there are no decorations
+        if (!wl->vo_opts->border && check_for_resize(wl, wl->opts->edge_pixels_pointer, &edges)) {
+            xdg_toplevel_resize(wl->xdg_toplevel, wl->seat, serial, edges);
+        } else {
+            window_move(wl, serial);
+        }
+        // Explicitly send an UP event after the client finishes a move/resize
+        mp_input_put_key(wl->vo->input_ctx, button | MP_KEY_STATE_UP);
+    }
+}
+
+static void pointer_handle_axis(void *data, struct wl_pointer *wl_pointer,
+                                uint32_t time, uint32_t axis, wl_fixed_t value)
+{
+    struct vo_wayland_state *wl = data;
+
+    double val = wl_fixed_to_double(value) < 0 ? -1 : 1;
+    switch (axis) {
+    case WL_POINTER_AXIS_VERTICAL_SCROLL:
+        if (value > 0)
+            mp_input_put_wheel(wl->vo->input_ctx, MP_WHEEL_DOWN | wl->mpmod, +val);
+        if (value < 0)
+            mp_input_put_wheel(wl->vo->input_ctx, MP_WHEEL_UP | wl->mpmod, -val);
+        break;
+    case WL_POINTER_AXIS_HORIZONTAL_SCROLL:
+        if (value > 0)
+            mp_input_put_wheel(wl->vo->input_ctx, MP_WHEEL_RIGHT | wl->mpmod, +val);
+        if (value < 0)
+            mp_input_put_wheel(wl->vo->input_ctx, MP_WHEEL_LEFT | wl->mpmod, -val);
+        break;
+    }
+}
+
+static const struct wl_pointer_listener pointer_listener = {
+    pointer_handle_enter,
+    pointer_handle_leave,
+    pointer_handle_motion,
+    pointer_handle_button,
+    pointer_handle_axis,
+};
+
+static void touch_handle_down(void *data, struct wl_touch *wl_touch,
+                              uint32_t serial, uint32_t time, struct wl_surface *surface,
+                              int32_t id, wl_fixed_t x_w, wl_fixed_t y_w)
+{
+    struct vo_wayland_state *wl = data;
+    wl->mouse_x = wl_fixed_to_int(x_w) * wl->scaling;
+    wl->mouse_y = wl_fixed_to_int(y_w) * wl->scaling;
+
+    enum xdg_toplevel_resize_edge edge;
+    if (!mp_input_test_dragging(wl->vo->input_ctx, wl->mouse_x, wl->mouse_y)) {
+        if (check_for_resize(wl, wl->opts->edge_pixels_touch, &edge)) {
+            xdg_toplevel_resize(wl->xdg_toplevel, wl->seat, serial, edge);
+        } else  {
+            xdg_toplevel_move(wl->xdg_toplevel, wl->seat, serial);
+        }
+    }
+
+    mp_input_set_mouse_pos(wl->vo->input_ctx, wl->mouse_x, wl->mouse_y);
+    mp_input_put_key(wl->vo->input_ctx, MP_MBTN_LEFT | MP_KEY_STATE_DOWN);
+}
+
+static void touch_handle_up(void *data, struct wl_touch *wl_touch,
+                            uint32_t serial, uint32_t time, int32_t id)
+{
+    struct vo_wayland_state *wl = data;
+    mp_input_put_key(wl->vo->input_ctx, MP_MBTN_LEFT | MP_KEY_STATE_UP);
+}
+
+static void touch_handle_motion(void *data, struct wl_touch *wl_touch,
+                                uint32_t time, int32_t id, wl_fixed_t x_w, wl_fixed_t y_w)
+{
+    struct vo_wayland_state *wl = data;
+
+    wl->mouse_x = wl_fixed_to_int(x_w) * wl->scaling;
+    wl->mouse_y = wl_fixed_to_int(y_w) * wl->scaling;
+
+    mp_input_set_mouse_pos(wl->vo->input_ctx, wl->mouse_x, wl->mouse_y);
+}
+
+static void touch_handle_frame(void *data, struct wl_touch *wl_touch)
+{
+}
+
+static void touch_handle_cancel(void *data, struct wl_touch *wl_touch)
+{
+}
+
+static const struct wl_touch_listener touch_listener = {
+    touch_handle_down,
+    touch_handle_up,
+    touch_handle_motion,
+    touch_handle_frame,
+    touch_handle_cancel,
+};
+
+static void keyboard_handle_keymap(void *data, struct wl_keyboard *wl_keyboard,
+                                   uint32_t format, int32_t fd, uint32_t size)
+{
+    struct vo_wayland_state *wl = data;
+    char *map_str;
+
+    if (format != WL_KEYBOARD_KEYMAP_FORMAT_XKB_V1) {
+        close(fd);
+        return;
+    }
+
+    map_str = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
+    if (map_str == MAP_FAILED) {
+        close(fd);
+        return;
+    }
+
+    wl->xkb_keymap = xkb_keymap_new_from_buffer(wl->xkb_context, map_str,
+                                                strnlen(map_str, size),
+                                                XKB_KEYMAP_FORMAT_TEXT_V1, 0);
+
+    munmap(map_str, size);
+    close(fd);
+
+    if (!wl->xkb_keymap) {
+        MP_ERR(wl, "failed to compile keymap\n");
+        return;
+    }
+
+    wl->xkb_state = xkb_state_new(wl->xkb_keymap);
+    if (!wl->xkb_state) {
+        MP_ERR(wl, "failed to create XKB state\n");
+        xkb_keymap_unref(wl->xkb_keymap);
+        wl->xkb_keymap = NULL;
+        return;
+    }
+}
+
+static void keyboard_handle_enter(void *data, struct wl_keyboard *wl_keyboard,
+                                  uint32_t serial, struct wl_surface *surface,
+                                  struct wl_array *keys)
+{
+    struct vo_wayland_state *wl = data;
+    wl->has_keyboard_input = true;
+    guess_focus(wl);
+}
+
+static void keyboard_handle_leave(void *data, struct wl_keyboard *wl_keyboard,
+                                  uint32_t serial, struct wl_surface *surface)
+{
+    struct vo_wayland_state *wl = data;
+    wl->has_keyboard_input = false;
+    wl->keyboard_code = 0;
+    wl->mpkey = 0;
+    wl->mpmod = 0;
+    mp_input_put_key(wl->vo->input_ctx, MP_INPUT_RELEASE_ALL);
+    guess_focus(wl);
+}
+
+static void keyboard_handle_key(void *data, struct wl_keyboard *wl_keyboard,
+                                uint32_t serial, uint32_t time, uint32_t key,
+                                uint32_t state)
+{
+    struct vo_wayland_state *wl = data;
+
+    wl->keyboard_code = key + 8;
+    xkb_keysym_t sym = xkb_state_key_get_one_sym(wl->xkb_state, wl->keyboard_code);
+    int mpkey = lookupkey(sym);
+
+    state = state == WL_KEYBOARD_KEY_STATE_PRESSED ? MP_KEY_STATE_DOWN
+                                                   : MP_KEY_STATE_UP;
+
+    if (mpkey) {
+        mp_input_put_key(wl->vo->input_ctx, mpkey | state | wl->mpmod);
+    } else {
+        char s[128];
+        if (xkb_keysym_to_utf8(sym, s, sizeof(s)) > 0) {
+            mp_input_put_key_utf8(wl->vo->input_ctx, state | wl->mpmod, bstr0(s));
+        } else {
+            // Assume a modifier was pressed and handle it in the mod event instead.
+            return;
+        }
+    }
+    if (state == MP_KEY_STATE_DOWN)
+        wl->mpkey = mpkey;
+    if (mpkey && state == MP_KEY_STATE_UP)
+        wl->mpkey = 0;
+}
+
+static void keyboard_handle_modifiers(void *data, struct wl_keyboard *wl_keyboard,
+                                      uint32_t serial, uint32_t mods_depressed,
+                                      uint32_t mods_latched, uint32_t mods_locked,
+                                      uint32_t group)
+{
+    struct vo_wayland_state *wl = data;
+
+    if (wl->xkb_state) {
+        xkb_state_update_mask(wl->xkb_state, mods_depressed, mods_latched,
+                              mods_locked, 0, 0, group);
+        wl->mpmod = get_mods(wl);
+        if (wl->mpkey)
+            mp_input_put_key(wl->vo->input_ctx, wl->mpkey | MP_KEY_STATE_DOWN | wl->mpmod);
+    }
+}
+
+static void keyboard_handle_repeat_info(void *data, struct wl_keyboard *wl_keyboard,
+                                        int32_t rate, int32_t delay)
+{
+    struct vo_wayland_state *wl = data;
+    if (wl->vo_opts->native_keyrepeat)
+        mp_input_set_repeat_info(wl->vo->input_ctx, rate, delay);
+}
+
+static const struct wl_keyboard_listener keyboard_listener = {
+    keyboard_handle_keymap,
+    keyboard_handle_enter,
+    keyboard_handle_leave,
+    keyboard_handle_key,
+    keyboard_handle_modifiers,
+    keyboard_handle_repeat_info,
+};
+
+static void seat_handle_caps(void *data, struct wl_seat *seat,
+                             enum wl_seat_capability caps)
+{
+    struct vo_wayland_state *wl = data;
+
+    if ((caps & WL_SEAT_CAPABILITY_POINTER) && !wl->pointer) {
+        wl->pointer = wl_seat_get_pointer(seat);
+        get_shape_device(wl);
+        wl_pointer_add_listener(wl->pointer, &pointer_listener, wl);
+    } else if (!(caps & WL_SEAT_CAPABILITY_POINTER) && wl->pointer) {
+        wl_pointer_destroy(wl->pointer);
+        wl->pointer = NULL;
+    }
+
+    if ((caps & WL_SEAT_CAPABILITY_KEYBOARD) && !wl->keyboard) {
+        wl->keyboard = wl_seat_get_keyboard(seat);
+        wl_keyboard_add_listener(wl->keyboard, &keyboard_listener, wl);
+    } else if (!(caps & WL_SEAT_CAPABILITY_KEYBOARD) && wl->keyboard) {
+        wl_keyboard_destroy(wl->keyboard);
+        wl->keyboard = NULL;
+    }
+
+    if ((caps & WL_SEAT_CAPABILITY_TOUCH) && !wl->touch) {
+        wl->touch = wl_seat_get_touch(seat);
+        wl_touch_set_user_data(wl->touch, wl);
+        wl_touch_add_listener(wl->touch, &touch_listener, wl);
+    } else if (!(caps & WL_SEAT_CAPABILITY_TOUCH) && wl->touch) {
+        wl_touch_destroy(wl->touch);
+        wl->touch = NULL;
+    }
+}
+
+static const struct wl_seat_listener seat_listener = {
+    seat_handle_caps,
+};
+
+static void data_offer_handle_offer(void *data, struct wl_data_offer *offer,
+                                    const char *mime_type)
+{
+    struct vo_wayland_state *wl = data;
+    int score = mp_event_get_mime_type_score(wl->vo->input_ctx, mime_type);
+    if (score > wl->dnd_mime_score && wl->vo_opts->drag_and_drop != -2) {
+        wl->dnd_mime_score = score;
+        if (wl->dnd_mime_type)
+            talloc_free(wl->dnd_mime_type);
+        wl->dnd_mime_type = talloc_strdup(wl, mime_type);
+        MP_VERBOSE(wl, "Given DND offer with mime type %s\n", wl->dnd_mime_type);
+    }
+}
+
+static void data_offer_source_actions(void *data, struct wl_data_offer *offer, uint32_t source_actions)
+{
+}
+
+static void data_offer_action(void *data, struct wl_data_offer *wl_data_offer, uint32_t dnd_action)
+{
+    struct vo_wayland_state *wl = data;
+    if (dnd_action && wl->vo_opts->drag_and_drop != -2) {
+        if (wl->vo_opts->drag_and_drop >= 0) {
+            wl->dnd_action = wl->vo_opts->drag_and_drop;
+        } else {
+            wl->dnd_action = dnd_action & WL_DATA_DEVICE_MANAGER_DND_ACTION_COPY ?
+                             DND_REPLACE : DND_APPEND;
+        }
+        MP_VERBOSE(wl, "DND action is %s\n",
+                   wl->dnd_action == DND_REPLACE ? "DND_REPLACE" : "DND_APPEND");
+    }
+}
+
+static const struct wl_data_offer_listener data_offer_listener = {
+    data_offer_handle_offer,
+    data_offer_source_actions,
+    data_offer_action,
+};
+
+static void data_device_handle_data_offer(void *data, struct wl_data_device *wl_ddev,
+                                          struct wl_data_offer *id)
+{
+    struct vo_wayland_state *wl = data;
+    if (wl->dnd_offer)
+        wl_data_offer_destroy(wl->dnd_offer);
+
+    wl->dnd_offer = id;
+    wl_data_offer_add_listener(id, &data_offer_listener, wl);
+}
+
+static void data_device_handle_enter(void *data, struct wl_data_device *wl_ddev,
+                                     uint32_t serial, struct wl_surface *surface,
+                                     wl_fixed_t x, wl_fixed_t y,
+                                     struct wl_data_offer *id)
+{
+    struct vo_wayland_state *wl = data;
+    if (wl->dnd_offer != id) {
+        MP_FATAL(wl, "DND offer ID mismatch!\n");
+        return;
+    }
+
+    if (wl->vo_opts->drag_and_drop != -2) {
+        wl_data_offer_set_actions(id, WL_DATA_DEVICE_MANAGER_DND_ACTION_COPY |
+                                      WL_DATA_DEVICE_MANAGER_DND_ACTION_MOVE,
+                                      WL_DATA_DEVICE_MANAGER_DND_ACTION_COPY);
+        wl_data_offer_accept(id, serial, wl->dnd_mime_type);
+        MP_VERBOSE(wl, "Accepting DND offer with mime type %s\n", wl->dnd_mime_type);
+    }
+
+}
+
+static void data_device_handle_leave(void *data, struct wl_data_device *wl_ddev)
+{
+    struct vo_wayland_state *wl = data;
+
+    if (wl->dnd_offer) {
+        if (wl->dnd_fd != -1)
+            return;
+        wl_data_offer_destroy(wl->dnd_offer);
+        wl->dnd_offer = NULL;
+    }
+
+    if (wl->vo_opts->drag_and_drop != -2) {
+        MP_VERBOSE(wl, "Releasing DND offer with mime type %s\n", wl->dnd_mime_type);
+        if (wl->dnd_mime_type)
+            TA_FREEP(&wl->dnd_mime_type);
+        wl->dnd_mime_score = 0;
+    }
+}
+
+static void data_device_handle_motion(void *data, struct wl_data_device *wl_ddev,
+                                      uint32_t time, wl_fixed_t x, wl_fixed_t y)
+{
+    struct vo_wayland_state *wl = data;
+    wl_data_offer_accept(wl->dnd_offer, time, wl->dnd_mime_type);
+}
+
+static void data_device_handle_drop(void *data, struct wl_data_device *wl_ddev)
+{
+    struct vo_wayland_state *wl = data;
+
+    int pipefd[2];
+
+    if (pipe2(pipefd, O_CLOEXEC) == -1) {
+        MP_ERR(wl, "Failed to create dnd pipe!\n");
+        return;
+    }
+
+    if (wl->vo_opts->drag_and_drop != -2) {
+        MP_VERBOSE(wl, "Receiving DND offer with mime %s\n", wl->dnd_mime_type);
+        wl_data_offer_receive(wl->dnd_offer, wl->dnd_mime_type, pipefd[1]);
+    }
+
+    close(pipefd[1]);
+    wl->dnd_fd = pipefd[0];
+}
+
+static void data_device_handle_selection(void *data, struct wl_data_device *wl_ddev,
+                                         struct wl_data_offer *id)
+{
+    struct vo_wayland_state *wl = data;
+
+    if (wl->dnd_offer) {
+        wl_data_offer_destroy(wl->dnd_offer);
+        wl->dnd_offer = NULL;
+        MP_VERBOSE(wl, "Received a new DND offer. Releasing the previous offer.\n");
+    }
+
+}
+
+static const struct wl_data_device_listener data_device_listener = {
+    data_device_handle_data_offer,
+    data_device_handle_enter,
+    data_device_handle_leave,
+    data_device_handle_motion,
+    data_device_handle_drop,
+    data_device_handle_selection,
+};
+
+static void output_handle_geometry(void *data, struct wl_output *wl_output,
+                                   int32_t x, int32_t y, int32_t phys_width,
+                                   int32_t phys_height, int32_t subpixel,
+                                   const char *make, const char *model,
+                                   int32_t transform)
+{
+    struct vo_wayland_output *output = data;
+    output->make = talloc_strdup(output->wl, make);
+    output->model = talloc_strdup(output->wl, model);
+    output->geometry.x0 = x;
+    output->geometry.y0 = y;
+    output->phys_width = phys_width;
+    output->phys_height = phys_height;
+}
+
+static void output_handle_mode(void *data, struct wl_output *wl_output,
+                               uint32_t flags, int32_t width,
+                               int32_t height, int32_t refresh)
+{
+    struct vo_wayland_output *output = data;
+
+    /* Only save current mode */
+    if (!(flags & WL_OUTPUT_MODE_CURRENT))
+        return;
+
+    output->geometry.x1 = width;
+    output->geometry.y1 = height;
+    output->flags = flags;
+    output->refresh_rate = (double)refresh * 0.001;
+}
+
+static void output_handle_done(void *data, struct wl_output *wl_output)
+{
+    struct vo_wayland_output *o = data;
+    struct vo_wayland_state *wl = o->wl;
+
+    o->geometry.x1 += o->geometry.x0;
+    o->geometry.y1 += o->geometry.y0;
+
+    MP_VERBOSE(o->wl, "Registered output %s %s (0x%x):\n"
+               "\tx: %dpx, y: %dpx\n"
+               "\tw: %dpx (%dmm), h: %dpx (%dmm)\n"
+               "\tscale: %d\n"
+               "\tHz: %f\n", o->make, o->model, o->id, o->geometry.x0,
+               o->geometry.y0, mp_rect_w(o->geometry), o->phys_width,
+               mp_rect_h(o->geometry), o->phys_height, o->scale, o->refresh_rate);
+
+    /* If we satisfy this conditional, something about the current
+     * output must have changed (resolution, scale, etc). All window
+     * geometry and scaling should be recalculated. */
+    if (wl->current_output && wl->current_output->output == wl_output) {
+        set_surface_scaling(wl);
+        spawn_cursor(wl);
+        set_geometry(wl, false);
+        prepare_resize(wl, 0, 0);
+        wl->pending_vo_events |= VO_EVENT_DPI;
+    }
+
+    wl->pending_vo_events |= VO_EVENT_WIN_STATE;
+}
+
+static void output_handle_scale(void *data, struct wl_output *wl_output,
+                                int32_t factor)
+{
+    struct vo_wayland_output *output = data;
+    if (!factor) {
+        MP_ERR(output->wl, "Invalid output scale given by the compositor!\n");
+        return;
+    }
+    output->scale = factor;
+}
+
+static void output_handle_name(void *data, struct wl_output *wl_output,
+                               const char *name)
+{
+    struct vo_wayland_output *output = data;
+    output->name = talloc_strdup(output->wl, name);
+}
+
+static void output_handle_description(void *data, struct wl_output *wl_output,
+                                      const char *description)
+{
+}
+
+static const struct wl_output_listener output_listener = {
+    output_handle_geometry,
+    output_handle_mode,
+    output_handle_done,
+    output_handle_scale,
+    output_handle_name,
+    output_handle_description,
+};
+
+static void surface_handle_enter(void *data, struct wl_surface *wl_surface,
+                                 struct wl_output *output)
+{
+    struct vo_wayland_state *wl = data;
+    if (!wl->current_output)
+        return;
+
+    struct mp_rect old_output_geometry = wl->current_output->geometry;
+    struct mp_rect old_geometry = wl->geometry;
+    wl->current_output = NULL;
+
+    struct vo_wayland_output *o;
+    wl_list_for_each(o, &wl->output_list, link) {
+        if (o->output == output) {
+            wl->current_output = o;
+            break;
+        }
+    }
+
+    wl->current_output->has_surface = true;
+    bool force_resize = false;
+
+    if (!wl->fractional_scale_manager && wl_surface_get_version(wl_surface) < 6 &&
+        wl->scaling != wl->current_output->scale)
+    {
+        set_surface_scaling(wl);
+        spawn_cursor(wl);
+        force_resize = true;
+        wl->pending_vo_events |= VO_EVENT_DPI;
+    }
+
+    if (!mp_rect_equals(&old_output_geometry, &wl->current_output->geometry)) {
+        set_geometry(wl, false);
+        force_resize = true;
+    }
+
+    if (!mp_rect_equals(&old_geometry, &wl->geometry) || force_resize)
+        prepare_resize(wl, 0, 0);
+
+    MP_VERBOSE(wl, "Surface entered output %s %s (0x%x), scale = %f, refresh rate = %f Hz\n",
+               o->make, o->model, o->id, wl->scaling, o->refresh_rate);
+
+    wl->pending_vo_events |= VO_EVENT_WIN_STATE;
+}
+
+static void surface_handle_leave(void *data, struct wl_surface *wl_surface,
+                                 struct wl_output *output)
+{
+    struct vo_wayland_state *wl = data;
+
+    struct vo_wayland_output *o;
+    wl_list_for_each(o, &wl->output_list, link) {
+        if (o->output == output) {
+            o->has_surface = false;
+            wl->pending_vo_events |= VO_EVENT_WIN_STATE;
+            return;
+        }
+    }
+}
+
+#ifdef HAVE_WAYLAND_1_22
+static void surface_handle_preferred_buffer_scale(void *data,
+                                                  struct wl_surface *wl_surface,
+                                                  int32_t scale)
+{
+    struct vo_wayland_state *wl = data;
+    double old_scale = wl->scaling;
+
+    if (wl->fractional_scale_manager)
+        return;
+
+    // dmabuf_wayland is always wl->scaling = 1
+    wl->scaling = !wl->using_dmabuf_wayland ? scale : 1;
+    MP_VERBOSE(wl, "Obtained preferred scale, %f, from the compositor.\n",
+               wl->scaling);
+    wl->pending_vo_events |= VO_EVENT_DPI;
+    if (wl->current_output) {
+        rescale_geometry(wl, old_scale);
+        set_geometry(wl, false);
+        prepare_resize(wl, 0, 0);
+    }
+}
+
+static void surface_handle_preferred_buffer_transform(void *data,
+                                                      struct wl_surface *wl_surface,
+                                                      uint32_t transform)
+{
+}
+#endif
+
+static const struct wl_surface_listener surface_listener = {
+    surface_handle_enter,
+    surface_handle_leave,
+#ifdef HAVE_WAYLAND_1_22
+    surface_handle_preferred_buffer_scale,
+    surface_handle_preferred_buffer_transform,
+#endif
+};
+
+static void xdg_wm_base_ping(void *data, struct xdg_wm_base *wm_base, uint32_t serial)
+{
+    xdg_wm_base_pong(wm_base, serial);
+}
+
+static const struct xdg_wm_base_listener xdg_wm_base_listener = {
+    xdg_wm_base_ping,
+};
+
+static void handle_surface_config(void *data, struct xdg_surface *surface,
+                                  uint32_t serial)
+{
+    xdg_surface_ack_configure(surface, serial);
+}
+
+static const struct xdg_surface_listener xdg_surface_listener = {
+    handle_surface_config,
+};
+
+static void handle_toplevel_config(void *data, struct xdg_toplevel *toplevel,
+                                   int32_t width, int32_t height, struct wl_array *states)
+{
+    struct vo_wayland_state *wl = data;
+    struct mp_vo_opts *vo_opts = wl->vo_opts;
+    struct mp_rect old_geometry = wl->geometry;
+
+    int old_toplevel_width = wl->toplevel_width;
+    int old_toplevel_height = wl->toplevel_height;
+    wl->toplevel_width = width;
+    wl->toplevel_height = height;
+
+    if (!wl->configured) {
+        /* Save initial window size if the compositor gives us a hint here. */
+        bool autofit_or_geometry = vo_opts->geometry.wh_valid || vo_opts->autofit.wh_valid ||
+                                   vo_opts->autofit_larger.wh_valid || vo_opts->autofit_smaller.wh_valid;
+        if (width && height && !autofit_or_geometry) {
+            wl->initial_size_hint = true;
+            wl->window_size = (struct mp_rect){0, 0, width, height};
+            wl->geometry = wl->window_size;
+        }
+        return;
+    }
+
+    bool is_maximized = false;
+    bool is_fullscreen = false;
+    bool is_activated = false;
+    bool is_suspended = false;
+    bool is_tiled = false;
+    enum xdg_toplevel_state *state;
+    wl_array_for_each(state, states) {
+        switch (*state) {
+        case XDG_TOPLEVEL_STATE_FULLSCREEN:
+            is_fullscreen = true;
+            break;
+        case XDG_TOPLEVEL_STATE_RESIZING:
+            break;
+        case XDG_TOPLEVEL_STATE_ACTIVATED:
+            is_activated = true;
+            /*
+             * If we get an ACTIVATED state, we know it cannot be
+             * minimized, but it may not have been minimized
+             * previously, so we can't detect the exact state.
+             */
+            vo_opts->window_minimized = false;
+            m_config_cache_write_opt(wl->vo_opts_cache,
+                                     &vo_opts->window_minimized);
+            break;
+        case XDG_TOPLEVEL_STATE_TILED_TOP:
+        case XDG_TOPLEVEL_STATE_TILED_LEFT:
+        case XDG_TOPLEVEL_STATE_TILED_RIGHT:
+        case XDG_TOPLEVEL_STATE_TILED_BOTTOM:
+            is_tiled = true;
+            break;
+        case XDG_TOPLEVEL_STATE_MAXIMIZED:
+            is_maximized = true;
+            break;
+        case XDG_TOPLEVEL_STATE_SUSPENDED:
+            is_suspended = true;
+            break;
+        }
+    }
+
+    if (wl->hidden != is_suspended)
+        wl->hidden = is_suspended;
+
+    if (vo_opts->fullscreen != is_fullscreen) {
+        wl->state_change = true;
+        vo_opts->fullscreen = is_fullscreen;
+        m_config_cache_write_opt(wl->vo_opts_cache, &vo_opts->fullscreen);
+    }
+
+    if (vo_opts->window_maximized != is_maximized) {
+        wl->state_change = true;
+        vo_opts->window_maximized = is_maximized;
+        m_config_cache_write_opt(wl->vo_opts_cache, &vo_opts->window_maximized);
+    }
+
+    wl->tiled = is_tiled;
+
+    wl->locked_size = is_fullscreen || is_maximized || is_tiled;
+
+    if (wl->requested_decoration)
+        request_decoration_mode(wl, wl->requested_decoration);
+
+    if (wl->activated != is_activated) {
+        wl->activated = is_activated;
+        guess_focus(wl);
+        /* Just force a redraw to be on the safe side. */
+        if (wl->activated) {
+            wl->hidden = false;
+            wl->pending_vo_events |= VO_EVENT_EXPOSE;
+        }
+    }
+
+    if (wl->state_change) {
+        if (!wl->locked_size) {
+            wl->geometry = wl->window_size;
+            wl->state_change = false;
+            goto resize;
+        }
+    }
+
+    /* Reuse old size if either of these are 0. */
+    if (width == 0 || height == 0) {
+        if (!wl->locked_size) {
+            wl->geometry = wl->window_size;
+        }
+        goto resize;
+    }
+
+    if (old_toplevel_width == wl->toplevel_width &&
+        old_toplevel_height == wl->toplevel_height)
+        return;
+
+    if (!wl->locked_size) {
+        if (vo_opts->keepaspect) {
+            double scale_factor = (double)width / wl->reduced_width;
+            width = ceil(wl->reduced_width * scale_factor);
+            if (vo_opts->keepaspect_window)
+                height = ceil(wl->reduced_height * scale_factor);
+        }
+        wl->window_size.x0 = 0;
+        wl->window_size.y0 = 0;
+        wl->window_size.x1 = round(width * wl->scaling);
+        wl->window_size.y1 = round(height * wl->scaling);
+    }
+    wl->geometry.x0 = 0;
+    wl->geometry.y0 = 0;
+    wl->geometry.x1 = round(width * wl->scaling);
+    wl->geometry.y1 = round(height * wl->scaling);
+
+    if (mp_rect_equals(&old_geometry, &wl->geometry))
+        return;
+
+resize:
+    MP_VERBOSE(wl, "Resizing due to xdg from %ix%i to %ix%i\n",
+               mp_rect_w(old_geometry), mp_rect_h(old_geometry),
+               mp_rect_w(wl->geometry), mp_rect_h(wl->geometry));
+
+    prepare_resize(wl, width, height);
+    wl->toplevel_configured = true;
+}
+
+static void handle_toplevel_close(void *data, struct xdg_toplevel *xdg_toplevel)
+{
+    struct vo_wayland_state *wl = data;
+    mp_input_put_key(wl->vo->input_ctx, MP_KEY_CLOSE_WIN);
+}
+
+static void handle_configure_bounds(void *data, struct xdg_toplevel *xdg_toplevel,
+                                    int32_t width, int32_t height)
+{
+    struct vo_wayland_state *wl = data;
+    wl->bounded_width = width * wl->scaling;
+    wl->bounded_height = height * wl->scaling;
+}
+
+#ifdef XDG_TOPLEVEL_WM_CAPABILITIES_SINCE_VERSION
+static void handle_wm_capabilities(void *data, struct xdg_toplevel *xdg_toplevel,
+                                   struct wl_array *capabilities)
+{
+}
+#endif
+
+static const struct xdg_toplevel_listener xdg_toplevel_listener = {
+    handle_toplevel_config,
+    handle_toplevel_close,
+    handle_configure_bounds,
+#ifdef XDG_TOPLEVEL_WM_CAPABILITIES_SINCE_VERSION
+    handle_wm_capabilities,
+#endif
+};
+
+#if HAVE_WAYLAND_PROTOCOLS_1_31
+static void preferred_scale(void *data,
+                            struct wp_fractional_scale_v1 *fractional_scale,
+                            uint32_t scale)
+{
+    struct vo_wayland_state *wl = data;
+    double old_scale = wl->scaling;
+
+    // dmabuf_wayland is always wl->scaling = 1
+    wl->scaling = !wl->using_dmabuf_wayland ? (double)scale / 120 : 1;
+    MP_VERBOSE(wl, "Obtained preferred scale, %f, from the compositor.\n",
+               wl->scaling);
+    wl->pending_vo_events |= VO_EVENT_DPI;
+    if (wl->current_output) {
+        rescale_geometry(wl, old_scale);
+        set_geometry(wl, false);
+        prepare_resize(wl, 0, 0);
+    }
+}
+
+static const struct wp_fractional_scale_v1_listener fractional_scale_listener = {
+    preferred_scale,
+};
+#endif
+
+static const char *zxdg_decoration_mode_to_str(const uint32_t mode)
+{
+    switch (mode) {
+    case ZXDG_TOPLEVEL_DECORATION_V1_MODE_SERVER_SIDE:
+        return "server-side";
+    case ZXDG_TOPLEVEL_DECORATION_V1_MODE_CLIENT_SIDE:
+        return "client-side";
+    default:
+        return "<unknown>";
+    }
+}
+
+static void configure_decorations(void *data,
+                                  struct zxdg_toplevel_decoration_v1 *xdg_toplevel_decoration,
+                                  uint32_t mode)
+{
+    struct vo_wayland_state *wl = data;
+    struct mp_vo_opts *opts = wl->vo_opts;
+
+    if (wl->requested_decoration && mode != wl->requested_decoration) {
+        MP_DBG(wl,
+               "Requested %s decorations but compositor responded with %s. "
+               "It is likely that compositor wants us to stay in a given mode.\n",
+               zxdg_decoration_mode_to_str(wl->requested_decoration),
+               zxdg_decoration_mode_to_str(mode));
+    }
+
+    wl->requested_decoration = 0;
+
+    if (mode == ZXDG_TOPLEVEL_DECORATION_V1_MODE_SERVER_SIDE) {
+        MP_VERBOSE(wl, "Enabling server decorations\n");
+    } else {
+        MP_VERBOSE(wl, "Disabling server decorations\n");
+    }
+    opts->border = mode == ZXDG_TOPLEVEL_DECORATION_V1_MODE_SERVER_SIDE;
+    m_config_cache_write_opt(wl->vo_opts_cache, &opts->border);
+}
+
+static const struct zxdg_toplevel_decoration_v1_listener decoration_listener = {
+    configure_decorations,
+};
+
+static void pres_set_clockid(void *data, struct wp_presentation *pres,
+                             uint32_t clockid)
+{
+    struct vo_wayland_state *wl = data;
+
+    if (clockid == CLOCK_MONOTONIC || clockid == CLOCK_MONOTONIC_RAW)
+        wl->use_present = true;
+}
+
+static const struct wp_presentation_listener pres_listener = {
+    pres_set_clockid,
+};
+
+static void feedback_sync_output(void *data, struct wp_presentation_feedback *fback,
+                               struct wl_output *output)
+{
+}
+
+static void feedback_presented(void *data, struct wp_presentation_feedback *fback,
+                              uint32_t tv_sec_hi, uint32_t tv_sec_lo,
+                              uint32_t tv_nsec, uint32_t refresh_nsec,
+                              uint32_t seq_hi, uint32_t seq_lo,
+                              uint32_t flags)
+{
+    struct vo_wayland_feedback_pool *fback_pool = data;
+    struct vo_wayland_state *wl = fback_pool->wl;
+
+    if (fback)
+        remove_feedback(fback_pool, fback);
+
+    wl->refresh_interval = (int64_t)refresh_nsec;
+
+    // Very similar to oml_sync_control, in this case we assume that every
+    // time the compositor receives feedback, a buffer swap has been already
+    // been performed.
+    //
+    // Notes:
+    //  - tv_sec_lo + tv_sec_hi is the equivalent of oml's ust
+    //  - seq_lo + seq_hi is the equivalent of oml's msc
+    //  - these values are updated every time the compositor receives feedback.
+
+    int64_t sec = (uint64_t) tv_sec_lo + ((uint64_t) tv_sec_hi << 32);
+    int64_t ust = MP_TIME_S_TO_NS(sec) + (uint64_t) tv_nsec;
+    int64_t msc = (uint64_t) seq_lo + ((uint64_t) seq_hi << 32);
+    present_sync_update_values(wl->present, ust, msc);
+}
+
+static void feedback_discarded(void *data, struct wp_presentation_feedback *fback)
+{
+    struct vo_wayland_feedback_pool *fback_pool = data;
+    if (fback)
+        remove_feedback(fback_pool, fback);
+}
+
+static const struct wp_presentation_feedback_listener feedback_listener = {
+    feedback_sync_output,
+    feedback_presented,
+    feedback_discarded,
+};
+
+static const struct wl_callback_listener frame_listener;
+
+static void frame_callback(void *data, struct wl_callback *callback, uint32_t time)
+{
+    struct vo_wayland_state *wl = data;
+
+    if (callback)
+        wl_callback_destroy(callback);
+
+    wl->frame_callback = wl_surface_frame(wl->callback_surface);
+    wl_callback_add_listener(wl->frame_callback, &frame_listener, wl);
+
+    if (wl->use_present) {
+        struct wp_presentation_feedback *fback = wp_presentation_feedback(wl->presentation, wl->callback_surface);
+        add_feedback(wl->fback_pool, fback);
+        wp_presentation_feedback_add_listener(fback, &feedback_listener, wl->fback_pool);
+    }
+
+    wl->frame_wait = false;
+    wl->hidden = false;
+}
+
+static const struct wl_callback_listener frame_listener = {
+    frame_callback,
+};
+
+static void done(void *data,
+                 struct zwp_linux_dmabuf_feedback_v1 *zwp_linux_dmabuf_feedback_v1)
+{
+}
+
+static void format_table(void *data,
+                         struct zwp_linux_dmabuf_feedback_v1 *zwp_linux_dmabuf_feedback_v1,
+                         int32_t fd,
+                         uint32_t size)
+{
+    struct vo_wayland_state *wl = data;
+
+    void *map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
+    close(fd);
+
+    if (map != MAP_FAILED) {
+        wl->format_map = map;
+        wl->format_size = size;
+    }
+}
+
+static void main_device(void *data,
+                        struct zwp_linux_dmabuf_feedback_v1 *zwp_linux_dmabuf_feedback_v1,
+                        struct wl_array *device)
+{
+}
+
+static void tranche_done(void *data,
+                         struct zwp_linux_dmabuf_feedback_v1 *zwp_linux_dmabuf_feedback_v1)
+{
+}
+
+static void tranche_target_device(void *data,
+                                  struct zwp_linux_dmabuf_feedback_v1 *zwp_linux_dmabuf_feedback_v1,
+                                  struct wl_array *device)
+{
+}
+
+static void tranche_formats(void *data,
+                            struct zwp_linux_dmabuf_feedback_v1 *zwp_linux_dmabuf_feedback_v1,
+                            struct wl_array *indices)
+{
+}
+
+static void tranche_flags(void *data,
+                          struct zwp_linux_dmabuf_feedback_v1 *zwp_linux_dmabuf_feedback_v1,
+                          uint32_t flags)
+{
+}
+
+static const struct zwp_linux_dmabuf_feedback_v1_listener dmabuf_feedback_listener = {
+    done,
+    format_table,
+    main_device,
+    tranche_done,
+    tranche_target_device,
+    tranche_formats,
+    tranche_flags,
+};
+
+static void registry_handle_add(void *data, struct wl_registry *reg, uint32_t id,
+                                const char *interface, uint32_t ver)
+{
+    int found = 1;
+    struct vo_wayland_state *wl = data;
+
+    if (!strcmp(interface, wl_compositor_interface.name) && (ver >= 4) && found++) {
+#ifdef HAVE_WAYLAND_1_22
+        ver = MPMIN(ver, 6); /* Cap at 6 in case new events are added later. */
+#else
+        ver = 4;
+#endif
+        wl->compositor = wl_registry_bind(reg, id, &wl_compositor_interface, ver);
+        wl->surface = wl_compositor_create_surface(wl->compositor);
+        wl->video_surface = wl_compositor_create_surface(wl->compositor);
+        wl->osd_surface = wl_compositor_create_surface(wl->compositor);
+
+        /* never accept input events on anything besides the main surface */
+        struct wl_region *region = wl_compositor_create_region(wl->compositor);
+        wl_surface_set_input_region(wl->osd_surface, region);
+        wl_surface_set_input_region(wl->video_surface, region);
+        wl_region_destroy(region);
+
+        wl->cursor_surface = wl_compositor_create_surface(wl->compositor);
+        wl_surface_add_listener(wl->surface, &surface_listener, wl);
+    }
+
+    if (!strcmp(interface, wl_subcompositor_interface.name) && (ver >= 1) && found++) {
+        wl->subcompositor = wl_registry_bind(reg, id, &wl_subcompositor_interface, 1);
+    }
+
+    if (!strcmp (interface, zwp_linux_dmabuf_v1_interface.name) && (ver >= 4) && found++) {
+        wl->dmabuf = wl_registry_bind(reg, id, &zwp_linux_dmabuf_v1_interface, 4);
+        wl->dmabuf_feedback = zwp_linux_dmabuf_v1_get_default_feedback(wl->dmabuf);
+        zwp_linux_dmabuf_feedback_v1_add_listener(wl->dmabuf_feedback, &dmabuf_feedback_listener, wl);
+    }
+
+    if (!strcmp (interface, wp_viewporter_interface.name) && (ver >= 1) && found++) {
+       wl->viewporter = wl_registry_bind (reg, id, &wp_viewporter_interface, 1);
+    }
+
+    if (!strcmp(interface, wl_data_device_manager_interface.name) && (ver >= 3) && found++) {
+        wl->dnd_devman = wl_registry_bind(reg, id, &wl_data_device_manager_interface, 3);
+    }
+
+    if (!strcmp(interface, wl_output_interface.name) && (ver >= 2) && found++) {
+        struct vo_wayland_output *output = talloc_zero(wl, struct vo_wayland_output);
+
+        output->wl     = wl;
+        output->id     = id;
+        output->scale  = 1;
+        output->name   = "";
+
+        ver = MPMIN(ver, 4); /* Cap at 4 in case new events are added later. */
+        output->output = wl_registry_bind(reg, id, &wl_output_interface, ver);
+        wl_output_add_listener(output->output, &output_listener, output);
+        wl_list_insert(&wl->output_list, &output->link);
+    }
+
+    if (!strcmp(interface, wl_seat_interface.name) && found++) {
+        wl->seat = wl_registry_bind(reg, id, &wl_seat_interface, 1);
+        wl_seat_add_listener(wl->seat, &seat_listener, wl);
+    }
+
+    if (!strcmp(interface, wl_shm_interface.name) && found++) {
+        wl->shm = wl_registry_bind(reg, id, &wl_shm_interface, 1);
+    }
+
+#if HAVE_WAYLAND_PROTOCOLS_1_27
+    if (!strcmp(interface, wp_content_type_manager_v1_interface.name) && found++) {
+        wl->content_type_manager = wl_registry_bind(reg, id, &wp_content_type_manager_v1_interface, 1);
+    }
+
+    if (!strcmp(interface, wp_single_pixel_buffer_manager_v1_interface.name) && found++) {
+        wl->single_pixel_manager = wl_registry_bind(reg, id, &wp_single_pixel_buffer_manager_v1_interface, 1);
+    }
+#endif
+
+#if HAVE_WAYLAND_PROTOCOLS_1_31
+    if (!strcmp(interface, wp_fractional_scale_manager_v1_interface.name) && found++) {
+        wl->fractional_scale_manager = wl_registry_bind(reg, id, &wp_fractional_scale_manager_v1_interface, 1);
+    }
+#endif
+
+#if HAVE_WAYLAND_PROTOCOLS_1_32
+    if (!strcmp(interface, wp_cursor_shape_manager_v1_interface.name) && found++) {
+        wl->cursor_shape_manager = wl_registry_bind(reg, id, &wp_cursor_shape_manager_v1_interface, 1);
+    }
+#endif
+
+    if (!strcmp(interface, wp_presentation_interface.name) && found++) {
+        wl->presentation = wl_registry_bind(reg, id, &wp_presentation_interface, 1);
+        wp_presentation_add_listener(wl->presentation, &pres_listener, wl);
+    }
+
+    if (!strcmp(interface, xdg_wm_base_interface.name) && found++) {
+        ver = MPMIN(ver, 6); /* Cap at 6 in case new events are added later. */
+        wl->wm_base = wl_registry_bind(reg, id, &xdg_wm_base_interface, ver);
+        xdg_wm_base_add_listener(wl->wm_base, &xdg_wm_base_listener, wl);
+    }
+
+    if (!strcmp(interface, zxdg_decoration_manager_v1_interface.name) && found++) {
+        wl->xdg_decoration_manager = wl_registry_bind(reg, id, &zxdg_decoration_manager_v1_interface, 1);
+    }
+
+    if (!strcmp(interface, zwp_idle_inhibit_manager_v1_interface.name) && found++) {
+        wl->idle_inhibit_manager = wl_registry_bind(reg, id, &zwp_idle_inhibit_manager_v1_interface, 1);
+    }
+
+    if (found > 1)
+        MP_VERBOSE(wl, "Registered for protocol %s\n", interface);
+}
+
+static void registry_handle_remove(void *data, struct wl_registry *reg, uint32_t id)
+{
+    struct vo_wayland_state *wl = data;
+    struct vo_wayland_output *output, *tmp;
+    wl_list_for_each_safe(output, tmp, &wl->output_list, link) {
+        if (output->id == id) {
+            remove_output(output);
+            return;
+        }
+    }
+}
+
+static const struct wl_registry_listener registry_listener = {
+    registry_handle_add,
+    registry_handle_remove,
+};
+
+/* Static functions */
+static void check_dnd_fd(struct vo_wayland_state *wl)
+{
+    if (wl->dnd_fd == -1)
+        return;
+
+    struct pollfd fdp = { wl->dnd_fd, POLLIN | POLLHUP, 0 };
+    if (poll(&fdp, 1, 0) <= 0)
+        return;
+
+    if (fdp.revents & POLLIN) {
+        ptrdiff_t offset = 0;
+        size_t data_read = 0;
+        const size_t chunk_size = 1;
+        uint8_t *buffer = ta_zalloc_size(wl, chunk_size);
+        if (!buffer)
+            goto end;
+
+        while ((data_read = read(wl->dnd_fd, buffer + offset, chunk_size)) > 0) {
+            offset += data_read;
+            buffer = ta_realloc_size(wl, buffer, offset + chunk_size);
+            memset(buffer + offset, 0, chunk_size);
+            if (!buffer)
+                goto end;
+        }
+
+        MP_VERBOSE(wl, "Read %td bytes from the DND fd\n", offset);
+
+        struct bstr file_list = bstr0(buffer);
+        mp_event_drop_mime_data(wl->vo->input_ctx, wl->dnd_mime_type,
+                                file_list, wl->dnd_action);
+        talloc_free(buffer);
+end:
+        if (wl->dnd_mime_type)
+            talloc_free(wl->dnd_mime_type);
+
+        if (wl->dnd_action >= 0 && wl->dnd_offer)
+            wl_data_offer_finish(wl->dnd_offer);
+
+        wl->dnd_action = -1;
+        wl->dnd_mime_type = NULL;
+        wl->dnd_mime_score = 0;
+    }
+
+    if (fdp.revents & (POLLIN | POLLERR | POLLHUP)) {
+        close(wl->dnd_fd);
+        wl->dnd_fd = -1;
+    }
+}
+
+static int check_for_resize(struct vo_wayland_state *wl, int edge_pixels,
+                            enum xdg_toplevel_resize_edge *edge)
+{
+    if (wl->vo_opts->fullscreen || wl->vo_opts->window_maximized)
+        return 0;
+
+    int pos[2] = { wl->mouse_x, wl->mouse_y };
+    int left_edge   = pos[0] < edge_pixels;
+    int top_edge    = pos[1] < edge_pixels;
+    int right_edge  = pos[0] > (mp_rect_w(wl->geometry) - edge_pixels);
+    int bottom_edge = pos[1] > (mp_rect_h(wl->geometry) - edge_pixels);
+
+    if (left_edge) {
+        *edge = XDG_TOPLEVEL_RESIZE_EDGE_LEFT;
+        if (top_edge)
+            *edge = XDG_TOPLEVEL_RESIZE_EDGE_TOP_LEFT;
+        else if (bottom_edge)
+            *edge = XDG_TOPLEVEL_RESIZE_EDGE_BOTTOM_LEFT;
+    } else if (right_edge) {
+        *edge = XDG_TOPLEVEL_RESIZE_EDGE_RIGHT;
+        if (top_edge)
+            *edge = XDG_TOPLEVEL_RESIZE_EDGE_TOP_RIGHT;
+        else if (bottom_edge)
+            *edge = XDG_TOPLEVEL_RESIZE_EDGE_BOTTOM_RIGHT;
+    } else if (top_edge) {
+        *edge = XDG_TOPLEVEL_RESIZE_EDGE_TOP;
+    } else if (bottom_edge) {
+        *edge = XDG_TOPLEVEL_RESIZE_EDGE_BOTTOM;
+    } else {
+        *edge = 0;
+        return 0;
+    }
+
+    return 1;
+}
+
+static bool create_input(struct vo_wayland_state *wl)
+{
+    wl->xkb_context = xkb_context_new(XKB_CONTEXT_NO_FLAGS);
+
+    if (!wl->xkb_context) {
+        MP_ERR(wl, "failed to initialize input: check xkbcommon\n");
+        return 1;
+    }
+
+    return 0;
+}
+
+static int create_viewports(struct vo_wayland_state *wl)
+{
+    if (wl->viewporter) {
+        wl->viewport = wp_viewporter_get_viewport(wl->viewporter, wl->surface);
+        wl->osd_viewport = wp_viewporter_get_viewport(wl->viewporter, wl->osd_surface);
+        wl->video_viewport = wp_viewporter_get_viewport(wl->viewporter, wl->video_surface);
+    }
+
+    if (wl->viewporter && (!wl->viewport || !wl->osd_viewport || !wl->video_viewport)) {
+        MP_ERR(wl, "failed to create viewport interfaces!\n");
+        return 1;
+    }
+    return 0;
+}
+
+static int create_xdg_surface(struct vo_wayland_state *wl)
+{
+    wl->xdg_surface = xdg_wm_base_get_xdg_surface(wl->wm_base, wl->surface);
+    xdg_surface_add_listener(wl->xdg_surface, &xdg_surface_listener, wl);
+
+    wl->xdg_toplevel = xdg_surface_get_toplevel(wl->xdg_surface);
+    xdg_toplevel_add_listener(wl->xdg_toplevel, &xdg_toplevel_listener, wl);
+
+    if (!wl->xdg_surface || !wl->xdg_toplevel) {
+        MP_ERR(wl, "failed to create xdg_surface and xdg_toplevel!\n");
+        return 1;
+    }
+    return 0;
+}
+
+static void add_feedback(struct vo_wayland_feedback_pool *fback_pool,
+                         struct wp_presentation_feedback *fback)
+{
+    for (int i = 0; i < fback_pool->len; ++i) {
+        if (!fback_pool->fback[i]) {
+            fback_pool->fback[i] = fback;
+            break;
+        } else if (i == fback_pool->len - 1) {
+            // Shouldn't happen in practice.
+            wp_presentation_feedback_destroy(fback_pool->fback[i]);
+            fback_pool->fback[i] = fback;
+        }
+    }
+}
+
+static void do_minimize(struct vo_wayland_state *wl)
+{
+    if (!wl->xdg_toplevel)
+        return;
+    if (wl->vo_opts->window_minimized)
+        xdg_toplevel_set_minimized(wl->xdg_toplevel);
+}
+
+static char **get_displays_spanned(struct vo_wayland_state *wl)
+{
+    char **names = NULL;
+    int displays_spanned = 0;
+    struct vo_wayland_output *output;
+    wl_list_for_each(output, &wl->output_list, link) {
+        if (output->has_surface) {
+            char *name = output->name ? output->name : output->model;
+            MP_TARRAY_APPEND(NULL, names, displays_spanned,
+                             talloc_strdup(NULL, name));
+        }
+    }
+    MP_TARRAY_APPEND(NULL, names, displays_spanned, NULL);
+    return names;
+}
+
+static int get_mods(struct vo_wayland_state *wl)
+{
+    static char* const mod_names[] = {
+        XKB_MOD_NAME_SHIFT,
+        XKB_MOD_NAME_CTRL,
+        XKB_MOD_NAME_ALT,
+        XKB_MOD_NAME_LOGO,
+    };
+
+    static const int mods[] = {
+        MP_KEY_MODIFIER_SHIFT,
+        MP_KEY_MODIFIER_CTRL,
+        MP_KEY_MODIFIER_ALT,
+        MP_KEY_MODIFIER_META,
+    };
+
+    int modifiers = 0;
+
+    for (int n = 0; n < MP_ARRAY_SIZE(mods); n++) {
+        xkb_mod_index_t index = xkb_keymap_mod_get_index(wl->xkb_keymap, mod_names[n]);
+        if (!xkb_state_mod_index_is_consumed(wl->xkb_state, wl->keyboard_code, index)
+            && xkb_state_mod_index_is_active(wl->xkb_state, index,
+                                             XKB_STATE_MODS_DEPRESSED))
+            modifiers |= mods[n];
+    }
+    return modifiers;
+}
+
+static void get_shape_device(struct vo_wayland_state *wl)
+{
+#if HAVE_WAYLAND_PROTOCOLS_1_32
+    if (!wl->cursor_shape_device && wl->cursor_shape_manager) {
+        wl->cursor_shape_device = wp_cursor_shape_manager_v1_get_pointer(wl->cursor_shape_manager,
+                                                                         wl->pointer);
+    }
+#endif
+}
+
+static int greatest_common_divisor(int a, int b)
+{
+    int rem = a % b;
+    if (rem == 0)
+        return b;
+    return greatest_common_divisor(b, rem);
+}
+
+static void guess_focus(struct vo_wayland_state *wl)
+{
+    // We can't actually know if the window is focused or not in wayland,
+    // so just guess it with some common sense. Obviously won't work if
+    // the user has no keyboard.
+    if ((!wl->focused && wl->activated && wl->has_keyboard_input) ||
+        (wl->focused && !wl->activated))
+     {
+         wl->focused = !wl->focused;
+         wl->pending_vo_events |= VO_EVENT_FOCUS;
+     }
+}
+
+static struct vo_wayland_output *find_output(struct vo_wayland_state *wl)
+{
+    int index = 0;
+    struct mp_vo_opts *opts = wl->vo_opts;
+    int screen_id = opts->fullscreen ? opts->fsscreen_id : opts->screen_id;
+    char *screen_name = opts->fullscreen ? opts->fsscreen_name : opts->screen_name;
+    struct vo_wayland_output *output = NULL;
+    struct vo_wayland_output *fallback_output = NULL;
+    wl_list_for_each(output, &wl->output_list, link) {
+        if (index == 0)
+            fallback_output = output;
+        if (screen_id == -1 && !screen_name)
+            return output;
+        if (screen_id == -1 && screen_name && !strcmp(screen_name, output->name))
+            return output;
+        if (screen_id == -1 && screen_name && !strcmp(screen_name, output->model))
+            return output;
+        if (screen_id == index++)
+            return output;
+    }
+    if (!fallback_output) {
+        MP_ERR(wl, "No screens could be found!\n");
+        return NULL;
+    } else if (screen_id >= 0) {
+        MP_WARN(wl, "Screen index %i not found/unavailable! Falling back to screen 0!\n", screen_id);
+    } else if (screen_name && screen_name[0]) {
+        MP_WARN(wl, "Screen name %s not found/unavailable! Falling back to screen 0!\n", screen_name);
+    }
+    return fallback_output;
+}
+
+static int lookupkey(int key)
+{
+    const char *passthrough_keys = " -+*/<>`~!@#$%^&()_{}:;\"\',.?\\|=[]";
+
+    int mpkey = 0;
+    if ((key >= 'a' && key <= 'z') || (key >= 'A' && key <= 'Z') ||
+        (key >= '0' && key <= '9') ||
+        (key >  0   && key <  256 && strchr(passthrough_keys, key)))
+        mpkey = key;
+
+    if (!mpkey)
+        mpkey = lookup_keymap_table(keymap, key);
+
+    return mpkey;
+}
+
+static void prepare_resize(struct vo_wayland_state *wl, int width, int height)
+{
+    if (!width)
+        width = mp_rect_w(wl->geometry) / wl->scaling;
+    if (!height)
+        height = mp_rect_h(wl->geometry) / wl->scaling;
+    xdg_surface_set_window_geometry(wl->xdg_surface, 0, 0, width, height);
+    wl->pending_vo_events |= VO_EVENT_RESIZE;
+}
+
+static void request_decoration_mode(struct vo_wayland_state *wl, uint32_t mode)
+{
+    wl->requested_decoration = mode;
+    zxdg_toplevel_decoration_v1_set_mode(wl->xdg_toplevel_decoration, mode);
+}
+
+static void rescale_geometry(struct vo_wayland_state *wl, double old_scale)
+{
+    double factor = old_scale / wl->scaling;
+    wl->window_size.x1 /= factor;
+    wl->window_size.y1 /= factor;
+    wl->geometry.x1 /= factor;
+    wl->geometry.y1 /= factor;
+}
+
+static void clean_feedback_pool(struct vo_wayland_feedback_pool *fback_pool)
+{
+    for (int i = 0; i < fback_pool->len; ++i) {
+        if (fback_pool->fback[i]) {
+            wp_presentation_feedback_destroy(fback_pool->fback[i]);
+            fback_pool->fback[i] = NULL;
+        }
+    }
+}
+
+static void remove_feedback(struct vo_wayland_feedback_pool *fback_pool,
+                            struct wp_presentation_feedback *fback)
+{
+    for (int i = 0; i < fback_pool->len; ++i) {
+        if (fback_pool->fback[i] == fback) {
+            wp_presentation_feedback_destroy(fback);
+            fback_pool->fback[i] = NULL;
+            break;
+        }
+    }
+}
+
+static void remove_output(struct vo_wayland_output *out)
+{
+    if (!out)
+        return;
+
+    MP_VERBOSE(out->wl, "Deregistering output %s %s (0x%x)\n", out->make,
+               out->model, out->id);
+    wl_list_remove(&out->link);
+    wl_output_destroy(out->output);
+    talloc_free(out->make);
+    talloc_free(out->model);
+    talloc_free(out);
+    return;
+}
+
+static void set_content_type(struct vo_wayland_state *wl)
+{
+    if (!wl->content_type_manager)
+        return;
+#if HAVE_WAYLAND_PROTOCOLS_1_27
+    // handle auto;
+    if (wl->vo_opts->content_type == -1) {
+        wp_content_type_v1_set_content_type(wl->content_type, wl->current_content_type);
+    } else {
+        wp_content_type_v1_set_content_type(wl->content_type, wl->vo_opts->content_type);
+    }
+#endif
+}
+
+static void set_cursor_shape(struct vo_wayland_state *wl)
+{
+#if HAVE_WAYLAND_PROTOCOLS_1_32
+    wp_cursor_shape_device_v1_set_shape(wl->cursor_shape_device, wl->pointer_id,
+                                        WP_CURSOR_SHAPE_DEVICE_V1_SHAPE_DEFAULT);
+#endif
+}
+
+static int set_cursor_visibility(struct vo_wayland_state *wl, bool on)
+{
+    wl->cursor_visible = on;
+    if (on) {
+        if (wl->cursor_shape_device) {
+            set_cursor_shape(wl);
+        } else {
+            if (spawn_cursor(wl))
+                return VO_FALSE;
+            struct wl_cursor_image *img = wl->default_cursor->images[0];
+            struct wl_buffer *buffer = wl_cursor_image_get_buffer(img);
+            if (!buffer)
+                return VO_FALSE;
+            int scale = MPMAX(wl->scaling, 1);
+            wl_pointer_set_cursor(wl->pointer, wl->pointer_id, wl->cursor_surface,
+                                  img->hotspot_x / scale, img->hotspot_y / scale);
+            wl_surface_set_buffer_scale(wl->cursor_surface, scale);
+            wl_surface_attach(wl->cursor_surface, buffer, 0, 0);
+            wl_surface_damage_buffer(wl->cursor_surface, 0, 0, img->width, img->height);
+        }
+        wl_surface_commit(wl->cursor_surface);
+    } else {
+        wl_pointer_set_cursor(wl->pointer, wl->pointer_id, NULL, 0, 0);
+    }
+    return VO_TRUE;
+}
+
+static void set_geometry(struct vo_wayland_state *wl, bool resize)
+{
+    struct vo *vo = wl->vo;
+    if (!wl->current_output)
+        return;
+
+    struct vo_win_geometry geo;
+    struct mp_rect screenrc = wl->current_output->geometry;
+    vo_calc_window_geometry2(vo, &screenrc, wl->scaling, &geo);
+    vo_apply_window_geometry(vo, &geo);
+
+    int gcd = greatest_common_divisor(vo->dwidth, vo->dheight);
+    wl->reduced_width = vo->dwidth / gcd;
+    wl->reduced_height = vo->dheight / gcd;
+
+    if (!wl->initial_size_hint)
+        wl->window_size = (struct mp_rect){0, 0, vo->dwidth, vo->dheight};
+    wl->initial_size_hint = false;
+
+    if (resize) {
+        if (!wl->locked_size)
+            wl->geometry = wl->window_size;
+        prepare_resize(wl, 0, 0);
+    }
+}
+
+static void set_input_region(struct vo_wayland_state *wl, bool passthrough)
+{
+    if (passthrough) {
+        struct wl_region *region = wl_compositor_create_region(wl->compositor);
+        wl_surface_set_input_region(wl->surface, region);
+        wl_region_destroy(region);
+    } else {
+        wl_surface_set_input_region(wl->surface, NULL);
+    }
+}
+
+static int set_screensaver_inhibitor(struct vo_wayland_state *wl, int state)
+{
+    if (!wl->idle_inhibit_manager)
+        return VO_NOTIMPL;
+    if (state == (!!wl->idle_inhibitor))
+        return VO_TRUE;
+    if (state) {
+        MP_VERBOSE(wl, "Enabling idle inhibitor\n");
+        struct zwp_idle_inhibit_manager_v1 *mgr = wl->idle_inhibit_manager;
+        wl->idle_inhibitor = zwp_idle_inhibit_manager_v1_create_inhibitor(mgr, wl->surface);
+    } else {
+        MP_VERBOSE(wl, "Disabling the idle inhibitor\n");
+        zwp_idle_inhibitor_v1_destroy(wl->idle_inhibitor);
+        wl->idle_inhibitor = NULL;
+    }
+    return VO_TRUE;
+}
+
+static void set_surface_scaling(struct vo_wayland_state *wl)
+{
+    if (wl->fractional_scale_manager)
+        return;
+
+    // dmabuf_wayland is always wl->scaling = 1
+    double old_scale = wl->scaling;
+    wl->scaling = !wl->using_dmabuf_wayland ? wl->current_output->scale : 1;
+
+    rescale_geometry(wl, old_scale);
+    wl_surface_set_buffer_scale(wl->surface, wl->scaling);
+}
+
+static void set_window_bounds(struct vo_wayland_state *wl)
+{
+    // If the user has set geometry/autofit and the option is auto,
+    // don't use these.
+    if (wl->opts->configure_bounds == -1 && (wl->vo_opts->geometry.wh_valid ||
+        wl->vo_opts->autofit.wh_valid || wl->vo_opts->autofit_larger.wh_valid ||
+        wl->vo_opts->autofit_smaller.wh_valid))
+    {
+        return;
+    }
+
+    if (wl->bounded_width && wl->bounded_width < wl->window_size.x1)
+        wl->window_size.x1 = wl->bounded_width;
+    if (wl->bounded_height && wl->bounded_height < wl->window_size.y1)
+        wl->window_size.y1 = wl->bounded_height;
+}
+
+static int spawn_cursor(struct vo_wayland_state *wl)
+{
+    /* Don't use this if we have cursor-shape. */
+    if (wl->cursor_shape_device)
+        return 0;
+    /* Reuse if size is identical */
+    if (!wl->pointer || wl->allocated_cursor_scale == wl->scaling)
+        return 0;
+    else if (wl->cursor_theme)
+        wl_cursor_theme_destroy(wl->cursor_theme);
+
+    const char *xcursor_theme = getenv("XCURSOR_THEME");
+    const char *size_str = getenv("XCURSOR_SIZE");
+    int size = 24;
+    if (size_str != NULL) {
+        errno = 0;
+        char *end;
+        long size_long = strtol(size_str, &end, 10);
+        if (!*end && !errno && size_long > 0 && size_long <= INT_MAX)
+            size = (int)size_long;
+    }
+
+    wl->cursor_theme = wl_cursor_theme_load(xcursor_theme, size*wl->scaling, wl->shm);
+    if (!wl->cursor_theme) {
+        MP_ERR(wl, "Unable to load cursor theme!\n");
+        return 1;
+    }
+
+    wl->default_cursor = wl_cursor_theme_get_cursor(wl->cursor_theme, "left_ptr");
+    if (!wl->default_cursor) {
+        MP_ERR(wl, "Unable to load cursor theme!\n");
+        return 1;
+    }
+
+    wl->allocated_cursor_scale = wl->scaling;
+
+    return 0;
+}
+
+static void toggle_fullscreen(struct vo_wayland_state *wl)
+{
+    if (!wl->xdg_toplevel)
+        return;
+    wl->state_change = true;
+    bool specific_screen = wl->vo_opts->fsscreen_id >= 0 || wl->vo_opts->fsscreen_name;
+    if (wl->vo_opts->fullscreen && !specific_screen) {
+        xdg_toplevel_set_fullscreen(wl->xdg_toplevel, NULL);
+    } else if (wl->vo_opts->fullscreen && specific_screen) {
+        struct vo_wayland_output *output = find_output(wl);
+        xdg_toplevel_set_fullscreen(wl->xdg_toplevel, output->output);
+    } else {
+        xdg_toplevel_unset_fullscreen(wl->xdg_toplevel);
+    }
+}
+
+static void toggle_maximized(struct vo_wayland_state *wl)
+{
+    if (!wl->xdg_toplevel)
+        return;
+    wl->state_change = true;
+    if (wl->vo_opts->window_maximized) {
+        xdg_toplevel_set_maximized(wl->xdg_toplevel);
+    } else {
+        xdg_toplevel_unset_maximized(wl->xdg_toplevel);
+    }
+}
+
+static void update_app_id(struct vo_wayland_state *wl)
+{
+    if (!wl->xdg_toplevel)
+        return;
+    xdg_toplevel_set_app_id(wl->xdg_toplevel, wl->vo_opts->appid);
+}
+
+static int update_window_title(struct vo_wayland_state *wl, const char *title)
+{
+    if (!wl->xdg_toplevel)
+        return VO_NOTAVAIL;
+    /* The xdg-shell protocol requires that the title is UTF-8. */
+    void *tmp = talloc_new(NULL);
+    struct bstr b_title = bstr_sanitize_utf8_latin1(tmp, bstr0(title));
+    xdg_toplevel_set_title(wl->xdg_toplevel, bstrto0(tmp, b_title));
+    talloc_free(tmp);
+    return VO_TRUE;
+}
+
+static void window_move(struct vo_wayland_state *wl, uint32_t serial)
+{
+    if (wl->xdg_toplevel)
+        xdg_toplevel_move(wl->xdg_toplevel, wl->seat, serial);
+}
+
+static void wayland_dispatch_events(struct vo_wayland_state *wl, int nfds, int64_t timeout_ns)
+{
+    if (wl->display_fd == -1)
+        return;
+
+    struct pollfd fds[2] = {
+        {.fd = wl->display_fd,     .events = POLLIN },
+        {.fd = wl->wakeup_pipe[0], .events = POLLIN },
+    };
+
+    while (wl_display_prepare_read(wl->display) != 0)
+        wl_display_dispatch_pending(wl->display);
+    wl_display_flush(wl->display);
+
+    mp_poll(fds, nfds, timeout_ns);
+
+    if (fds[0].revents & POLLIN) {
+        wl_display_read_events(wl->display);
+    } else {
+        wl_display_cancel_read(wl->display);
+    }
+
+    if (fds[0].revents & (POLLERR | POLLHUP | POLLNVAL)) {
+        MP_FATAL(wl, "Error occurred on the display fd\n");
+        wl->display_fd = -1;
+        mp_input_put_key(wl->vo->input_ctx, MP_KEY_CLOSE_WIN);
+    }
+
+    if (fds[1].revents & POLLIN)
+        mp_flush_wakeup_pipe(wl->wakeup_pipe[0]);
+
+    wl_display_dispatch_pending(wl->display);
+}
+
+/* Non-static */
+int vo_wayland_allocate_memfd(struct vo *vo, size_t size)
+{
+#if !HAVE_MEMFD_CREATE
+    return VO_ERROR;
+#else
+    int fd = memfd_create("mpv", MFD_CLOEXEC | MFD_ALLOW_SEALING);
+    if (fd < 0) {
+        MP_ERR(vo, "Failed to allocate memfd: %s\n", mp_strerror(errno));
+        return VO_ERROR;
+    }
+
+    fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_SEAL);
+
+    if (posix_fallocate(fd, 0, size) == 0)
+        return fd;
+
+    close(fd);
+    MP_ERR(vo, "Failed to allocate memfd: %s\n", mp_strerror(errno));
+
+    return VO_ERROR;
+#endif
+}
+
+bool vo_wayland_check_visible(struct vo *vo)
+{
+    struct vo_wayland_state *wl = vo->wl;
+    bool render = !wl->hidden || wl->vo_opts->force_render;
+    wl->frame_wait = true;
+    return render;
+}
+
+int vo_wayland_control(struct vo *vo, int *events, int request, void *arg)
+{
+    struct vo_wayland_state *wl = vo->wl;
+    struct mp_vo_opts *opts = wl->vo_opts;
+    wl_display_dispatch_pending(wl->display);
+
+    switch (request) {
+    case VOCTRL_CHECK_EVENTS: {
+        check_dnd_fd(wl);
+        *events |= wl->pending_vo_events;
+        if (*events & VO_EVENT_RESIZE) {
+            *events |= VO_EVENT_EXPOSE;
+            wl->frame_wait = false;
+            wl->timeout_count = 0;
+            wl->hidden = false;
+        }
+        wl->pending_vo_events = 0;
+        return VO_TRUE;
+    }
+    case VOCTRL_VO_OPTS_CHANGED: {
+        void *opt;
+        while (m_config_cache_get_next_changed(wl->vo_opts_cache, &opt)) {
+            if (opt == &opts->appid)
+                update_app_id(wl);
+            if (opt == &opts->border)
+            {
+                // This is stupid but the value of border shouldn't be written
+                // unless we get a configure event. Change it back to its old
+                // value and let configure_decorations handle it after the request.
+                if (wl->xdg_toplevel_decoration) {
+                    int requested_border_mode = opts->border;
+                    opts->border = !opts->border;
+                    m_config_cache_write_opt(wl->vo_opts_cache,
+                                             &opts->border);
+                    request_decoration_mode(
+                        wl, requested_border_mode ?
+                            ZXDG_TOPLEVEL_DECORATION_V1_MODE_SERVER_SIDE :
+                            ZXDG_TOPLEVEL_DECORATION_V1_MODE_CLIENT_SIDE);
+                } else {
+                    opts->border = false;
+                    m_config_cache_write_opt(wl->vo_opts_cache,
+                                             &wl->vo_opts->border);
+                }
+            }
+            if (opt == &opts->content_type)
+                set_content_type(wl);
+            if (opt == &opts->cursor_passthrough)
+                set_input_region(wl, opts->cursor_passthrough);
+            if (opt == &opts->fullscreen)
+                toggle_fullscreen(wl);
+            if (opt == &opts->hidpi_window_scale)
+                set_geometry(wl, true);
+            if (opt == &opts->window_maximized)
+                toggle_maximized(wl);
+            if (opt == &opts->window_minimized)
+                do_minimize(wl);
+            if (opt == &opts->geometry || opt == &opts->autofit ||
+                opt == &opts->autofit_smaller || opt == &opts->autofit_larger)
+            {
+                set_geometry(wl, true);
+            }
+        }
+        return VO_TRUE;
+    }
+    case VOCTRL_CONTENT_TYPE: {
+#if HAVE_WAYLAND_PROTOCOLS_1_27
+        wl->current_content_type = *(enum mp_content_type *)arg;
+        set_content_type(wl);
+#endif
+        return VO_TRUE;
+    }
+    case VOCTRL_GET_FOCUSED: {
+        *(bool *)arg = wl->focused;
+        return VO_TRUE;
+    }
+    case VOCTRL_GET_DISPLAY_NAMES: {
+        *(char ***)arg = get_displays_spanned(wl);
+        return VO_TRUE;
+    }
+    case VOCTRL_GET_UNFS_WINDOW_SIZE: {
+        int *s = arg;
+        if (wl->vo_opts->window_maximized || wl->tiled) {
+            s[0] = mp_rect_w(wl->geometry);
+            s[1] = mp_rect_h(wl->geometry);
+        } else {
+            s[0] = mp_rect_w(wl->window_size);
+            s[1] = mp_rect_h(wl->window_size);
+        }
+        return VO_TRUE;
+    }
+    case VOCTRL_SET_UNFS_WINDOW_SIZE: {
+        int *s = arg;
+        wl->window_size.x0 = 0;
+        wl->window_size.y0 = 0;
+        wl->window_size.x1 = s[0];
+        wl->window_size.y1 = s[1];
+        if (!wl->vo_opts->fullscreen && !wl->tiled) {
+            if (wl->vo_opts->window_maximized) {
+                xdg_toplevel_unset_maximized(wl->xdg_toplevel);
+                wl_display_dispatch_pending(wl->display);
+                /* Make sure the compositor let us unmaximize */
+                if (wl->vo_opts->window_maximized)
+                    return VO_TRUE;
+            }
+            wl->geometry = wl->window_size;
+            prepare_resize(wl, 0, 0);
+        }
+        return VO_TRUE;
+    }
+    case VOCTRL_GET_DISPLAY_FPS: {
+        struct vo_wayland_output *out;
+        if (wl->current_output) {
+            out = wl->current_output;
+        } else {
+            out = find_output(wl);
+        }
+        if (!out)
+            return VO_NOTAVAIL;
+        *(double *)arg = out->refresh_rate;
+        return VO_TRUE;
+    }
+    case VOCTRL_GET_DISPLAY_RES: {
+        struct vo_wayland_output *out;
+        if (wl->current_output) {
+            out = wl->current_output;
+        } else {
+            out = find_output(wl);
+        }
+        if (!out)
+            return VO_NOTAVAIL;
+        ((int *)arg)[0] = out->geometry.x1;
+        ((int *)arg)[1] = out->geometry.y1;
+        return VO_TRUE;
+    }
+    case VOCTRL_GET_HIDPI_SCALE: {
+        if (!wl->scaling)
+            return VO_NOTAVAIL;
+        *(double *)arg = wl->scaling;
+        return VO_TRUE;
+    }
+    case VOCTRL_UPDATE_WINDOW_TITLE:
+        return update_window_title(wl, (const char *)arg);
+    case VOCTRL_SET_CURSOR_VISIBILITY:
+        if (!wl->pointer)
+            return VO_NOTAVAIL;
+        return set_cursor_visibility(wl, *(bool *)arg);
+    case VOCTRL_KILL_SCREENSAVER:
+        return set_screensaver_inhibitor(wl, true);
+    case VOCTRL_RESTORE_SCREENSAVER:
+        return set_screensaver_inhibitor(wl, false);
+    }
+
+    return VO_NOTIMPL;
+}
+
+void vo_wayland_handle_fractional_scale(struct vo_wayland_state *wl)
+{
+    if (wl->fractional_scale_manager && wl->viewport)
+        wp_viewport_set_destination(wl->viewport,
+                                    round(mp_rect_w(wl->geometry) / wl->scaling),
+                                    round(mp_rect_h(wl->geometry) / wl->scaling));
+}
+
+bool vo_wayland_init(struct vo *vo)
+{
+    vo->wl = talloc_zero(NULL, struct vo_wayland_state);
+    struct vo_wayland_state *wl = vo->wl;
+
+    *wl = (struct vo_wayland_state) {
+        .display = wl_display_connect(NULL),
+        .vo = vo,
+        .log = mp_log_new(wl, vo->log, "wayland"),
+        .bounded_width = 0,
+        .bounded_height = 0,
+        .refresh_interval = 0,
+        .scaling = 1,
+        .wakeup_pipe = {-1, -1},
+        .display_fd = -1,
+        .dnd_fd = -1,
+        .cursor_visible = true,
+        .vo_opts_cache = m_config_cache_alloc(wl, vo->global, &vo_sub_opts),
+    };
+    wl->vo_opts = wl->vo_opts_cache->opts;
+    wl->using_dmabuf_wayland = !strcmp(wl->vo->driver->name, "dmabuf-wayland");
+
+    wl_list_init(&wl->output_list);
+
+    if (!wl->display)
+        goto err;
+
+    if (create_input(wl))
+        goto err;
+
+    wl->registry = wl_display_get_registry(wl->display);
+    wl_registry_add_listener(wl->registry, &registry_listener, wl);
+
+    /* Do a roundtrip to run the registry */
+    wl_display_roundtrip(wl->display);
+
+    if (!wl->surface) {
+        MP_FATAL(wl, "Compositor doesn't support %s (ver. 4)\n",
+                 wl_compositor_interface.name);
+        goto err;
+    }
+
+    if (!wl->wm_base) {
+        MP_FATAL(wl, "Compositor doesn't support the required %s protocol!\n",
+                 xdg_wm_base_interface.name);
+        goto err;
+    }
+
+    if (!wl_list_length(&wl->output_list)) {
+        MP_FATAL(wl, "No outputs found or compositor doesn't support %s (ver. 2)\n",
+                 wl_output_interface.name);
+        goto err;
+    }
+
+    /* Can't be initialized during registry due to multi-protocol dependence */
+    if (create_viewports(wl))
+        goto err;
+
+    if (create_xdg_surface(wl))
+        goto err;
+
+    if (wl->subcompositor) {
+        wl->osd_subsurface = wl_subcompositor_get_subsurface(wl->subcompositor, wl->osd_surface, wl->video_surface);
+        wl->video_subsurface = wl_subcompositor_get_subsurface(wl->subcompositor, wl->video_surface, wl->surface);
+    }
+
+#if HAVE_WAYLAND_PROTOCOLS_1_27
+    if (wl->content_type_manager) {
+        wl->content_type = wp_content_type_manager_v1_get_surface_content_type(wl->content_type_manager, wl->surface);
+    } else {
+        MP_VERBOSE(wl, "Compositor doesn't support the %s protocol!\n",
+                   wp_content_type_manager_v1_interface.name);
+    }
+
+    if (!wl->single_pixel_manager) {
+        MP_VERBOSE(wl, "Compositor doesn't support the %s protocol!\n",
+                   wp_single_pixel_buffer_manager_v1_interface.name);
+    }
+#endif
+
+#if HAVE_WAYLAND_PROTOCOLS_1_31
+    if (wl->fractional_scale_manager) {
+        wl->fractional_scale = wp_fractional_scale_manager_v1_get_fractional_scale(wl->fractional_scale_manager, wl->surface);
+        wp_fractional_scale_v1_add_listener(wl->fractional_scale, &fractional_scale_listener, wl);
+    } else {
+        MP_VERBOSE(wl, "Compositor doesn't support the %s protocol!\n",
+                   wp_fractional_scale_manager_v1_interface.name);
+    }
+#endif
+
+    if (wl->dnd_devman && wl->seat) {
+        wl->dnd_ddev = wl_data_device_manager_get_data_device(wl->dnd_devman, wl->seat);
+        wl_data_device_add_listener(wl->dnd_ddev, &data_device_listener, wl);
+    } else if (!wl->dnd_devman) {
+        MP_VERBOSE(wl, "Compositor doesn't support the %s (ver. 3) protocol!\n",
+                   wl_data_device_manager_interface.name);
+    }
+
+    if (wl->presentation) {
+        wl->fback_pool = talloc_zero(wl, struct vo_wayland_feedback_pool);
+        wl->fback_pool->wl = wl;
+        wl->fback_pool->len = VO_MAX_SWAPCHAIN_DEPTH;
+        wl->fback_pool->fback = talloc_zero_array(wl->fback_pool, struct wp_presentation_feedback *,
+                                                  wl->fback_pool->len);
+        wl->present = mp_present_initialize(wl, wl->vo_opts, VO_MAX_SWAPCHAIN_DEPTH);
+    } else {
+        MP_VERBOSE(wl, "Compositor doesn't support the %s protocol!\n",
+                   wp_presentation_interface.name);
+    }
+
+    if (wl->xdg_decoration_manager) {
+        wl->xdg_toplevel_decoration = zxdg_decoration_manager_v1_get_toplevel_decoration(wl->xdg_decoration_manager, wl->xdg_toplevel);
+        zxdg_toplevel_decoration_v1_add_listener(wl->xdg_toplevel_decoration, &decoration_listener, wl);
+        request_decoration_mode(
+            wl, wl->vo_opts->border ?
+                ZXDG_TOPLEVEL_DECORATION_V1_MODE_SERVER_SIDE :
+                ZXDG_TOPLEVEL_DECORATION_V1_MODE_CLIENT_SIDE);
+    } else {
+        wl->vo_opts->border = false;
+        m_config_cache_write_opt(wl->vo_opts_cache,
+                                 &wl->vo_opts->border);
+        MP_VERBOSE(wl, "Compositor doesn't support the %s protocol!\n",
+                   zxdg_decoration_manager_v1_interface.name);
+    }
+
+    if (!wl->idle_inhibit_manager) {
+        MP_VERBOSE(wl, "Compositor doesn't support the %s protocol!\n",
+                   zwp_idle_inhibit_manager_v1_interface.name);
+    }
+
+    wl->opts = mp_get_config_group(wl, wl->vo->global, &wayland_conf);
+    wl->display_fd = wl_display_get_fd(wl->display);
+
+    update_app_id(wl);
+    mp_make_wakeup_pipe(wl->wakeup_pipe);
+
+    wl->callback_surface = wl->using_dmabuf_wayland ? wl->video_surface : wl->surface;
+    wl->frame_callback = wl_surface_frame(wl->callback_surface);
+    wl_callback_add_listener(wl->frame_callback, &frame_listener, wl);
+    wl_surface_commit(wl->surface);
+
+    /* Do another roundtrip to ensure all of the above is initialized
+     * before mpv does anything else. */
+    wl_display_roundtrip(wl->display);
+
+    return true;
+
+err:
+    vo_wayland_uninit(vo);
+    return false;
+}
+
+bool vo_wayland_reconfig(struct vo *vo)
+{
+    struct vo_wayland_state *wl = vo->wl;
+
+    MP_VERBOSE(wl, "Reconfiguring!\n");
+
+    if (!wl->current_output) {
+        wl->current_output = find_output(wl);
+        if (!wl->current_output)
+            return false;
+        set_surface_scaling(wl);
+        wl->pending_vo_events |= VO_EVENT_DPI;
+    }
+
+    if (wl->vo_opts->auto_window_resize || !wl->configured)
+        set_geometry(wl, false);
+
+    if (wl->opts->configure_bounds)
+        set_window_bounds(wl);
+
+    if (!wl->configured || !wl->locked_size) {
+        wl->geometry = wl->window_size;
+        wl->configured = true;
+    }
+
+    if (wl->vo_opts->cursor_passthrough)
+        set_input_region(wl, true);
+
+    if (wl->vo_opts->fullscreen)
+        toggle_fullscreen(wl);
+
+    if (wl->vo_opts->window_maximized)
+        toggle_maximized(wl);
+
+    if (wl->vo_opts->window_minimized)
+        do_minimize(wl);
+
+    prepare_resize(wl, 0, 0);
+
+    return true;
+}
+
+void vo_wayland_set_opaque_region(struct vo_wayland_state *wl, bool alpha)
+{
+    const int32_t width = mp_rect_w(wl->geometry);
+    const int32_t height = mp_rect_h(wl->geometry);
+    if (!alpha) {
+        struct wl_region *region = wl_compositor_create_region(wl->compositor);
+        wl_region_add(region, 0, 0, width, height);
+        wl_surface_set_opaque_region(wl->surface, region);
+        wl_region_destroy(region);
+    } else {
+        wl_surface_set_opaque_region(wl->surface, NULL);
+    }
+}
+
+void vo_wayland_uninit(struct vo *vo)
+{
+    struct vo_wayland_state *wl = vo->wl;
+    if (!wl)
+        return;
+
+    mp_input_put_key(wl->vo->input_ctx, MP_INPUT_RELEASE_ALL);
+
+    if (wl->compositor)
+        wl_compositor_destroy(wl->compositor);
+
+    if (wl->subcompositor)
+        wl_subcompositor_destroy(wl->subcompositor);
+
+#if HAVE_WAYLAND_PROTOCOLS_1_32
+    if (wl->cursor_shape_device)
+        wp_cursor_shape_device_v1_destroy(wl->cursor_shape_device);
+
+    if (wl->cursor_shape_manager)
+        wp_cursor_shape_manager_v1_destroy(wl->cursor_shape_manager);
+#endif
+
+    if (wl->cursor_surface)
+        wl_surface_destroy(wl->cursor_surface);
+
+    if (wl->cursor_theme)
+        wl_cursor_theme_destroy(wl->cursor_theme);
+
+#if HAVE_WAYLAND_PROTOCOLS_1_27
+    if (wl->content_type)
+        wp_content_type_v1_destroy(wl->content_type);
+
+    if (wl->content_type_manager)
+        wp_content_type_manager_v1_destroy(wl->content_type_manager);
+#endif
+
+    if (wl->dnd_ddev)
+        wl_data_device_destroy(wl->dnd_ddev);
+
+    if (wl->dnd_devman)
+        wl_data_device_manager_destroy(wl->dnd_devman);
+
+    if (wl->dnd_offer)
+        wl_data_offer_destroy(wl->dnd_offer);
+
+    if (wl->fback_pool)
+        clean_feedback_pool(wl->fback_pool);
+
+#if HAVE_WAYLAND_PROTOCOLS_1_31
+    if (wl->fractional_scale)
+        wp_fractional_scale_v1_destroy(wl->fractional_scale);
+
+    if (wl->fractional_scale_manager)
+        wp_fractional_scale_manager_v1_destroy(wl->fractional_scale_manager);
+#endif
+
+    if (wl->frame_callback)
+        wl_callback_destroy(wl->frame_callback);
+
+    if (wl->idle_inhibitor)
+        zwp_idle_inhibitor_v1_destroy(wl->idle_inhibitor);
+
+    if (wl->idle_inhibit_manager)
+        zwp_idle_inhibit_manager_v1_destroy(wl->idle_inhibit_manager);
+
+    if (wl->keyboard)
+        wl_keyboard_destroy(wl->keyboard);
+
+    if (wl->pointer)
+        wl_pointer_destroy(wl->pointer);
+
+    if (wl->presentation)
+        wp_presentation_destroy(wl->presentation);
+
+    if (wl->registry)
+        wl_registry_destroy(wl->registry);
+
+    if (wl->viewporter)
+        wp_viewporter_destroy(wl->viewporter);
+
+    if (wl->viewport)
+        wp_viewport_destroy(wl->viewport);
+
+    if (wl->osd_viewport)
+        wp_viewport_destroy(wl->osd_viewport);
+
+    if (wl->video_viewport)
+        wp_viewport_destroy(wl->video_viewport);
+
+    if (wl->dmabuf)
+        zwp_linux_dmabuf_v1_destroy(wl->dmabuf);
+
+    if (wl->dmabuf_feedback)
+        zwp_linux_dmabuf_feedback_v1_destroy(wl->dmabuf_feedback);
+
+    if (wl->seat)
+        wl_seat_destroy(wl->seat);
+
+    if (wl->shm)
+        wl_shm_destroy(wl->shm);
+
+#if HAVE_WAYLAND_PROTOCOLS_1_27
+    if (wl->single_pixel_manager)
+        wp_single_pixel_buffer_manager_v1_destroy(wl->single_pixel_manager);
+#endif
+
+    if (wl->surface)
+        wl_surface_destroy(wl->surface);
+
+    if (wl->osd_surface)
+        wl_surface_destroy(wl->osd_surface);
+
+    if (wl->osd_subsurface)
+        wl_subsurface_destroy(wl->osd_subsurface);
+
+    if (wl->video_surface)
+        wl_surface_destroy(wl->video_surface);
+
+    if (wl->video_subsurface)
+        wl_subsurface_destroy(wl->video_subsurface);
+
+    if (wl->wm_base)
+        xdg_wm_base_destroy(wl->wm_base);
+
+    if (wl->xdg_decoration_manager)
+        zxdg_decoration_manager_v1_destroy(wl->xdg_decoration_manager);
+
+    if (wl->xdg_toplevel)
+        xdg_toplevel_destroy(wl->xdg_toplevel);
+
+    if (wl->xdg_toplevel_decoration)
+        zxdg_toplevel_decoration_v1_destroy(wl->xdg_toplevel_decoration);
+
+    if (wl->xdg_surface)
+        xdg_surface_destroy(wl->xdg_surface);
+
+    if (wl->xkb_context)
+        xkb_context_unref(wl->xkb_context);
+
+    if (wl->xkb_keymap)
+        xkb_keymap_unref(wl->xkb_keymap);
+
+    if (wl->xkb_state)
+        xkb_state_unref(wl->xkb_state);
+
+    struct vo_wayland_output *output, *tmp;
+    wl_list_for_each_safe(output, tmp, &wl->output_list, link)
+        remove_output(output);
+
+    if (wl->display)
+        wl_display_disconnect(wl->display);
+
+    munmap(wl->format_map, wl->format_size);
+
+    for (int n = 0; n < 2; n++)
+        close(wl->wakeup_pipe[n]);
+    talloc_free(wl);
+    vo->wl = NULL;
+}
+
+void vo_wayland_wait_frame(struct vo_wayland_state *wl)
+{
+    int64_t vblank_time = 0;
+    /* We need some vblank interval to use for the timeout in
+     * this function. The order of preference of values to use is:
+     * 1. vsync duration from presentation time
+     * 2. refresh interval reported by presentation time
+     * 3. refresh rate of the output reported by the compositor
+     * 4. make up crap if vblank_time is still <= 0 (better than nothing) */
+
+    if (wl->use_present && wl->present->head)
+        vblank_time = wl->present->head->vsync_duration;
+
+    if (vblank_time <= 0 && wl->refresh_interval > 0)
+        vblank_time = wl->refresh_interval;
+
+    if (vblank_time <= 0 && wl->current_output->refresh_rate > 0)
+        vblank_time = 1e9 / wl->current_output->refresh_rate;
+
+    // Ideally you should never reach this point.
+    if (vblank_time <= 0)
+        vblank_time = 1e9 / 60;
+
+    // Completely arbitrary amount of additional time to wait.
+    vblank_time += 0.05 * vblank_time;
+    int64_t finish_time = mp_time_ns() + vblank_time;
+
+    while (wl->frame_wait && finish_time > mp_time_ns()) {
+        int64_t poll_time = finish_time - mp_time_ns();
+        if (poll_time < 0) {
+            poll_time = 0;
+        }
+        wayland_dispatch_events(wl, 1, poll_time);
+    }
+
+    /* If the compositor does not have presentation time, we cannot be sure
+     * that this wait is accurate. Do a hacky block with wl_display_roundtrip. */
+    if (!wl->use_present && !wl_display_get_error(wl->display))
+        wl_display_roundtrip(wl->display);
+
+    /* Only use this heuristic if the compositor doesn't support the suspended state. */
+    if (wl->frame_wait && xdg_toplevel_get_version(wl->xdg_toplevel) < 6) {
+        // Only consider consecutive missed callbacks.
+        if (wl->timeout_count > 1) {
+            wl->hidden = true;
+            return;
+        } else {
+            wl->timeout_count += 1;
+            return;
+        }
+    }
+
+    wl->timeout_count = 0;
+}
+
+void vo_wayland_wait_events(struct vo *vo, int64_t until_time_ns)
+{
+    struct vo_wayland_state *wl = vo->wl;
+
+    int64_t wait_ns = until_time_ns - mp_time_ns();
+    int64_t timeout_ns = MPCLAMP(wait_ns, 0, MP_TIME_S_TO_NS(10));
+
+    wayland_dispatch_events(wl, 2, timeout_ns);
+}
+
+void vo_wayland_wakeup(struct vo *vo)
+{
+    struct vo_wayland_state *wl = vo->wl;
+    (void)write(wl->wakeup_pipe[1], &(char){0}, 1);
+}
diff --git a/video/out/wayland_common.h b/video/out/wayland_common.h
new file mode 100644
index 0000000..adbcca6
--- /dev/null
+++ b/video/out/wayland_common.h
@@ -0,0 +1,189 @@
+/*
+ * This file is part of mpv video player.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MPLAYER_WAYLAND_COMMON_H
+#define MPLAYER_WAYLAND_COMMON_H
+
+#include <wayland-client.h>
+#include "input/event.h"
+#include "vo.h"
+
+typedef struct {
+    uint32_t format;
+    uint32_t padding;
+    uint64_t modifier;
+} wayland_format;
+
+struct wayland_opts {
+    int configure_bounds;
+    int content_type;
+    bool disable_vsync;
+    int edge_pixels_pointer;
+    int edge_pixels_touch;
+};
+
+struct vo_wayland_state {
+    struct m_config_cache   *vo_opts_cache;
+    struct mp_log           *log;
+    struct mp_vo_opts       *vo_opts;
+    struct vo               *vo;
+    struct wayland_opts     *opts;
+    struct wl_callback      *frame_callback;
+    struct wl_compositor    *compositor;
+    struct wl_subcompositor *subcompositor;
+    struct wl_display       *display;
+    struct wl_registry      *registry;
+    struct wl_shm           *shm;
+    struct wl_surface       *surface;
+    struct wl_surface       *osd_surface;
+    struct wl_subsurface    *osd_subsurface;
+    struct wl_surface       *video_surface;
+    struct wl_surface       *callback_surface;
+    struct wl_subsurface    *video_subsurface;
+
+    /* Geometry */
+    struct mp_rect geometry;
+    struct mp_rect window_size;
+    struct wl_list output_list;
+    struct vo_wayland_output *current_output;
+    int bounded_height;
+    int bounded_width;
+    int reduced_height;
+    int reduced_width;
+    int toplevel_width;
+    int toplevel_height;
+
+    /* State */
+    bool activated;
+    bool configured;
+    bool focused;
+    bool frame_wait;
+    bool has_keyboard_input;
+    bool hidden;
+    bool initial_size_hint;
+    bool locked_size;
+    bool state_change;
+    bool tiled;
+    bool toplevel_configured;
+    int display_fd;
+    int mouse_x;
+    int mouse_y;
+    int pending_vo_events;
+    double scaling;
+    int timeout_count;
+    int wakeup_pipe[2];
+
+    /* content-type */
+    /* TODO: unvoid these if required wayland protocols is bumped to 1.27+ */
+    void *content_type_manager;
+    void *content_type;
+    int current_content_type;
+
+    /* cursor-shape */
+    /* TODO: unvoid these if required wayland protocols is bumped to 1.32+ */
+    void *cursor_shape_manager;
+    void *cursor_shape_device;
+
+    /* fractional-scale */
+    /* TODO: unvoid these if required wayland protocols is bumped to 1.31+ */
+    void *fractional_scale_manager;
+    void *fractional_scale;
+
+    /* idle-inhibit */
+    struct zwp_idle_inhibit_manager_v1 *idle_inhibit_manager;
+    struct zwp_idle_inhibitor_v1 *idle_inhibitor;
+
+    /* linux-dmabuf */
+    struct zwp_linux_dmabuf_v1 *dmabuf;
+    struct zwp_linux_dmabuf_feedback_v1 *dmabuf_feedback;
+    wayland_format *format_map;
+    uint32_t format_size;
+    bool using_dmabuf_wayland;
+
+    /* presentation-time */
+    struct wp_presentation  *presentation;
+    struct vo_wayland_feedback_pool *fback_pool;
+    struct mp_present *present;
+    int64_t refresh_interval;
+    bool use_present;
+
+    /* single-pixel-buffer */
+    /* TODO: unvoid this if required wayland-protocols is bumped to 1.27+ */
+    void *single_pixel_manager;
+
+    /* xdg-decoration */
+    struct zxdg_decoration_manager_v1 *xdg_decoration_manager;
+    struct zxdg_toplevel_decoration_v1 *xdg_toplevel_decoration;
+    int requested_decoration;
+
+    /* xdg-shell */
+    struct xdg_wm_base      *wm_base;
+    struct xdg_surface      *xdg_surface;
+    struct xdg_toplevel     *xdg_toplevel;
+
+    /* viewporter */
+    struct wp_viewporter *viewporter;
+    struct wp_viewport   *viewport;
+    struct wp_viewport   *osd_viewport;
+    struct wp_viewport   *video_viewport;
+
+    /* Input */
+    struct wl_keyboard *keyboard;
+    struct wl_pointer  *pointer;
+    struct wl_seat     *seat;
+    struct wl_touch    *touch;
+    struct xkb_context *xkb_context;
+    struct xkb_keymap  *xkb_keymap;
+    struct xkb_state   *xkb_state;
+    uint32_t keyboard_code;
+    int mpkey;
+    int mpmod;
+
+    /* DND */
+    struct wl_data_device *dnd_ddev;
+    struct wl_data_device_manager *dnd_devman;
+    struct wl_data_offer *dnd_offer;
+    enum mp_dnd_action dnd_action;
+    char *dnd_mime_type;
+    int dnd_fd;
+    int dnd_mime_score;
+
+    /* Cursor */
+    struct wl_cursor_theme *cursor_theme;
+    struct wl_cursor       *default_cursor;
+    struct wl_surface      *cursor_surface;
+    bool                    cursor_visible;
+    int                     allocated_cursor_scale;
+    uint32_t                pointer_id;
+};
+
+bool vo_wayland_check_visible(struct vo *vo);
+bool vo_wayland_init(struct vo *vo);
+bool vo_wayland_reconfig(struct vo *vo);
+
+int vo_wayland_allocate_memfd(struct vo *vo, size_t size);
+int vo_wayland_control(struct vo *vo, int *events, int request, void *arg);
+
+void vo_wayland_handle_fractional_scale(struct vo_wayland_state *wl);
+void vo_wayland_set_opaque_region(struct vo_wayland_state *wl, bool alpha);
+void vo_wayland_sync_swap(struct vo_wayland_state *wl);
+void vo_wayland_uninit(struct vo *vo);
+void vo_wayland_wait_events(struct vo *vo, int64_t until_time_ns);
+void vo_wayland_wait_frame(struct vo_wayland_state *wl);
+void vo_wayland_wakeup(struct vo *vo);
+
+#endif /* MPLAYER_WAYLAND_COMMON_H */
diff --git a/video/out/win32/displayconfig.c b/video/out/win32/displayconfig.c
new file mode 100644
index 0000000..9844afd
--- /dev/null
+++ b/video/out/win32/displayconfig.c
@@ -0,0 +1,140 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <windows.h>
+#include <stdbool.h>
+#include <string.h>
+
+#include "displayconfig.h"
+
+#include "mpv_talloc.h"
+
+static bool is_valid_refresh_rate(DISPLAYCONFIG_RATIONAL rr)
+{
+    // DisplayConfig sometimes reports a rate of 1 when the rate is not known
+    return rr.Denominator != 0 && rr.Numerator / rr.Denominator > 1;
+}
+
+static int get_config(void *ctx,
+                      UINT32 *num_paths, DISPLAYCONFIG_PATH_INFO** paths,
+                      UINT32 *num_modes, DISPLAYCONFIG_MODE_INFO** modes)
+{
+    LONG res;
+    *paths = NULL;
+    *modes = NULL;
+
+    // The display configuration could change between the call to
+    // GetDisplayConfigBufferSizes and the call to QueryDisplayConfig, so call
+    // them in a loop until the correct buffer size is chosen
+    do {
+        res = GetDisplayConfigBufferSizes(QDC_ONLY_ACTIVE_PATHS, num_paths,
+                                          num_modes);
+        if (res != ERROR_SUCCESS)
+            goto fail;
+
+        // Free old buffers if they exist and allocate new ones
+        talloc_free(*paths);
+        talloc_free(*modes);
+        *paths = talloc_array(ctx, DISPLAYCONFIG_PATH_INFO, *num_paths);
+        *modes = talloc_array(ctx, DISPLAYCONFIG_MODE_INFO, *num_modes);
+
+        res = QueryDisplayConfig(QDC_ONLY_ACTIVE_PATHS, num_paths, *paths,
+                                 num_modes, *modes, NULL);
+    } while (res == ERROR_INSUFFICIENT_BUFFER);
+    if (res != ERROR_SUCCESS)
+        goto fail;
+
+    return 0;
+fail:
+    talloc_free(*paths);
+    talloc_free(*modes);
+    return -1;
+}
+
+static DISPLAYCONFIG_PATH_INFO *get_path(UINT32 num_paths,
+                                         DISPLAYCONFIG_PATH_INFO* paths,
+                                         const wchar_t *device)
+{
+    // Search for a path with a matching device name
+    for (UINT32 i = 0; i < num_paths; i++) {
+        // Send a GET_SOURCE_NAME request
+        DISPLAYCONFIG_SOURCE_DEVICE_NAME source = {
+            .header = {
+                .size = sizeof source,
+                .type = DISPLAYCONFIG_DEVICE_INFO_GET_SOURCE_NAME,
+                .adapterId = paths[i].sourceInfo.adapterId,
+                .id = paths[i].sourceInfo.id,
+            }
+        };
+        if (DisplayConfigGetDeviceInfo(&source.header) != ERROR_SUCCESS)
+            return NULL;
+
+        // Check if the device name matches
+        if (!wcscmp(device, source.viewGdiDeviceName))
+            return &paths[i];
+    }
+
+    return NULL;
+}
+
+static double get_refresh_rate_from_mode(DISPLAYCONFIG_MODE_INFO *mode)
+{
+    if (mode->infoType != DISPLAYCONFIG_MODE_INFO_TYPE_TARGET)
+        return 0.0;
+
+    DISPLAYCONFIG_VIDEO_SIGNAL_INFO *info =
+        &mode->targetMode.targetVideoSignalInfo;
+    if (!is_valid_refresh_rate(info->vSyncFreq))
+        return 0.0;
+
+    return ((double)info->vSyncFreq.Numerator) /
+           ((double)info->vSyncFreq.Denominator);
+}
+
+double mp_w32_displayconfig_get_refresh_rate(const wchar_t *device)
+{
+    void *ctx = talloc_new(NULL);
+    double freq = 0.0;
+
+    // Get the current display configuration
+    UINT32 num_paths;
+    DISPLAYCONFIG_PATH_INFO* paths;
+    UINT32 num_modes;
+    DISPLAYCONFIG_MODE_INFO* modes;
+    if (get_config(ctx, &num_paths, &paths, &num_modes, &modes))
+        goto end;
+
+    // Get the path for the specified monitor
+    DISPLAYCONFIG_PATH_INFO* path;
+    if (!(path = get_path(num_paths, paths, device)))
+        goto end;
+
+    // Try getting the refresh rate from the mode first. The value in the mode
+    // overrides the value in the path.
+    if (path->targetInfo.modeInfoIdx != DISPLAYCONFIG_PATH_MODE_IDX_INVALID)
+        freq = get_refresh_rate_from_mode(&modes[path->targetInfo.modeInfoIdx]);
+
+    // If the mode didn't contain a valid refresh rate, try the path
+    if (freq == 0.0 && is_valid_refresh_rate(path->targetInfo.refreshRate)) {
+        freq = ((double)path->targetInfo.refreshRate.Numerator) /
+               ((double)path->targetInfo.refreshRate.Denominator);
+    }
+
+end:
+    talloc_free(ctx);
+    return freq;
+}
diff --git a/video/out/win32/displayconfig.h b/video/out/win32/displayconfig.h
new file mode 100644
index 0000000..ee6cd03
--- /dev/null
+++ b/video/out/win32/displayconfig.h
@@ -0,0 +1,27 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MP_WIN32_DISPLAYCONFIG_H_
+#define MP_WIN32_DISPLAYCONFIG_H_
+
+#include <wchar.h>
+
+// Given a GDI monitor device name, get the precise refresh rate using the
+// Windows 7 DisplayConfig API. Returns 0.0 on failure.
+double mp_w32_displayconfig_get_refresh_rate(const wchar_t *device);
+
+#endif
diff --git a/video/out/win32/droptarget.c b/video/out/win32/droptarget.c
new file mode 100644
index 0000000..8a33522
--- /dev/null
+++ b/video/out/win32/droptarget.c
@@ -0,0 +1,227 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+#include <stdatomic.h>
+
+#include <windows.h>
+#include <ole2.h>
+#include <shobjidl.h>
+
+#include "common/msg.h"
+#include "common/common.h"
+#include "input/input.h"
+#include "input/event.h"
+#include "osdep/io.h"
+#include "osdep/windows_utils.h"
+#include "mpv_talloc.h"
+
+#include "droptarget.h"
+
+struct droptarget {
+    IDropTarget iface;
+    atomic_int ref_cnt;
+    struct mp_log *log;
+    struct input_ctx *input_ctx;
+    struct mp_vo_opts *opts;
+    DWORD last_effect;
+    IDataObject *data_obj;
+};
+
+static FORMATETC fmtetc_file = {
+    .cfFormat = CF_HDROP,
+    .dwAspect = DVASPECT_CONTENT,
+    .lindex = -1,
+    .tymed = TYMED_HGLOBAL,
+};
+
+static FORMATETC fmtetc_url = {
+    .dwAspect = DVASPECT_CONTENT,
+    .lindex = -1,
+    .tymed = TYMED_HGLOBAL,
+};
+
+static void DropTarget_Destroy(struct droptarget *t)
+{
+    SAFE_RELEASE(t->data_obj);
+    talloc_free(t);
+}
+
+static STDMETHODIMP DropTarget_QueryInterface(IDropTarget *self, REFIID riid,
+                                              void **ppvObject)
+{
+    if (IsEqualIID(riid, &IID_IUnknown) || IsEqualIID(riid, &IID_IDropTarget)) {
+        *ppvObject = self;
+        IDropTarget_AddRef(self);
+        return S_OK;
+    }
+
+    *ppvObject = NULL;
+    return E_NOINTERFACE;
+}
+
+static STDMETHODIMP_(ULONG) DropTarget_AddRef(IDropTarget *self)
+{
+    struct droptarget *t = (struct droptarget *)self;
+    return atomic_fetch_add(&t->ref_cnt, 1) + 1;
+}
+
+static STDMETHODIMP_(ULONG) DropTarget_Release(IDropTarget *self)
+{
+    struct droptarget *t = (struct droptarget *)self;
+
+    ULONG ref_cnt = atomic_fetch_add(&t->ref_cnt, -1) - 1;
+    if (ref_cnt == 0)
+        DropTarget_Destroy(t);
+    return ref_cnt;
+}
+
+static STDMETHODIMP DropTarget_DragEnter(IDropTarget *self,
+                                         IDataObject *pDataObj,
+                                         DWORD grfKeyState, POINTL pt,
+                                         DWORD *pdwEffect)
+{
+    struct droptarget *t = (struct droptarget *)self;
+
+    IDataObject_AddRef(pDataObj);
+    if (FAILED(IDataObject_QueryGetData(pDataObj, &fmtetc_file)) &&
+        FAILED(IDataObject_QueryGetData(pDataObj, &fmtetc_url)))
+    {
+        *pdwEffect = DROPEFFECT_NONE;
+    }
+
+    SAFE_RELEASE(t->data_obj);
+    t->data_obj = pDataObj;
+    t->last_effect = *pdwEffect;
+    return S_OK;
+}
+
+static STDMETHODIMP DropTarget_DragOver(IDropTarget *self, DWORD grfKeyState,
+                                        POINTL pt, DWORD *pdwEffect)
+{
+    struct droptarget *t = (struct droptarget *)self;
+
+    *pdwEffect = t->last_effect;
+    return S_OK;
+}
+
+static STDMETHODIMP DropTarget_DragLeave(IDropTarget *self)
+{
+    struct droptarget *t = (struct droptarget *)self;
+
+    SAFE_RELEASE(t->data_obj);
+    return S_OK;
+}
+
+static STDMETHODIMP DropTarget_Drop(IDropTarget *self, IDataObject *pDataObj,
+                                    DWORD grfKeyState, POINTL pt,
+                                    DWORD *pdwEffect)
+{
+    struct droptarget *t = (struct droptarget *)self;
+
+    enum mp_dnd_action action;
+    if (t->opts->drag_and_drop >= 0) {
+        action = t->opts->drag_and_drop;
+    } else {
+        action = (grfKeyState & MK_SHIFT) ? DND_APPEND : DND_REPLACE;
+    }
+
+    SAFE_RELEASE(t->data_obj);
+
+    STGMEDIUM medium;
+    if (t->opts->drag_and_drop == -2) {
+        t->last_effect = DROPEFFECT_NONE;
+    } else if (SUCCEEDED(IDataObject_GetData(pDataObj, &fmtetc_file, &medium))) {
+        if (GlobalLock(medium.hGlobal)) {
+            HDROP drop = medium.hGlobal;
+
+            UINT files_num = DragQueryFileW(drop, 0xFFFFFFFF, NULL, 0);
+            char **files = talloc_zero_array(NULL, char*, files_num);
+
+            UINT recvd_files = 0;
+            for (UINT i = 0; i < files_num; i++) {
+                UINT len = DragQueryFileW(drop, i, NULL, 0);
+                wchar_t *buf = talloc_array(NULL, wchar_t, len + 1);
+
+                if (DragQueryFileW(drop, i, buf, len + 1) == len) {
+                    char *fname = mp_to_utf8(files, buf);
+                    files[recvd_files++] = fname;
+
+                    MP_VERBOSE(t, "received dropped file: %s\n", fname);
+                } else {
+                    MP_ERR(t, "error getting dropped file name\n");
+                }
+
+                talloc_free(buf);
+            }
+
+            GlobalUnlock(medium.hGlobal);
+            mp_event_drop_files(t->input_ctx, recvd_files, files, action);
+            talloc_free(files);
+        }
+
+        ReleaseStgMedium(&medium);
+    } else if (SUCCEEDED(IDataObject_GetData(pDataObj, &fmtetc_url, &medium))) {
+        wchar_t *wurl = GlobalLock(medium.hGlobal);
+        if (wurl) {
+            char *url = mp_to_utf8(NULL, wurl);
+            if (mp_event_drop_mime_data(t->input_ctx, "text/uri-list",
+                                        bstr0(url), action) > 0)
+            {
+                MP_VERBOSE(t, "received dropped URL: %s\n", url);
+            } else {
+                MP_ERR(t, "error getting dropped URL\n");
+            }
+
+            talloc_free(url);
+            GlobalUnlock(medium.hGlobal);
+        }
+
+        ReleaseStgMedium(&medium);
+    } else {
+        t->last_effect = DROPEFFECT_NONE;
+    }
+
+    *pdwEffect = t->last_effect;
+    return S_OK;
+}
+
+static IDropTargetVtbl idroptarget_vtbl = {
+    .QueryInterface = DropTarget_QueryInterface,
+    .AddRef = DropTarget_AddRef,
+    .Release = DropTarget_Release,
+    .DragEnter = DropTarget_DragEnter,
+    .DragOver = DropTarget_DragOver,
+    .DragLeave = DropTarget_DragLeave,
+    .Drop = DropTarget_Drop,
+};
+
+IDropTarget *mp_w32_droptarget_create(struct mp_log *log,
+                                      struct mp_vo_opts *opts,
+                                      struct input_ctx *input_ctx)
+{
+    fmtetc_url.cfFormat = RegisterClipboardFormatW(L"UniformResourceLocatorW");
+
+    struct droptarget *dt = talloc(NULL, struct droptarget);
+    dt->iface.lpVtbl = &idroptarget_vtbl;
+    atomic_store(&dt->ref_cnt, 0);
+    dt->last_effect = 0;
+    dt->data_obj = NULL;
+    dt->log = mp_log_new(dt, log, "droptarget");
+    dt->opts = opts;
+    dt->input_ctx = input_ctx;
+
+    return &dt->iface;
+}
diff --git a/video/out/win32/droptarget.h b/video/out/win32/droptarget.h
new file mode 100644
index 0000000..1c18c06
--- /dev/null
+++ b/video/out/win32/droptarget.h
@@ -0,0 +1,35 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MP_WIN32_DROPTARGET_H_
+#define MP_WIN32_DROPTARGET_H_
+
+#include <windows.h>
+#include <ole2.h>
+#include <shobjidl.h>
+
+#include "input/input.h"
+#include "common/msg.h"
+#include "common/common.h"
+#include "options/options.h"
+
+// Create a IDropTarget implementation that sends dropped files to input_ctx
+IDropTarget *mp_w32_droptarget_create(struct mp_log *log,
+                                      struct mp_vo_opts *opts,
+                                      struct input_ctx *input_ctx);
+
+#endif
diff --git a/video/out/win_state.c b/video/out/win_state.c
new file mode 100644
index 0000000..b4bc9fd
--- /dev/null
+++ b/video/out/win_state.c
@@ -0,0 +1,155 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "win_state.h"
+#include "vo.h"
+
+#include "video/mp_image.h"
+
+static void calc_monitor_aspect(struct mp_vo_opts *opts, int scr_w, int scr_h,
+                                double *pixelaspect, int *w, int *h)
+{
+    *pixelaspect = 1.0 / opts->monitor_pixel_aspect;
+
+    if (scr_w > 0 && scr_h > 0 && opts->force_monitor_aspect)
+        *pixelaspect = 1.0 / (opts->force_monitor_aspect * scr_h / scr_w);
+
+    if (*pixelaspect < 1) {
+        *h /= *pixelaspect;
+    } else {
+        *w *= *pixelaspect;
+    }
+}
+
+// Fit *w/*h into the size specified by geo.
+static void apply_autofit(int *w, int *h, int scr_w, int scr_h,
+                          struct m_geometry *geo, bool allow_up, bool allow_down)
+{
+    if (!geo->wh_valid)
+        return;
+
+    int dummy = 0;
+    int n_w = *w, n_h = *h;
+    m_geometry_apply(&dummy, &dummy, &n_w, &n_h, scr_w, scr_h, geo);
+
+    if (!allow_up && *w <= n_w && *h <= n_h)
+        return;
+    if (!allow_down && *w >= n_w && *h >= n_h)
+        return;
+
+    // If aspect mismatches, always make the window smaller than the fit box
+    // (Or larger, if allow_down==false.)
+    double asp = (double)*w / *h;
+    double n_asp = (double)n_w / n_h;
+    if ((n_asp <= asp) == allow_down) {
+        *w = n_w;
+        *h = n_w / asp;
+    } else {
+        *w = n_h * asp;
+        *h = n_h;
+    }
+}
+
+// Compute the "suggested" window size and position and return it in *out_geo.
+// screen is the bounding box of the current screen within the virtual desktop.
+// Does not change *vo.
+//  screen: position of the area on virtual desktop on which the video-content
+//          should be placed (maybe after excluding decorations, taskbars, etc)
+//  monitor: position of the monitor on virtual desktop (used for pixelaspect).
+//  dpi_scale: the DPI multiplier to get from virtual to real coordinates
+//             (>1 for "hidpi")
+// Use vo_apply_window_geometry() to copy the result into the vo.
+// NOTE: currently, all windowing backends do their own handling of window
+//       geometry additional to this code. This is to deal with initial window
+//       placement, fullscreen handling, avoiding resize on reconfig() with no
+//       size change, multi-monitor stuff, and possibly more.
+void vo_calc_window_geometry3(struct vo *vo, const struct mp_rect *screen,
+                              const struct mp_rect *monitor,
+                              double dpi_scale, struct vo_win_geometry *out_geo)
+{
+    struct mp_vo_opts *opts = vo->opts;
+
+    *out_geo = (struct vo_win_geometry){0};
+
+    // The case of calling this function even though no video was configured
+    // yet (i.e. vo->params==NULL) happens when vo_gpu creates a hidden window
+    // in order to create a rendering context.
+    struct mp_image_params params = { .w = 320, .h = 200 };
+    if (vo->params)
+        params = *vo->params;
+
+    if (!opts->hidpi_window_scale)
+        dpi_scale = 1;
+
+    int d_w, d_h;
+    mp_image_params_get_dsize(&params, &d_w, &d_h);
+    if ((vo->driver->caps & VO_CAP_ROTATE90) && params.rotate % 180 == 90)
+        MPSWAP(int, d_w, d_h);
+    d_w = MPCLAMP(d_w * opts->window_scale * dpi_scale, 1, 16000);
+    d_h = MPCLAMP(d_h * opts->window_scale * dpi_scale, 1, 16000);
+
+    int scr_w = screen->x1 - screen->x0;
+    int scr_h = screen->y1 - screen->y0;
+
+    int mon_w = monitor->x1 - monitor->x0;
+    int mon_h = monitor->y1 - monitor->y0;
+
+    MP_DBG(vo, "max content size: %dx%d\n", scr_w, scr_h);
+    MP_DBG(vo, "monitor size: %dx%d\n", mon_w, mon_h);
+
+    calc_monitor_aspect(opts, mon_w, mon_h, &out_geo->monitor_par, &d_w, &d_h);
+
+    apply_autofit(&d_w, &d_h, scr_w, scr_h, &opts->autofit, true, true);
+    apply_autofit(&d_w, &d_h, scr_w, scr_h, &opts->autofit_smaller, true, false);
+    apply_autofit(&d_w, &d_h, scr_w, scr_h, &opts->autofit_larger, false, true);
+
+    out_geo->win.x0 = (int)(scr_w - d_w) / 2;
+    out_geo->win.y0 = (int)(scr_h - d_h) / 2;
+    m_geometry_apply(&out_geo->win.x0, &out_geo->win.y0, &d_w, &d_h,
+                     scr_w, scr_h, &opts->geometry);
+
+    out_geo->win.x0 += screen->x0;
+    out_geo->win.y0 += screen->y0;
+    out_geo->win.x1 = out_geo->win.x0 + d_w;
+    out_geo->win.y1 = out_geo->win.y0 + d_h;
+
+    if (opts->geometry.xy_valid || opts->force_window_position)
+        out_geo->flags |= VO_WIN_FORCE_POS;
+}
+
+// same as vo_calc_window_geometry3 with monitor assumed same as screen
+void vo_calc_window_geometry2(struct vo *vo, const struct mp_rect *screen,
+                              double dpi_scale, struct vo_win_geometry *out_geo)
+{
+    vo_calc_window_geometry3(vo, screen, screen, dpi_scale, out_geo);
+}
+
+void vo_calc_window_geometry(struct vo *vo, const struct mp_rect *screen,
+                             struct vo_win_geometry *out_geo)
+{
+    vo_calc_window_geometry2(vo, screen, 1.0, out_geo);
+}
+
+// Copy the parameters in *geo to the vo fields.
+// (Doesn't do anything else - windowing backends should trigger VO_EVENT_RESIZE
+//  to ensure that the VO reinitializes rendering properly.)
+void vo_apply_window_geometry(struct vo *vo, const struct vo_win_geometry *geo)
+{
+    vo->dwidth = geo->win.x1 - geo->win.x0;
+    vo->dheight = geo->win.y1 - geo->win.y0;
+    vo->monitor_par = geo->monitor_par;
+}
diff --git a/video/out/win_state.h b/video/out/win_state.h
new file mode 100644
index 0000000..a253efa
--- /dev/null
+++ b/video/out/win_state.h
@@ -0,0 +1,35 @@
+#ifndef MP_WIN_STATE_H_
+#define MP_WIN_STATE_H_
+
+#include "common/common.h"
+
+struct vo;
+
+enum {
+    // By user settings, the window manager's chosen window position should
+    // be overridden.
+    VO_WIN_FORCE_POS = (1 << 0),
+};
+
+struct vo_win_geometry {
+    // Bitfield of VO_WIN_* flags
+    int flags;
+    // Position & size of the window. In xinerama coordinates, i.e. they're
+    // relative to the virtual desktop encompassing all screens, not the
+    // current screen.
+    struct mp_rect win;
+    // Aspect ratio of the current monitor.
+    // (calculated from screen size and options.)
+    double monitor_par;
+};
+
+void vo_calc_window_geometry(struct vo *vo, const struct mp_rect *screen,
+                             struct vo_win_geometry *out_geo);
+void vo_calc_window_geometry2(struct vo *vo, const struct mp_rect *screen,
+                              double dpi_scale, struct vo_win_geometry *out_geo);
+void vo_calc_window_geometry3(struct vo *vo, const struct mp_rect *screen,
+                              const struct mp_rect *monitor,
+                              double dpi_scale, struct vo_win_geometry *out_geo);
+void vo_apply_window_geometry(struct vo *vo, const struct vo_win_geometry *geo);
+
+#endif
diff --git a/video/out/wldmabuf/context_wldmabuf.c b/video/out/wldmabuf/context_wldmabuf.c
new file mode 100644
index 0000000..c494575
--- /dev/null
+++ b/video/out/wldmabuf/context_wldmabuf.c
@@ -0,0 +1,43 @@
+/*
+ * This file is part of mpv video player.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "video/out/wayland_common.h"
+#include "video/out/opengl/context.h"
+#include "ra_wldmabuf.h"
+
+static void uninit(struct ra_ctx *ctx)
+{
+    ra_free(&ctx->ra);
+    vo_wayland_uninit(ctx->vo);
+}
+
+static bool init(struct ra_ctx *ctx)
+{
+    if (!vo_wayland_init(ctx->vo))
+        return false;
+    ctx->ra = ra_create_wayland(ctx->log, ctx->vo);
+
+    return true;
+}
+
+const struct ra_ctx_fns ra_ctx_wldmabuf = {
+    .type               = "none",
+    .name               = "wldmabuf",
+    .hidden             = true,
+    .init               = init,
+    .uninit             = uninit,
+};
diff --git a/video/out/wldmabuf/ra_wldmabuf.c b/video/out/wldmabuf/ra_wldmabuf.c
new file mode 100644
index 0000000..3f27314
--- /dev/null
+++ b/video/out/wldmabuf/ra_wldmabuf.c
@@ -0,0 +1,66 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "video/out/wayland_common.h"
+#include "video/out/gpu/ra.h"
+#include "ra_wldmabuf.h"
+
+struct priv {
+    struct vo *vo;
+};
+
+static void destroy(struct ra *ra)
+{
+    talloc_free(ra->priv);
+}
+
+bool ra_compatible_format(struct ra* ra, uint32_t drm_format, uint64_t modifier)
+{
+    struct priv* p = ra->priv;
+    struct vo_wayland_state *wl = p->vo->wl;
+    const wayland_format *formats = wl->format_map;
+
+    for (int i = 0; i < wl->format_size / sizeof(wayland_format); i++) {
+        if (drm_format == formats[i].format && modifier == formats[i].modifier)
+            return true;
+    }
+
+    return false;
+}
+
+static struct ra_fns ra_fns_wldmabuf = {
+    .destroy                = destroy,
+};
+
+struct ra *ra_create_wayland(struct mp_log *log, struct vo* vo)
+{
+    struct ra *ra =  talloc_zero(NULL, struct ra);
+
+    ra->fns = &ra_fns_wldmabuf;
+    ra->log = log;
+    ra_add_native_resource(ra, "wl", vo->wl->display);
+    ra->priv = talloc_zero(NULL, struct priv);
+    struct priv *p = ra->priv;
+    p->vo = vo;
+
+    return ra;
+}
+
+bool ra_is_wldmabuf(struct ra *ra)
+{
+    return (ra->fns == &ra_fns_wldmabuf);
+}
diff --git a/video/out/wldmabuf/ra_wldmabuf.h b/video/out/wldmabuf/ra_wldmabuf.h
new file mode 100644
index 0000000..8e20173
--- /dev/null
+++ b/video/out/wldmabuf/ra_wldmabuf.h
@@ -0,0 +1,23 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+#include "video/out/wayland_common.h"
+
+struct ra *ra_create_wayland(struct mp_log *log, struct vo *vo);
+bool ra_compatible_format(struct ra* ra, uint32_t drm_format, uint64_t modifier);
+bool ra_is_wldmabuf(struct ra *ra);
diff --git a/video/out/x11_common.c b/video/out/x11_common.c
new file mode 100644
index 0000000..b4605bf
--- /dev/null
+++ b/video/out/x11_common.c
@@ -0,0 +1,2291 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <inttypes.h>
+#include <limits.h>
+#include <unistd.h>
+#include <poll.h>
+#include <string.h>
+#include <assert.h>
+
+#include <X11/Xmd.h>
+#include <X11/Xlib.h>
+#include <X11/Xutil.h>
+#include <X11/Xatom.h>
+#include <X11/keysym.h>
+#include <X11/XKBlib.h>
+#include <X11/XF86keysym.h>
+
+#include <X11/extensions/scrnsaver.h>
+#include <X11/extensions/dpms.h>
+#include <X11/extensions/shape.h>
+#include <X11/extensions/Xpresent.h>
+#include <X11/extensions/Xrandr.h>
+
+#include "misc/bstr.h"
+#include "options/options.h"
+#include "options/m_config.h"
+#include "common/common.h"
+#include "common/msg.h"
+#include "input/input.h"
+#include "input/event.h"
+#include "video/image_loader.h"
+#include "video/mp_image.h"
+#include "present_sync.h"
+#include "x11_common.h"
+#include "mpv_talloc.h"
+
+#include "vo.h"
+#include "win_state.h"
+#include "osdep/io.h"
+#include "osdep/poll_wrapper.h"
+#include "osdep/timer.h"
+#include "osdep/subprocess.h"
+
+#include "input/input.h"
+#include "input/keycodes.h"
+
+#define vo_wm_LAYER 1
+#define vo_wm_FULLSCREEN 2
+#define vo_wm_STAYS_ON_TOP 4
+#define vo_wm_ABOVE 8
+#define vo_wm_BELOW 16
+#define vo_wm_STICKY 32
+
+/* EWMH state actions, see
+         http://freedesktop.org/Standards/wm-spec/index.html#id2768769 */
+#define NET_WM_STATE_REMOVE        0    /* remove/unset property */
+#define NET_WM_STATE_ADD           1    /* add/set property */
+#define NET_WM_STATE_TOGGLE        2    /* toggle property  */
+
+#define WIN_LAYER_ONBOTTOM               2
+#define WIN_LAYER_NORMAL                 4
+#define WIN_LAYER_ONTOP                  6
+#define WIN_LAYER_ABOVE_DOCK             10
+
+#define DND_VERSION 5
+
+#define XEMBED_VERSION              0
+#define XEMBED_MAPPED               (1 << 0)
+#define XEMBED_EMBEDDED_NOTIFY      0
+#define XEMBED_REQUEST_FOCUS        3
+
+// ----- Motif header: -------
+
+#define MWM_HINTS_FUNCTIONS     (1L << 0)
+#define MWM_HINTS_DECORATIONS   (1L << 1)
+
+#define MWM_FUNC_RESIZE         (1L << 1)
+#define MWM_FUNC_MOVE           (1L << 2)
+#define MWM_FUNC_MINIMIZE       (1L << 3)
+#define MWM_FUNC_MAXIMIZE       (1L << 4)
+#define MWM_FUNC_CLOSE          (1L << 5)
+
+#define MWM_DECOR_ALL           (1L << 0)
+
+typedef struct
+{
+    long flags;
+    long functions;
+    long decorations;
+    long input_mode;
+    long state;
+} MotifWmHints;
+
+static const char x11_icon_16[] =
+#include "etc/mpv-icon-8bit-16x16.png.inc"
+;
+
+static const char x11_icon_32[] =
+#include "etc/mpv-icon-8bit-32x32.png.inc"
+;
+
+static const char x11_icon_64[] =
+#include "etc/mpv-icon-8bit-64x64.png.inc"
+;
+
+static const char x11_icon_128[] =
+#include "etc/mpv-icon-8bit-128x128.png.inc"
+;
+
+#define ICON_ENTRY(var) { (char *)var, sizeof(var) }
+static const struct bstr x11_icons[] = {
+    ICON_ENTRY(x11_icon_16),
+    ICON_ENTRY(x11_icon_32),
+    ICON_ENTRY(x11_icon_64),
+    ICON_ENTRY(x11_icon_128),
+    {0}
+};
+
+static struct mp_log *x11_error_output;
+static atomic_int x11_error_silence;
+
+static bool rc_overlaps(struct mp_rect rc1, struct mp_rect rc2);
+static void vo_x11_update_geometry(struct vo *vo);
+static void vo_x11_fullscreen(struct vo *vo);
+static void xscreensaver_heartbeat(struct vo_x11_state *x11);
+static void set_screensaver(struct vo_x11_state *x11, bool enabled);
+static void vo_x11_selectinput_witherr(struct vo *vo, Display *display,
+                                       Window w, long event_mask);
+static void vo_x11_setlayer(struct vo *vo, bool ontop);
+static void vo_x11_xembed_handle_message(struct vo *vo, XClientMessageEvent *ce);
+static void vo_x11_xembed_send_message(struct vo_x11_state *x11, long m[4]);
+static void vo_x11_move_resize(struct vo *vo, bool move, bool resize,
+                               struct mp_rect rc);
+static void vo_x11_maximize(struct vo *vo);
+static void vo_x11_minimize(struct vo *vo);
+static void vo_x11_set_input_region(struct vo *vo, bool passthrough);
+static void vo_x11_sticky(struct vo *vo, bool sticky);
+
+#define XA(x11, s) (XInternAtom((x11)->display, # s, False))
+#define XAs(x11, s) XInternAtom((x11)->display, s, False)
+
+#define RC_W(rc) ((rc).x1 - (rc).x0)
+#define RC_H(rc) ((rc).y1 - (rc).y0)
+
+static char *x11_atom_name_buf(struct vo_x11_state *x11, Atom atom,
+                               char *buf, size_t buf_size)
+{
+    buf[0] = '\0';
+
+    char *new_name = XGetAtomName(x11->display, atom);
+    if (new_name) {
+        snprintf(buf, buf_size, "%s", new_name);
+        XFree(new_name);
+    }
+
+    return buf;
+}
+
+#define x11_atom_name(x11, atom) x11_atom_name_buf(x11, atom, (char[80]){0}, 80)
+
+// format = 8 (unsigned char), 16 (short), 32 (long, even on LP64 systems)
+// *out_nitems = returned number of items of requested format
+static void *x11_get_property(struct vo_x11_state *x11, Window w, Atom property,
+                              Atom type, int format, int *out_nitems)
+{
+    assert(format == 8 || format == 16 || format == 32);
+    *out_nitems = 0;
+    if (!w)
+        return NULL;
+    long max_len = 128 * 1024 * 1024; // static maximum limit
+    Atom ret_type = 0;
+    int ret_format = 0;
+    unsigned long ret_nitems = 0;
+    unsigned long ret_bytesleft = 0;
+    unsigned char *ret_prop = NULL;
+    if (XGetWindowProperty(x11->display, w, property, 0, max_len, False, type,
+                           &ret_type, &ret_format, &ret_nitems, &ret_bytesleft,
+                           &ret_prop) != Success)
+        return NULL;
+    if (ret_format != format || ret_nitems < 1 || ret_bytesleft) {
+        XFree(ret_prop);
+        ret_prop = NULL;
+        ret_nitems = 0;
+    }
+    *out_nitems = ret_nitems;
+    return ret_prop;
+}
+
+static bool x11_get_property_copy(struct vo_x11_state *x11, Window w,
+                                  Atom property, Atom type, int format,
+                                  void *dst, size_t dst_size)
+{
+    bool ret = false;
+    int len;
+    void *ptr = x11_get_property(x11, w, property, type, format, &len);
+    if (ptr) {
+        size_t ib = format == 32 ? sizeof(long) : format / 8;
+        if (dst_size <= len * ib) {
+            memcpy(dst, ptr, dst_size);
+            ret = true;
+        }
+        XFree(ptr);
+    }
+    return ret;
+}
+
+static void x11_send_ewmh_msg(struct vo_x11_state *x11, char *message_type,
+                              long params[5])
+{
+    if (!x11->window)
+        return;
+
+    XEvent xev = {
+        .xclient = {
+            .type = ClientMessage,
+            .send_event = True,
+            .message_type = XInternAtom(x11->display, message_type, False),
+            .window = x11->window,
+            .format = 32,
+        },
+    };
+    for (int n = 0; n < 5; n++)
+        xev.xclient.data.l[n] = params[n];
+
+    if (!XSendEvent(x11->display, x11->rootwin, False,
+                    SubstructureRedirectMask | SubstructureNotifyMask,
+                    &xev))
+        MP_ERR(x11, "Couldn't send EWMH %s message!\n", message_type);
+}
+
+// change the _NET_WM_STATE hint. Remove or add the state according to "set".
+static void x11_set_ewmh_state(struct vo_x11_state *x11, char *state, bool set)
+{
+    long params[5] = {
+        set ? NET_WM_STATE_ADD : NET_WM_STATE_REMOVE,
+        XInternAtom(x11->display, state, False),
+        0, // No second state
+        1, // source indication: normal
+    };
+    x11_send_ewmh_msg(x11, "_NET_WM_STATE", params);
+}
+
+static void vo_update_cursor(struct vo *vo)
+{
+    Cursor no_ptr;
+    Pixmap bm_no;
+    XColor black, dummy;
+    Colormap colormap;
+    const char bm_no_data[] = {0, 0, 0, 0, 0, 0, 0, 0};
+    struct vo_x11_state *x11 = vo->x11;
+    Display *disp = x11->display;
+    Window win = x11->window;
+    bool should_hide = x11->has_focus && !x11->mouse_cursor_visible;
+
+    if (should_hide == x11->mouse_cursor_set)
+        return;
+
+    x11->mouse_cursor_set = should_hide;
+
+    if (x11->parent == x11->rootwin || !win)
+        return;                 // do not hide if playing on the root window
+
+    if (x11->mouse_cursor_set) {
+        colormap = DefaultColormap(disp, DefaultScreen(disp));
+        if (!XAllocNamedColor(disp, colormap, "black", &black, &dummy))
+            return; // color alloc failed, give up
+        bm_no = XCreateBitmapFromData(disp, win, bm_no_data, 8, 8);
+        no_ptr = XCreatePixmapCursor(disp, bm_no, bm_no, &black, &black, 0, 0);
+        XDefineCursor(disp, win, no_ptr);
+        XFreeCursor(disp, no_ptr);
+        if (bm_no != None)
+            XFreePixmap(disp, bm_no);
+        XFreeColors(disp, colormap, &black.pixel, 1, 0);
+    } else {
+        XDefineCursor(x11->display, x11->window, 0);
+    }
+}
+
+static int x11_errorhandler(Display *display, XErrorEvent *event)
+{
+    struct mp_log *log = x11_error_output;
+    if (!log)
+        return 0;
+
+    char msg[60];
+    XGetErrorText(display, event->error_code, (char *) &msg, sizeof(msg));
+
+    int lev = atomic_load(&x11_error_silence) ? MSGL_V : MSGL_ERR;
+    mp_msg(log, lev, "X11 error: %s\n", msg);
+    mp_msg(log, lev, "Type: %x, display: %p, resourceid: %lx, serial: %lx\n",
+               event->type, event->display, event->resourceid, event->serial);
+    mp_msg(log, lev, "Error code: %x, request code: %x, minor code: %x\n",
+           event->error_code, event->request_code, event->minor_code);
+
+    return 0;
+}
+
+void vo_x11_silence_xlib(int dir)
+{
+    atomic_fetch_add(&x11_error_silence, dir);
+}
+
+static int net_wm_support_state_test(struct vo_x11_state *x11, Atom atom)
+{
+#define NET_WM_STATE_TEST(x) { \
+    if (atom == XA(x11, _NET_WM_STATE_##x)) { \
+        MP_DBG(x11, "Detected wm supports " #x " state.\n" ); \
+        return vo_wm_##x; \
+    } \
+}
+
+    NET_WM_STATE_TEST(FULLSCREEN);
+    NET_WM_STATE_TEST(ABOVE);
+    NET_WM_STATE_TEST(STAYS_ON_TOP);
+    NET_WM_STATE_TEST(BELOW);
+    NET_WM_STATE_TEST(STICKY);
+    return 0;
+}
+
+static int vo_wm_detect(struct vo *vo)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    int i;
+    int wm = 0;
+    int nitems;
+    Atom *args = NULL;
+    Window win = x11->rootwin;
+
+    if (x11->parent)
+        return 0;
+
+// -- supports layers
+    args = x11_get_property(x11, win, XA(x11, _WIN_PROTOCOLS), XA_ATOM, 32,
+                            &nitems);
+    if (args) {
+        for (i = 0; i < nitems; i++) {
+            if (args[i] == XA(x11, _WIN_LAYER)) {
+                MP_DBG(x11, "Detected wm supports layers.\n");
+                wm |= vo_wm_LAYER;
+            }
+        }
+        XFree(args);
+    }
+// --- netwm
+    args = x11_get_property(x11, win, XA(x11, _NET_SUPPORTED), XA_ATOM, 32,
+                            &nitems);
+    if (args) {
+        MP_DBG(x11, "Detected wm supports NetWM.\n");
+        if (x11->opts->x11_netwm >= 0) {
+            for (i = 0; i < nitems; i++)
+                wm |= net_wm_support_state_test(vo->x11, args[i]);
+        } else {
+            MP_DBG(x11, "NetWM usage disabled by user.\n");
+        }
+        XFree(args);
+    }
+
+    if (wm == 0)
+        MP_DBG(x11, "Unknown wm type...\n");
+    if (x11->opts->x11_netwm > 0 && !(wm & vo_wm_FULLSCREEN)) {
+        MP_WARN(x11, "Forcing NetWM FULLSCREEN support.\n");
+        wm |= vo_wm_FULLSCREEN;
+    }
+    return wm;
+}
+
+static void xpresent_set(struct vo_x11_state *x11)
+{
+    int present = x11->opts->x11_present;
+    x11->use_present = x11->present_code &&
+                       ((x11->has_mesa && !x11->has_nvidia && present) ||
+                        present == 2);
+    if (x11->use_present) {
+        MP_VERBOSE(x11, "XPresent enabled.\n");
+    } else {
+        MP_VERBOSE(x11, "XPresent disabled.\n");
+    }
+}
+
+static void xrandr_read(struct vo_x11_state *x11)
+{
+    for(int i = 0; i < x11->num_displays; i++)
+        talloc_free(x11->displays[i].name);
+
+    x11->num_displays = 0;
+
+    if (x11->xrandr_event < 0) {
+        int event_base, error_base;
+        if (!XRRQueryExtension(x11->display, &event_base, &error_base)) {
+            MP_VERBOSE(x11, "Couldn't init Xrandr.\n");
+            return;
+        }
+        x11->xrandr_event = event_base + RRNotify;
+        XRRSelectInput(x11->display, x11->rootwin, RRScreenChangeNotifyMask |
+                       RRCrtcChangeNotifyMask | RROutputChangeNotifyMask);
+    }
+
+    XRRScreenResources *r = XRRGetScreenResourcesCurrent(x11->display, x11->rootwin);
+    if (!r) {
+        MP_VERBOSE(x11, "Xrandr doesn't work.\n");
+        return;
+    }
+
+    /* Look at the available providers on the current screen and try to determine
+     * the driver. If amd/intel/radeon, assume this is mesa. If nvidia is found,
+     * assume nvidia. Because the same screen can have multiple providers (e.g.
+     * a laptop with switchable graphics), we need to know both of these things.
+     * In practice, this is used for determining whether or not to use XPresent
+     * (i.e. needs to be Mesa and not Nvidia). Requires Randr 1.4. */
+    XRRProviderResources *pr = XRRGetProviderResources(x11->display, x11->rootwin);
+    for (int i = 0; i < pr->nproviders; i++) {
+        XRRProviderInfo *info = XRRGetProviderInfo(x11->display, r, pr->providers[i]);
+        struct bstr provider_name = bstrdup(x11, bstr0(info->name));
+        bstr_lower(provider_name);
+        int amd = bstr_find0(provider_name, "amd");
+        int intel = bstr_find0(provider_name, "intel");
+        int modesetting = bstr_find0(provider_name, "modesetting");
+        int nouveau = bstr_find0(provider_name, "nouveau");
+        int nvidia = bstr_find0(provider_name, "nvidia");
+        int radeon = bstr_find0(provider_name, "radeon");
+        x11->has_mesa = x11->has_mesa || amd >= 0 || intel >= 0 ||
+                        modesetting >= 0 || nouveau >= 0 || radeon >= 0;
+        x11->has_nvidia = x11->has_nvidia || nvidia >= 0;
+        XRRFreeProviderInfo(info);
+    }
+    if (x11->present_code)
+        xpresent_set(x11);
+    XRRFreeProviderResources(pr);
+
+    int primary_id = -1;
+    RROutput primary = XRRGetOutputPrimary(x11->display, x11->rootwin);
+    for (int o = 0; o < r->noutput; o++) {
+        RROutput output = r->outputs[o];
+        XRRCrtcInfo *crtc = NULL;
+        XRROutputInfo *out = XRRGetOutputInfo(x11->display, r, output);
+        if (!out || !out->crtc)
+            goto next;
+        crtc = XRRGetCrtcInfo(x11->display, r, out->crtc);
+        if (!crtc)
+            goto next;
+        for (int om = 0; om < out->nmode; om++) {
+            RRMode xm = out->modes[om];
+            for (int n = 0; n < r->nmode; n++) {
+                XRRModeInfo m = r->modes[n];
+                if (m.id != xm || crtc->mode != xm)
+                    continue;
+                if (x11->num_displays >= MAX_DISPLAYS)
+                    continue;
+                double vTotal = m.vTotal;
+                if (m.modeFlags & RR_DoubleScan)
+                    vTotal *= 2;
+                if (m.modeFlags & RR_Interlace)
+                    vTotal /= 2;
+                struct xrandr_display d = {
+                    .rc = { crtc->x, crtc->y,
+                            crtc->x + crtc->width, crtc->y + crtc->height },
+                    .fps = m.dotClock / (m.hTotal * vTotal),
+                    .name = talloc_strdup(x11, out->name),
+                };
+                int num = x11->num_displays++;
+                MP_VERBOSE(x11, "Display %d (%s): [%d, %d, %d, %d] @ %f FPS\n",
+                           num, d.name, d.rc.x0, d.rc.y0, d.rc.x1, d.rc.y1, d.fps);
+                x11->displays[num] = d;
+                if (output == primary)
+                    primary_id = num;
+            }
+        }
+    next:
+        if (crtc)
+            XRRFreeCrtcInfo(crtc);
+        if (out)
+            XRRFreeOutputInfo(out);
+    }
+
+    for (int i = 0; i < x11->num_displays; i++) {
+        struct xrandr_display *d = &(x11->displays[i]);
+        d->screen = i;
+
+        if (i == primary_id) {
+            d->atom_id = 0;
+            continue;
+        }
+        if (primary_id > 0 && i < primary_id) {
+            d->atom_id = i+1;
+            continue;
+        }
+        d->atom_id = i;
+    }
+
+    XRRFreeScreenResources(r);
+}
+
+static int vo_x11_select_screen(struct vo *vo)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    struct mp_vo_opts *opts = x11->opts;
+    int screen = -2; // all displays
+    if (!opts->fullscreen || opts->fsscreen_id != -2) {
+        screen = opts->fullscreen ? opts->fsscreen_id : opts->screen_id;
+        if (opts->fullscreen && opts->fsscreen_id == -1)
+            screen = opts->screen_id;
+
+        if (screen == -1 && (opts->fsscreen_name || opts->screen_name)) {
+            char *screen_name = opts->fullscreen ? opts->fsscreen_name : opts->screen_name;
+            if (screen_name) {
+                bool screen_found = false;
+                for (int n = 0; n < x11->num_displays; n++) {
+                    char *display_name = x11->displays[n].name;
+                    if (!strcmp(display_name, screen_name)) {
+                        screen = n;
+                        screen_found = true;
+                        break;
+                    }
+                }
+                if (!screen_found)
+                    MP_WARN(x11, "Screen name %s not found!\n", screen_name);
+            }
+        }
+
+        if (screen >= x11->num_displays)
+            screen = x11->num_displays - 1;
+    }
+    return screen;
+}
+
+static void vo_x11_update_screeninfo(struct vo *vo)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    x11->screenrc = (struct mp_rect){.x1 = x11->ws_width, .y1 = x11->ws_height};
+    int screen = vo_x11_select_screen(vo);
+    if (screen >= -1) {
+        if (screen == -1) {
+            int x = x11->winrc.x0 + RC_W(x11->winrc) / 2;
+            int y = x11->winrc.y0 + RC_H(x11->winrc) / 2;
+            for (screen = x11->num_displays - 1; screen > 0; screen--) {
+                struct xrandr_display *disp = &x11->displays[screen];
+                int left = disp->rc.x0;
+                int right = disp->rc.x1;
+                int top = disp->rc.y0;
+                int bottom = disp->rc.y1;
+                if (left <= x && x <= right && top <= y && y <= bottom)
+                    break;
+            }
+        }
+
+        if (screen < 0)
+            screen = 0;
+        x11->screenrc = (struct mp_rect){
+            .x0 = x11->displays[screen].rc.x0,
+            .y0 = x11->displays[screen].rc.y0,
+            .x1 = x11->displays[screen].rc.x1,
+            .y1 = x11->displays[screen].rc.y1,
+        };
+    }
+}
+
+static struct xrandr_display *get_current_display(struct vo *vo)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    struct xrandr_display *selected_disp = NULL;
+    for (int n = 0; n < x11->num_displays; n++) {
+        struct xrandr_display *disp = &x11->displays[n];
+        disp->overlaps = rc_overlaps(disp->rc, x11->winrc);
+        if (disp->overlaps && (!selected_disp || disp->fps < selected_disp->fps))
+            selected_disp = disp;
+    }
+    return selected_disp;
+}
+
+// Get the monitors for the 4 edges of the rectangle spanning all screens.
+static void vo_x11_get_bounding_monitors(struct vo_x11_state *x11, long b[4])
+{
+    //top  bottom left   right
+    b[0] = b[1] = b[2] = b[3] = 0;
+    for (int n = 0; n < x11->num_displays; n++) {
+        struct xrandr_display *d = &x11->displays[n];
+        if (d->rc.y0 < x11->displays[b[0]].rc.y0)
+            b[0] = n;
+        if (d->rc.y1 < x11->displays[b[1]].rc.y1)
+            b[1] = n;
+        if (d->rc.x0 < x11->displays[b[2]].rc.x0)
+            b[2] = n;
+        if (d->rc.x1 < x11->displays[b[3]].rc.x1)
+            b[3] = n;
+    }
+}
+
+bool vo_x11_init(struct vo *vo)
+{
+    char *dispName;
+
+    assert(!vo->x11);
+
+    XInitThreads();
+
+    struct vo_x11_state *x11 = talloc_ptrtype(NULL, x11);
+    *x11 = (struct vo_x11_state){
+        .log = mp_log_new(x11, vo->log, "x11"),
+        .input_ctx = vo->input_ctx,
+        .screensaver_enabled = true,
+        .xrandr_event = -1,
+        .wakeup_pipe = {-1, -1},
+        .dpi_scale = 1,
+        .opts_cache = m_config_cache_alloc(x11, vo->global, &vo_sub_opts),
+    };
+    x11->opts = x11->opts_cache->opts;
+    vo->x11 = x11;
+
+    x11_error_output = x11->log;
+    XSetErrorHandler(x11_errorhandler);
+    x11->present = mp_present_initialize(x11, x11->opts, VO_MAX_SWAPCHAIN_DEPTH);
+
+    dispName = XDisplayName(NULL);
+
+    MP_VERBOSE(x11, "X11 opening display: %s\n", dispName);
+
+    x11->display = XOpenDisplay(dispName);
+    if (!x11->display) {
+        MP_MSG(x11, vo->probing ? MSGL_V : MSGL_ERR,
+               "couldn't open the X11 display (%s)!\n", dispName);
+        goto error;
+    }
+    x11->screen = DefaultScreen(x11->display);  // screen ID
+    x11->rootwin = RootWindow(x11->display, x11->screen);   // root window ID
+
+    if (x11->opts->WinID >= 0)
+        x11->parent = x11->opts->WinID ? x11->opts->WinID : x11->rootwin;
+
+    if (!x11->opts->native_keyrepeat) {
+        Bool ok = False;
+        XkbSetDetectableAutoRepeat(x11->display, True, &ok);
+        x11->no_autorepeat = ok;
+    }
+
+    x11->xim = XOpenIM(x11->display, NULL, NULL, NULL);
+    if (!x11->xim)
+        MP_WARN(x11, "XOpenIM() failed. Unicode input will not work.\n");
+
+    x11->ws_width = DisplayWidth(x11->display, x11->screen);
+    x11->ws_height = DisplayHeight(x11->display, x11->screen);
+
+    if (strncmp(dispName, "unix:", 5) == 0)
+        dispName += 4;
+    else if (strncmp(dispName, "localhost:", 10) == 0)
+        dispName += 9;
+    x11->display_is_local = dispName[0] == ':' &&
+                            strtoul(dispName + 1, NULL, 10) < 10;
+    MP_DBG(x11, "X11 running at %dx%d (\"%s\" => %s display)\n",
+           x11->ws_width, x11->ws_height, dispName,
+           x11->display_is_local ? "local" : "remote");
+
+    int w_mm = DisplayWidthMM(x11->display, x11->screen);
+    int h_mm = DisplayHeightMM(x11->display, x11->screen);
+    double dpi_x = x11->ws_width * 25.4 / w_mm;
+    double dpi_y = x11->ws_height * 25.4 / h_mm;
+    double base_dpi = 96;
+    if (isfinite(dpi_x) && isfinite(dpi_y) && x11->opts->hidpi_window_scale) {
+        int s_x = lrint(MPCLAMP(dpi_x / base_dpi, 0, 10));
+        int s_y = lrint(MPCLAMP(dpi_y / base_dpi, 0, 10));
+        if (s_x == s_y && s_x > 1 && s_x < 10) {
+            x11->dpi_scale = s_x;
+            MP_VERBOSE(x11, "Assuming DPI scale %d for prescaling. This can "
+                       "be disabled with --hidpi-window-scale=no.\n",
+                       x11->dpi_scale);
+        }
+    }
+
+    x11->wm_type = vo_wm_detect(vo);
+
+    x11->event_fd = ConnectionNumber(x11->display);
+    mp_make_wakeup_pipe(x11->wakeup_pipe);
+
+    xrandr_read(x11);
+
+    vo_x11_update_geometry(vo);
+
+    return true;
+
+error:
+    vo_x11_uninit(vo);
+    return false;
+}
+
+static const struct mp_keymap keymap[] = {
+    // special keys
+    {XK_Pause, MP_KEY_PAUSE}, {XK_Escape, MP_KEY_ESC},
+    {XK_BackSpace, MP_KEY_BS}, {XK_Tab, MP_KEY_TAB}, {XK_Return, MP_KEY_ENTER},
+    {XK_Menu, MP_KEY_MENU}, {XK_Print, MP_KEY_PRINT},
+    {XK_Cancel, MP_KEY_CANCEL}, {XK_ISO_Left_Tab, MP_KEY_TAB},
+
+    // cursor keys
+    {XK_Left, MP_KEY_LEFT}, {XK_Right, MP_KEY_RIGHT}, {XK_Up, MP_KEY_UP},
+    {XK_Down, MP_KEY_DOWN},
+
+    // navigation block
+    {XK_Insert, MP_KEY_INSERT}, {XK_Delete, MP_KEY_DELETE},
+    {XK_Home, MP_KEY_HOME}, {XK_End, MP_KEY_END}, {XK_Page_Up, MP_KEY_PAGE_UP},
+    {XK_Page_Down, MP_KEY_PAGE_DOWN},
+
+    // F-keys
+    {XK_F1, MP_KEY_F+1}, {XK_F2, MP_KEY_F+2}, {XK_F3, MP_KEY_F+3},
+    {XK_F4, MP_KEY_F+4}, {XK_F5, MP_KEY_F+5}, {XK_F6, MP_KEY_F+6},
+    {XK_F7, MP_KEY_F+7}, {XK_F8, MP_KEY_F+8}, {XK_F9, MP_KEY_F+9},
+    {XK_F10, MP_KEY_F+10}, {XK_F11, MP_KEY_F+11}, {XK_F12, MP_KEY_F+12},
+    {XK_F13, MP_KEY_F+13}, {XK_F14, MP_KEY_F+14}, {XK_F15, MP_KEY_F+15},
+    {XK_F16, MP_KEY_F+16}, {XK_F17, MP_KEY_F+17}, {XK_F18, MP_KEY_F+18},
+    {XK_F19, MP_KEY_F+19}, {XK_F20, MP_KEY_F+20}, {XK_F21, MP_KEY_F+21},
+    {XK_F22, MP_KEY_F+22}, {XK_F23, MP_KEY_F+23}, {XK_F24, MP_KEY_F+24},
+
+    // numpad independent of numlock
+    {XK_KP_Subtract, '-'}, {XK_KP_Add, '+'}, {XK_KP_Multiply, '*'},
+    {XK_KP_Divide, '/'}, {XK_KP_Enter, MP_KEY_KPENTER},
+
+    // numpad with numlock
+    {XK_KP_0, MP_KEY_KP0}, {XK_KP_1, MP_KEY_KP1}, {XK_KP_2, MP_KEY_KP2},
+    {XK_KP_3, MP_KEY_KP3}, {XK_KP_4, MP_KEY_KP4}, {XK_KP_5, MP_KEY_KP5},
+    {XK_KP_6, MP_KEY_KP6}, {XK_KP_7, MP_KEY_KP7}, {XK_KP_8, MP_KEY_KP8},
+    {XK_KP_9, MP_KEY_KP9}, {XK_KP_Decimal, MP_KEY_KPDEC},
+    {XK_KP_Separator, MP_KEY_KPDEC},
+
+    // numpad without numlock
+    {XK_KP_Insert, MP_KEY_KPINS}, {XK_KP_End, MP_KEY_KPEND},
+    {XK_KP_Down, MP_KEY_KPDOWN}, {XK_KP_Page_Down, MP_KEY_KPPGDOWN},
+    {XK_KP_Left, MP_KEY_KPLEFT}, {XK_KP_Begin, MP_KEY_KP5},
+    {XK_KP_Right, MP_KEY_KPRIGHT}, {XK_KP_Home, MP_KEY_KPHOME}, {XK_KP_Up, MP_KEY_KPUP},
+    {XK_KP_Page_Up, MP_KEY_KPPGUP}, {XK_KP_Delete, MP_KEY_KPDEL},
+
+    {XF86XK_MenuKB, MP_KEY_MENU},
+    {XF86XK_AudioPlay, MP_KEY_PLAY}, {XF86XK_AudioPause, MP_KEY_PAUSE},
+    {XF86XK_AudioStop, MP_KEY_STOP},
+    {XF86XK_AudioPrev, MP_KEY_PREV}, {XF86XK_AudioNext, MP_KEY_NEXT},
+    {XF86XK_AudioRewind, MP_KEY_REWIND}, {XF86XK_AudioForward, MP_KEY_FORWARD},
+    {XF86XK_AudioMute, MP_KEY_MUTE},
+    {XF86XK_AudioLowerVolume, MP_KEY_VOLUME_DOWN},
+    {XF86XK_AudioRaiseVolume, MP_KEY_VOLUME_UP},
+    {XF86XK_HomePage, MP_KEY_HOMEPAGE}, {XF86XK_WWW, MP_KEY_WWW},
+    {XF86XK_Mail, MP_KEY_MAIL}, {XF86XK_Favorites, MP_KEY_FAVORITES},
+    {XF86XK_Search, MP_KEY_SEARCH}, {XF86XK_Sleep, MP_KEY_SLEEP},
+    {XF86XK_Back, MP_KEY_BACK}, {XF86XK_Tools, MP_KEY_TOOLS},
+    {XF86XK_ZoomIn, MP_KEY_ZOOMIN}, {XF86XK_ZoomOut, MP_KEY_ZOOMOUT},
+
+    {0, 0}
+};
+
+static int vo_x11_lookupkey(int key)
+{
+    const char *passthrough_keys = " -+*/<>`~!@#$%^&()_{}:;\"\',.?\\|=[]";
+    int mpkey = 0;
+    if ((key >= 'a' && key <= 'z') ||
+        (key >= 'A' && key <= 'Z') ||
+        (key >= '0' && key <= '9') ||
+        (key > 0 && key < 256 && strchr(passthrough_keys, key)))
+        mpkey = key;
+
+    if (!mpkey)
+        mpkey = lookup_keymap_table(keymap, key);
+
+    // XFree86 keysym range; typically contains obscure "extra" keys
+    if (!mpkey && key >= 0x10080001 && key <= 0x1008FFFF) {
+        mpkey = MP_KEY_UNKNOWN_RESERVED_START + (key - 0x10080000);
+        if (mpkey > MP_KEY_UNKNOWN_RESERVED_LAST)
+            mpkey = 0;
+    }
+
+    return mpkey;
+}
+
+static void vo_x11_decoration(struct vo *vo, bool d)
+{
+    struct vo_x11_state *x11 = vo->x11;
+
+    if (x11->parent || !x11->window)
+        return;
+
+    Atom motif_hints = XA(x11, _MOTIF_WM_HINTS);
+    MotifWmHints mhints = {0};
+    bool got = x11_get_property_copy(x11, x11->window, motif_hints,
+                                     motif_hints, 32, &mhints, sizeof(mhints));
+    // hints weren't set, and decorations requested -> assume WM displays them
+    if (!got && d)
+        return;
+    if (!got) {
+        mhints.flags = MWM_HINTS_FUNCTIONS;
+        mhints.functions = MWM_FUNC_MOVE | MWM_FUNC_CLOSE | MWM_FUNC_MINIMIZE |
+                           MWM_FUNC_MAXIMIZE | MWM_FUNC_RESIZE;
+    }
+    mhints.flags |= MWM_HINTS_DECORATIONS;
+    mhints.decorations = d ? MWM_DECOR_ALL : 0;
+    XChangeProperty(x11->display, x11->window, motif_hints, motif_hints, 32,
+                    PropModeReplace, (unsigned char *) &mhints, 5);
+}
+
+static void vo_x11_wm_hints(struct vo *vo, Window window)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    XWMHints hints = {0};
+    hints.flags = InputHint | StateHint;
+    hints.input = 1;
+    hints.initial_state = NormalState;
+    XSetWMHints(x11->display, window, &hints);
+}
+
+static void vo_x11_classhint(struct vo *vo, Window window, const char *name)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    struct mp_vo_opts *opts = x11->opts;
+    XClassHint wmClass;
+    long pid = getpid();
+
+    wmClass.res_name = opts->winname ? opts->winname : (char *)name;
+    wmClass.res_class = "mpv";
+    XSetClassHint(x11->display, window, &wmClass);
+    XChangeProperty(x11->display, window, XA(x11, _NET_WM_PID), XA_CARDINAL,
+                    32, PropModeReplace, (unsigned char *) &pid, 1);
+}
+
+void vo_x11_uninit(struct vo *vo)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    if (!x11)
+        return;
+
+    mp_input_put_key(x11->input_ctx, MP_INPUT_RELEASE_ALL);
+
+    set_screensaver(x11, true);
+
+    if (x11->window != None && x11->window != x11->rootwin)
+        XDestroyWindow(x11->display, x11->window);
+    if (x11->xic)
+        XDestroyIC(x11->xic);
+    if (x11->colormap != None)
+        XFreeColormap(vo->x11->display, x11->colormap);
+
+    MP_DBG(x11, "uninit ...\n");
+    if (x11->xim)
+        XCloseIM(x11->xim);
+    if (x11->display) {
+        XSetErrorHandler(NULL);
+        x11_error_output = NULL;
+        XCloseDisplay(x11->display);
+    }
+
+    if (x11->wakeup_pipe[0] >= 0) {
+        close(x11->wakeup_pipe[0]);
+        close(x11->wakeup_pipe[1]);
+    }
+
+    talloc_free(x11);
+    vo->x11 = NULL;
+}
+
+#define DND_PROPERTY "mpv_dnd_selection"
+
+static void vo_x11_dnd_init_window(struct vo *vo)
+{
+    struct vo_x11_state *x11 = vo->x11;
+
+    Atom version = DND_VERSION;
+    XChangeProperty(x11->display, x11->window, XA(x11, XdndAware), XA_ATOM,
+                    32, PropModeReplace, (unsigned char *)&version, 1);
+}
+
+// The Atom does not always map to a mime type, but often.
+static char *x11_dnd_mime_type_buf(struct vo_x11_state *x11, Atom atom,
+                                   char *buf, size_t buf_size)
+{
+    if (atom == XInternAtom(x11->display, "UTF8_STRING", False))
+        return "text";
+    return x11_atom_name_buf(x11, atom, buf, buf_size);
+}
+
+#define x11_dnd_mime_type(x11, atom) \
+    x11_dnd_mime_type_buf(x11, atom, (char[80]){0}, 80)
+
+static bool dnd_format_is_better(struct vo_x11_state *x11, Atom cur, Atom new)
+{
+    int new_score = mp_event_get_mime_type_score(x11->input_ctx,
+                                                 x11_dnd_mime_type(x11, new));
+    int cur_score = -1;
+    if (cur) {
+        cur_score = mp_event_get_mime_type_score(x11->input_ctx,
+                                                 x11_dnd_mime_type(x11, cur));
+    }
+    return new_score >= 0 && new_score > cur_score;
+}
+
+static void dnd_select_format(struct vo_x11_state *x11, Atom *args, int items)
+{
+    x11->dnd_requested_format = 0;
+
+    for (int n = 0; n < items; n++) {
+        MP_VERBOSE(x11, "DnD type: '%s'\n", x11_atom_name(x11, args[n]));
+        // There are other types; possibly not worth supporting.
+        if (dnd_format_is_better(x11, x11->dnd_requested_format, args[n]))
+            x11->dnd_requested_format = args[n];
+    }
+
+    MP_VERBOSE(x11, "Selected DnD type: %s\n", x11->dnd_requested_format ?
+                    x11_atom_name(x11, x11->dnd_requested_format) : "(none)");
+}
+
+static void dnd_reset(struct vo *vo)
+{
+    struct vo_x11_state *x11 = vo->x11;
+
+    x11->dnd_src_window = 0;
+    x11->dnd_requested_format = 0;
+}
+
+static void vo_x11_dnd_handle_message(struct vo *vo, XClientMessageEvent *ce)
+{
+    struct vo_x11_state *x11 = vo->x11;
+
+    if (!x11->window)
+        return;
+
+    if (ce->message_type == XA(x11, XdndEnter)) {
+        x11->dnd_requested_format = 0;
+
+        Window src = ce->data.l[0];
+        if (ce->data.l[1] & 1) {
+            int nitems;
+            Atom *args = x11_get_property(x11, src, XA(x11, XdndTypeList),
+                                          XA_ATOM, 32, &nitems);
+            if (args) {
+                dnd_select_format(x11, args, nitems);
+                XFree(args);
+            }
+        } else {
+            Atom args[3];
+            for (int n = 2; n <= 4; n++)
+                args[n - 2] = ce->data.l[n];
+            dnd_select_format(x11, args, 3);
+        }
+    } else if (ce->message_type == XA(x11, XdndPosition)) {
+        x11->dnd_requested_action = ce->data.l[4];
+
+        Window src = ce->data.l[0];
+        XEvent xev;
+
+        xev.xclient.type = ClientMessage;
+        xev.xclient.serial = 0;
+        xev.xclient.send_event = True;
+        xev.xclient.message_type = XA(x11, XdndStatus);
+        xev.xclient.window = src;
+        xev.xclient.format = 32;
+        xev.xclient.data.l[0] = x11->window;
+        xev.xclient.data.l[1] = x11->dnd_requested_format ? 1 : 0;
+        xev.xclient.data.l[2] = 0;
+        xev.xclient.data.l[3] = 0;
+        xev.xclient.data.l[4] = XA(x11, XdndActionCopy);
+
+        XSendEvent(x11->display, src, False, 0, &xev);
+    } else if (ce->message_type == XA(x11, XdndDrop)) {
+        x11->dnd_src_window = ce->data.l[0];
+        XConvertSelection(x11->display, XA(x11, XdndSelection),
+                          x11->dnd_requested_format, XAs(x11, DND_PROPERTY),
+                          x11->window, ce->data.l[2]);
+    } else if (ce->message_type == XA(x11, XdndLeave)) {
+        dnd_reset(vo);
+    }
+}
+
+static void vo_x11_dnd_handle_selection(struct vo *vo, XSelectionEvent *se)
+{
+    struct vo_x11_state *x11 = vo->x11;
+
+    if (!x11->window || !x11->dnd_src_window)
+        return;
+
+    bool success = false;
+
+    if (se->selection == XA(x11, XdndSelection) &&
+        se->property == XAs(x11, DND_PROPERTY) &&
+        se->target == x11->dnd_requested_format &&
+        x11->opts->drag_and_drop != -2)
+    {
+        int nitems;
+        void *prop = x11_get_property(x11, x11->window, XAs(x11, DND_PROPERTY),
+                                      x11->dnd_requested_format, 8, &nitems);
+        if (prop) {
+            enum mp_dnd_action action;
+            if (x11->opts->drag_and_drop >= 0) {
+                action = x11->opts->drag_and_drop;
+            } else {
+                action = x11->dnd_requested_action == XA(x11, XdndActionCopy) ?
+                         DND_REPLACE : DND_APPEND;
+            }
+
+            char *mime_type = x11_dnd_mime_type(x11, x11->dnd_requested_format);
+            MP_VERBOSE(x11, "Dropping type: %s (%s)\n",
+                       x11_atom_name(x11, x11->dnd_requested_format), mime_type);
+
+            // No idea if this is guaranteed to be \0-padded, so use bstr.
+            success = mp_event_drop_mime_data(x11->input_ctx, mime_type,
+                                              (bstr){prop, nitems}, action) > 0;
+            XFree(prop);
+        }
+    }
+
+    XEvent xev;
+
+    xev.xclient.type = ClientMessage;
+    xev.xclient.serial = 0;
+    xev.xclient.send_event = True;
+    xev.xclient.message_type = XA(x11, XdndFinished);
+    xev.xclient.window = x11->dnd_src_window;
+    xev.xclient.format = 32;
+    xev.xclient.data.l[0] = x11->window;
+    xev.xclient.data.l[1] = success ? 1 : 0;
+    xev.xclient.data.l[2] = success ? XA(x11, XdndActionCopy) : 0;
+    xev.xclient.data.l[3] = 0;
+    xev.xclient.data.l[4] = 0;
+
+    XSendEvent(x11->display, x11->dnd_src_window, False, 0, &xev);
+
+    dnd_reset(vo);
+}
+
+static void update_vo_size(struct vo *vo)
+{
+    struct vo_x11_state *x11 = vo->x11;
+
+    if (RC_W(x11->winrc) != vo->dwidth || RC_H(x11->winrc) != vo->dheight) {
+        vo->dwidth = RC_W(x11->winrc);
+        vo->dheight = RC_H(x11->winrc);
+        x11->pending_vo_events |= VO_EVENT_RESIZE;
+    }
+}
+
+static int get_mods(unsigned int state)
+{
+    int modifiers = 0;
+    if (state & ShiftMask)
+        modifiers |= MP_KEY_MODIFIER_SHIFT;
+    if (state & ControlMask)
+        modifiers |= MP_KEY_MODIFIER_CTRL;
+    if (state & Mod1Mask)
+        modifiers |= MP_KEY_MODIFIER_ALT;
+    if (state & Mod4Mask)
+        modifiers |= MP_KEY_MODIFIER_META;
+    return modifiers;
+}
+
+static void vo_x11_update_composition_hint(struct vo *vo)
+{
+    struct vo_x11_state *x11 = vo->x11;
+
+    long hint = 0;
+    switch (x11->opts->x11_bypass_compositor) {
+    case 0: hint = 0; break; // leave default
+    case 1: hint = 1; break; // always bypass
+    case 2: hint = x11->fs ? 1 : 0; break; // bypass in FS
+    case 3: hint = 2; break; // always enable
+    }
+
+    XChangeProperty(x11->display, x11->window, XA(x11,_NET_WM_BYPASS_COMPOSITOR),
+                    XA_CARDINAL, 32, PropModeReplace, (unsigned char *)&hint, 1);
+}
+
+static void vo_x11_check_net_wm_state_change(struct vo *vo)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    struct mp_vo_opts *opts = x11->opts;
+
+    if (x11->parent)
+        return;
+
+    if (x11->wm_type & vo_wm_FULLSCREEN) {
+        int num_elems;
+        long *elems = x11_get_property(x11, x11->window, XA(x11, _NET_WM_STATE),
+                                       XA_ATOM, 32, &num_elems);
+        int is_fullscreen = 0, is_minimized = 0, is_maximized = 0;
+        if (elems) {
+            Atom fullscreen_prop = XA(x11, _NET_WM_STATE_FULLSCREEN);
+            Atom hidden = XA(x11, _NET_WM_STATE_HIDDEN);
+            Atom max_vert = XA(x11, _NET_WM_STATE_MAXIMIZED_VERT);
+            Atom max_horiz = XA(x11, _NET_WM_STATE_MAXIMIZED_HORZ);
+            for (int n = 0; n < num_elems; n++) {
+                if (elems[n] == fullscreen_prop)
+                    is_fullscreen = 1;
+                if (elems[n] == hidden)
+                    is_minimized = 1;
+                if (elems[n] == max_vert || elems[n] == max_horiz)
+                    is_maximized = 1;
+            }
+            XFree(elems);
+        }
+
+        if (opts->window_maximized && !is_maximized && x11->geometry_change) {
+            x11->geometry_change = false;
+            vo_x11_config_vo_window(vo);
+        }
+
+        opts->window_minimized = is_minimized;
+        x11->hidden = is_minimized;
+        m_config_cache_write_opt(x11->opts_cache, &opts->window_minimized);
+        opts->window_maximized = is_maximized;
+        m_config_cache_write_opt(x11->opts_cache, &opts->window_maximized);
+
+        if ((x11->opts->fullscreen && !is_fullscreen) ||
+            (!x11->opts->fullscreen && is_fullscreen))
+        {
+            x11->opts->fullscreen = is_fullscreen;
+            x11->fs = is_fullscreen;
+            m_config_cache_write_opt(x11->opts_cache, &x11->opts->fullscreen);
+
+            if (!is_fullscreen && (x11->pos_changed_during_fs ||
+                                   x11->size_changed_during_fs))
+            {
+                vo_x11_move_resize(vo, x11->pos_changed_during_fs,
+                                       x11->size_changed_during_fs,
+                                       x11->nofsrc);
+            }
+
+            x11->size_changed_during_fs = false;
+            x11->pos_changed_during_fs = false;
+
+            vo_x11_update_composition_hint(vo);
+        }
+    }
+}
+
+static void vo_x11_check_net_wm_desktop_change(struct vo *vo)
+{
+    struct vo_x11_state *x11 = vo->x11;
+
+    if (x11->parent)
+        return;
+
+    long params[1] = {0};
+    if (x11_get_property_copy(x11, x11->window, XA(x11, _NET_WM_DESKTOP),
+                              XA_CARDINAL, 32, params, sizeof(params)))
+    {
+        x11->opts->all_workspaces = params[0] == -1; // (gets sign-extended?)
+        m_config_cache_write_opt(x11->opts_cache, &x11->opts->all_workspaces);
+    }
+}
+
+// Releasing all keys on key-up or defocus is simpler and ensures no keys can
+// get "stuck".
+static void release_all_keys(struct vo *vo)
+{
+    struct vo_x11_state *x11 = vo->x11;
+
+    if (x11->no_autorepeat)
+        mp_input_put_key(x11->input_ctx, MP_INPUT_RELEASE_ALL);
+    x11->win_drag_button1_down = false;
+}
+
+void vo_x11_check_events(struct vo *vo)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    Display *display = vo->x11->display;
+    XEvent Event;
+
+    xscreensaver_heartbeat(vo->x11);
+
+    while (XPending(display)) {
+        XNextEvent(display, &Event);
+        MP_TRACE(x11, "XEvent: %d\n", Event.type);
+        switch (Event.type) {
+        case Expose:
+            x11->pending_vo_events |= VO_EVENT_EXPOSE;
+            break;
+        case ConfigureNotify:
+            if (x11->window == None)
+                break;
+            vo_x11_update_geometry(vo);
+            if (x11->parent && Event.xconfigure.window == x11->parent) {
+                MP_TRACE(x11, "adjusting embedded window position\n");
+                XMoveResizeWindow(x11->display, x11->window,
+                                  0, 0, RC_W(x11->winrc), RC_H(x11->winrc));
+            }
+            break;
+        case KeyPress: {
+            char buf[100];
+            KeySym keySym = 0;
+            int modifiers = get_mods(Event.xkey.state);
+            if (x11->no_autorepeat)
+                modifiers |= MP_KEY_STATE_DOWN;
+            if (x11->xic) {
+                Status status;
+                int len = Xutf8LookupString(x11->xic, &Event.xkey, buf,
+                                            sizeof(buf), &keySym, &status);
+                int mpkey = vo_x11_lookupkey(keySym);
+                if (mpkey) {
+                    mp_input_put_key(x11->input_ctx, mpkey | modifiers);
+                } else if (status == XLookupChars || status == XLookupBoth) {
+                    struct bstr t = { buf, len };
+                    mp_input_put_key_utf8(x11->input_ctx, modifiers, t);
+                }
+            } else {
+                XLookupString(&Event.xkey, buf, sizeof(buf), &keySym,
+                              &x11->compose_status);
+                int mpkey = vo_x11_lookupkey(keySym);
+                if (mpkey)
+                    mp_input_put_key(x11->input_ctx, mpkey | modifiers);
+            }
+            break;
+        }
+        case FocusIn:
+            x11->has_focus = true;
+            vo_update_cursor(vo);
+            x11->pending_vo_events |= VO_EVENT_FOCUS;
+            break;
+        case FocusOut:
+            release_all_keys(vo);
+            x11->has_focus = false;
+            vo_update_cursor(vo);
+            x11->pending_vo_events |= VO_EVENT_FOCUS;
+            break;
+        case KeyRelease:
+            release_all_keys(vo);
+            break;
+        case MotionNotify:
+            if (x11->win_drag_button1_down && !x11->fs &&
+                !mp_input_test_dragging(x11->input_ctx, Event.xmotion.x,
+                                                        Event.xmotion.y))
+            {
+                mp_input_put_key(x11->input_ctx, MP_INPUT_RELEASE_ALL);
+                XUngrabPointer(x11->display, CurrentTime);
+
+                long params[5] = {
+                    Event.xmotion.x_root, Event.xmotion.y_root,
+                    8, // _NET_WM_MOVERESIZE_MOVE
+                    1, // button 1
+                    1, // source indication: normal
+                };
+                x11_send_ewmh_msg(x11, "_NET_WM_MOVERESIZE", params);
+            } else {
+                mp_input_set_mouse_pos(x11->input_ctx, Event.xmotion.x,
+                                                       Event.xmotion.y);
+            }
+            x11->win_drag_button1_down = false;
+            break;
+        case LeaveNotify:
+            if (Event.xcrossing.mode != NotifyNormal)
+                break;
+            x11->win_drag_button1_down = false;
+            mp_input_put_key(x11->input_ctx, MP_KEY_MOUSE_LEAVE);
+            break;
+        case EnterNotify:
+            if (Event.xcrossing.mode != NotifyNormal)
+                break;
+            mp_input_put_key(x11->input_ctx, MP_KEY_MOUSE_ENTER);
+            break;
+        case ButtonPress:
+            if (Event.xbutton.button - 1 >= MP_KEY_MOUSE_BTN_COUNT)
+                break;
+            if (Event.xbutton.button == 1)
+                x11->win_drag_button1_down = true;
+            mp_input_put_key(x11->input_ctx,
+                             (MP_MBTN_BASE + Event.xbutton.button - 1) |
+                             get_mods(Event.xbutton.state) | MP_KEY_STATE_DOWN);
+            long msg[4] = {XEMBED_REQUEST_FOCUS};
+            vo_x11_xembed_send_message(x11, msg);
+            break;
+        case ButtonRelease:
+            if (Event.xbutton.button - 1 >= MP_KEY_MOUSE_BTN_COUNT)
+                break;
+            if (Event.xbutton.button == 1)
+                x11->win_drag_button1_down = false;
+            mp_input_put_key(x11->input_ctx,
+                             (MP_MBTN_BASE + Event.xbutton.button - 1) |
+                             get_mods(Event.xbutton.state) | MP_KEY_STATE_UP);
+            break;
+        case MapNotify:
+            x11->window_hidden = false;
+            x11->pseudo_mapped = true;
+            x11->current_screen = -1;
+            vo_x11_update_geometry(vo);
+            break;
+        case DestroyNotify:
+            MP_WARN(x11, "Our window was destroyed, exiting\n");
+            mp_input_put_key(x11->input_ctx, MP_KEY_CLOSE_WIN);
+            x11->window = 0;
+            break;
+        case ClientMessage:
+            if (Event.xclient.message_type == XA(x11, WM_PROTOCOLS) &&
+                Event.xclient.data.l[0] == XA(x11, WM_DELETE_WINDOW))
+                mp_input_put_key(x11->input_ctx, MP_KEY_CLOSE_WIN);
+            vo_x11_dnd_handle_message(vo, &Event.xclient);
+            vo_x11_xembed_handle_message(vo, &Event.xclient);
+            break;
+        case SelectionNotify:
+            vo_x11_dnd_handle_selection(vo, &Event.xselection);
+            break;
+        case PropertyNotify:
+            if (Event.xproperty.atom == XA(x11, _NET_FRAME_EXTENTS) ||
+                Event.xproperty.atom == XA(x11, WM_STATE))
+            {
+                if (!x11->pseudo_mapped && !x11->parent) {
+                    MP_VERBOSE(x11, "not waiting for MapNotify\n");
+                    x11->pseudo_mapped = true;
+                }
+            } else if (Event.xproperty.atom == XA(x11, _NET_WM_STATE)) {
+                vo_x11_check_net_wm_state_change(vo);
+            } else if (Event.xproperty.atom == XA(x11, _NET_WM_DESKTOP)) {
+                vo_x11_check_net_wm_desktop_change(vo);
+            } else if (Event.xproperty.atom == x11->icc_profile_property) {
+                x11->pending_vo_events |= VO_EVENT_ICC_PROFILE_CHANGED;
+            }
+            break;
+        case GenericEvent: {
+            XGenericEventCookie *cookie = (XGenericEventCookie *)&Event.xcookie;
+            if (cookie->extension == x11->present_code && x11->use_present)
+            {
+                XGetEventData(x11->display, cookie);
+                if (cookie->evtype == PresentCompleteNotify) {
+                    XPresentCompleteNotifyEvent *present_event;
+                    present_event = (XPresentCompleteNotifyEvent *)cookie->data;
+                    present_sync_update_values(x11->present,
+                                               present_event->ust * 1000,
+                                               present_event->msc);
+                }
+            }
+            XFreeEventData(x11->display, cookie);
+            break;
+        }
+        default:
+            if (Event.type == x11->ShmCompletionEvent) {
+                if (x11->ShmCompletionWaitCount > 0)
+                    x11->ShmCompletionWaitCount--;
+            }
+            if (Event.type == x11->xrandr_event) {
+                xrandr_read(x11);
+                vo_x11_update_geometry(vo);
+            }
+            break;
+        }
+    }
+
+    update_vo_size(vo);
+}
+
+static void vo_x11_sizehint(struct vo *vo, struct mp_rect rc, bool override_pos)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    struct mp_vo_opts *opts = x11->opts;
+
+    if (!x11->window || x11->parent)
+        return;
+
+    bool screen = opts->screen_id >= 0 || (opts->screen_name &&
+                                           opts->screen_name[0]);
+    bool fsscreen = opts->fsscreen_id >= 0 || (opts->fsscreen_name &&
+                                               opts->fsscreen_name[0]);
+    bool force_pos = opts->geometry.xy_valid ||     // explicitly forced by user
+                     opts->force_window_position || // resize -> reset position
+                     screen || fsscreen          || // force onto screen area
+                     opts->screen_name ||           // also force onto screen area
+                     x11->parent ||                 // force to fill parent
+                     override_pos;                  // for fullscreen and such
+
+    XSizeHints *hint = XAllocSizeHints();
+    if (!hint)
+        return; // OOM
+
+    hint->flags |= PSize | (force_pos ? PPosition : 0);
+    hint->x = rc.x0;
+    hint->y = rc.y0;
+    hint->width = RC_W(rc);
+    hint->height = RC_H(rc);
+    hint->max_width = 0;
+    hint->max_height = 0;
+
+    if (opts->keepaspect && opts->keepaspect_window) {
+        hint->flags |= PAspect;
+        hint->min_aspect.x = hint->width;
+        hint->min_aspect.y = hint->height;
+        hint->max_aspect.x = hint->width;
+        hint->max_aspect.y = hint->height;
+    }
+
+    // Set minimum height/width to 4 to avoid off-by-one errors.
+    hint->flags |= PMinSize;
+    hint->min_width = hint->min_height = 4;
+
+    hint->flags |= PWinGravity;
+    hint->win_gravity = StaticGravity;
+
+    XSetWMNormalHints(x11->display, x11->window, hint);
+    XFree(hint);
+}
+
+static void vo_x11_move_resize(struct vo *vo, bool move, bool resize,
+                               struct mp_rect rc)
+{
+    if (!vo->x11->window)
+        return;
+    int w = RC_W(rc), h = RC_H(rc);
+    XWindowChanges req = {.x = rc.x0, .y = rc.y0, .width = w, .height = h};
+    unsigned mask = (move ? CWX | CWY : 0) | (resize ? CWWidth | CWHeight : 0);
+    if (mask)
+        XConfigureWindow(vo->x11->display, vo->x11->window, mask, &req);
+    vo_x11_sizehint(vo, rc, false);
+}
+
+// set a X text property that expects a UTF8_STRING type
+static void vo_x11_set_property_utf8(struct vo *vo, Atom name, const char *t)
+{
+    struct vo_x11_state *x11 = vo->x11;
+
+    XChangeProperty(x11->display, x11->window, name, XA(x11, UTF8_STRING), 8,
+                    PropModeReplace, t, strlen(t));
+}
+
+// set a X text property that expects a STRING or COMPOUND_TEXT type
+static void vo_x11_set_property_string(struct vo *vo, Atom name, const char *t)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    XTextProperty prop = {0};
+
+    if (Xutf8TextListToTextProperty(x11->display, (char **)&t, 1,
+                                    XStdICCTextStyle, &prop) == Success)
+    {
+        XSetTextProperty(x11->display, x11->window, &prop, name);
+    } else {
+        // Strictly speaking this violates the ICCCM, but there's no way we
+        // can do this correctly.
+        vo_x11_set_property_utf8(vo, name, t);
+    }
+    XFree(prop.value);
+}
+
+static void vo_x11_update_window_title(struct vo *vo)
+{
+    struct vo_x11_state *x11 = vo->x11;
+
+    if (!x11->window || !x11->window_title)
+        return;
+
+    vo_x11_set_property_string(vo, XA_WM_NAME, x11->window_title);
+    vo_x11_set_property_string(vo, XA_WM_ICON_NAME, x11->window_title);
+
+    /* _NET_WM_NAME and _NET_WM_ICON_NAME must be sanitized to UTF-8. */
+    void *tmp = talloc_new(NULL);
+    struct bstr b_title = bstr_sanitize_utf8_latin1(tmp, bstr0(x11->window_title));
+    vo_x11_set_property_utf8(vo, XA(x11, _NET_WM_NAME), bstrto0(tmp, b_title));
+    vo_x11_set_property_utf8(vo, XA(x11, _NET_WM_ICON_NAME), bstrto0(tmp, b_title));
+    talloc_free(tmp);
+}
+
+static void vo_x11_xembed_update(struct vo_x11_state *x11, int flags)
+{
+    if (!x11->window || !x11->parent)
+        return;
+
+    long xembed_info[] = {XEMBED_VERSION, flags};
+    Atom name = XA(x11, _XEMBED_INFO);
+    XChangeProperty(x11->display, x11->window, name, name, 32,
+                    PropModeReplace, (char *)xembed_info, 2);
+}
+
+static void vo_x11_xembed_handle_message(struct vo *vo, XClientMessageEvent *ce)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    if (!x11->window || !x11->parent || ce->message_type != XA(x11, _XEMBED))
+        return;
+
+    long msg = ce->data.l[1];
+    if (msg == XEMBED_EMBEDDED_NOTIFY)
+        MP_VERBOSE(x11, "Parent windows supports XEmbed.\n");
+}
+
+static void vo_x11_xembed_send_message(struct vo_x11_state *x11, long m[4])
+{
+    if (!x11->window || !x11->parent)
+        return;
+    XEvent ev = {.xclient = {
+        .type = ClientMessage,
+        .window = x11->parent,
+        .message_type = XA(x11, _XEMBED),
+        .format = 32,
+        .data = {.l = { CurrentTime, m[0], m[1], m[2], m[3] }},
+    } };
+    XSendEvent(x11->display, x11->parent, False, NoEventMask, &ev);
+}
+
+static void vo_x11_set_wm_icon(struct vo_x11_state *x11)
+{
+    int icon_size = 0;
+    long *icon = talloc_array(NULL, long, 0);
+
+    for (int n = 0; x11_icons[n].start; n++) {
+        struct mp_image *img =
+            load_image_png_buf(x11_icons[n].start, x11_icons[n].len, IMGFMT_RGBA);
+        if (!img)
+            continue;
+        int new_size = 2 + img->w * img->h;
+        MP_RESIZE_ARRAY(NULL, icon, icon_size + new_size);
+        long *cur = icon + icon_size;
+        icon_size += new_size;
+        *cur++ = img->w;
+        *cur++ = img->h;
+        for (int y = 0; y < img->h; y++) {
+            uint8_t *s = (uint8_t *)img->planes[0] + img->stride[0] * y;
+            for (int x = 0; x < img->w; x++) {
+                *cur++ = s[x * 4 + 0] | (s[x * 4 + 1] << 8) |
+                         (s[x * 4 + 2] << 16) | ((unsigned)s[x * 4 + 3] << 24);
+            }
+        }
+        talloc_free(img);
+    }
+
+    XChangeProperty(x11->display, x11->window, XA(x11, _NET_WM_ICON),
+                    XA_CARDINAL, 32, PropModeReplace,
+                    (unsigned char *)icon, icon_size);
+    talloc_free(icon);
+}
+
+static void vo_x11_create_window(struct vo *vo, XVisualInfo *vis,
+                                 struct mp_rect rc)
+{
+    struct vo_x11_state *x11 = vo->x11;
+
+    assert(x11->window == None);
+    assert(!x11->xic);
+
+    XVisualInfo vinfo_storage;
+    if (!vis) {
+        vis = &vinfo_storage;
+        XWindowAttributes att;
+        XGetWindowAttributes(x11->display, x11->rootwin, &att);
+        XMatchVisualInfo(x11->display, x11->screen, att.depth, TrueColor, vis);
+    }
+
+    if (x11->colormap == None) {
+        x11->colormap = XCreateColormap(x11->display, x11->rootwin,
+                                        vis->visual, AllocNone);
+    }
+
+    unsigned long xswamask = CWBorderPixel | CWColormap;
+    XSetWindowAttributes xswa = {
+        .border_pixel = 0,
+        .colormap = x11->colormap,
+    };
+
+    Window parent = x11->parent;
+    if (!parent)
+        parent = x11->rootwin;
+
+    x11->window =
+        XCreateWindow(x11->display, parent, rc.x0, rc.y0, RC_W(rc), RC_H(rc), 0,
+                      vis->depth, CopyFromParent, vis->visual, xswamask, &xswa);
+    Atom protos[1] = {XA(x11, WM_DELETE_WINDOW)};
+    XSetWMProtocols(x11->display, x11->window, protos, 1);
+
+    if (!XPresentQueryExtension(x11->display, &x11->present_code, NULL, NULL)) {
+        MP_VERBOSE(x11, "The XPresent extension is not supported.\n");
+    } else {
+        MP_VERBOSE(x11, "The XPresent extension was found.\n");
+        XPresentSelectInput(x11->display, x11->window, PresentCompleteNotifyMask);
+    }
+    xpresent_set(x11);
+
+    x11->mouse_cursor_set = false;
+    x11->mouse_cursor_visible = true;
+    vo_update_cursor(vo);
+
+    if (x11->xim) {
+        x11->xic = XCreateIC(x11->xim,
+                             XNInputStyle, XIMPreeditNone | XIMStatusNone,
+                             XNClientWindow, x11->window,
+                             XNFocusWindow, x11->window,
+                             NULL);
+    }
+
+    if (!x11->parent) {
+        vo_x11_update_composition_hint(vo);
+        vo_x11_set_wm_icon(x11);
+        vo_x11_dnd_init_window(vo);
+        vo_x11_set_property_utf8(vo, XA(x11, _GTK_THEME_VARIANT), "dark");
+    }
+    if (!x11->parent || x11->opts->x11_wid_title)
+        vo_x11_update_window_title(vo);
+    vo_x11_xembed_update(x11, 0);
+}
+
+static void vo_x11_map_window(struct vo *vo, struct mp_rect rc)
+{
+    struct vo_x11_state *x11 = vo->x11;
+
+    vo_x11_move_resize(vo, true, true, rc);
+    vo_x11_decoration(vo, x11->opts->border);
+
+    if (x11->opts->fullscreen && (x11->wm_type & vo_wm_FULLSCREEN)) {
+        Atom state = XA(x11, _NET_WM_STATE_FULLSCREEN);
+        XChangeProperty(x11->display, x11->window, XA(x11, _NET_WM_STATE), XA_ATOM,
+                        32, PropModeAppend, (unsigned char *)&state, 1);
+        x11->fs = 1;
+        // The "saved" positions are bogus, so reset them when leaving FS again.
+        x11->size_changed_during_fs = true;
+        x11->pos_changed_during_fs = true;
+    }
+
+    if (x11->opts->fsscreen_id != -1) {
+        long params[5] = {0};
+        if (x11->opts->fsscreen_id >= 0) {
+            for (int n = 0; n < 4; n++)
+                params[n] = x11->opts->fsscreen_id;
+        } else {
+            vo_x11_get_bounding_monitors(x11, &params[0]);
+        }
+        params[4] = 1; // source indication: normal
+        x11_send_ewmh_msg(x11, "_NET_WM_FULLSCREEN_MONITORS", params);
+    }
+
+    if (x11->opts->all_workspaces) {
+        if (x11->wm_type & vo_wm_STICKY) {
+            Atom state = XA(x11, _NET_WM_STATE_STICKY);
+            XChangeProperty(x11->display, x11->window, XA(x11, _NET_WM_STATE), XA_ATOM,
+                            32, PropModeReplace, (unsigned char *)&state, 1);
+        } else {
+            long v = 0xFFFFFFFF;
+            XChangeProperty(x11->display, x11->window, XA(x11, _NET_WM_DESKTOP),
+                            XA_CARDINAL, 32, PropModeReplace, (unsigned char *)&v, 1);
+        }
+    } else if (x11->opts->geometry.ws > 0) {
+        long v = x11->opts->geometry.ws - 1;
+        XChangeProperty(x11->display, x11->window, XA(x11, _NET_WM_DESKTOP),
+                        XA_CARDINAL, 32, PropModeReplace, (unsigned char *)&v, 1);
+    }
+
+    vo_x11_update_composition_hint(vo);
+
+    // map window
+    int events = StructureNotifyMask | ExposureMask | PropertyChangeMask |
+                 LeaveWindowMask | EnterWindowMask | FocusChangeMask;
+    if (mp_input_mouse_enabled(x11->input_ctx))
+        events |= PointerMotionMask | ButtonPressMask | ButtonReleaseMask;
+    if (mp_input_vo_keyboard_enabled(x11->input_ctx))
+        events |= KeyPressMask | KeyReleaseMask;
+    vo_x11_selectinput_witherr(vo, x11->display, x11->window, events);
+    XMapWindow(x11->display, x11->window);
+
+    if (x11->opts->cursor_passthrough)
+        vo_x11_set_input_region(vo, true);
+
+    if (x11->opts->window_maximized) // don't override WM default on "no"
+        vo_x11_maximize(vo);
+    if (x11->opts->window_minimized) // don't override WM default on "no"
+        vo_x11_minimize(vo);
+
+    if (x11->opts->fullscreen && (x11->wm_type & vo_wm_FULLSCREEN))
+        x11_set_ewmh_state(x11, "_NET_WM_STATE_FULLSCREEN", 1);
+
+    vo_x11_xembed_update(x11, XEMBED_MAPPED);
+}
+
+static void vo_x11_highlevel_resize(struct vo *vo, struct mp_rect rc)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    struct mp_vo_opts *opts = x11->opts;
+
+    bool reset_pos = opts->force_window_position;
+    if (reset_pos) {
+        x11->nofsrc = rc;
+    } else {
+        x11->nofsrc.x1 = x11->nofsrc.x0 + RC_W(rc);
+        x11->nofsrc.y1 = x11->nofsrc.y0 + RC_H(rc);
+    }
+
+    if (opts->fullscreen) {
+        x11->size_changed_during_fs = true;
+        x11->pos_changed_during_fs = reset_pos;
+        vo_x11_sizehint(vo, rc, false);
+    } else {
+        vo_x11_move_resize(vo, reset_pos, true, rc);
+    }
+}
+
+static void wait_until_mapped(struct vo *vo)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    if (!x11->pseudo_mapped)
+        x11_send_ewmh_msg(x11, "_NET_REQUEST_FRAME_EXTENTS", (long[5]){0});
+    while (!x11->pseudo_mapped && x11->window) {
+        XWindowAttributes att;
+        XGetWindowAttributes(x11->display, x11->window, &att);
+        if (att.map_state != IsUnmapped) {
+            x11->pseudo_mapped = true;
+            break;
+        }
+        XEvent unused;
+        XPeekEvent(x11->display, &unused);
+        vo_x11_check_events(vo);
+    }
+}
+
+// Create the X11 window. There is only 1, and it must be created before
+// vo_x11_config_vo_window() is called. vis can be NULL for default.
+bool vo_x11_create_vo_window(struct vo *vo, XVisualInfo *vis,
+                             const char *classname)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    assert(!x11->window);
+
+    if (x11->parent) {
+        if (x11->parent == x11->rootwin) {
+            x11->window = x11->rootwin;
+            x11->pseudo_mapped = true;
+            XSelectInput(x11->display, x11->window, StructureNotifyMask);
+        } else {
+            XSelectInput(x11->display, x11->parent, StructureNotifyMask);
+        }
+    }
+    if (x11->window == None) {
+        vo_x11_create_window(vo, vis, (struct mp_rect){.x1 = 320, .y1 = 200 });
+        vo_x11_classhint(vo, x11->window, classname);
+        vo_x11_wm_hints(vo, x11->window);
+        x11->window_hidden = true;
+    }
+
+    return !!x11->window;
+}
+
+// Resize the window (e.g. new file, or video resolution change)
+void vo_x11_config_vo_window(struct vo *vo)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    struct mp_vo_opts *opts = x11->opts;
+
+    assert(x11->window);
+
+    // Don't attempt to change autofit/geometry on maximized windows.
+    if (x11->geometry_change && opts->window_maximized)
+        return;
+
+    vo_x11_update_screeninfo(vo);
+
+    struct vo_win_geometry geo;
+    vo_calc_window_geometry2(vo, &x11->screenrc, x11->dpi_scale, &geo);
+    vo_apply_window_geometry(vo, &geo);
+
+    struct mp_rect rc = geo.win;
+
+    if (x11->parent) {
+        vo_x11_update_geometry(vo);
+        rc = (struct mp_rect){0, 0, RC_W(x11->winrc), RC_H(x11->winrc)};
+    }
+
+    bool reset_size = (x11->old_dw != RC_W(rc) || x11->old_dh != RC_H(rc)) &&
+                      (opts->auto_window_resize || x11->geometry_change);
+
+    x11->old_dw = RC_W(rc);
+    x11->old_dh = RC_H(rc);
+
+    if (x11->window_hidden) {
+        x11->nofsrc = rc;
+        vo_x11_map_window(vo, rc);
+    } else if (reset_size) {
+        vo_x11_highlevel_resize(vo, rc);
+    }
+
+    x11->geometry_change = false;
+
+    if (opts->ontop)
+        vo_x11_setlayer(vo, opts->ontop);
+
+    vo_x11_fullscreen(vo);
+
+    wait_until_mapped(vo);
+    vo_x11_update_geometry(vo);
+    update_vo_size(vo);
+    x11->pending_vo_events &= ~VO_EVENT_RESIZE; // implicitly done by the VO
+}
+
+static void vo_x11_sticky(struct vo *vo, bool sticky)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    if (x11->wm_type & vo_wm_STICKY) {
+        x11_set_ewmh_state(x11, "_NET_WM_STATE_STICKY", sticky);
+    } else {
+        long params[5] = {0xFFFFFFFF, 1};
+        if (!sticky) {
+            x11_get_property_copy(x11, x11->rootwin,
+                XA(x11, _NET_CURRENT_DESKTOP),
+                XA_CARDINAL, 32, &params[0],
+                sizeof(params[0]));
+        }
+        x11_send_ewmh_msg(x11, "_NET_WM_DESKTOP", params);
+    }
+}
+
+static void vo_x11_setlayer(struct vo *vo, bool ontop)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    if (x11->parent || !x11->window)
+        return;
+
+    if (x11->wm_type & (vo_wm_STAYS_ON_TOP | vo_wm_ABOVE)) {
+        char *state = "_NET_WM_STATE_ABOVE";
+
+        // Not in EWMH - but the old code preferred this (maybe it is "better")
+        if (x11->wm_type & vo_wm_STAYS_ON_TOP)
+            state = "_NET_WM_STATE_STAYS_ON_TOP";
+
+        x11_set_ewmh_state(x11, state, ontop);
+
+        MP_VERBOSE(x11, "NET style stay on top (%d). Using state %s.\n",
+                   ontop, state);
+    } else if (x11->wm_type & vo_wm_LAYER) {
+        if (!x11->orig_layer) {
+            x11->orig_layer = WIN_LAYER_NORMAL;
+            x11_get_property_copy(x11, x11->window, XA(x11, _WIN_LAYER),
+                                  XA_CARDINAL, 32, &x11->orig_layer, sizeof(long));
+            MP_VERBOSE(x11, "original window layer is %ld.\n", x11->orig_layer);
+        }
+
+        long params[5] = {0};
+        // if not fullscreen, stay on default layer
+        params[0] = ontop ? WIN_LAYER_ABOVE_DOCK : x11->orig_layer;
+        params[1] = CurrentTime;
+        MP_VERBOSE(x11, "Layered style stay on top (layer %ld).\n", params[0]);
+        x11_send_ewmh_msg(x11, "_WIN_LAYER", params);
+    }
+}
+
+static bool rc_overlaps(struct mp_rect rc1, struct mp_rect rc2)
+{
+    return mp_rect_intersection(&rc1, &rc2); // changes the first argument
+}
+
+// update x11->winrc with current boundaries of vo->x11->window
+static void vo_x11_update_geometry(struct vo *vo)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    int x = 0, y = 0;
+    unsigned w, h, dummy_uint;
+    int dummy_int;
+    Window dummy_win;
+    Window win = x11->parent ? x11->parent : x11->window;
+    x11->winrc = (struct mp_rect){0, 0, 0, 0};
+    if (win) {
+        XGetGeometry(x11->display, win, &dummy_win, &dummy_int, &dummy_int,
+                     &w, &h, &dummy_int, &dummy_uint);
+        if (w > INT_MAX || h > INT_MAX)
+            w = h = 0;
+        XTranslateCoordinates(x11->display, win, x11->rootwin, 0, 0,
+                              &x, &y, &dummy_win);
+        x11->winrc = (struct mp_rect){x, y, x + w, y + h};
+    }
+    struct xrandr_display *disp = get_current_display(vo);
+    // Try to fallback to something reasonable if we have no disp yet
+    if (!disp) {
+        int screen = vo_x11_select_screen(vo);
+        if (screen > -1) {
+            disp = &x11->displays[screen];
+        } else if (x11->current_screen > - 1) {
+            disp = &x11->displays[x11->current_screen];
+        }
+    }
+    double fps = disp ? disp->fps : 0;
+    if (fps != x11->current_display_fps)
+        MP_VERBOSE(x11, "Current display FPS: %f\n", fps);
+    x11->current_display_fps = fps;
+    if (disp && x11->current_screen != disp->screen) {
+        x11->current_screen = disp->screen;
+        x11->pending_vo_events |= VO_EVENT_ICC_PROFILE_CHANGED;
+    }
+    x11->pending_vo_events |= VO_EVENT_WIN_STATE;
+}
+
+static void vo_x11_fullscreen(struct vo *vo)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    struct mp_vo_opts *opts = x11->opts;
+
+    if (opts->fullscreen == x11->fs)
+        return;
+    x11->fs = opts->fullscreen; // x11->fs now contains the new state
+    if (x11->parent || !x11->window)
+        return;
+
+    // Save old state before entering fullscreen
+    if (x11->fs) {
+        vo_x11_update_geometry(vo);
+        x11->nofsrc = x11->winrc;
+    }
+
+    struct mp_rect rc = x11->nofsrc;
+
+    if (x11->wm_type & vo_wm_FULLSCREEN) {
+        x11_set_ewmh_state(x11, "_NET_WM_STATE_FULLSCREEN", x11->fs);
+        if (!x11->fs && (x11->pos_changed_during_fs ||
+                         x11->size_changed_during_fs))
+        {
+            if (x11->screenrc.x0 == rc.x0 && x11->screenrc.x1 == rc.x1 &&
+                x11->screenrc.y0 == rc.y0 && x11->screenrc.y1 == rc.y1)
+            {
+                // Workaround for some WMs switching back to FS in this case.
+                MP_VERBOSE(x11, "avoiding triggering old-style fullscreen\n");
+                rc.x1 -= 1;
+                rc.y1 -= 1;
+            }
+            vo_x11_move_resize(vo, x11->pos_changed_during_fs,
+                                   x11->size_changed_during_fs, rc);
+        }
+    } else {
+        if (x11->fs) {
+            vo_x11_update_screeninfo(vo);
+            rc = x11->screenrc;
+        }
+
+        vo_x11_decoration(vo, opts->border && !x11->fs);
+        vo_x11_sizehint(vo, rc, true);
+
+        XMoveResizeWindow(x11->display, x11->window, rc.x0, rc.y0,
+                          RC_W(rc), RC_H(rc));
+
+        vo_x11_setlayer(vo, x11->fs || opts->ontop);
+
+        XRaiseWindow(x11->display, x11->window);
+        XFlush(x11->display);
+    }
+
+    x11->size_changed_during_fs = false;
+    x11->pos_changed_during_fs = false;
+
+    vo_x11_update_composition_hint(vo);
+}
+
+static void vo_x11_maximize(struct vo *vo)
+{
+    struct vo_x11_state *x11 = vo->x11;
+
+    long params[5] = {
+        x11->opts->window_maximized ? NET_WM_STATE_ADD : NET_WM_STATE_REMOVE,
+        XA(x11, _NET_WM_STATE_MAXIMIZED_VERT),
+        XA(x11, _NET_WM_STATE_MAXIMIZED_HORZ),
+        1, // source indication: normal
+    };
+    x11_send_ewmh_msg(x11, "_NET_WM_STATE", params);
+}
+
+static void vo_x11_minimize(struct vo *vo)
+{
+    struct vo_x11_state *x11 = vo->x11;
+
+    if (x11->opts->window_minimized) {
+        XIconifyWindow(x11->display, x11->window, x11->screen);
+    } else {
+        long params[5] = {0};
+        x11_send_ewmh_msg(x11, "_NET_ACTIVE_WINDOW", params);
+    }
+}
+
+static void vo_x11_set_geometry(struct vo *vo)
+{
+    struct vo_x11_state *x11 = vo->x11;
+
+    if (!x11->window)
+        return;
+
+    x11->geometry_change = true;
+    vo_x11_config_vo_window(vo);
+}
+
+bool vo_x11_check_visible(struct vo *vo)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    struct mp_vo_opts *opts = x11->opts;
+
+    bool render = !x11->hidden || opts->force_render ||
+                  VS_IS_DISP(opts->video_sync);
+    return render;
+}
+
+static void vo_x11_set_input_region(struct vo *vo, bool passthrough)
+{
+    struct vo_x11_state *x11 = vo->x11;
+
+    if (passthrough) {
+        XRectangle rect = {0, 0, 0, 0};
+        Region region = XCreateRegion();
+        XUnionRectWithRegion(&rect, region, region);
+        XShapeCombineRegion(x11->display, x11->window, ShapeInput, 0, 0,
+                            region, ShapeSet);
+        XDestroyRegion(region);
+    } else {
+        XShapeCombineMask(x11->display, x11->window, ShapeInput, 0, 0,
+                          0, ShapeSet);
+    }
+}
+
+int vo_x11_control(struct vo *vo, int *events, int request, void *arg)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    struct mp_vo_opts *opts = x11->opts;
+    switch (request) {
+    case VOCTRL_CHECK_EVENTS:
+        vo_x11_check_events(vo);
+        *events |= x11->pending_vo_events;
+        x11->pending_vo_events = 0;
+        return VO_TRUE;
+    case VOCTRL_VO_OPTS_CHANGED: {
+        void *opt;
+        while (m_config_cache_get_next_changed(x11->opts_cache, &opt)) {
+            if (opt == &opts->fullscreen)
+                vo_x11_fullscreen(vo);
+            if (opt == &opts->ontop)
+                vo_x11_setlayer(vo, opts->ontop);
+            if (opt == &opts->border)
+                vo_x11_decoration(vo, opts->border);
+            if (opt == &opts->all_workspaces)
+                vo_x11_sticky(vo, opts->all_workspaces);
+            if (opt == &opts->window_minimized)
+                vo_x11_minimize(vo);
+            if (opt == &opts->window_maximized)
+                vo_x11_maximize(vo);
+            if (opt == &opts->cursor_passthrough)
+                vo_x11_set_input_region(vo, opts->cursor_passthrough);
+            if (opt == &opts->x11_present)
+                xpresent_set(x11);
+            if (opt == &opts->geometry || opt == &opts->autofit ||
+                opt == &opts->autofit_smaller || opt == &opts->autofit_larger)
+            {
+                vo_x11_set_geometry(vo);
+            }
+        }
+        return VO_TRUE;
+    }
+    case VOCTRL_GET_UNFS_WINDOW_SIZE: {
+        int *s = arg;
+        if (!x11->window || x11->parent)
+            return VO_FALSE;
+        s[0] = (x11->fs ? RC_W(x11->nofsrc) : RC_W(x11->winrc)) / x11->dpi_scale;
+        s[1] = (x11->fs ? RC_H(x11->nofsrc) : RC_H(x11->winrc)) / x11->dpi_scale;
+        return VO_TRUE;
+    }
+    case VOCTRL_SET_UNFS_WINDOW_SIZE: {
+        int *s = arg;
+        if (!x11->window || x11->parent)
+            return VO_FALSE;
+        int w = s[0] * x11->dpi_scale;
+        int h = s[1] * x11->dpi_scale;
+        struct mp_rect rc = x11->winrc;
+        rc.x1 = rc.x0 + w;
+        rc.y1 = rc.y0 + h;
+        if (x11->opts->window_maximized) {
+            x11->opts->window_maximized = false;
+            m_config_cache_write_opt(x11->opts_cache,
+                    &x11->opts->window_maximized);
+            vo_x11_maximize(vo);
+        }
+        vo_x11_highlevel_resize(vo, rc);
+        if (!x11->fs) { // guess new window size, instead of waiting for X
+            x11->winrc.x1 = x11->winrc.x0 + w;
+            x11->winrc.y1 = x11->winrc.y0 + h;
+        }
+        return VO_TRUE;
+    }
+    case VOCTRL_GET_FOCUSED: {
+        *(bool *)arg = x11->has_focus;
+        return VO_TRUE;
+    }
+    case VOCTRL_GET_DISPLAY_NAMES: {
+        if (!x11->pseudo_mapped)
+            return VO_FALSE;
+        char **names = NULL;
+        int displays_spanned = 0;
+        for (int n = 0; n < x11->num_displays; n++) {
+            if (rc_overlaps(x11->displays[n].rc, x11->winrc))
+                MP_TARRAY_APPEND(NULL, names, displays_spanned,
+                                 talloc_strdup(NULL, x11->displays[n].name));
+        }
+        MP_TARRAY_APPEND(NULL, names, displays_spanned, NULL);
+        *(char ***)arg = names;
+        return VO_TRUE;
+    }
+    case VOCTRL_GET_ICC_PROFILE: {
+        if (!x11->pseudo_mapped)
+            return VO_NOTAVAIL;
+        int atom_id = x11->displays[x11->current_screen].atom_id;
+        char prop[80];
+        snprintf(prop, sizeof(prop), "_ICC_PROFILE");
+        if (atom_id > 0)
+            mp_snprintf_cat(prop, sizeof(prop), "_%d", atom_id);
+        x11->icc_profile_property = XAs(x11, prop);
+        int len;
+        MP_VERBOSE(x11, "Retrieving ICC profile for display: %d\n", x11->current_screen);
+        void *icc = x11_get_property(x11, x11->rootwin, x11->icc_profile_property,
+                                     XA_CARDINAL, 8, &len);
+        if (!icc)
+            return VO_FALSE;
+        *(bstr *)arg = bstrdup(NULL, (bstr){icc, len});
+        XFree(icc);
+        // Watch x11->icc_profile_property
+        XSelectInput(x11->display, x11->rootwin, PropertyChangeMask);
+        return VO_TRUE;
+    }
+    case VOCTRL_SET_CURSOR_VISIBILITY:
+        x11->mouse_cursor_visible = *(bool *)arg;
+        vo_update_cursor(vo);
+        return VO_TRUE;
+    case VOCTRL_KILL_SCREENSAVER:
+        set_screensaver(x11, false);
+        return VO_TRUE;
+    case VOCTRL_RESTORE_SCREENSAVER:
+        set_screensaver(x11, true);
+        return VO_TRUE;
+    case VOCTRL_UPDATE_WINDOW_TITLE:
+        talloc_free(x11->window_title);
+        x11->window_title = talloc_strdup(x11, (char *)arg);
+        if (!x11->parent || x11->opts->x11_wid_title)
+            vo_x11_update_window_title(vo);
+        return VO_TRUE;
+    case VOCTRL_GET_DISPLAY_FPS: {
+        double fps = x11->current_display_fps;
+        if (fps <= 0)
+            break;
+        *(double *)arg = fps;
+        return VO_TRUE;
+    }
+    case VOCTRL_GET_DISPLAY_RES: {
+        struct xrandr_display *disp = NULL;
+        if (x11->current_screen > -1)
+            disp = &x11->displays[x11->current_screen];
+        if (!x11->window || x11->parent || !disp)
+            return VO_NOTAVAIL;
+        ((int *)arg)[0] = mp_rect_w(disp->rc);
+        ((int *)arg)[1] = mp_rect_h(disp->rc);
+        return VO_TRUE;
+    }
+    case VOCTRL_GET_WINDOW_ID: {
+        if (!x11->window)
+            return VO_NOTAVAIL;
+        *(int64_t *)arg = x11->window;
+        return VO_TRUE;
+    }
+    case VOCTRL_GET_HIDPI_SCALE:
+        *(double *)arg = x11->dpi_scale;
+        return VO_TRUE;
+    }
+    return VO_NOTIMPL;
+}
+
+void vo_x11_present(struct vo *vo)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    XPresentNotifyMSC(x11->display, x11->window,
+                      0, 0, 1, 0);
+}
+
+void vo_x11_wakeup(struct vo *vo)
+{
+    struct vo_x11_state *x11 = vo->x11;
+
+    (void)write(x11->wakeup_pipe[1], &(char){0}, 1);
+}
+
+void vo_x11_wait_events(struct vo *vo, int64_t until_time_ns)
+{
+    struct vo_x11_state *x11 = vo->x11;
+
+    struct pollfd fds[2] = {
+        { .fd = x11->event_fd, .events = POLLIN },
+        { .fd = x11->wakeup_pipe[0], .events = POLLIN },
+    };
+    int64_t wait_ns = until_time_ns - mp_time_ns();
+    int64_t timeout_ns = MPCLAMP(wait_ns, 0, MP_TIME_S_TO_NS(10));
+
+    mp_poll(fds, 2, timeout_ns);
+
+    if (fds[1].revents & POLLIN)
+        mp_flush_wakeup_pipe(x11->wakeup_pipe[0]);
+}
+
+static void xscreensaver_heartbeat(struct vo_x11_state *x11)
+{
+    double time = mp_time_sec();
+
+    if (x11->display && !x11->screensaver_enabled &&
+        (time - x11->screensaver_time_last) >= 10)
+    {
+        x11->screensaver_time_last = time;
+        XResetScreenSaver(x11->display);
+    }
+}
+
+static int xss_suspend(Display *mDisplay, Bool suspend)
+{
+    int event, error, major, minor;
+    if (XScreenSaverQueryExtension(mDisplay, &event, &error) != True ||
+        XScreenSaverQueryVersion(mDisplay, &major, &minor) != True)
+        return 0;
+    if (major < 1 || (major == 1 && minor < 1))
+        return 0;
+    XScreenSaverSuspend(mDisplay, suspend);
+    return 1;
+}
+
+static void set_screensaver(struct vo_x11_state *x11, bool enabled)
+{
+    Display *mDisplay = x11->display;
+    if (!mDisplay || x11->screensaver_enabled == enabled)
+        return;
+    MP_VERBOSE(x11, "%s screensaver.\n", enabled ? "Enabling" : "Disabling");
+    x11->screensaver_enabled = enabled;
+    if (xss_suspend(mDisplay, !enabled))
+        return;
+    int nothing;
+    if (DPMSQueryExtension(mDisplay, &nothing, &nothing)) {
+        BOOL onoff = 0;
+        CARD16 state;
+        DPMSInfo(mDisplay, &state, &onoff);
+        if (!x11->dpms_touched && enabled)
+            return; // enable DPMS only we we disabled it before
+        if (enabled != !!onoff) {
+            MP_VERBOSE(x11, "Setting DMPS: %s.\n", enabled ? "on" : "off");
+            if (enabled) {
+                DPMSEnable(mDisplay);
+            } else {
+                DPMSDisable(mDisplay);
+                x11->dpms_touched = true;
+            }
+            DPMSInfo(mDisplay, &state, &onoff);
+            if (enabled != !!onoff)
+                MP_WARN(x11, "DPMS state could not be set.\n");
+        }
+    }
+}
+
+static void vo_x11_selectinput_witherr(struct vo *vo,
+                                       Display *display,
+                                       Window w,
+                                       long event_mask)
+{
+    XSelectInput(display, w, NoEventMask);
+
+    // NOTE: this can raise BadAccess, which should be ignored by the X error
+    //       handler; also see below
+    XSelectInput(display, w, event_mask);
+
+    // Test whether setting the event mask failed (with a BadAccess X error,
+    // although we don't know whether this really happened).
+    // This is needed for obscure situations like using --rootwin with a window
+    // manager active.
+    XWindowAttributes a;
+    if (XGetWindowAttributes(display, w, &a)) {
+        long bad = ButtonPressMask | ButtonReleaseMask | PointerMotionMask;
+        if ((event_mask & bad) && (a.all_event_masks & bad) &&
+            ((a.your_event_mask & bad) != (event_mask & bad)))
+        {
+            MP_ERR(vo->x11, "X11 error: error during XSelectInput "
+                   "call, trying without mouse events\n");
+            XSelectInput(display, w, event_mask & ~bad);
+        }
+    }
+}
+
+bool vo_x11_screen_is_composited(struct vo *vo)
+{
+    struct vo_x11_state *x11 = vo->x11;
+    char buf[50];
+    snprintf(buf, sizeof(buf), "_NET_WM_CM_S%d", x11->screen);
+    Atom NET_WM_CM = XInternAtom(x11->display, buf, False);
+    return XGetSelectionOwner(x11->display, NET_WM_CM) != None;
+}
+
+// Return whether the given visual has alpha (when compositing is used).
+bool vo_x11_is_rgba_visual(XVisualInfo *v)
+{
+    // This is a heuristic at best. Note that normal 8 bit Visuals use
+    // a depth of 24, even if the pixels are padded to 32 bit. If the
+    // depth is higher than 24, the remaining bits must be alpha.
+    // Note: vinfo->bits_per_rgb appears to be useless (is always 8).
+    unsigned long mask = v->depth == sizeof(unsigned long) * 8 ?
+        (unsigned long)-1 : (1UL << v->depth) - 1;
+    return mask & ~(v->red_mask | v->green_mask | v->blue_mask);
+}
diff --git a/video/out/x11_common.h b/video/out/x11_common.h
new file mode 100644
index 0000000..62a96d7
--- /dev/null
+++ b/video/out/x11_common.h
@@ -0,0 +1,164 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MPLAYER_X11_COMMON_H
+#define MPLAYER_X11_COMMON_H
+
+#include <stdatomic.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <X11/Xlib.h>
+#include <X11/Xutil.h>
+
+#include "common/common.h"
+
+#include "config.h"
+#if !HAVE_GPL
+#error GPL only
+#endif
+
+struct vo;
+struct mp_log;
+
+#define MAX_DISPLAYS 32 // ought to be enough for everyone
+
+struct xrandr_display {
+    struct mp_rect rc;
+    double fps;
+    char *name;
+    bool overlaps;
+    int atom_id; // offset by location of primary
+    int screen;
+};
+
+struct vo_x11_state {
+    struct mp_log *log;
+    struct input_ctx *input_ctx;
+    struct m_config_cache *opts_cache;
+    struct mp_vo_opts *opts;
+    Display *display;
+    int event_fd;
+    int wakeup_pipe[2];
+    Window window;
+    Window rootwin;
+    Window parent;  // embedded in this foreign window
+    int screen;
+    int display_is_local;
+    int ws_width;
+    int ws_height;
+    int dpi_scale;
+    struct mp_rect screenrc;
+    char *window_title;
+
+    struct xrandr_display displays[MAX_DISPLAYS];
+    int num_displays;
+    int current_screen;
+
+    int xrandr_event;
+    bool has_mesa;
+    bool has_nvidia;
+
+    bool screensaver_enabled;
+    bool dpms_touched;
+    double screensaver_time_last;
+
+    struct mp_present *present;
+    bool use_present;
+    int present_code;
+
+    XIM xim;
+    XIC xic;
+    bool no_autorepeat;
+
+    Colormap colormap;
+
+    int wm_type;
+    bool hidden; // _NET_WM_STATE_HIDDEN
+    bool window_hidden; // the window was mapped at least once
+    bool pseudo_mapped; // not necessarily mapped, but known window size
+    int fs;     // whether we assume the window is in fullscreen mode
+
+    bool mouse_cursor_visible; // whether we want the cursor to be visible (only
+                               // takes effect when the window is focused)
+    bool mouse_cursor_set; // whether the cursor is *currently* *hidden*
+    bool has_focus;
+    long orig_layer;
+
+    // Current actual window position (updated on window move/resize events).
+    struct mp_rect winrc;
+    double current_display_fps;
+
+    int pending_vo_events;
+
+    // last non-fullscreen extends (updated on fullscreen or reinitialization)
+    struct mp_rect nofsrc;
+
+    /* Keep track of original video width/height to determine when to
+     * resize window when reconfiguring. Resize window when video size
+     * changes, but don't force window size changes as long as video size
+     * stays the same (even if that size is different from the current
+     * window size after the user modified the latter). */
+    int old_dw, old_dh;
+    /* Video size changed during fullscreen when we couldn't tell the new
+     * size to the window manager. Must set window size when turning
+     * fullscreen off. */
+    bool size_changed_during_fs;
+    bool pos_changed_during_fs;
+
+    /* One of the autofit/geometry options changed at runtime. */
+    bool geometry_change;
+
+    XComposeStatus compose_status;
+
+    /* XShm stuff */
+    int ShmCompletionEvent;
+    /* Number of outstanding XShmPutImage requests */
+    /* Decremented when ShmCompletionEvent is received */
+    /* Increment it before XShmPutImage */
+    int ShmCompletionWaitCount;
+
+    /* drag and drop */
+    Atom dnd_requested_format;
+    Atom dnd_requested_action;
+    Window dnd_src_window;
+
+    /* dragging the window */
+    bool win_drag_button1_down;
+
+    Atom icc_profile_property;
+};
+
+bool vo_x11_init(struct vo *vo);
+void vo_x11_uninit(struct vo *vo);
+void vo_x11_check_events(struct vo *vo);
+bool vo_x11_screen_is_composited(struct vo *vo);
+bool vo_x11_create_vo_window(struct vo *vo, XVisualInfo *vis,
+                             const char *classname);
+void vo_x11_config_vo_window(struct vo *vo);
+bool vo_x11_check_visible(struct vo *vo);
+int vo_x11_control(struct vo *vo, int *events, int request, void *arg);
+void vo_x11_present(struct vo *vo);
+void vo_x11_sync_swap(struct vo *vo);
+void vo_x11_wakeup(struct vo *vo);
+void vo_x11_wait_events(struct vo *vo, int64_t until_time_ns);
+
+void vo_x11_silence_xlib(int dir);
+
+bool vo_x11_is_rgba_visual(XVisualInfo *v);
+
+#endif /* MPLAYER_X11_COMMON_H */
diff --git a/video/repack.c b/video/repack.c
new file mode 100644
index 0000000..ce3703a
--- /dev/null
+++ b/video/repack.c
@@ -0,0 +1,1203 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <math.h>
+
+#include <libavutil/bswap.h>
+#include <libavutil/pixfmt.h>
+
+#include "common/common.h"
+#include "repack.h"
+#include "video/csputils.h"
+#include "video/fmt-conversion.h"
+#include "video/img_format.h"
+#include "video/mp_image.h"
+
+enum repack_step_type {
+    REPACK_STEP_FLOAT,
+    REPACK_STEP_REPACK,
+    REPACK_STEP_ENDIAN,
+};
+
+struct repack_step {
+    enum repack_step_type type;
+    // 0=input, 1=output
+    struct mp_image *buf[2];
+    bool user_buf[2]; // user_buf[n]==true if buf[n] = user src/dst buffer
+    struct mp_imgfmt_desc fmt[2];
+    struct mp_image *tmp; // output buffer, if needed
+};
+
+struct mp_repack {
+    bool pack;                  // if false, this is for unpacking
+    int flags;
+    int imgfmt_user;            // original mp format (unchanged endian)
+    int imgfmt_a;               // original mp format (possibly packed format,
+                                // swapped endian)
+    int imgfmt_b;               // equivalent unpacked/planar format
+    struct mp_imgfmt_desc fmt_a;// ==imgfmt_a
+    struct mp_imgfmt_desc fmt_b;// ==imgfmt_b
+
+    void (*repack)(struct mp_repack *rp,
+                   struct mp_image *a, int a_x, int a_y,
+                   struct mp_image *b, int b_x, int b_y, int w);
+
+    bool passthrough_y;         // possible luma plane optimization for e.g. nv12
+    int endian_size;            // endian swap; 0=none, 2/4=swap word size
+
+    // For packed_repack.
+    int components[4];          // b[n] = mp_image.planes[components[n]]
+    //  pack:   a is dst, b is src
+    //  unpack: a is src, b is dst
+    void (*packed_repack_scanline)(void *a, void *b[], int w);
+
+    // Fringe RGB/YUV.
+    uint8_t comp_size;
+    uint8_t comp_map[6];
+    uint8_t comp_shifts[3];
+    uint8_t *comp_lut;
+    void (*repack_fringe_yuv)(void *dst, void *src[], int w, uint8_t *c);
+
+    // F32 repacking.
+    int f32_comp_size;
+    float f32_m[4], f32_o[4];
+    uint32_t f32_pmax[4];
+    enum mp_csp f32_csp_space;
+    enum mp_csp_levels f32_csp_levels;
+
+    // REPACK_STEP_REPACK: if true, need to copy this plane
+    bool copy_buf[4];
+
+    struct repack_step steps[4];
+    int num_steps;
+
+    bool configured;
+};
+
+// depth = number of LSB in use
+static int find_gbrp_format(int depth, int num_planes)
+{
+    if (num_planes != 3 && num_planes != 4)
+        return 0;
+    struct mp_regular_imgfmt desc = {
+        .component_type = MP_COMPONENT_TYPE_UINT,
+        .forced_csp = MP_CSP_RGB,
+        .component_size = depth > 8 ? 2 : 1,
+        .component_pad = depth - (depth > 8 ? 16 : 8),
+        .num_planes = num_planes,
+        .planes = { {1, {2}}, {1, {3}}, {1, {1}}, {1, {4}} },
+    };
+    return mp_find_regular_imgfmt(&desc);
+}
+
+// depth = number of LSB in use
+static int find_yuv_format(int depth, int num_planes)
+{
+    if (num_planes < 1 || num_planes > 4)
+        return 0;
+    struct mp_regular_imgfmt desc = {
+        .component_type = MP_COMPONENT_TYPE_UINT,
+        .component_size = depth > 8 ? 2 : 1,
+        .component_pad = depth - (depth > 8 ? 16 : 8),
+        .num_planes = num_planes,
+        .planes = { {1, {1}}, {1, {2}}, {1, {3}}, {1, {4}} },
+    };
+    if (num_planes == 2)
+        desc.planes[1].components[0] = 4;
+    return mp_find_regular_imgfmt(&desc);
+}
+
+// Copy one line on the plane p.
+static void copy_plane(struct mp_image *dst, int dst_x, int dst_y,
+                       struct mp_image *src, int src_x, int src_y,
+                       int w, int p)
+{
+    // Number of lines on this plane.
+    int h = (1 << dst->fmt.chroma_ys) - (1 << dst->fmt.ys[p]) + 1;
+    size_t size = mp_image_plane_bytes(dst, p, dst_x, w);
+
+    assert(dst->fmt.bpp[p] == src->fmt.bpp[p]);
+
+    for (int y = 0; y < h; y++) {
+        void *pd = mp_image_pixel_ptr_ny(dst, p, dst_x, dst_y + y);
+        void *ps = mp_image_pixel_ptr_ny(src, p, src_x, src_y + y);
+        memcpy(pd, ps, size);
+    }
+}
+
+// Swap endian for one line.
+static void swap_endian(struct mp_image *dst, int dst_x, int dst_y,
+                        struct mp_image *src, int src_x, int src_y,
+                        int w, int endian_size)
+{
+    assert(src->fmt.num_planes == dst->fmt.num_planes);
+
+    for (int p = 0; p < dst->fmt.num_planes; p++) {
+        int xs = dst->fmt.xs[p];
+        int bpp = dst->fmt.bpp[p] / 8;
+        int words_per_pixel = bpp / endian_size;
+        int num_words = ((w + (1 << xs) - 1) >> xs) * words_per_pixel;
+        // Number of lines on this plane.
+        int h = (1 << dst->fmt.chroma_ys) - (1 << dst->fmt.ys[p]) + 1;
+
+        assert(src->fmt.bpp[p] == bpp * 8);
+
+        for (int y = 0; y < h; y++) {
+            void *s = mp_image_pixel_ptr_ny(src, p, src_x, src_y + y);
+            void *d = mp_image_pixel_ptr_ny(dst, p, dst_x, dst_y + y);
+            switch (endian_size) {
+            case 2:
+                for (int x = 0; x < num_words; x++)
+                    ((uint16_t *)d)[x] = av_bswap16(((uint16_t *)s)[x]);
+                break;
+            case 4:
+                for (int x = 0; x < num_words; x++)
+                    ((uint32_t *)d)[x] = av_bswap32(((uint32_t *)s)[x]);
+                break;
+            default:
+                MP_ASSERT_UNREACHABLE();
+            }
+        }
+    }
+}
+
+// PA = PAck, copy planar input to single packed array
+// UN = UNpack, copy packed input to planar output
+// Naming convention:
+//  pa_/un_ prefix to identify conversion direction.
+//  Left (LSB, lowest byte address) -> Right (MSB, highest byte address).
+//      (This is unusual; MSB to LSB is more commonly used to describe formats,
+//       but our convention makes more sense for byte access in little endian.)
+//  "c" identifies a color component.
+//  "z" identifies known zero padding.
+//  "x" identifies uninitialized padding.
+//  A component is followed by its size in bits.
+//  Size can be omitted for multiple uniform components (c8c8c8 == ccc8).
+// Unpackers will often use "x" for padding, because they ignore it, while
+// packers will use "z" because they write zero.
+
+#define PA_WORD_4(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, sh_c3)      \
+    static void name(void *dst, void *src[], int w) {                       \
+        for (int x = 0; x < w; x++) {                                       \
+            ((packed_t *)dst)[x] =                                          \
+                ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) |             \
+                ((packed_t)((plane_t *)src[1])[x] << (sh_c1)) |             \
+                ((packed_t)((plane_t *)src[2])[x] << (sh_c2)) |             \
+                ((packed_t)((plane_t *)src[3])[x] << (sh_c3));              \
+        }                                                                   \
+    }
+
+#define UN_WORD_4(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, sh_c3, mask)\
+    static void name(void *src, void *dst[], int w) {                       \
+        for (int x = 0; x < w; x++) {                                       \
+            packed_t c = ((packed_t *)src)[x];                              \
+            ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask);               \
+            ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask);               \
+            ((plane_t *)dst[2])[x] = (c >> (sh_c2)) & (mask);               \
+            ((plane_t *)dst[3])[x] = (c >> (sh_c3)) & (mask);               \
+        }                                                                   \
+    }
+
+
+#define PA_WORD_3(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, pad)        \
+    static void name(void *dst, void *src[], int w) {                       \
+        for (int x = 0; x < w; x++) {                                       \
+            ((packed_t *)dst)[x] = (pad) |                                  \
+                ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) |             \
+                ((packed_t)((plane_t *)src[1])[x] << (sh_c1)) |             \
+                ((packed_t)((plane_t *)src[2])[x] << (sh_c2));              \
+        }                                                                   \
+    }
+
+UN_WORD_4(un_cccc8,  uint32_t, uint8_t,  0, 8,  16, 24, 0xFFu)
+PA_WORD_4(pa_cccc8,  uint32_t, uint8_t,  0, 8,  16, 24)
+// Not sure if this is a good idea; there may be no alignment guarantee.
+UN_WORD_4(un_cccc16,  uint64_t, uint16_t,  0, 16,  32, 48, 0xFFFFu)
+PA_WORD_4(pa_cccc16,  uint64_t, uint16_t,  0, 16,  32, 48)
+
+#define UN_WORD_3(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, mask)       \
+    static void name(void *src, void *dst[], int w) {                       \
+        for (int x = 0; x < w; x++) {                                       \
+            packed_t c = ((packed_t *)src)[x];                              \
+            ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask);               \
+            ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask);               \
+            ((plane_t *)dst[2])[x] = (c >> (sh_c2)) & (mask);               \
+        }                                                                   \
+    }
+
+UN_WORD_3(un_ccc8x8,  uint32_t, uint8_t,  0, 8,  16, 0xFFu)
+PA_WORD_3(pa_ccc8z8,  uint32_t, uint8_t,  0, 8,  16, 0)
+UN_WORD_3(un_x8ccc8,  uint32_t, uint8_t,  8, 16, 24, 0xFFu)
+PA_WORD_3(pa_z8ccc8,  uint32_t, uint8_t,  8, 16, 24, 0)
+UN_WORD_3(un_ccc10x2, uint32_t, uint16_t, 0, 10, 20, 0x3FFu)
+PA_WORD_3(pa_ccc10z2, uint32_t, uint16_t, 0, 10, 20, 0)
+UN_WORD_3(un_ccc16x16, uint64_t, uint16_t, 0, 16, 32, 0xFFFFu)
+PA_WORD_3(pa_ccc16z16, uint64_t, uint16_t, 0, 16, 32, 0)
+
+#define PA_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, pad)               \
+    static void name(void *dst, void *src[], int w) {                       \
+        for (int x = 0; x < w; x++) {                                       \
+            ((packed_t *)dst)[x] = (pad) |                                  \
+                ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) |             \
+                ((packed_t)((plane_t *)src[1])[x] << (sh_c1));              \
+        }                                                                   \
+    }
+
+#define UN_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, mask)              \
+    static void name(void *src, void *dst[], int w) {                       \
+        for (int x = 0; x < w; x++) {                                       \
+            packed_t c = ((packed_t *)src)[x];                              \
+            ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask);               \
+            ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask);               \
+        }                                                                   \
+    }
+
+UN_WORD_2(un_cc8,  uint16_t, uint8_t,  0, 8,  0xFFu)
+PA_WORD_2(pa_cc8,  uint16_t, uint8_t,  0, 8,  0)
+UN_WORD_2(un_cc16, uint32_t, uint16_t, 0, 16, 0xFFFFu)
+PA_WORD_2(pa_cc16, uint32_t, uint16_t, 0, 16, 0)
+
+#define PA_SEQ_3(name, comp_t)                                              \
+    static void name(void *dst, void *src[], int w) {                       \
+        comp_t *r = dst;                                                    \
+        for (int x = 0; x < w; x++) {                                       \
+            *r++ = ((comp_t *)src[0])[x];                                   \
+            *r++ = ((comp_t *)src[1])[x];                                   \
+            *r++ = ((comp_t *)src[2])[x];                                   \
+        }                                                                   \
+    }
+
+#define UN_SEQ_3(name, comp_t)                                              \
+    static void name(void *src, void *dst[], int w) {                       \
+        comp_t *r = src;                                                    \
+        for (int x = 0; x < w; x++) {                                       \
+            ((comp_t *)dst[0])[x] = *r++;                                   \
+            ((comp_t *)dst[1])[x] = *r++;                                   \
+            ((comp_t *)dst[2])[x] = *r++;                                   \
+        }                                                                   \
+    }
+
+UN_SEQ_3(un_ccc8,  uint8_t)
+PA_SEQ_3(pa_ccc8,  uint8_t)
+UN_SEQ_3(un_ccc16, uint16_t)
+PA_SEQ_3(pa_ccc16, uint16_t)
+
+// "regular": single packed plane, all components have same width (except padding)
+struct regular_repacker {
+    int packed_width;       // number of bits of the packed pixel
+    int component_width;    // number of bits for a single component
+    int prepadding;         // number of bits of LSB padding
+    int num_components;     // number of components that can be accessed
+    void (*pa_scanline)(void *a, void *b[], int w);
+    void (*un_scanline)(void *a, void *b[], int w);
+};
+
+static const struct regular_repacker regular_repackers[] = {
+    {32, 8,  0, 3, pa_ccc8z8,   un_ccc8x8},
+    {32, 8,  8, 3, pa_z8ccc8,   un_x8ccc8},
+    {32, 8,  0, 4, pa_cccc8,    un_cccc8},
+    {64, 16, 0, 4, pa_cccc16,   un_cccc16},
+    {64, 16, 0, 3, pa_ccc16z16, un_ccc16x16},
+    {24, 8,  0, 3, pa_ccc8,     un_ccc8},
+    {48, 16, 0, 3, pa_ccc16,    un_ccc16},
+    {16, 8,  0, 2, pa_cc8,      un_cc8},
+    {32, 16, 0, 2, pa_cc16,     un_cc16},
+    {32, 10, 0, 3, pa_ccc10z2,  un_ccc10x2},
+};
+
+static void packed_repack(struct mp_repack *rp,
+                          struct mp_image *a, int a_x, int a_y,
+                          struct mp_image *b, int b_x, int b_y, int w)
+{
+    uint32_t *pa = mp_image_pixel_ptr(a, 0, a_x, a_y);
+
+    void *pb[4] = {0};
+    for (int p = 0; p < b->num_planes; p++) {
+        int s = rp->components[p];
+        pb[p] = mp_image_pixel_ptr(b, s, b_x, b_y);
+    }
+
+    rp->packed_repack_scanline(pa, pb, w);
+}
+
+// Tries to set a packer/unpacker for component-wise byte aligned formats.
+static void setup_packed_packer(struct mp_repack *rp)
+{
+    struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(rp->imgfmt_a);
+    if (!(desc.flags & MP_IMGFLAG_HAS_COMPS) ||
+        !(desc.flags & MP_IMGFLAG_TYPE_UINT) ||
+        !(desc.flags & MP_IMGFLAG_NE) ||
+        desc.num_planes != 1)
+        return;
+
+    int num_real_components = 0;
+    int components[4] = {0};
+    for (int n = 0; n < MP_NUM_COMPONENTS; n++) {
+        if (!desc.comps[n].size)
+            continue;
+        if (desc.comps[n].size != desc.comps[0].size ||
+            desc.comps[n].pad != desc.comps[0].pad ||
+            desc.comps[n].offset % desc.comps[0].size)
+            return;
+        int item = desc.comps[n].offset / desc.comps[0].size;
+        if (item >= 4)
+            return;
+        components[item] = n + 1;
+        num_real_components++;
+    }
+
+    int depth = desc.comps[0].size + MPMIN(0, desc.comps[0].pad);
+
+    static const int reorder_gbrp[] = {0, 3, 1, 2, 4};
+    static const int reorder_yuv[] = {0, 1, 2, 3, 4};
+    int planar_fmt = 0;
+    const int *reorder = NULL;
+    if (desc.flags & MP_IMGFLAG_COLOR_YUV) {
+        planar_fmt = find_yuv_format(depth, num_real_components);
+        reorder = reorder_yuv;
+    } else {
+        planar_fmt = find_gbrp_format(depth, num_real_components);
+        reorder = reorder_gbrp;
+    }
+    if (!planar_fmt)
+        return;
+
+    for (int i = 0; i < MP_ARRAY_SIZE(regular_repackers); i++) {
+        const struct regular_repacker *pa = &regular_repackers[i];
+
+        // The following may assume little endian (because some repack backends
+        // use word access, while the metadata here uses byte access).
+
+        int prepad = components[0] ? 0 : 8;
+        int first_comp = components[0] ? 0 : 1;
+        void (*repack_cb)(void *pa, void *pb[], int w) =
+            rp->pack ? pa->pa_scanline : pa->un_scanline;
+
+        if (pa->packed_width != desc.bpp[0] ||
+            pa->component_width != depth ||
+            pa->num_components != num_real_components ||
+            pa->prepadding != prepad ||
+            !repack_cb)
+            continue;
+
+        rp->repack = packed_repack;
+        rp->packed_repack_scanline = repack_cb;
+        rp->imgfmt_b = planar_fmt;
+        for (int n = 0; n < num_real_components; n++) {
+            // Determine permutation that maps component order between the two
+            // formats, with has_alpha special case (see above).
+            int c = reorder[components[first_comp + n]];
+            rp->components[n] = c == 4 ? num_real_components - 1 : c - 1;
+        }
+        return;
+    }
+}
+
+#define PA_SHIFT_LUT8(name, packed_t)                                       \
+    static void name(void *dst, void *src[], int w, uint8_t *lut,           \
+                     uint8_t s0, uint8_t s1, uint8_t s2) {                  \
+        for (int x = 0; x < w; x++) {                                       \
+            ((packed_t *)dst)[x] =                                          \
+                (lut[((uint8_t *)src[0])[x] + 256 * 0] << s0) |             \
+                (lut[((uint8_t *)src[1])[x] + 256 * 1] << s1) |             \
+                (lut[((uint8_t *)src[2])[x] + 256 * 2] << s2);              \
+        }                                                                   \
+    }
+
+
+#define UN_SHIFT_LUT8(name, packed_t)                                       \
+    static void name(void *src, void *dst[], int w, uint8_t *lut,           \
+                     uint8_t s0, uint8_t s1, uint8_t s2) {                  \
+        for (int x = 0; x < w; x++) {                                       \
+            packed_t c = ((packed_t *)src)[x];                              \
+            ((uint8_t *)dst[0])[x] = lut[((c >> s0) & 0xFF) + 256 * 0];     \
+            ((uint8_t *)dst[1])[x] = lut[((c >> s1) & 0xFF) + 256 * 1];     \
+            ((uint8_t *)dst[2])[x] = lut[((c >> s2) & 0xFF) + 256 * 2];     \
+        }                                                                   \
+    }
+
+PA_SHIFT_LUT8(pa_shift_lut8_8,  uint8_t)
+PA_SHIFT_LUT8(pa_shift_lut8_16, uint16_t)
+UN_SHIFT_LUT8(un_shift_lut8_8,  uint8_t)
+UN_SHIFT_LUT8(un_shift_lut8_16, uint16_t)
+
+static void fringe_rgb_repack(struct mp_repack *rp,
+                              struct mp_image *a, int a_x, int a_y,
+                              struct mp_image *b, int b_x, int b_y, int w)
+{
+    void *pa = mp_image_pixel_ptr(a, 0, a_x, a_y);
+
+    void *pb[4] = {0};
+    for (int p = 0; p < b->num_planes; p++) {
+        int s = rp->components[p];
+        pb[p] = mp_image_pixel_ptr(b, s, b_x, b_y);
+    }
+
+    assert(rp->comp_size == 1 || rp->comp_size == 2);
+
+    void (*repack)(void *pa, void *pb[], int w, uint8_t *lut,
+                   uint8_t s0, uint8_t s1, uint8_t s2) = NULL;
+    if (rp->pack) {
+        repack = rp->comp_size == 1 ? pa_shift_lut8_8 : pa_shift_lut8_16;
+    } else {
+        repack = rp->comp_size == 1 ? un_shift_lut8_8 : un_shift_lut8_16;
+    }
+    repack(pa, pb, w, rp->comp_lut,
+           rp->comp_shifts[0], rp->comp_shifts[1], rp->comp_shifts[2]);
+}
+
+static void setup_fringe_rgb_packer(struct mp_repack *rp)
+{
+    struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(rp->imgfmt_a);
+    if (!(desc.flags & MP_IMGFLAG_HAS_COMPS))
+        return;
+
+    if (desc.bpp[0] > 16 || (desc.bpp[0] % 8u) ||
+        mp_imgfmt_get_forced_csp(rp->imgfmt_a) != MP_CSP_RGB ||
+        desc.num_planes != 1 || desc.comps[3].size)
+        return;
+
+    int depth = desc.comps[0].size;
+    for (int n = 0; n < 3; n++) {
+        struct mp_imgfmt_comp_desc *c = &desc.comps[n];
+
+        if (c->size < 1 || c->size > 8 || c->pad)
+            return;
+
+        if (rp->flags & REPACK_CREATE_ROUND_DOWN) {
+            depth = MPMIN(depth, c->size);
+        } else {
+            depth = MPMAX(depth, c->size);
+        }
+    }
+    if (rp->flags & REPACK_CREATE_EXPAND_8BIT)
+        depth = 8;
+
+    rp->imgfmt_b = find_gbrp_format(depth, 3);
+    if (!rp->imgfmt_b)
+        return;
+    rp->comp_lut = talloc_array(rp, uint8_t, 256 * 3);
+    rp->repack = fringe_rgb_repack;
+    for (int n = 0; n < 3; n++)
+        rp->components[n] = ((int[]){3, 1, 2})[n] - 1;
+
+    for (int n = 0; n < 3; n++) {
+        int bits = desc.comps[n].size;
+        rp->comp_shifts[n] = desc.comps[n].offset;
+        if (rp->comp_lut) {
+            uint8_t *lut = rp->comp_lut + 256 * n;
+            uint8_t zmax = (1 << depth) - 1;
+            uint8_t cmax = (1 << bits) - 1;
+            for (int v = 0; v < 256; v++) {
+                if (rp->pack) {
+                    lut[v] = (v * cmax + zmax / 2) / zmax;
+                } else {
+                    lut[v] = (v & cmax) * zmax / cmax;
+                }
+            }
+        }
+    }
+
+    rp->comp_size = (desc.bpp[0] + 7) / 8;
+    assert(rp->comp_size == 1 || rp->comp_size == 2);
+
+    if (desc.endian_shift) {
+        assert(rp->comp_size == 2 && (1 << desc.endian_shift) == 2);
+        rp->endian_size = 2;
+    }
+}
+
+static void unpack_pal(struct mp_repack *rp,
+                       struct mp_image *a, int a_x, int a_y,
+                       struct mp_image *b, int b_x, int b_y, int w)
+{
+    uint8_t *src = mp_image_pixel_ptr(a, 0, a_x, a_y);
+    uint32_t *pal = (void *)a->planes[1];
+
+    uint8_t *dst[4] = {0};
+    for (int p = 0; p < b->num_planes; p++)
+        dst[p] = mp_image_pixel_ptr(b, p, b_x, b_y);
+
+    for (int x = 0; x < w; x++) {
+        uint32_t c = pal[src[x]];
+        dst[0][x] = (c >>  8) & 0xFF; // G
+        dst[1][x] = (c >>  0) & 0xFF; // B
+        dst[2][x] = (c >> 16) & 0xFF; // R
+        dst[3][x] = (c >> 24) & 0xFF; // A
+    }
+}
+
+static void bitmap_repack(struct mp_repack *rp,
+                          struct mp_image *a, int a_x, int a_y,
+                          struct mp_image *b, int b_x, int b_y, int w)
+{
+    uint8_t *pa = mp_image_pixel_ptr(a, 0, a_x, a_y);
+    uint8_t *pb = mp_image_pixel_ptr(b, 0, b_x, b_y);
+
+    if (rp->pack) {
+        for (unsigned x = 0; x < w; x += 8) {
+            uint8_t d = 0;
+            int max_b = MPMIN(8, w - x);
+            for (int bp = 0; bp < max_b; bp++)
+                d |= (rp->comp_lut[pb[x + bp]]) << (7 - bp);
+            pa[x / 8] = d;
+        }
+    } else {
+        for (unsigned x = 0; x < w; x += 8) {
+            uint8_t d = pa[x / 8];
+            int max_b = MPMIN(8, w - x);
+            for (int bp = 0; bp < max_b; bp++)
+                pb[x + bp] = rp->comp_lut[d & (1 << (7 - bp))];
+        }
+    }
+}
+
+static void setup_misc_packer(struct mp_repack *rp)
+{
+    if (rp->imgfmt_a == IMGFMT_PAL8 && !rp->pack) {
+        int grap_fmt = find_gbrp_format(8, 4);
+        if (!grap_fmt)
+            return;
+        rp->imgfmt_b = grap_fmt;
+        rp->repack = unpack_pal;
+    } else {
+        enum AVPixelFormat avfmt = imgfmt2pixfmt(rp->imgfmt_a);
+        if (avfmt == AV_PIX_FMT_MONOWHITE || avfmt == AV_PIX_FMT_MONOBLACK) {
+            rp->comp_lut = talloc_array(rp, uint8_t, 256);
+            rp->imgfmt_b = IMGFMT_Y1;
+            int max = 1;
+            if (rp->flags & REPACK_CREATE_EXPAND_8BIT) {
+                rp->imgfmt_b = IMGFMT_Y8;
+                max = 255;
+            }
+            bool inv = avfmt == AV_PIX_FMT_MONOWHITE;
+            for (int n = 0; n < 256; n++) {
+                rp->comp_lut[n] = rp->pack ? (inv ^ (n >= (max + 1) / 2))
+                                           : ((inv ^ !!n) ? max : 0);
+            }
+            rp->repack = bitmap_repack;
+            return;
+        }
+    }
+}
+
+#define PA_P422(name, comp_t)                                               \
+    static void name(void *dst, void *src[], int w, uint8_t *c) {           \
+        for (int x = 0; x < w; x += 2) {                                    \
+            ((comp_t *)dst)[x * 2 + c[0]] = ((comp_t *)src[0])[x + 0];      \
+            ((comp_t *)dst)[x * 2 + c[1]] = ((comp_t *)src[0])[x + 1];      \
+            ((comp_t *)dst)[x * 2 + c[4]] = ((comp_t *)src[1])[x >> 1];     \
+            ((comp_t *)dst)[x * 2 + c[5]] = ((comp_t *)src[2])[x >> 1];     \
+        }                                                                   \
+    }
+
+
+#define UN_P422(name, comp_t)                                               \
+    static void name(void *src, void *dst[], int w, uint8_t *c) {           \
+        for (int x = 0; x < w; x += 2) {                                    \
+            ((comp_t *)dst[0])[x + 0]  = ((comp_t *)src)[x * 2 + c[0]];     \
+            ((comp_t *)dst[0])[x + 1]  = ((comp_t *)src)[x * 2 + c[1]];     \
+            ((comp_t *)dst[1])[x >> 1] = ((comp_t *)src)[x * 2 + c[4]];     \
+            ((comp_t *)dst[2])[x >> 1] = ((comp_t *)src)[x * 2 + c[5]];     \
+        }                                                                   \
+    }
+
+PA_P422(pa_p422_8,  uint8_t)
+PA_P422(pa_p422_16, uint16_t)
+UN_P422(un_p422_8,  uint8_t)
+UN_P422(un_p422_16, uint16_t)
+
+static void pa_p411_8(void *dst, void *src[], int w, uint8_t *c)
+{
+    for (int x = 0; x < w; x += 4) {
+        ((uint8_t *)dst)[x / 4 * 6 + c[0]] = ((uint8_t *)src[0])[x + 0];
+        ((uint8_t *)dst)[x / 4 * 6 + c[1]] = ((uint8_t *)src[0])[x + 1];
+        ((uint8_t *)dst)[x / 4 * 6 + c[2]] = ((uint8_t *)src[0])[x + 2];
+        ((uint8_t *)dst)[x / 4 * 6 + c[3]] = ((uint8_t *)src[0])[x + 3];
+        ((uint8_t *)dst)[x / 4 * 6 + c[4]] = ((uint8_t *)src[1])[x >> 2];
+        ((uint8_t *)dst)[x / 4 * 6 + c[5]] = ((uint8_t *)src[2])[x >> 2];
+    }
+}
+
+
+static void un_p411_8(void *src, void *dst[], int w, uint8_t *c)
+{
+    for (int x = 0; x < w; x += 4) {
+        ((uint8_t *)dst[0])[x + 0]  = ((uint8_t *)src)[x / 4 * 6 + c[0]];
+        ((uint8_t *)dst[0])[x + 1]  = ((uint8_t *)src)[x / 4 * 6 + c[1]];
+        ((uint8_t *)dst[0])[x + 2]  = ((uint8_t *)src)[x / 4 * 6 + c[2]];
+        ((uint8_t *)dst[0])[x + 3]  = ((uint8_t *)src)[x / 4 * 6 + c[3]];
+        ((uint8_t *)dst[1])[x >> 2] = ((uint8_t *)src)[x / 4 * 6 + c[4]];
+        ((uint8_t *)dst[2])[x >> 2] = ((uint8_t *)src)[x / 4 * 6 + c[5]];
+    }
+}
+
+static void fringe_yuv_repack(struct mp_repack *rp,
+                              struct mp_image *a, int a_x, int a_y,
+                              struct mp_image *b, int b_x, int b_y, int w)
+{
+    void *pa = mp_image_pixel_ptr(a, 0, a_x, a_y);
+
+    void *pb[4] = {0};
+    for (int p = 0; p < b->num_planes; p++)
+        pb[p] = mp_image_pixel_ptr(b, p, b_x, b_y);
+
+    rp->repack_fringe_yuv(pa, pb, w, rp->comp_map);
+}
+
+static void setup_fringe_yuv_packer(struct mp_repack *rp)
+{
+    struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(rp->imgfmt_a);
+    if (!(desc.flags & MP_IMGFLAG_PACKED_SS_YUV) ||
+        mp_imgfmt_desc_get_num_comps(&desc) != 3 ||
+        desc.align_x > 4)
+        return;
+
+    uint8_t y_loc[4];
+    if (!mp_imgfmt_get_packed_yuv_locations(desc.id, y_loc))
+        return;
+
+    for (int n = 0; n < MP_NUM_COMPONENTS; n++) {
+        if (!desc.comps[n].size)
+            continue;
+        if (desc.comps[n].size != desc.comps[0].size ||
+            desc.comps[n].pad < 0 ||
+            desc.comps[n].offset % desc.comps[0].size)
+            return;
+        if (n == 1 || n == 2) {
+            rp->comp_map[4 + (n - 1)] =
+                desc.comps[n].offset / desc.comps[0].size;
+        }
+    }
+    for (int n = 0; n < desc.align_x; n++) {
+        if (y_loc[n] % desc.comps[0].size)
+            return;
+        rp->comp_map[n] = y_loc[n] / desc.comps[0].size;
+    }
+
+    if (desc.comps[0].size == 8 && desc.align_x == 2) {
+        rp->repack_fringe_yuv = rp->pack ? pa_p422_8 : un_p422_8;
+    } else if (desc.comps[0].size == 16 && desc.align_x == 2) {
+        rp->repack_fringe_yuv = rp->pack ? pa_p422_16 : un_p422_16;
+    } else if (desc.comps[0].size == 8 && desc.align_x == 4) {
+        rp->repack_fringe_yuv = rp->pack ? pa_p411_8 : un_p411_8;
+    }
+
+    if (!rp->repack_fringe_yuv)
+        return;
+
+    struct mp_regular_imgfmt yuvfmt = {
+        .component_type = MP_COMPONENT_TYPE_UINT,
+        // NB: same problem with P010 and not clearing padding.
+        .component_size = desc.comps[0].size / 8u,
+        .num_planes = 3,
+        .planes = { {1, {1}}, {1, {2}}, {1, {3}} },
+        .chroma_xs = desc.chroma_xs,
+        .chroma_ys = 0,
+    };
+    rp->imgfmt_b = mp_find_regular_imgfmt(&yuvfmt);
+    rp->repack = fringe_yuv_repack;
+
+    if (desc.endian_shift) {
+        rp->endian_size = 1 << desc.endian_shift;
+        assert(rp->endian_size == 2);
+    }
+}
+
+static void repack_nv(struct mp_repack *rp,
+                      struct mp_image *a, int a_x, int a_y,
+                      struct mp_image *b, int b_x, int b_y, int w)
+{
+    int xs = a->fmt.chroma_xs;
+
+    uint32_t *pa = mp_image_pixel_ptr(a, 1, a_x, a_y);
+
+    void *pb[2];
+    for (int p = 0; p < 2; p++) {
+        int s = rp->components[p];
+        pb[p] = mp_image_pixel_ptr(b, s, b_x, b_y);
+    }
+
+    rp->packed_repack_scanline(pa, pb, (w + (1 << xs) - 1) >> xs);
+}
+
+static void setup_nv_packer(struct mp_repack *rp)
+{
+    struct mp_regular_imgfmt desc;
+    if (!mp_get_regular_imgfmt(&desc, rp->imgfmt_a))
+        return;
+
+    // Check for NV.
+    if (desc.num_planes != 2)
+        return;
+    if (desc.planes[0].num_components != 1 || desc.planes[0].components[0] != 1)
+        return;
+    if (desc.planes[1].num_components != 2)
+        return;
+    int cr0 = desc.planes[1].components[0];
+    int cr1 = desc.planes[1].components[1];
+    if (cr0 > cr1)
+        MPSWAP(int, cr0, cr1);
+    if (cr0 != 2 || cr1 != 3)
+        return;
+
+    // Construct equivalent planar format.
+    struct mp_regular_imgfmt desc2 = desc;
+    desc2.num_planes = 3;
+    desc2.planes[1].num_components = 1;
+    desc2.planes[1].components[0] = 2;
+    desc2.planes[2].num_components = 1;
+    desc2.planes[2].components[0] = 3;
+    // For P010. Strangely this concept exists only for the NV format.
+    if (desc2.component_pad > 0)
+        desc2.component_pad = 0;
+
+    int planar_fmt = mp_find_regular_imgfmt(&desc2);
+    if (!planar_fmt)
+        return;
+
+    for (int i = 0; i < MP_ARRAY_SIZE(regular_repackers); i++) {
+        const struct regular_repacker *pa = &regular_repackers[i];
+
+        void (*repack_cb)(void *pa, void *pb[], int w) =
+            rp->pack ? pa->pa_scanline : pa->un_scanline;
+
+        if (pa->packed_width != desc.component_size * 2 * 8 ||
+            pa->component_width != desc.component_size * 8 ||
+            pa->num_components != 2 ||
+            pa->prepadding != 0 ||
+            !repack_cb)
+            continue;
+
+        rp->repack = repack_nv;
+        rp->passthrough_y = true;
+        rp->packed_repack_scanline = repack_cb;
+        rp->imgfmt_b = planar_fmt;
+        rp->components[0] = desc.planes[1].components[0] - 1;
+        rp->components[1] = desc.planes[1].components[1] - 1;
+        return;
+    }
+}
+
+#define PA_F32(name, packed_t)                                              \
+    static void name(void *dst, float *src, int w, float m, float o,        \
+                     uint32_t p_max) {                                      \
+        for (int x = 0; x < w; x++) {                                       \
+            ((packed_t *)dst)[x] =                                          \
+                MPCLAMP(lrint((src[x] + o) * m), 0, (packed_t)p_max);       \
+        }                                                                   \
+    }
+
+#define UN_F32(name, packed_t)                                              \
+    static void name(void *src, float *dst, int w, float m, float o,        \
+                     uint32_t unused) {                                     \
+        for (int x = 0; x < w; x++)                                         \
+            dst[x] = ((packed_t *)src)[x] * m + o;                          \
+    }
+
+PA_F32(pa_f32_8, uint8_t)
+UN_F32(un_f32_8, uint8_t)
+PA_F32(pa_f32_16, uint16_t)
+UN_F32(un_f32_16, uint16_t)
+
+// In all this, float counts as "unpacked".
+static void repack_float(struct mp_repack *rp,
+                         struct mp_image *a, int a_x, int a_y,
+                         struct mp_image *b, int b_x, int b_y, int w)
+{
+    assert(rp->f32_comp_size == 1 || rp->f32_comp_size == 2);
+
+    void (*packer)(void *a, float *b, int w, float fm, float fb, uint32_t max)
+        = rp->pack ? (rp->f32_comp_size == 1 ? pa_f32_8 : pa_f32_16)
+                   : (rp->f32_comp_size == 1 ? un_f32_8 : un_f32_16);
+
+    for (int p = 0; p < b->num_planes; p++) {
+        int h = (1 << b->fmt.chroma_ys) - (1 << b->fmt.ys[p]) + 1;
+        for (int y = 0; y < h; y++) {
+            void *pa = mp_image_pixel_ptr_ny(a, p, a_x, a_y + y);
+            void *pb = mp_image_pixel_ptr_ny(b, p, b_x, b_y + y);
+
+            packer(pa, pb, w >> b->fmt.xs[p], rp->f32_m[p], rp->f32_o[p],
+                   rp->f32_pmax[p]);
+        }
+    }
+}
+
+static void update_repack_float(struct mp_repack *rp)
+{
+    if (!rp->f32_comp_size)
+        return;
+
+    // Image in input format.
+    struct mp_image *ui =  rp->pack ? rp->steps[rp->num_steps - 1].buf[1]
+                                    : rp->steps[0].buf[0];
+    enum mp_csp csp = ui->params.color.space;
+    enum mp_csp_levels levels = ui->params.color.levels;
+    if (rp->f32_csp_space == csp && rp->f32_csp_levels == levels)
+        return;
+
+    // The fixed point format.
+    struct mp_regular_imgfmt desc = {0};
+    mp_get_regular_imgfmt(&desc, rp->imgfmt_b);
+    assert(desc.component_size);
+
+    int comp_bits = desc.component_size * 8 + MPMIN(desc.component_pad, 0);
+    for (int p = 0; p < desc.num_planes; p++) {
+        double m, o;
+        mp_get_csp_uint_mul(csp, levels, comp_bits, desc.planes[p].components[0],
+                            &m, &o);
+        rp->f32_m[p] = rp->pack ? 1.0 / m : m;
+        rp->f32_o[p] = rp->pack ? -o      : o;
+        rp->f32_pmax[p] = (1u << comp_bits) - 1;
+    }
+
+    rp->f32_csp_space = csp;
+    rp->f32_csp_levels = levels;
+}
+
+void repack_line(struct mp_repack *rp, int dst_x, int dst_y,
+                 int src_x, int src_y, int w)
+{
+    assert(rp->configured);
+
+    struct repack_step *first = &rp->steps[0];
+    struct repack_step *last = &rp->steps[rp->num_steps - 1];
+
+    assert(dst_x >= 0 && dst_y >= 0 && src_x >= 0 && src_y >= 0 && w >= 0);
+    assert(dst_x + w <= MP_ALIGN_UP(last->buf[1]->w, last->fmt[1].align_x));
+    assert(src_x + w <= MP_ALIGN_UP(first->buf[0]->w, first->fmt[0].align_x));
+    assert(dst_y < last->buf[1]->h);
+    assert(src_y < first->buf[0]->h);
+    assert(!(dst_x & (last->fmt[1].align_x - 1)));
+    assert(!(src_x & (first->fmt[0].align_x - 1)));
+    assert(!(w & ((1 << first->fmt[0].chroma_xs) - 1)));
+    assert(!(dst_y & (last->fmt[1].align_y - 1)));
+    assert(!(src_y & (first->fmt[0].align_y - 1)));
+
+    for (int n = 0; n < rp->num_steps; n++) {
+        struct repack_step *rs = &rp->steps[n];
+
+        // When writing to temporary buffers, always write to the start (maybe
+        // helps with locality).
+        int sx = rs->user_buf[0] ? src_x : 0;
+        int sy = rs->user_buf[0] ? src_y : 0;
+        int dx = rs->user_buf[1] ? dst_x : 0;
+        int dy = rs->user_buf[1] ? dst_y : 0;
+
+        struct mp_image *buf_a = rs->buf[rp->pack];
+        struct mp_image *buf_b = rs->buf[!rp->pack];
+        int a_x = rp->pack ? dx : sx;
+        int a_y = rp->pack ? dy : sy;
+        int b_x = rp->pack ? sx : dx;
+        int b_y = rp->pack ? sy : dy;
+
+        switch (rs->type) {
+        case REPACK_STEP_REPACK: {
+            if (rp->repack)
+                rp->repack(rp, buf_a, a_x, a_y, buf_b, b_x, b_y, w);
+
+            for (int p = 0; p < rs->fmt[0].num_planes; p++) {
+                if (rp->copy_buf[p])
+                    copy_plane(rs->buf[1], dx, dy, rs->buf[0], sx, sy, w, p);
+            }
+            break;
+        }
+        case REPACK_STEP_ENDIAN:
+            swap_endian(rs->buf[1], dx, dy, rs->buf[0], sx, sy, w,
+                        rp->endian_size);
+            break;
+        case REPACK_STEP_FLOAT:
+            repack_float(rp, buf_a, a_x, a_y, buf_b, b_x, b_y, w);
+            break;
+        }
+    }
+}
+
+static bool setup_format_ne(struct mp_repack *rp)
+{
+    if (!rp->imgfmt_b)
+        setup_nv_packer(rp);
+    if (!rp->imgfmt_b)
+        setup_misc_packer(rp);
+    if (!rp->imgfmt_b)
+        setup_packed_packer(rp);
+    if (!rp->imgfmt_b)
+        setup_fringe_rgb_packer(rp);
+    if (!rp->imgfmt_b)
+        setup_fringe_yuv_packer(rp);
+    if (!rp->imgfmt_b)
+        rp->imgfmt_b = rp->imgfmt_a; // maybe it was planar after all
+
+    struct mp_regular_imgfmt desc;
+    if (!mp_get_regular_imgfmt(&desc, rp->imgfmt_b))
+        return false;
+
+    // no weird stuff
+    if (desc.num_planes > 4)
+        return false;
+
+    // Endian swapping.
+    if (rp->imgfmt_a != rp->imgfmt_user &&
+        rp->imgfmt_a == mp_find_other_endian(rp->imgfmt_user))
+    {
+        struct mp_imgfmt_desc desc_a = mp_imgfmt_get_desc(rp->imgfmt_a);
+        struct mp_imgfmt_desc desc_u = mp_imgfmt_get_desc(rp->imgfmt_user);
+        rp->endian_size = 1 << desc_u.endian_shift;
+        if (!desc_a.endian_shift && rp->endian_size != 2 && rp->endian_size != 4)
+            return false;
+    }
+
+    // Accept only true planar formats (with known components and no padding).
+    for (int n = 0; n < desc.num_planes; n++) {
+        if (desc.planes[n].num_components != 1)
+            return false;
+        int c = desc.planes[n].components[0];
+        if (c < 1 || c > 4)
+            return false;
+    }
+
+    rp->fmt_a = mp_imgfmt_get_desc(rp->imgfmt_a);
+    rp->fmt_b = mp_imgfmt_get_desc(rp->imgfmt_b);
+
+    // This is if we did a pack step.
+
+    if (rp->flags & REPACK_CREATE_PLANAR_F32) {
+        // imgfmt_b with float32 component type.
+        struct mp_regular_imgfmt fdesc = desc;
+        fdesc.component_type = MP_COMPONENT_TYPE_FLOAT;
+        fdesc.component_size = 4;
+        fdesc.component_pad = 0;
+        int ffmt = mp_find_regular_imgfmt(&fdesc);
+        if (!ffmt)
+            return false;
+        if (ffmt != rp->imgfmt_b) {
+            if (desc.component_type != MP_COMPONENT_TYPE_UINT ||
+                (desc.component_size != 1 && desc.component_size != 2))
+                return false;
+            rp->f32_comp_size = desc.component_size;
+            rp->f32_csp_space = MP_CSP_COUNT;
+            rp->f32_csp_levels = MP_CSP_LEVELS_COUNT;
+            rp->steps[rp->num_steps++] = (struct repack_step) {
+                .type = REPACK_STEP_FLOAT,
+                .fmt = {
+                    mp_imgfmt_get_desc(ffmt),
+                    rp->fmt_b,
+                },
+            };
+        }
+    }
+
+    rp->steps[rp->num_steps++] = (struct repack_step) {
+        .type = REPACK_STEP_REPACK,
+        .fmt = { rp->fmt_b, rp->fmt_a },
+    };
+
+    if (rp->endian_size) {
+        rp->steps[rp->num_steps++] = (struct repack_step) {
+            .type = REPACK_STEP_ENDIAN,
+            .fmt = {
+                rp->fmt_a,
+                mp_imgfmt_get_desc(rp->imgfmt_user),
+            },
+        };
+    }
+
+    // Reverse if unpack (to reflect actual data flow)
+    if (!rp->pack) {
+        for (int n = 0; n < rp->num_steps / 2; n++) {
+            MPSWAP(struct repack_step, rp->steps[n],
+                   rp->steps[rp->num_steps - 1 - n]);
+        }
+        for (int n = 0; n < rp->num_steps; n++) {
+            struct repack_step *rs = &rp->steps[n];
+            MPSWAP(struct mp_imgfmt_desc, rs->fmt[0], rs->fmt[1]);
+        }
+    }
+
+    for (int n = 0; n < rp->num_steps - 1; n++)
+        assert(rp->steps[n].fmt[1].id == rp->steps[n + 1].fmt[0].id);
+
+    return true;
+}
+
+static void reset_params(struct mp_repack *rp)
+{
+    rp->num_steps = 0;
+    rp->imgfmt_b = 0;
+    rp->repack = NULL;
+    rp->passthrough_y = false;
+    rp->endian_size = 0;
+    rp->packed_repack_scanline = NULL;
+    rp->comp_size = 0;
+    talloc_free(rp->comp_lut);
+    rp->comp_lut = NULL;
+}
+
+static bool setup_format(struct mp_repack *rp)
+{
+    reset_params(rp);
+    rp->imgfmt_a = rp->imgfmt_user;
+    if (setup_format_ne(rp))
+        return true;
+    // Try reverse endian.
+    reset_params(rp);
+    rp->imgfmt_a = mp_find_other_endian(rp->imgfmt_user);
+    return rp->imgfmt_a && setup_format_ne(rp);
+}
+
+struct mp_repack *mp_repack_create_planar(int imgfmt, bool pack, int flags)
+{
+    struct mp_repack *rp = talloc_zero(NULL, struct mp_repack);
+    rp->imgfmt_user = imgfmt;
+    rp->pack = pack;
+    rp->flags = flags;
+
+    if (!setup_format(rp)) {
+        talloc_free(rp);
+        return NULL;
+    }
+
+    return rp;
+}
+
+int mp_repack_get_format_src(struct mp_repack *rp)
+{
+    return rp->steps[0].fmt[0].id;
+}
+
+int mp_repack_get_format_dst(struct mp_repack *rp)
+{
+    return rp->steps[rp->num_steps - 1].fmt[1].id;
+}
+
+int mp_repack_get_align_x(struct mp_repack *rp)
+{
+    // We really want the LCM between those, but since only one of them is
+    // packed (or they're the same format), and the chroma subsampling is the
+    // same for both, only the packed one matters.
+    return rp->fmt_a.align_x;
+}
+
+int mp_repack_get_align_y(struct mp_repack *rp)
+{
+    return rp->fmt_a.align_y; // should be the same for packed/planar formats
+}
+
+static void image_realloc(struct mp_image **img, int fmt, int w, int h)
+{
+    if (*img && (*img)->imgfmt == fmt && (*img)->w == w && (*img)->h == h)
+        return;
+    talloc_free(*img);
+    *img = mp_image_alloc(fmt, w, h);
+}
+
+bool repack_config_buffers(struct mp_repack *rp,
+                           int dst_flags, struct mp_image *dst,
+                           int src_flags, struct mp_image *src,
+                           bool *enable_passthrough)
+{
+    struct repack_step *rs_first = &rp->steps[0];
+    struct repack_step *rs_last = &rp->steps[rp->num_steps - 1];
+
+    rp->configured = false;
+
+    assert(dst && src);
+
+    int buf_w = MPMAX(dst->w, src->w);
+
+    assert(dst->imgfmt == rs_last->fmt[1].id);
+    assert(src->imgfmt == rs_first->fmt[0].id);
+
+    // Chain/allocate buffers.
+
+    for (int n = 0; n < rp->num_steps; n++)
+        rp->steps[n].buf[0] = rp->steps[n].buf[1] = NULL;
+
+    rs_first->buf[0] = src;
+    rs_last->buf[1] = dst;
+
+    for (int n = 0; n < rp->num_steps; n++) {
+        struct repack_step *rs = &rp->steps[n];
+
+        if (!rs->buf[0]) {
+            assert(n > 0);
+            rs->buf[0] = rp->steps[n - 1].buf[1];
+        }
+
+        if (rs->buf[1])
+            continue;
+
+        // Note: since repack_line() can have different src/dst offsets, we
+        //       can't do true in-place in general.
+        bool can_inplace = rs->type == REPACK_STEP_ENDIAN &&
+                           rs->buf[0] != src && rs->buf[0] != dst;
+        if (can_inplace) {
+            rs->buf[1] = rs->buf[0];
+            continue;
+        }
+
+        if (rs != rs_last) {
+            struct repack_step *next = &rp->steps[n + 1];
+            if (next->buf[0]) {
+                rs->buf[1] = next->buf[0];
+                continue;
+            }
+        }
+
+        image_realloc(&rs->tmp, rs->fmt[1].id, buf_w, rs->fmt[1].align_y);
+        if (!rs->tmp)
+            return false;
+        talloc_steal(rp, rs->tmp);
+        rs->buf[1] = rs->tmp;
+    }
+
+    for (int n = 0; n < rp->num_steps; n++) {
+        struct repack_step *rs = &rp->steps[n];
+        rs->user_buf[0] = rs->buf[0] == src || rs->buf[0] == dst;
+        rs->user_buf[1] = rs->buf[1] == src || rs->buf[1] == dst;
+    }
+
+    // If repacking is the only operation. It's also responsible for simply
+    // copying src to dst if absolutely no filtering is done.
+    bool may_passthrough =
+        rp->num_steps == 1 && rp->steps[0].type == REPACK_STEP_REPACK;
+
+    for (int p = 0; p < rp->fmt_b.num_planes; p++) {
+        // (All repack callbacks copy, except nv12 does not copy luma.)
+        bool repack_copies_plane = rp->repack && !(rp->passthrough_y && p == 0);
+
+        bool can_pt = may_passthrough && !repack_copies_plane &&
+                      enable_passthrough && enable_passthrough[p];
+
+        // Copy if needed, unless the repack callback does it anyway.
+        rp->copy_buf[p] = !repack_copies_plane && !can_pt;
+
+        if (enable_passthrough)
+            enable_passthrough[p] = can_pt && !rp->copy_buf[p];
+    }
+
+    if (enable_passthrough) {
+        for (int n = rp->fmt_b.num_planes; n < MP_MAX_PLANES; n++)
+            enable_passthrough[n] = false;
+    }
+
+    update_repack_float(rp);
+
+    rp->configured = true;
+
+    return true;
+}
diff --git a/video/repack.h b/video/repack.h
new file mode 100644
index 0000000..7afe7ed
--- /dev/null
+++ b/video/repack.h
@@ -0,0 +1,76 @@
+#pragma once
+
+#include <stdbool.h>
+
+enum {
+    // This controls bheavior with different bit widths per component (like
+    // RGB565). If ROUND_DOWN is specified, the planar format will use the min.
+    // bit width of all components, otherwise the transformation is lossless.
+    REPACK_CREATE_ROUND_DOWN    = (1 << 0),
+
+    // Expand some (not all) low bit depth fringe formats to 8 bit on unpack.
+    REPACK_CREATE_EXPAND_8BIT   = (1 << 1),
+
+    // For mp_repack_create_planar(). If specified, the planar format uses a
+    // float 32 bit sample format. No range expansion is done.
+    REPACK_CREATE_PLANAR_F32    = (1 << 2),
+};
+
+struct mp_repack;
+struct mp_image;
+
+// Create a repacker between any format (imgfmt parameter) and an equivalent
+// planar format (that is native endian). If pack==true, imgfmt is the output,
+// otherwise it is the input. The respective other input/output is the planar
+// format. The planar format can be queried with mp_repack_get_format_*().
+// Note that some formats may change the "implied" colorspace (for example,
+// packed xyz unpacks as rgb).
+// If imgfmt is already planar, a passthrough repacker may be created.
+//  imgfmt: src or dst format (usually packed, non-planar, etc.)
+//  pack: true if imgfmt is dst, false if imgfmt is src
+//  flags: any of REPACK_CREATE_* flags
+//  returns: NULL on failure, otherwise free with talloc_free().
+struct mp_repack *mp_repack_create_planar(int imgfmt, bool pack, int flags);
+
+// Return input and output formats for which rp was created.
+int mp_repack_get_format_src(struct mp_repack *rp);
+int mp_repack_get_format_dst(struct mp_repack *rp);
+
+// Return pixel alignment. For x, this is a lowest pixel count at which there is
+// a byte boundary and a full chroma pixel (horizontal subsampling) on src/dst.
+// For y, this is the pixel height of the vertical subsampling.
+// Always returns a power of 2.
+int mp_repack_get_align_x(struct mp_repack *rp);
+int mp_repack_get_align_y(struct mp_repack *rp);
+
+// Repack a single line from dst to src, as set in repack_config_buffers().
+// For subsampled chroma formats, this copies as many luma/alpha rows as needed
+// for a complete line (e.g. 2 luma lines, 1 chroma line for 4:2:0).
+// dst_x, src_x, y must be aligned to the pixel alignment. w may be unaligned
+// if at the right crop-border of the image, but must be always aligned to
+// horiz. sub-sampling. y is subject to hslice.
+void repack_line(struct mp_repack *rp, int dst_x, int dst_y,
+                 int src_x, int src_y, int w);
+
+// Configure with a source and target buffer. The rp instance will keep the
+// mp_image pointers and access them on repack_line() calls. Refcounting is
+// not respected - the caller needs to make sure dst is always writable.
+// The images can have different sizes (as repack_line() lets you use different
+// target coordinates for dst/src).
+// This also allocaters potentially required temporary buffers.
+//  dst_flags: REPACK_BUF_* flags for dst
+//  dst: where repack_line() writes to
+//  src_flags: REPACK_BUF_* flags for src
+//  src: where repack_line() reads from
+//  enable_passthrough: if non-NULL, an bool array of size MP_MAX_PLANES indexed
+//                      by plane; a true entry requests disabling copying the
+//                      plane data to the dst plane. The function will write to
+//                      this array whether the plane can really be passed through
+//                      (i.e. will set array entries from true to false if pass-
+//                      through is not possible). It writes to all MP_MAX_PLANES
+//                      entries. If NULL, all entries are implicitly false.
+//  returns: success (fails on OOM)
+bool repack_config_buffers(struct mp_repack *rp,
+                           int dst_flags, struct mp_image *dst,
+                           int src_flags, struct mp_image *src,
+                           bool *enable_passthrough);
diff --git a/video/sws_utils.c b/video/sws_utils.c
new file mode 100644
index 0000000..5e9c358
--- /dev/null
+++ b/video/sws_utils.c
@@ -0,0 +1,496 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+
+#include <libswscale/swscale.h>
+#include <libavcodec/avcodec.h>
+#include <libavutil/bswap.h>
+#include <libavutil/opt.h>
+#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 37, 100)
+#include <libavutil/pixdesc.h>
+#endif
+
+#include "config.h"
+
+#include "sws_utils.h"
+
+#include "common/common.h"
+#include "options/m_config.h"
+#include "options/m_option.h"
+#include "video/mp_image.h"
+#include "video/img_format.h"
+#include "fmt-conversion.h"
+#include "csputils.h"
+#include "common/msg.h"
+#include "osdep/endian.h"
+
+#if HAVE_ZIMG
+#include "zimg.h"
+#endif
+
+//global sws_flags from the command line
+struct sws_opts {
+    int scaler;
+    float lum_gblur;
+    float chr_gblur;
+    int chr_vshift;
+    int chr_hshift;
+    float chr_sharpen;
+    float lum_sharpen;
+    bool fast;
+    bool bitexact;
+    bool zimg;
+};
+
+#define OPT_BASE_STRUCT struct sws_opts
+const struct m_sub_options sws_conf = {
+    .opts = (const m_option_t[]) {
+        {"scaler", OPT_CHOICE(scaler,
+            {"fast-bilinear",   SWS_FAST_BILINEAR},
+            {"bilinear",        SWS_BILINEAR},
+            {"bicubic",         SWS_BICUBIC},
+            {"x",               SWS_X},
+            {"point",           SWS_POINT},
+            {"area",            SWS_AREA},
+            {"bicublin",        SWS_BICUBLIN},
+            {"gauss",           SWS_GAUSS},
+            {"sinc",            SWS_SINC},
+            {"lanczos",         SWS_LANCZOS},
+            {"spline",          SWS_SPLINE})},
+        {"lgb", OPT_FLOAT(lum_gblur), M_RANGE(0, 100.0)},
+        {"cgb", OPT_FLOAT(chr_gblur), M_RANGE(0, 100.0)},
+        {"cvs", OPT_INT(chr_vshift)},
+        {"chs", OPT_INT(chr_hshift)},
+        {"ls", OPT_FLOAT(lum_sharpen), M_RANGE(-100.0, 100.0)},
+        {"cs", OPT_FLOAT(chr_sharpen), M_RANGE(-100.0, 100.0)},
+        {"fast", OPT_BOOL(fast)},
+        {"bitexact", OPT_BOOL(bitexact)},
+        {"allow-zimg", OPT_BOOL(zimg)},
+        {0}
+    },
+    .size = sizeof(struct sws_opts),
+    .defaults = &(const struct sws_opts){
+        .scaler = SWS_LANCZOS,
+        .zimg = true,
+    },
+};
+
+// Highest quality, but also slowest.
+static const int mp_sws_hq_flags = SWS_FULL_CHR_H_INT | SWS_FULL_CHR_H_INP |
+                                   SWS_ACCURATE_RND;
+
+// Fast, lossy.
+const int mp_sws_fast_flags = SWS_BILINEAR;
+
+// Set ctx parameters to global command line flags.
+static void mp_sws_update_from_cmdline(struct mp_sws_context *ctx)
+{
+    m_config_cache_update(ctx->opts_cache);
+    struct sws_opts *opts = ctx->opts_cache->opts;
+
+    sws_freeFilter(ctx->src_filter);
+    ctx->src_filter = sws_getDefaultFilter(opts->lum_gblur, opts->chr_gblur,
+                                           opts->lum_sharpen, opts->chr_sharpen,
+                                           opts->chr_hshift, opts->chr_vshift, 0);
+    ctx->force_reload = true;
+
+    ctx->flags = SWS_PRINT_INFO;
+    ctx->flags |= opts->scaler;
+    if (!opts->fast)
+        ctx->flags |= mp_sws_hq_flags;
+    if (opts->bitexact)
+        ctx->flags |= SWS_BITEXACT;
+
+    ctx->allow_zimg = opts->zimg;
+}
+
+bool mp_sws_supported_format(int imgfmt)
+{
+    enum AVPixelFormat av_format = imgfmt2pixfmt(imgfmt);
+
+    return av_format != AV_PIX_FMT_NONE && sws_isSupportedInput(av_format)
+        && sws_isSupportedOutput(av_format);
+}
+
+#if HAVE_ZIMG
+static bool allow_zimg(struct mp_sws_context *ctx)
+{
+    return ctx->force_scaler == MP_SWS_ZIMG ||
+           (ctx->force_scaler == MP_SWS_AUTO && ctx->allow_zimg);
+}
+#endif
+
+static bool allow_sws(struct mp_sws_context *ctx)
+{
+    return ctx->force_scaler == MP_SWS_SWS || ctx->force_scaler == MP_SWS_AUTO;
+}
+
+bool mp_sws_supports_formats(struct mp_sws_context *ctx,
+                             int imgfmt_out, int imgfmt_in)
+{
+#if HAVE_ZIMG
+    if (allow_zimg(ctx)) {
+        if (mp_zimg_supports_in_format(imgfmt_in) &&
+            mp_zimg_supports_out_format(imgfmt_out))
+            return true;
+    }
+#endif
+
+    return allow_sws(ctx) &&
+           sws_isSupportedInput(imgfmt2pixfmt(imgfmt_in)) &&
+           sws_isSupportedOutput(imgfmt2pixfmt(imgfmt_out));
+}
+
+static int mp_csp_to_sws_colorspace(enum mp_csp csp)
+{
+    // The SWS_CS_* macros are just convenience redefinitions of the
+    // AVCOL_SPC_* macros, inside swscale.h.
+    return mp_csp_to_avcol_spc(csp);
+}
+
+static bool cache_valid(struct mp_sws_context *ctx)
+{
+    struct mp_sws_context *old = ctx->cached;
+    if (ctx->force_reload)
+        return false;
+    return mp_image_params_equal(&ctx->src, &old->src) &&
+           mp_image_params_equal(&ctx->dst, &old->dst) &&
+           ctx->flags == old->flags &&
+           ctx->allow_zimg == old->allow_zimg &&
+           ctx->force_scaler == old->force_scaler &&
+           (!ctx->opts_cache || !m_config_cache_update(ctx->opts_cache));
+}
+
+static void free_mp_sws(void *p)
+{
+    struct mp_sws_context *ctx = p;
+    sws_freeContext(ctx->sws);
+    sws_freeFilter(ctx->src_filter);
+    sws_freeFilter(ctx->dst_filter);
+    TA_FREEP(&ctx->aligned_src);
+    TA_FREEP(&ctx->aligned_dst);
+}
+
+// You're supposed to set your scaling parameters on the returned context.
+// Free the context with talloc_free().
+struct mp_sws_context *mp_sws_alloc(void *talloc_ctx)
+{
+    struct mp_sws_context *ctx = talloc_ptrtype(talloc_ctx, ctx);
+    *ctx = (struct mp_sws_context) {
+        .log = mp_null_log,
+        .flags = SWS_BILINEAR,
+        .force_reload = true,
+        .params = {SWS_PARAM_DEFAULT, SWS_PARAM_DEFAULT},
+        .cached = talloc_zero(ctx, struct mp_sws_context),
+    };
+    talloc_set_destructor(ctx, free_mp_sws);
+
+#if HAVE_ZIMG
+    ctx->zimg = mp_zimg_alloc();
+    talloc_steal(ctx, ctx->zimg);
+#endif
+
+    return ctx;
+}
+
+// Enable auto-update of parameters from command line. Don't try to set custom
+// options (other than possibly .src/.dst), because they might be overwritten
+// if the user changes any options.
+void mp_sws_enable_cmdline_opts(struct mp_sws_context *ctx, struct mpv_global *g)
+{
+    // Should only ever be NULL for tests.
+    if (!g)
+        return;
+    if (ctx->opts_cache)
+        return;
+
+    ctx->opts_cache = m_config_cache_alloc(ctx, g, &sws_conf);
+    ctx->force_reload = true;
+    mp_sws_update_from_cmdline(ctx);
+
+#if HAVE_ZIMG
+    mp_zimg_enable_cmdline_opts(ctx->zimg, g);
+#endif
+}
+
+// Reinitialize (if needed) - return error code.
+// Optional, but possibly useful to avoid having to handle mp_sws_scale errors.
+int mp_sws_reinit(struct mp_sws_context *ctx)
+{
+    struct mp_image_params src = ctx->src;
+    struct mp_image_params dst = ctx->dst;
+
+    if (cache_valid(ctx))
+        return 0;
+
+    if (ctx->opts_cache)
+        mp_sws_update_from_cmdline(ctx);
+
+    sws_freeContext(ctx->sws);
+    ctx->sws = NULL;
+    ctx->zimg_ok = false;
+    TA_FREEP(&ctx->aligned_src);
+    TA_FREEP(&ctx->aligned_dst);
+
+#if HAVE_ZIMG
+    if (allow_zimg(ctx)) {
+        ctx->zimg->log = ctx->log;
+        ctx->zimg->src = src;
+        ctx->zimg->dst = dst;
+        if (ctx->zimg_opts)
+            ctx->zimg->opts = *ctx->zimg_opts;
+        if (mp_zimg_config(ctx->zimg)) {
+            ctx->zimg_ok = true;
+            MP_VERBOSE(ctx, "Using zimg.\n");
+            goto success;
+        }
+        MP_WARN(ctx, "Not using zimg, falling back to swscale.\n");
+    }
+#endif
+
+    if (!allow_sws(ctx)) {
+        MP_ERR(ctx, "No scaler.\n");
+        return -1;
+    }
+
+    ctx->sws = sws_alloc_context();
+    if (!ctx->sws)
+        return -1;
+
+    mp_image_params_guess_csp(&src); // sanitize colorspace/colorlevels
+    mp_image_params_guess_csp(&dst);
+
+    enum AVPixelFormat s_fmt = imgfmt2pixfmt(src.imgfmt);
+    if (s_fmt == AV_PIX_FMT_NONE || sws_isSupportedInput(s_fmt) < 1) {
+        MP_ERR(ctx, "Input image format %s not supported by libswscale.\n",
+               mp_imgfmt_to_name(src.imgfmt));
+        return -1;
+    }
+
+    enum AVPixelFormat d_fmt = imgfmt2pixfmt(dst.imgfmt);
+    if (d_fmt == AV_PIX_FMT_NONE || sws_isSupportedOutput(d_fmt) < 1) {
+        MP_ERR(ctx, "Output image format %s not supported by libswscale.\n",
+               mp_imgfmt_to_name(dst.imgfmt));
+        return -1;
+    }
+
+    int s_csp = mp_csp_to_sws_colorspace(src.color.space);
+    int s_range = src.color.levels == MP_CSP_LEVELS_PC;
+
+    int d_csp = mp_csp_to_sws_colorspace(dst.color.space);
+    int d_range = dst.color.levels == MP_CSP_LEVELS_PC;
+
+    av_opt_set_int(ctx->sws, "sws_flags", ctx->flags, 0);
+
+    av_opt_set_int(ctx->sws, "srcw", src.w, 0);
+    av_opt_set_int(ctx->sws, "srch", src.h, 0);
+    av_opt_set_int(ctx->sws, "src_format", s_fmt, 0);
+
+    av_opt_set_int(ctx->sws, "dstw", dst.w, 0);
+    av_opt_set_int(ctx->sws, "dsth", dst.h, 0);
+    av_opt_set_int(ctx->sws, "dst_format", d_fmt, 0);
+
+    av_opt_set_double(ctx->sws, "param0", ctx->params[0], 0);
+    av_opt_set_double(ctx->sws, "param1", ctx->params[1], 0);
+
+    int cr_src = mp_chroma_location_to_av(src.chroma_location);
+    int cr_dst = mp_chroma_location_to_av(dst.chroma_location);
+    int cr_xpos, cr_ypos;
+#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 37, 100)
+    if (av_chroma_location_enum_to_pos(&cr_xpos, &cr_ypos, cr_src) >= 0) {
+        av_opt_set_int(ctx->sws, "src_h_chr_pos", cr_xpos, 0);
+        av_opt_set_int(ctx->sws, "src_v_chr_pos", cr_ypos, 0);
+    }
+    if (av_chroma_location_enum_to_pos(&cr_xpos, &cr_ypos, cr_dst) >= 0) {
+        av_opt_set_int(ctx->sws, "dst_h_chr_pos", cr_xpos, 0);
+        av_opt_set_int(ctx->sws, "dst_v_chr_pos", cr_ypos, 0);
+    }
+#else
+    if (avcodec_enum_to_chroma_pos(&cr_xpos, &cr_ypos, cr_src) >= 0) {
+        av_opt_set_int(ctx->sws, "src_h_chr_pos", cr_xpos, 0);
+        av_opt_set_int(ctx->sws, "src_v_chr_pos", cr_ypos, 0);
+    }
+    if (avcodec_enum_to_chroma_pos(&cr_xpos, &cr_ypos, cr_dst) >= 0) {
+        av_opt_set_int(ctx->sws, "dst_h_chr_pos", cr_xpos, 0);
+        av_opt_set_int(ctx->sws, "dst_v_chr_pos", cr_ypos, 0);
+    }
+#endif
+
+    // This can fail even with normal operation, e.g. if a conversion path
+    // simply does not support these settings.
+    int r =
+        sws_setColorspaceDetails(ctx->sws, sws_getCoefficients(s_csp), s_range,
+                                 sws_getCoefficients(d_csp), d_range,
+                                 0, 1 << 16, 1 << 16);
+    ctx->supports_csp = r >= 0;
+
+    if (sws_init_context(ctx->sws, ctx->src_filter, ctx->dst_filter) < 0)
+        return -1;
+
+#if HAVE_ZIMG
+success:
+#endif
+
+    ctx->force_reload = false;
+    *ctx->cached = *ctx;
+    return 1;
+}
+
+static struct mp_image *check_alignment(struct mp_log *log,
+                                        struct mp_image **alloc,
+                                        struct mp_image *img)
+{
+    // It's completely unclear which alignment libswscale wants (for performance)
+    // or requires (for avoiding crashes and memory corruption).
+    // Is it av_cpu_max_align()? Is it the hardcoded AVFrame "default" of 32
+    // in get_video_buffer()? Is it whatever avcodec_align_dimensions2()
+    // determines? It's like you can't win if you try to prevent libswscale from
+    // corrupting memory...
+    // So use 32, a value that has been experimentally determined to be safe,
+    // and which in most cases is not larger than decoder output. It is smaller
+    // or equal to what most image allocators in mpv/ffmpeg use.
+    size_t align = 32;
+    assert(align <= MP_IMAGE_BYTE_ALIGN); // or mp_image_alloc will not cut it
+
+    bool is_aligned = true;
+    for (int p = 0; p < img->num_planes; p++) {
+        is_aligned &= MP_IS_ALIGNED((uintptr_t)img->planes[p], align);
+        is_aligned &= MP_IS_ALIGNED(labs(img->stride[p]), align);
+    }
+
+    if (is_aligned)
+        return img;
+
+    if (!*alloc) {
+        mp_verbose(log, "unaligned libswscale parameter; using slow copy.\n");
+        *alloc = mp_image_alloc(img->imgfmt, img->w, img->h);
+        if (!*alloc)
+            return NULL;
+    }
+
+    mp_image_copy_attributes(*alloc, img);
+    return *alloc;
+}
+
+// Scale from src to dst - if src/dst have different parameters from previous
+// calls, the context is reinitialized. Return error code. (It can fail if
+// reinitialization was necessary, and swscale returned an error.)
+int mp_sws_scale(struct mp_sws_context *ctx, struct mp_image *dst,
+                 struct mp_image *src)
+{
+    ctx->src = src->params;
+    ctx->dst = dst->params;
+
+    int r = mp_sws_reinit(ctx);
+    if (r < 0) {
+        MP_ERR(ctx, "libswscale initialization failed.\n");
+        return r;
+    }
+
+#if HAVE_ZIMG
+    if (ctx->zimg_ok)
+        return mp_zimg_convert(ctx->zimg, dst, src) ? 0 : -1;
+#endif
+
+    if (src->params.color.space == MP_CSP_XYZ && dst->params.color.space != MP_CSP_XYZ) {
+        // swsscale has hardcoded gamma 2.2 internally and 2.6 for XYZ
+        dst->params.color.gamma = MP_CSP_TRC_GAMMA22;
+        // and sRGB primaries...
+        dst->params.color.primaries = MP_CSP_PRIM_BT_709;
+        // it doesn't adjust white point though, but it is not worth to support
+        // this case. It would require custom prim with equal energy white point
+        // and sRGB primaries.
+    }
+
+    struct mp_image *a_src = check_alignment(ctx->log, &ctx->aligned_src, src);
+    struct mp_image *a_dst = check_alignment(ctx->log, &ctx->aligned_dst, dst);
+    if (!a_src || !a_dst) {
+        MP_ERR(ctx, "image allocation failed.\n");
+        return -1;
+    }
+
+    if (a_src != src)
+        mp_image_copy(a_src, src);
+
+    sws_scale(ctx->sws, (const uint8_t *const *) a_src->planes, a_src->stride,
+              0, a_src->h, a_dst->planes, a_dst->stride);
+
+    if (a_dst != dst)
+        mp_image_copy(dst, a_dst);
+
+    return 0;
+}
+
+int mp_image_swscale(struct mp_image *dst, struct mp_image *src,
+                     int my_sws_flags)
+{
+    struct mp_sws_context *ctx = mp_sws_alloc(NULL);
+    ctx->flags = my_sws_flags;
+    int res = mp_sws_scale(ctx, dst, src);
+    talloc_free(ctx);
+    return res;
+}
+
+int mp_image_sw_blur_scale(struct mp_image *dst, struct mp_image *src,
+                           float gblur)
+{
+    struct mp_sws_context *ctx = mp_sws_alloc(NULL);
+    ctx->flags = SWS_LANCZOS | mp_sws_hq_flags;
+    ctx->src_filter = sws_getDefaultFilter(gblur, gblur, 0, 0, 0, 0, 0);
+    ctx->force_reload = true;
+    int res = mp_sws_scale(ctx, dst, src);
+    talloc_free(ctx);
+    return res;
+}
+
+static const int endian_swaps[][2] = {
+#if BYTE_ORDER == LITTLE_ENDIAN
+#if defined(AV_PIX_FMT_YA16) && defined(AV_PIX_FMT_RGBA64)
+    {AV_PIX_FMT_YA16BE,     AV_PIX_FMT_YA16LE},
+    {AV_PIX_FMT_RGBA64BE,   AV_PIX_FMT_RGBA64LE},
+    {AV_PIX_FMT_GRAY16BE,   AV_PIX_FMT_GRAY16LE},
+    {AV_PIX_FMT_RGB48BE,    AV_PIX_FMT_RGB48LE},
+#endif
+#endif
+    {AV_PIX_FMT_NONE,       AV_PIX_FMT_NONE}
+};
+
+// Swap _some_ non-native endian formats to native. We do this specifically
+// for pixel formats used by PNG, to avoid going through libswscale, which
+// might reduce the effective bit depth in some cases.
+struct mp_image *mp_img_swap_to_native(struct mp_image *img)
+{
+    int avfmt = imgfmt2pixfmt(img->imgfmt);
+    int to = AV_PIX_FMT_NONE;
+    for (int n = 0; endian_swaps[n][0] != AV_PIX_FMT_NONE; n++) {
+        if (endian_swaps[n][0] == avfmt)
+            to = endian_swaps[n][1];
+    }
+    if (to == AV_PIX_FMT_NONE || !mp_image_make_writeable(img))
+        return img;
+    int elems = img->fmt.bpp[0] / 8 / 2 * img->w;
+    for (int y = 0; y < img->h; y++) {
+        uint16_t *p = (uint16_t *)(img->planes[0] + y * img->stride[0]);
+        for (int i = 0; i < elems; i++)
+            p[i] = av_be2ne16(p[i]);
+    }
+    mp_image_setfmt(img, pixfmt2imgfmt(to));
+    return img;
+}
+
+// vim: ts=4 sw=4 et tw=80
diff --git a/video/sws_utils.h b/video/sws_utils.h
new file mode 100644
index 0000000..24bec07
--- /dev/null
+++ b/video/sws_utils.h
@@ -0,0 +1,82 @@
+#ifndef MPLAYER_SWS_UTILS_H
+#define MPLAYER_SWS_UTILS_H
+
+#include <stdbool.h>
+
+#include "mp_image.h"
+
+struct mp_image;
+struct mpv_global;
+
+// libswscale currently requires 16 bytes alignment for row pointers and
+// strides. Otherwise, it will print warnings and use slow codepaths.
+// Guaranteed to be a power of 2 and > 1.
+#define SWS_MIN_BYTE_ALIGN MP_IMAGE_BYTE_ALIGN
+
+extern const int mp_sws_fast_flags;
+
+bool mp_sws_supported_format(int imgfmt);
+
+int mp_image_swscale(struct mp_image *dst, struct mp_image *src,
+                     int my_sws_flags);
+
+int mp_image_sw_blur_scale(struct mp_image *dst, struct mp_image *src,
+                           float gblur);
+
+enum mp_sws_scaler {
+    MP_SWS_AUTO = 0, // use command line
+    MP_SWS_SWS,
+    MP_SWS_ZIMG,
+};
+
+struct mp_sws_context {
+    // Can be set for verbose error printing.
+    struct mp_log *log;
+    // User configuration. These can be changed freely, at any time.
+    // mp_sws_scale() will handle the changes transparently.
+    int flags;
+    bool allow_zimg; // use zimg if available (ignores filters and all)
+    bool force_reload;
+    // These are also implicitly set by mp_sws_scale(), and thus optional.
+    // Setting them before that call makes sense when using mp_sws_reinit().
+    struct mp_image_params src, dst;
+
+    // This is unfortunately a hack: bypass command line choice
+    enum mp_sws_scaler force_scaler;
+
+    // If zimg is used. Need to manually invalidate cache (set force_reload).
+    // Conflicts with enabling command line opts.
+    struct zimg_opts *zimg_opts;
+
+    // Changing these requires setting force_reload=true.
+    // By default, they are NULL.
+    // Freeing the mp_sws_context will deallocate these if set.
+    struct SwsFilter *src_filter, *dst_filter;
+    double params[2];
+
+    // Cached context (if any)
+    struct SwsContext *sws;
+    bool supports_csp;
+
+    // Private.
+    struct m_config_cache *opts_cache;
+    struct mp_sws_context *cached; // contains parameters for which sws is valid
+    struct mp_zimg_context *zimg;
+    bool zimg_ok;
+    struct mp_image *aligned_src, *aligned_dst;
+};
+
+struct mp_sws_context *mp_sws_alloc(void *talloc_ctx);
+void mp_sws_enable_cmdline_opts(struct mp_sws_context *ctx, struct mpv_global *g);
+int mp_sws_reinit(struct mp_sws_context *ctx);
+int mp_sws_scale(struct mp_sws_context *ctx, struct mp_image *dst,
+                 struct mp_image *src);
+
+bool mp_sws_supports_formats(struct mp_sws_context *ctx,
+                             int imgfmt_out, int imgfmt_in);
+
+struct mp_image *mp_img_swap_to_native(struct mp_image *img);
+
+#endif /* MP_SWS_UTILS_H */
+
+// vim: ts=4 sw=4 et tw=80
diff --git a/video/vaapi.c b/video/vaapi.c
new file mode 100644
index 0000000..08248a7
--- /dev/null
+++ b/video/vaapi.c
@@ -0,0 +1,288 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+
+#include "config.h"
+
+#include "vaapi.h"
+#include "common/common.h"
+#include "common/msg.h"
+#include "osdep/threads.h"
+#include "mp_image.h"
+#include "img_format.h"
+#include "mp_image_pool.h"
+#include "options/m_config.h"
+
+#include <libavutil/hwcontext.h>
+#include <libavutil/hwcontext_vaapi.h>
+
+struct vaapi_opts {
+    char *path;
+};
+
+#define OPT_BASE_STRUCT struct vaapi_opts
+const struct m_sub_options vaapi_conf = {
+    .opts = (const struct m_option[]) {
+        {"device", OPT_STRING(path)},
+        {0},
+    },
+    .defaults = &(const struct vaapi_opts) {
+        .path = "/dev/dri/renderD128",
+    },
+    .size = sizeof(struct vaapi_opts),
+};
+
+int va_get_colorspace_flag(enum mp_csp csp)
+{
+    switch (csp) {
+    case MP_CSP_BT_601:         return VA_SRC_BT601;
+    case MP_CSP_BT_709:         return VA_SRC_BT709;
+    case MP_CSP_SMPTE_240M:     return VA_SRC_SMPTE_240;
+    }
+    return 0;
+}
+
+static void va_message_callback(void *context, const char *msg, int mp_level)
+{
+    struct mp_vaapi_ctx *res = context;
+    mp_msg(res->log, mp_level, "libva: %s", msg);
+}
+
+static void va_error_callback(void *context, const char *msg)
+{
+    va_message_callback(context, msg, MSGL_ERR);
+}
+
+static void va_info_callback(void *context, const char *msg)
+{
+    va_message_callback(context, msg, MSGL_DEBUG);
+}
+
+static void free_device_ref(struct AVHWDeviceContext *hwctx)
+{
+    struct mp_vaapi_ctx *ctx = hwctx->user_opaque;
+
+    if (ctx->display)
+        vaTerminate(ctx->display);
+
+    if (ctx->destroy_native_ctx)
+        ctx->destroy_native_ctx(ctx->native_ctx);
+
+    talloc_free(ctx);
+}
+
+struct mp_vaapi_ctx *va_initialize(VADisplay *display, struct mp_log *plog,
+                                   bool probing)
+{
+    AVBufferRef *avref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_VAAPI);
+    if (!avref)
+        return NULL;
+
+    AVHWDeviceContext *hwctx = (void *)avref->data;
+    AVVAAPIDeviceContext *vactx = hwctx->hwctx;
+
+    struct mp_vaapi_ctx *res = talloc_ptrtype(NULL, res);
+    *res = (struct mp_vaapi_ctx) {
+        .log = mp_log_new(res, plog, "/vaapi"),
+        .display = display,
+        .av_device_ref = avref,
+        .hwctx = {
+            .av_device_ref = avref,
+        },
+    };
+
+    hwctx->free = free_device_ref;
+    hwctx->user_opaque = res;
+
+    vaSetErrorCallback(display, va_error_callback, res);
+    vaSetInfoCallback(display,  va_info_callback,  res);
+
+    int major, minor;
+    int status = vaInitialize(display, &major, &minor);
+    if (status != VA_STATUS_SUCCESS) {
+        if (!probing)
+            MP_ERR(res, "Failed to initialize VAAPI: %s\n", vaErrorStr(status));
+        goto error;
+    }
+    MP_VERBOSE(res, "Initialized VAAPI: version %d.%d\n", major, minor);
+
+    vactx->display = res->display;
+
+    if (av_hwdevice_ctx_init(res->av_device_ref) < 0)
+        goto error;
+
+    return res;
+
+error:
+    res->display = NULL; // do not vaTerminate this
+    va_destroy(res);
+    return NULL;
+}
+
+// Undo va_initialize, and close the VADisplay.
+void va_destroy(struct mp_vaapi_ctx *ctx)
+{
+    if (!ctx)
+        return;
+
+    AVBufferRef *ref = ctx->av_device_ref;
+    av_buffer_unref(&ref); // frees ctx as well
+}
+
+VASurfaceID va_surface_id(struct mp_image *mpi)
+{
+    return mpi && mpi->imgfmt == IMGFMT_VAAPI ?
+        (VASurfaceID)(uintptr_t)mpi->planes[3] : VA_INVALID_ID;
+}
+
+static bool is_emulated(struct AVBufferRef *hw_device_ctx)
+{
+    AVHWDeviceContext *hwctx = (void *)hw_device_ctx->data;
+    AVVAAPIDeviceContext *vactx = hwctx->hwctx;
+
+    const char *s = vaQueryVendorString(vactx->display);
+    return s && strstr(s, "VDPAU backend");
+}
+
+
+bool va_guess_if_emulated(struct mp_vaapi_ctx *ctx)
+{
+    return is_emulated(ctx->av_device_ref);
+}
+
+struct va_native_display {
+    void (*create)(VADisplay **out_display, void **out_native_ctx,
+                   const char *path);
+    void (*destroy)(void *native_ctx);
+};
+
+#if HAVE_VAAPI_X11
+#include <X11/Xlib.h>
+#include <va/va_x11.h>
+
+static void x11_destroy(void *native_ctx)
+{
+    XCloseDisplay(native_ctx);
+}
+
+static void x11_create(VADisplay **out_display, void **out_native_ctx,
+                       const char *path)
+{
+    void *native_display = XOpenDisplay(NULL);
+    if (!native_display)
+        return;
+    *out_display = vaGetDisplay(native_display);
+    if (*out_display) {
+        *out_native_ctx = native_display;
+    } else {
+        XCloseDisplay(native_display);
+    }
+}
+
+static const struct va_native_display disp_x11 = {
+    .create = x11_create,
+    .destroy = x11_destroy,
+};
+#endif
+
+#if HAVE_VAAPI_DRM
+#include <unistd.h>
+#include <fcntl.h>
+#include <va/va_drm.h>
+
+struct va_native_display_drm {
+    int drm_fd;
+};
+
+static void drm_destroy(void *native_ctx)
+{
+    struct va_native_display_drm *ctx = native_ctx;
+    close(ctx->drm_fd);
+    talloc_free(ctx);
+}
+
+static void drm_create(VADisplay **out_display, void **out_native_ctx,
+                       const char *path)
+{
+    int drm_fd = open(path, O_RDWR);
+    if (drm_fd < 0)
+        return;
+
+    struct va_native_display_drm *ctx = talloc_ptrtype(NULL, ctx);
+    ctx->drm_fd = drm_fd;
+    *out_display = vaGetDisplayDRM(drm_fd);
+    if (*out_display) {
+        *out_native_ctx = ctx;
+        return;
+    }
+
+    close(drm_fd);
+    talloc_free(ctx);
+}
+
+static const struct va_native_display disp_drm = {
+    .create = drm_create,
+    .destroy = drm_destroy,
+};
+#endif
+
+static const struct va_native_display *const native_displays[] = {
+#if HAVE_VAAPI_DRM
+    &disp_drm,
+#endif
+#if HAVE_VAAPI_X11
+    &disp_x11,
+#endif
+    NULL
+};
+
+static struct AVBufferRef *va_create_standalone(struct mpv_global *global,
+        struct mp_log *log, struct hwcontext_create_dev_params *params)
+{
+    struct AVBufferRef *ret = NULL;
+    struct vaapi_opts *opts = mp_get_config_group(NULL, global, &vaapi_conf);
+
+    for (int n = 0; native_displays[n]; n++) {
+        VADisplay *display = NULL;
+        void *native_ctx = NULL;
+        native_displays[n]->create(&display, &native_ctx, opts->path);
+        if (display) {
+            struct mp_vaapi_ctx *ctx =
+                va_initialize(display, log, params->probing);
+            if (!ctx) {
+                vaTerminate(display);
+                native_displays[n]->destroy(native_ctx);
+                goto end;
+            }
+            ctx->native_ctx = native_ctx;
+            ctx->destroy_native_ctx = native_displays[n]->destroy;
+            ret = ctx->hwctx.av_device_ref;
+            goto end;
+        }
+    }
+
+end:
+    talloc_free(opts);
+    return ret;
+}
+
+const struct hwcontext_fns hwcontext_fns_vaapi = {
+    .av_hwdevice_type = AV_HWDEVICE_TYPE_VAAPI,
+    .create_dev = va_create_standalone,
+    .is_emulated = is_emulated,
+};
diff --git a/video/vaapi.h b/video/vaapi.h
new file mode 100644
index 0000000..56235bc
--- /dev/null
+++ b/video/vaapi.h
@@ -0,0 +1,54 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef MPV_VAAPI_H
+#define MPV_VAAPI_H
+
+#include <stdbool.h>
+#include <inttypes.h>
+#include <va/va.h>
+
+#include "mp_image.h"
+#include "hwdec.h"
+
+struct mp_vaapi_ctx {
+    struct mp_hwdec_ctx hwctx;
+    struct mp_log *log;
+    VADisplay display;
+    struct AVBufferRef *av_device_ref; // AVVAAPIDeviceContext*
+    // Internal, for va_create_standalone()
+    void *native_ctx;
+    void (*destroy_native_ctx)(void *native_ctx);
+};
+
+#define CHECK_VA_STATUS_LEVEL(ctx, msg, level) \
+    (status == VA_STATUS_SUCCESS ? true \
+        : (MP_MSG(ctx, level, "%s failed (%s)\n", msg, vaErrorStr(status)), false))
+
+#define CHECK_VA_STATUS(ctx, msg) \
+    CHECK_VA_STATUS_LEVEL(ctx, msg, MSGL_ERR)
+
+int                      va_get_colorspace_flag(enum mp_csp csp);
+
+struct mp_vaapi_ctx *    va_initialize(VADisplay *display, struct mp_log *plog, bool probing);
+void                     va_destroy(struct mp_vaapi_ctx *ctx);
+
+VASurfaceID va_surface_id(struct mp_image *mpi);
+
+bool va_guess_if_emulated(struct mp_vaapi_ctx *ctx);
+
+#endif
diff --git a/video/vdpau.c b/video/vdpau.c
new file mode 100644
index 0000000..15985d6
--- /dev/null
+++ b/video/vdpau.c
@@ -0,0 +1,574 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+
+#include <libavutil/hwcontext.h>
+#include <libavutil/hwcontext_vdpau.h>
+
+#include "vdpau.h"
+
+#include "osdep/threads.h"
+#include "osdep/timer.h"
+
+#include "video/out/x11_common.h"
+#include "img_format.h"
+#include "mp_image.h"
+#include "mp_image_pool.h"
+#include "vdpau_mixer.h"
+
+static void mark_vdpau_objects_uninitialized(struct mp_vdpau_ctx *ctx)
+{
+    for (int i = 0; i < MAX_VIDEO_SURFACES; i++) {
+        ctx->video_surfaces[i].surface = VDP_INVALID_HANDLE;
+        ctx->video_surfaces[i].osurface = VDP_INVALID_HANDLE;
+        ctx->video_surfaces[i].allocated = false;
+    }
+    ctx->vdp_device = VDP_INVALID_HANDLE;
+    ctx->preemption_obj = VDP_INVALID_HANDLE;
+}
+
+static void preemption_callback(VdpDevice device, void *context)
+{
+    struct mp_vdpau_ctx *ctx = context;
+
+    mp_mutex_lock(&ctx->preempt_lock);
+    ctx->is_preempted = true;
+    mp_mutex_unlock(&ctx->preempt_lock);
+}
+
+static int win_x11_init_vdpau_procs(struct mp_vdpau_ctx *ctx, bool probing)
+{
+    Display *x11 = ctx->x11;
+    VdpStatus vdp_st;
+
+    // Don't operate on ctx->vdp directly, so that even if init fails, ctx->vdp
+    // will have the function pointers from the previous successful init, and
+    // won't randomly make other code crash on calling NULL pointers.
+    struct vdp_functions vdp = {0};
+
+    if (!x11)
+        return -1;
+
+    struct vdp_function {
+        const int id;
+        int offset;
+    };
+
+    static const struct vdp_function vdp_func[] = {
+#define VDP_FUNCTION(_, macro_name, mp_name) {macro_name, offsetof(struct vdp_functions, mp_name)},
+#include "video/vdpau_functions.inc"
+#undef VDP_FUNCTION
+        {0, -1}
+    };
+
+    VdpGetProcAddress *get_proc_address;
+    vdp_st = vdp_device_create_x11(x11, DefaultScreen(x11), &ctx->vdp_device,
+                                   &get_proc_address);
+    if (vdp_st != VDP_STATUS_OK) {
+        if (ctx->is_preempted) {
+            MP_DBG(ctx, "Error calling vdp_device_create_x11 while preempted: %d\n",
+                   vdp_st);
+        } else {
+            int lev = probing ? MSGL_V : MSGL_ERR;
+            mp_msg(ctx->log, lev, "Error when calling vdp_device_create_x11: %d\n",
+                   vdp_st);
+        }
+        return -1;
+    }
+
+    for (const struct vdp_function *dsc = vdp_func; dsc->offset >= 0; dsc++) {
+        vdp_st = get_proc_address(ctx->vdp_device, dsc->id,
+                                  (void **)((char *)&vdp + dsc->offset));
+        if (vdp_st != VDP_STATUS_OK) {
+            MP_ERR(ctx, "Error when calling vdp_get_proc_address(function "
+                   "id %d): %s\n",  dsc->id,
+                   vdp.get_error_string ? vdp.get_error_string(vdp_st) : "?");
+            return -1;
+        }
+    }
+
+    ctx->vdp = vdp;
+    ctx->get_proc_address = get_proc_address;
+
+    if (ctx->av_device_ref) {
+        AVHWDeviceContext *hwctx = (void *)ctx->av_device_ref->data;
+        AVVDPAUDeviceContext *vdctx = hwctx->hwctx;
+
+        vdctx->device = ctx->vdp_device;
+        vdctx->get_proc_address = ctx->get_proc_address;
+    }
+
+    vdp_st = vdp.output_surface_create(ctx->vdp_device, VDP_RGBA_FORMAT_B8G8R8A8,
+                                       1, 1, &ctx->preemption_obj);
+    if (vdp_st != VDP_STATUS_OK) {
+        MP_ERR(ctx, "Could not create dummy object: %s",
+               vdp.get_error_string(vdp_st));
+        return -1;
+    }
+
+    vdp.preemption_callback_register(ctx->vdp_device, preemption_callback, ctx);
+    return 0;
+}
+
+static int handle_preemption(struct mp_vdpau_ctx *ctx)
+{
+    if (!ctx->is_preempted)
+        return 0;
+    mark_vdpau_objects_uninitialized(ctx);
+    if (!ctx->preemption_user_notified) {
+        MP_ERR(ctx, "Got display preemption notice! Will attempt to recover.\n");
+        ctx->preemption_user_notified = true;
+    }
+    /* Trying to initialize seems to be quite slow, so only try once a
+     * second to avoid using 100% CPU. */
+    if (ctx->last_preemption_retry_fail &&
+        mp_time_sec() - ctx->last_preemption_retry_fail < 1.0)
+        return -1;
+    if (win_x11_init_vdpau_procs(ctx, false) < 0) {
+        ctx->last_preemption_retry_fail = mp_time_sec();
+        return -1;
+    }
+    ctx->preemption_user_notified = false;
+    ctx->last_preemption_retry_fail = 0;
+    ctx->is_preempted = false;
+    ctx->preemption_counter++;
+    MP_INFO(ctx, "Recovered from display preemption.\n");
+    return 1;
+}
+
+// Check whether vdpau display preemption happened. The caller provides a
+// preemption counter, which contains the logical timestamp of the last
+// preemption handled by the caller. The counter can be 0 for init.
+// If counter is NULL, only ever return -1 or 1.
+// Return values:
+//  -1: the display is currently preempted, and vdpau can't be used
+//   0: a preemption event happened, and the caller must recover
+//      (*counter is updated, and a second call will report status ok)
+//   1: everything is fine, no preemption happened
+int mp_vdpau_handle_preemption(struct mp_vdpau_ctx *ctx, uint64_t *counter)
+{
+    int r = 1;
+    mp_mutex_lock(&ctx->preempt_lock);
+
+    const void *p[4] = {&(uint32_t){0}};
+    uint32_t stride[4] = {4};
+    VdpRect rc = {0};
+    ctx->vdp.output_surface_put_bits_native(ctx->preemption_obj, p, stride, &rc);
+
+    // First time init
+    if (counter && !*counter)
+        *counter = ctx->preemption_counter;
+
+    if (handle_preemption(ctx) < 0)
+        r = -1;
+
+    if (counter && r > 0 && *counter < ctx->preemption_counter) {
+        *counter = ctx->preemption_counter;
+        r = 0; // signal recovery after preemption
+    }
+
+    mp_mutex_unlock(&ctx->preempt_lock);
+    return r;
+}
+
+struct surface_ref {
+    struct mp_vdpau_ctx *ctx;
+    int index;
+};
+
+static void release_decoder_surface(void *ptr)
+{
+    struct surface_ref *r = ptr;
+    struct mp_vdpau_ctx *ctx = r->ctx;
+
+    mp_mutex_lock(&ctx->pool_lock);
+    assert(ctx->video_surfaces[r->index].in_use);
+    ctx->video_surfaces[r->index].in_use = false;
+    mp_mutex_unlock(&ctx->pool_lock);
+
+    talloc_free(r);
+}
+
+static struct mp_image *create_ref(struct mp_vdpau_ctx *ctx, int index)
+{
+    struct surface_entry *e = &ctx->video_surfaces[index];
+    assert(!e->in_use);
+    e->in_use = true;
+    e->age = ctx->age_counter++;
+    struct surface_ref *ref = talloc_ptrtype(NULL, ref);
+    *ref = (struct surface_ref){ctx, index};
+    struct mp_image *res =
+        mp_image_new_custom_ref(NULL, ref, release_decoder_surface);
+    if (res) {
+        mp_image_setfmt(res, e->rgb ? IMGFMT_VDPAU_OUTPUT : IMGFMT_VDPAU);
+        mp_image_set_size(res, e->w, e->h);
+        res->planes[0] = (void *)"dummy"; // must be non-NULL, otherwise arbitrary
+        res->planes[3] = (void *)(intptr_t)(e->rgb ? e->osurface : e->surface);
+    }
+    return res;
+}
+
+static struct mp_image *mp_vdpau_get_surface(struct mp_vdpau_ctx *ctx,
+                                             VdpChromaType chroma,
+                                             VdpRGBAFormat rgb_format,
+                                             bool rgb, int w, int h)
+{
+    struct vdp_functions *vdp = &ctx->vdp;
+    int surface_index = -1;
+    VdpStatus vdp_st;
+
+    if (rgb) {
+        chroma = (VdpChromaType)-1;
+    } else {
+        rgb_format = (VdpChromaType)-1;
+    }
+
+    mp_mutex_lock(&ctx->pool_lock);
+
+    // Destroy all unused surfaces that don't have matching parameters
+    for (int n = 0; n < MAX_VIDEO_SURFACES; n++) {
+        struct surface_entry *e = &ctx->video_surfaces[n];
+        if (!e->in_use && e->allocated) {
+            if (e->w != w || e->h != h || e->rgb != rgb ||
+                e->chroma != chroma || e->rgb_format != rgb_format)
+            {
+                if (e->rgb) {
+                    vdp_st = vdp->output_surface_destroy(e->osurface);
+                } else {
+                    vdp_st = vdp->video_surface_destroy(e->surface);
+                }
+                CHECK_VDP_WARNING(ctx, "Error when destroying surface");
+                e->surface = e->osurface = VDP_INVALID_HANDLE;
+                e->allocated = false;
+            }
+        }
+    }
+
+    // Try to find an existing unused surface
+    for (int n = 0; n < MAX_VIDEO_SURFACES; n++) {
+        struct surface_entry *e = &ctx->video_surfaces[n];
+        if (!e->in_use && e->allocated) {
+            assert(e->w == w && e->h == h);
+            assert(e->chroma == chroma);
+            assert(e->rgb_format == rgb_format);
+            assert(e->rgb == rgb);
+            if (surface_index >= 0) {
+                struct surface_entry *other = &ctx->video_surfaces[surface_index];
+                if (other->age < e->age)
+                    continue;
+            }
+            surface_index = n;
+        }
+    }
+
+    if (surface_index >= 0)
+        goto done;
+
+    // Allocate new surface
+    for (int n = 0; n < MAX_VIDEO_SURFACES; n++) {
+        struct surface_entry *e = &ctx->video_surfaces[n];
+        if (!e->in_use) {
+            assert(e->surface == VDP_INVALID_HANDLE);
+            assert(e->osurface == VDP_INVALID_HANDLE);
+            assert(!e->allocated);
+            e->chroma = chroma;
+            e->rgb_format = rgb_format;
+            e->rgb = rgb;
+            e->w = w;
+            e->h = h;
+            if (mp_vdpau_handle_preemption(ctx, NULL) >= 0) {
+                if (rgb) {
+                    vdp_st = vdp->output_surface_create(ctx->vdp_device, rgb_format,
+                                                        w, h, &e->osurface);
+                    e->allocated = e->osurface != VDP_INVALID_HANDLE;
+                } else {
+                    vdp_st = vdp->video_surface_create(ctx->vdp_device, chroma,
+                                                    w, h, &e->surface);
+                    e->allocated = e->surface != VDP_INVALID_HANDLE;
+                }
+                CHECK_VDP_WARNING(ctx, "Error when allocating surface");
+            } else {
+                e->allocated = false;
+                e->osurface = VDP_INVALID_HANDLE;
+                e->surface = VDP_INVALID_HANDLE;
+            }
+            surface_index = n;
+            goto done;
+        }
+    }
+
+done: ;
+    struct mp_image *mpi = NULL;
+    if (surface_index >= 0)
+        mpi = create_ref(ctx, surface_index);
+
+    mp_mutex_unlock(&ctx->pool_lock);
+
+    if (!mpi)
+        MP_ERR(ctx, "no surfaces available in mp_vdpau_get_video_surface\n");
+    return mpi;
+}
+
+struct mp_image *mp_vdpau_get_video_surface(struct mp_vdpau_ctx *ctx,
+                                            VdpChromaType chroma, int w, int h)
+{
+    return mp_vdpau_get_surface(ctx, chroma, 0, false, w, h);
+}
+
+static void free_device_ref(struct AVHWDeviceContext *hwctx)
+{
+    struct mp_vdpau_ctx *ctx = hwctx->user_opaque;
+
+    struct vdp_functions *vdp = &ctx->vdp;
+    VdpStatus vdp_st;
+
+    for (int i = 0; i < MAX_VIDEO_SURFACES; i++) {
+        // can't hold references past context lifetime
+        assert(!ctx->video_surfaces[i].in_use);
+        if (ctx->video_surfaces[i].surface != VDP_INVALID_HANDLE) {
+            vdp_st = vdp->video_surface_destroy(ctx->video_surfaces[i].surface);
+            CHECK_VDP_WARNING(ctx, "Error when calling vdp_video_surface_destroy");
+        }
+        if (ctx->video_surfaces[i].osurface != VDP_INVALID_HANDLE) {
+            vdp_st = vdp->output_surface_destroy(ctx->video_surfaces[i].osurface);
+            CHECK_VDP_WARNING(ctx, "Error when calling vdp_output_surface_destroy");
+        }
+    }
+
+    if (ctx->preemption_obj != VDP_INVALID_HANDLE) {
+        vdp_st = vdp->output_surface_destroy(ctx->preemption_obj);
+        CHECK_VDP_WARNING(ctx, "Error when calling vdp_output_surface_destroy");
+    }
+
+    if (vdp->device_destroy && ctx->vdp_device != VDP_INVALID_HANDLE) {
+        vdp_st = vdp->device_destroy(ctx->vdp_device);
+        CHECK_VDP_WARNING(ctx, "Error when calling vdp_device_destroy");
+    }
+
+    if (ctx->close_display)
+        XCloseDisplay(ctx->x11);
+
+    mp_mutex_destroy(&ctx->pool_lock);
+    mp_mutex_destroy(&ctx->preempt_lock);
+    talloc_free(ctx);
+}
+
+struct mp_vdpau_ctx *mp_vdpau_create_device_x11(struct mp_log *log, Display *x11,
+                                                bool probing)
+{
+    AVBufferRef *avref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_VDPAU);
+    if (!avref)
+        return NULL;
+
+    AVHWDeviceContext *hwctx = (void *)avref->data;
+    AVVDPAUDeviceContext *vdctx = hwctx->hwctx;
+
+    struct mp_vdpau_ctx *ctx = talloc_ptrtype(NULL, ctx);
+    *ctx = (struct mp_vdpau_ctx) {
+        .log = log,
+        .x11 = x11,
+        .preemption_counter = 1,
+        .av_device_ref = avref,
+        .hwctx = {
+            .av_device_ref = avref,
+        },
+    };
+    mp_mutex_init_type(&ctx->preempt_lock, MP_MUTEX_RECURSIVE);
+    mp_mutex_init(&ctx->pool_lock);
+
+    hwctx->free = free_device_ref;
+    hwctx->user_opaque = ctx;
+
+    mark_vdpau_objects_uninitialized(ctx);
+
+    if (win_x11_init_vdpau_procs(ctx, probing) < 0) {
+        mp_vdpau_destroy(ctx);
+        return NULL;
+    }
+
+    vdctx->device = ctx->vdp_device;
+    vdctx->get_proc_address = ctx->get_proc_address;
+
+    if (av_hwdevice_ctx_init(ctx->av_device_ref) < 0) {
+        mp_vdpau_destroy(ctx);
+        return NULL;
+    }
+
+    return ctx;
+}
+
+void mp_vdpau_destroy(struct mp_vdpau_ctx *ctx)
+{
+    if (!ctx)
+        return;
+
+    AVBufferRef *ref = ctx->av_device_ref;
+    av_buffer_unref(&ref); // frees ctx as well
+}
+
+bool mp_vdpau_get_format(int imgfmt, VdpChromaType *out_chroma_type,
+                         VdpYCbCrFormat *out_pixel_format)
+{
+    VdpChromaType chroma = VDP_CHROMA_TYPE_420;
+    VdpYCbCrFormat ycbcr = (VdpYCbCrFormat)-1;
+
+    switch (imgfmt) {
+    case IMGFMT_420P:
+        ycbcr = VDP_YCBCR_FORMAT_YV12;
+        break;
+    case IMGFMT_NV12:
+        ycbcr = VDP_YCBCR_FORMAT_NV12;
+        break;
+    case IMGFMT_UYVY:
+        ycbcr = VDP_YCBCR_FORMAT_UYVY;
+        chroma = VDP_CHROMA_TYPE_422;
+        break;
+    case IMGFMT_VDPAU:
+        break;
+    default:
+        return false;
+    }
+
+    if (out_chroma_type)
+        *out_chroma_type = chroma;
+    if (out_pixel_format)
+        *out_pixel_format = ycbcr;
+    return true;
+}
+
+bool mp_vdpau_get_rgb_format(int imgfmt, VdpRGBAFormat *out_rgba_format)
+{
+    VdpRGBAFormat format = (VdpRGBAFormat)-1;
+
+    switch (imgfmt) {
+    case IMGFMT_BGRA:
+        format = VDP_RGBA_FORMAT_B8G8R8A8; break;
+    default:
+        return false;
+    }
+
+    if (out_rgba_format)
+        *out_rgba_format = format;
+    return true;
+}
+
+// Use mp_vdpau_get_video_surface, and upload mpi to it. Return NULL on failure.
+// If the image is already a vdpau video surface, just return a reference.
+struct mp_image *mp_vdpau_upload_video_surface(struct mp_vdpau_ctx *ctx,
+                                               struct mp_image *mpi)
+{
+    struct vdp_functions *vdp = &ctx->vdp;
+    VdpStatus vdp_st;
+
+    if (mpi->imgfmt == IMGFMT_VDPAU || mpi->imgfmt == IMGFMT_VDPAU_OUTPUT)
+        return mp_image_new_ref(mpi);
+
+    VdpChromaType chroma = (VdpChromaType)-1;
+    VdpYCbCrFormat ycbcr = (VdpYCbCrFormat)-1;
+    VdpRGBAFormat rgbafmt = (VdpRGBAFormat)-1;
+    bool rgb = !mp_vdpau_get_format(mpi->imgfmt, &chroma, &ycbcr);
+    if (rgb && !mp_vdpau_get_rgb_format(mpi->imgfmt, &rgbafmt))
+        return NULL;
+
+    struct mp_image *hwmpi =
+        mp_vdpau_get_surface(ctx, chroma, rgbafmt, rgb, mpi->w, mpi->h);
+    if (!hwmpi)
+        return NULL;
+
+    struct mp_image *src = mpi;
+    if (mpi->stride[0] < 0)
+        src = mp_image_new_copy(mpi); // unflips it when copying
+
+    if (hwmpi->imgfmt == IMGFMT_VDPAU) {
+        VdpVideoSurface surface = (intptr_t)hwmpi->planes[3];
+        const void *destdata[3] = {src->planes[0], src->planes[2], src->planes[1]};
+        if (src->imgfmt == IMGFMT_NV12)
+            destdata[1] = destdata[2];
+        vdp_st = vdp->video_surface_put_bits_y_cb_cr(surface,
+            ycbcr, destdata, src->stride);
+    } else {
+        VdpOutputSurface rgb_surface = (intptr_t)hwmpi->planes[3];
+        vdp_st = vdp->output_surface_put_bits_native(rgb_surface,
+                                    &(const void *){src->planes[0]},
+                                    &(uint32_t){src->stride[0]},
+                                    NULL);
+    }
+    CHECK_VDP_WARNING(ctx, "Error when uploading surface");
+
+    if (src != mpi)
+        talloc_free(src);
+
+    mp_image_copy_attributes(hwmpi, mpi);
+    return hwmpi;
+}
+
+bool mp_vdpau_guess_if_emulated(struct mp_vdpau_ctx *ctx)
+{
+    struct vdp_functions *vdp = &ctx->vdp;
+    VdpStatus vdp_st;
+    char const* info = NULL;
+    vdp_st = vdp->get_information_string(&info);
+    CHECK_VDP_WARNING(ctx, "Error when calling vdp_get_information_string");
+    return vdp_st == VDP_STATUS_OK && info && strstr(info, "VAAPI");
+}
+
+// (This clearly works only for contexts wrapped by our code.)
+struct mp_vdpau_ctx *mp_vdpau_get_ctx_from_av(AVBufferRef *hw_device_ctx)
+{
+    AVHWDeviceContext *hwctx = (void *)hw_device_ctx->data;
+
+    if (hwctx->free != free_device_ref)
+        return NULL; // not ours
+
+    return hwctx->user_opaque;
+}
+
+static bool is_emulated(struct AVBufferRef *hw_device_ctx)
+{
+    struct mp_vdpau_ctx *ctx = mp_vdpau_get_ctx_from_av(hw_device_ctx);
+    if (!ctx)
+        return false;
+
+    return mp_vdpau_guess_if_emulated(ctx);
+}
+
+static struct AVBufferRef *vdpau_create_standalone(struct mpv_global *global,
+        struct mp_log *log, struct hwcontext_create_dev_params *params)
+{
+    XInitThreads();
+
+    Display *display = XOpenDisplay(NULL);
+    if (!display)
+        return NULL;
+
+    struct mp_vdpau_ctx *vdp =
+        mp_vdpau_create_device_x11(log, display, params->probing);
+    if (!vdp) {
+        XCloseDisplay(display);
+        return NULL;
+    }
+
+    vdp->close_display = true;
+    return vdp->hwctx.av_device_ref;
+}
+
+const struct hwcontext_fns hwcontext_fns_vdpau = {
+    .av_hwdevice_type = AV_HWDEVICE_TYPE_VDPAU,
+    .create_dev = vdpau_create_standalone,
+    .is_emulated = is_emulated,
+};
diff --git a/video/vdpau.h b/video/vdpau.h
new file mode 100644
index 0000000..a30f478
--- /dev/null
+++ b/video/vdpau.h
@@ -0,0 +1,109 @@
+#ifndef MPV_VDPAU_H
+#define MPV_VDPAU_H
+
+#include <stdbool.h>
+#include <inttypes.h>
+
+#include <vdpau/vdpau.h>
+#include <vdpau/vdpau_x11.h>
+
+#include "common/msg.h"
+#include "hwdec.h"
+#include "osdep/threads.h"
+
+#include "config.h"
+#if !HAVE_GPL
+#error GPL only
+#endif
+
+#define CHECK_VDP_ERROR_ST(ctx, message, statement) \
+    do { \
+        if (vdp_st != VDP_STATUS_OK) { \
+            MP_ERR(ctx, "%s: %s\n", message, vdp->get_error_string(vdp_st)); \
+            statement \
+        } \
+    } while (0)
+
+#define CHECK_VDP_ERROR(ctx, message) \
+    CHECK_VDP_ERROR_ST(ctx, message, return -1;)
+
+#define CHECK_VDP_ERROR_NORETURN(ctx, message) \
+    CHECK_VDP_ERROR_ST(ctx, message, ;)
+
+#define CHECK_VDP_WARNING(ctx, message) \
+    do { \
+        if (vdp_st != VDP_STATUS_OK) \
+            MP_WARN(ctx, "%s: %s\n", message, vdp->get_error_string(vdp_st)); \
+    } while (0)
+
+struct vdp_functions {
+#define VDP_FUNCTION(vdp_type, _, mp_name) vdp_type *mp_name;
+#include "video/vdpau_functions.inc"
+#undef VDP_FUNCTION
+};
+
+
+#define MAX_VIDEO_SURFACES 50
+
+// Shared state. Objects created from different VdpDevices are often (always?)
+// incompatible to each other, so all code must use a shared VdpDevice.
+struct mp_vdpau_ctx {
+    struct mp_log *log;
+    Display *x11;
+    bool close_display;
+
+    struct mp_hwdec_ctx hwctx;
+    struct AVBufferRef *av_device_ref;
+
+    // These are mostly immutable, except on preemption. We don't really care
+    // to synchronize the preemption case fully correctly, because it's an
+    // extremely obscure corner case, and basically a vdpau API design bug.
+    // What we do will sort-of work anyway (no memory errors are possible).
+    struct vdp_functions vdp;
+    VdpGetProcAddress *get_proc_address;
+    VdpDevice vdp_device;
+
+    mp_mutex preempt_lock;
+    bool is_preempted;                  // set to true during unavailability
+    uint64_t preemption_counter;        // incremented after _restoring_
+    bool preemption_user_notified;
+    double last_preemption_retry_fail;
+    VdpOutputSurface preemption_obj;    // dummy for reliable preempt. check
+
+    // Surface pool
+    mp_mutex pool_lock;
+    int64_t age_counter;
+    struct surface_entry {
+        VdpVideoSurface surface;
+        VdpOutputSurface osurface;
+        bool allocated;
+        int w, h;
+        VdpRGBAFormat rgb_format;
+        VdpChromaType chroma;
+        bool rgb;
+        bool in_use;
+        int64_t age;
+    } video_surfaces[MAX_VIDEO_SURFACES];
+};
+
+struct mp_vdpau_ctx *mp_vdpau_create_device_x11(struct mp_log *log, Display *x11,
+                                                bool probing);
+void mp_vdpau_destroy(struct mp_vdpau_ctx *ctx);
+
+int mp_vdpau_handle_preemption(struct mp_vdpau_ctx *ctx, uint64_t *counter);
+
+struct mp_image *mp_vdpau_get_video_surface(struct mp_vdpau_ctx *ctx,
+                                            VdpChromaType chroma, int w, int h);
+
+bool mp_vdpau_get_format(int imgfmt, VdpChromaType *out_chroma_type,
+                         VdpYCbCrFormat *out_pixel_format);
+bool mp_vdpau_get_rgb_format(int imgfmt, VdpRGBAFormat *out_rgba_format);
+
+struct mp_image *mp_vdpau_upload_video_surface(struct mp_vdpau_ctx *ctx,
+                                               struct mp_image *mpi);
+
+struct mp_vdpau_ctx *mp_vdpau_get_ctx_from_av(struct AVBufferRef *hw_device_ctx);
+
+bool mp_vdpau_guess_if_emulated(struct mp_vdpau_ctx *ctx);
+
+#endif
diff --git a/video/vdpau_functions.inc b/video/vdpau_functions.inc
new file mode 100644
index 0000000..22c612c
--- /dev/null
+++ b/video/vdpau_functions.inc
@@ -0,0 +1,50 @@
+/* Lists the VDPAU functions used by MPV.
+ * First argument on each line is the VDPAU function type name,
+ * second is the macro name needed to get the function address,
+ * third is the name MPV uses for the function.
+ */
+
+VDP_FUNCTION(VdpGetErrorString, VDP_FUNC_ID_GET_ERROR_STRING, get_error_string)
+VDP_FUNCTION(VdpBitmapSurfaceCreate, VDP_FUNC_ID_BITMAP_SURFACE_CREATE, bitmap_surface_create)
+VDP_FUNCTION(VdpBitmapSurfaceDestroy, VDP_FUNC_ID_BITMAP_SURFACE_DESTROY, bitmap_surface_destroy)
+VDP_FUNCTION(VdpBitmapSurfacePutBitsNative, VDP_FUNC_ID_BITMAP_SURFACE_PUT_BITS_NATIVE, bitmap_surface_put_bits_native)
+VDP_FUNCTION(VdpBitmapSurfaceQueryCapabilities, VDP_FUNC_ID_BITMAP_SURFACE_QUERY_CAPABILITIES, bitmap_surface_query_capabilities)
+VDP_FUNCTION(VdpDecoderCreate, VDP_FUNC_ID_DECODER_CREATE, decoder_create)
+VDP_FUNCTION(VdpDecoderDestroy, VDP_FUNC_ID_DECODER_DESTROY, decoder_destroy)
+VDP_FUNCTION(VdpDecoderRender, VDP_FUNC_ID_DECODER_RENDER, decoder_render)
+VDP_FUNCTION(VdpDecoderQueryCapabilities, VDP_FUNC_ID_DECODER_QUERY_CAPABILITIES, decoder_query_capabilities)
+VDP_FUNCTION(VdpDeviceDestroy, VDP_FUNC_ID_DEVICE_DESTROY, device_destroy)
+VDP_FUNCTION(VdpGetInformationString, VDP_FUNC_ID_GET_INFORMATION_STRING, get_information_string)
+VDP_FUNCTION(VdpGenerateCSCMatrix, VDP_FUNC_ID_GENERATE_CSC_MATRIX, generate_csc_matrix)
+VDP_FUNCTION(VdpOutputSurfaceCreate, VDP_FUNC_ID_OUTPUT_SURFACE_CREATE, output_surface_create)
+VDP_FUNCTION(VdpOutputSurfaceDestroy, VDP_FUNC_ID_OUTPUT_SURFACE_DESTROY, output_surface_destroy)
+VDP_FUNCTION(VdpOutputSurfaceGetBitsNative, VDP_FUNC_ID_OUTPUT_SURFACE_GET_BITS_NATIVE, output_surface_get_bits_native)
+VDP_FUNCTION(VdpOutputSurfacePutBitsIndexed, VDP_FUNC_ID_OUTPUT_SURFACE_PUT_BITS_INDEXED, output_surface_put_bits_indexed)
+VDP_FUNCTION(VdpOutputSurfacePutBitsNative, VDP_FUNC_ID_OUTPUT_SURFACE_PUT_BITS_NATIVE, output_surface_put_bits_native)
+VDP_FUNCTION(VdpOutputSurfaceRenderBitmapSurface, VDP_FUNC_ID_OUTPUT_SURFACE_RENDER_BITMAP_SURFACE, output_surface_render_bitmap_surface)
+VDP_FUNCTION(VdpOutputSurfaceRenderOutputSurface, VDP_FUNC_ID_OUTPUT_SURFACE_RENDER_OUTPUT_SURFACE, output_surface_render_output_surface)
+VDP_FUNCTION(VdpPreemptionCallbackRegister, VDP_FUNC_ID_PREEMPTION_CALLBACK_REGISTER, preemption_callback_register)
+VDP_FUNCTION(VdpPresentationQueueBlockUntilSurfaceIdle, VDP_FUNC_ID_PRESENTATION_QUEUE_BLOCK_UNTIL_SURFACE_IDLE, presentation_queue_block_until_surface_idle)
+VDP_FUNCTION(VdpPresentationQueueCreate, VDP_FUNC_ID_PRESENTATION_QUEUE_CREATE, presentation_queue_create)
+VDP_FUNCTION(VdpPresentationQueueDestroy, VDP_FUNC_ID_PRESENTATION_QUEUE_DESTROY, presentation_queue_destroy)
+VDP_FUNCTION(VdpPresentationQueueDisplay, VDP_FUNC_ID_PRESENTATION_QUEUE_DISPLAY, presentation_queue_display)
+VDP_FUNCTION(VdpPresentationQueueGetTime, VDP_FUNC_ID_PRESENTATION_QUEUE_GET_TIME, presentation_queue_get_time)
+VDP_FUNCTION(VdpPresentationQueueQuerySurfaceStatus, VDP_FUNC_ID_PRESENTATION_QUEUE_QUERY_SURFACE_STATUS, presentation_queue_query_surface_status)
+VDP_FUNCTION(VdpPresentationQueueSetBackgroundColor, VDP_FUNC_ID_PRESENTATION_QUEUE_SET_BACKGROUND_COLOR, presentation_queue_set_background_color)
+VDP_FUNCTION(VdpPresentationQueueGetBackgroundColor, VDP_FUNC_ID_PRESENTATION_QUEUE_GET_BACKGROUND_COLOR, presentation_queue_get_background_color)
+VDP_FUNCTION(VdpPresentationQueueTargetCreateX11, VDP_FUNC_ID_PRESENTATION_QUEUE_TARGET_CREATE_X11, presentation_queue_target_create_x11)
+VDP_FUNCTION(VdpPresentationQueueTargetDestroy, VDP_FUNC_ID_PRESENTATION_QUEUE_TARGET_DESTROY, presentation_queue_target_destroy)
+VDP_FUNCTION(VdpVideoMixerCreate, VDP_FUNC_ID_VIDEO_MIXER_CREATE, video_mixer_create)
+VDP_FUNCTION(VdpVideoMixerDestroy, VDP_FUNC_ID_VIDEO_MIXER_DESTROY, video_mixer_destroy)
+VDP_FUNCTION(VdpVideoMixerQueryFeatureSupport, VDP_FUNC_ID_VIDEO_MIXER_QUERY_FEATURE_SUPPORT, video_mixer_query_feature_support)
+VDP_FUNCTION(VdpVideoMixerRender, VDP_FUNC_ID_VIDEO_MIXER_RENDER, video_mixer_render)
+VDP_FUNCTION(VdpVideoMixerSetAttributeValues, VDP_FUNC_ID_VIDEO_MIXER_SET_ATTRIBUTE_VALUES, video_mixer_set_attribute_values)
+VDP_FUNCTION(VdpVideoMixerSetFeatureEnables, VDP_FUNC_ID_VIDEO_MIXER_SET_FEATURE_ENABLES, video_mixer_set_feature_enables)
+VDP_FUNCTION(VdpVideoSurfaceCreate, VDP_FUNC_ID_VIDEO_SURFACE_CREATE, video_surface_create)
+VDP_FUNCTION(VdpVideoSurfaceDestroy, VDP_FUNC_ID_VIDEO_SURFACE_DESTROY, video_surface_destroy)
+VDP_FUNCTION(VdpVideoSurfacePutBitsYCbCr, VDP_FUNC_ID_VIDEO_SURFACE_PUT_BITS_Y_CB_CR, video_surface_put_bits_y_cb_cr)
+VDP_FUNCTION(VdpVideoSurfaceGetBitsYCbCr, VDP_FUNC_ID_VIDEO_SURFACE_GET_BITS_Y_CB_CR, video_surface_get_bits_y_cb_cr)
+VDP_FUNCTION(VdpVideoSurfaceGetParameters, VDP_FUNC_ID_VIDEO_SURFACE_GET_PARAMETERS, video_surface_get_parameters)
+VDP_FUNCTION(VdpVideoSurfaceQueryCapabilities, VDP_FUNC_ID_VIDEO_SURFACE_QUERY_CAPABILITIES, video_surface_query_capabilities)
+VDP_FUNCTION(VdpOutputSurfaceQueryCapabilities, VDP_FUNC_ID_OUTPUT_SURFACE_QUERY_CAPABILITIES, output_surface_query_capabilities)
+VDP_FUNCTION(VdpOutputSurfaceGetParameters, VDP_FUNC_ID_OUTPUT_SURFACE_GET_PARAMETERS, output_surface_get_parameters)
diff --git a/video/vdpau_mixer.c b/video/vdpau_mixer.c
new file mode 100644
index 0000000..b1aed70
--- /dev/null
+++ b/video/vdpau_mixer.c
@@ -0,0 +1,306 @@
+/*
+ * This file is part of mpv.
+ *
+ * Parts of video mixer creation code:
+ * Copyright (C) 2008 NVIDIA (Rajib Mahapatra <rmahapatra@nvidia.com>)
+ * Copyright (C) 2009 Uoti Urpala
+ *
+ * mpv is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation; either version 2 of the License, or
+ * (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License along
+ * with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <assert.h>
+
+#include "vdpau_mixer.h"
+
+static void free_mixed_frame(void *arg)
+{
+    struct mp_vdpau_mixer_frame *frame = arg;
+    talloc_free(frame);
+}
+
+// This creates an image of format IMGFMT_VDPAU with a mp_vdpau_mixer_frame
+// struct. Use mp_vdpau_mixed_frame_get() to retrieve the struct and to
+// initialize it.
+// "base" is used only to set parameters, no image data is referenced.
+struct mp_image *mp_vdpau_mixed_frame_create(struct mp_image *base)
+{
+    assert(base->imgfmt == IMGFMT_VDPAU);
+
+    struct mp_vdpau_mixer_frame *frame =
+        talloc_zero(NULL, struct mp_vdpau_mixer_frame);
+    for (int n = 0; n < MP_VDP_HISTORY_FRAMES; n++)
+        frame->past[n] = frame->future[n] = VDP_INVALID_HANDLE;
+    frame->current = VDP_INVALID_HANDLE;
+    frame->field = VDP_VIDEO_MIXER_PICTURE_STRUCTURE_FRAME;
+
+    struct mp_image *mpi = mp_image_new_custom_ref(base, frame, free_mixed_frame);
+    if (mpi) {
+        mpi->planes[2] = (void *)frame;
+        mpi->planes[3] = (void *)(uintptr_t)VDP_INVALID_HANDLE;
+    }
+    return mpi;
+}
+
+struct mp_vdpau_mixer_frame *mp_vdpau_mixed_frame_get(struct mp_image *mpi)
+{
+    if (mpi->imgfmt != IMGFMT_VDPAU)
+        return NULL;
+    return (void *)mpi->planes[2];
+}
+
+struct mp_vdpau_mixer *mp_vdpau_mixer_create(struct mp_vdpau_ctx *vdp_ctx,
+                                             struct mp_log *log)
+{
+    struct mp_vdpau_mixer *mixer = talloc_ptrtype(NULL, mixer);
+    *mixer = (struct mp_vdpau_mixer){
+        .ctx = vdp_ctx,
+        .log = log,
+        .video_mixer = VDP_INVALID_HANDLE,
+    };
+    mp_vdpau_handle_preemption(mixer->ctx, &mixer->preemption_counter);
+    return mixer;
+}
+
+void mp_vdpau_mixer_destroy(struct mp_vdpau_mixer *mixer)
+{
+    struct vdp_functions *vdp = &mixer->ctx->vdp;
+    VdpStatus vdp_st;
+    if (mixer->video_mixer != VDP_INVALID_HANDLE) {
+        vdp_st = vdp->video_mixer_destroy(mixer->video_mixer);
+        CHECK_VDP_WARNING(mixer, "Error when calling vdp_video_mixer_destroy");
+    }
+    talloc_free(mixer);
+}
+
+static bool opts_equal(const struct mp_vdpau_mixer_opts *a,
+                       const struct mp_vdpau_mixer_opts *b)
+{
+    return a->deint == b->deint && a->chroma_deint == b->chroma_deint &&
+           a->pullup == b->pullup && a->hqscaling == b->hqscaling &&
+           a->sharpen == b->sharpen && a->denoise == b->denoise;
+}
+
+static int set_video_attribute(struct mp_vdpau_mixer *mixer,
+                               VdpVideoMixerAttribute attr,
+                               const void *value, char *attr_name)
+{
+    struct vdp_functions *vdp = &mixer->ctx->vdp;
+    VdpStatus vdp_st;
+
+    vdp_st = vdp->video_mixer_set_attribute_values(mixer->video_mixer, 1,
+                                                   &attr, &value);
+    if (vdp_st != VDP_STATUS_OK) {
+        MP_ERR(mixer, "Error setting video mixer attribute %s: %s\n", attr_name,
+               vdp->get_error_string(vdp_st));
+        return -1;
+    }
+    return 0;
+}
+
+#define SET_VIDEO_ATTR(attr_name, attr_type, value) set_video_attribute(mixer, \
+                 VDP_VIDEO_MIXER_ATTRIBUTE_ ## attr_name, &(attr_type){value},\
+                 # attr_name)
+static int create_vdp_mixer(struct mp_vdpau_mixer *mixer,
+                            VdpChromaType chroma_type, uint32_t w, uint32_t h)
+{
+    struct vdp_functions *vdp = &mixer->ctx->vdp;
+    VdpDevice vdp_device = mixer->ctx->vdp_device;
+    struct mp_vdpau_mixer_opts *opts = &mixer->opts;
+#define VDP_NUM_MIXER_PARAMETER 3
+#define MAX_NUM_FEATURES 6
+    int i;
+    VdpStatus vdp_st;
+
+    MP_VERBOSE(mixer, "Recreating vdpau video mixer.\n");
+
+    int feature_count = 0;
+    VdpVideoMixerFeature features[MAX_NUM_FEATURES];
+    VdpBool feature_enables[MAX_NUM_FEATURES];
+    static const VdpVideoMixerParameter parameters[VDP_NUM_MIXER_PARAMETER] = {
+        VDP_VIDEO_MIXER_PARAMETER_VIDEO_SURFACE_WIDTH,
+        VDP_VIDEO_MIXER_PARAMETER_VIDEO_SURFACE_HEIGHT,
+        VDP_VIDEO_MIXER_PARAMETER_CHROMA_TYPE,
+    };
+    const void *const parameter_values[VDP_NUM_MIXER_PARAMETER] = {
+        &(uint32_t){w},
+        &(uint32_t){h},
+        &(VdpChromaType){chroma_type},
+    };
+    if (opts->deint >= 3)
+        features[feature_count++] = VDP_VIDEO_MIXER_FEATURE_DEINTERLACE_TEMPORAL;
+    if (opts->deint == 4)
+        features[feature_count++] =
+            VDP_VIDEO_MIXER_FEATURE_DEINTERLACE_TEMPORAL_SPATIAL;
+    if (opts->pullup)
+        features[feature_count++] = VDP_VIDEO_MIXER_FEATURE_INVERSE_TELECINE;
+    if (opts->denoise)
+        features[feature_count++] = VDP_VIDEO_MIXER_FEATURE_NOISE_REDUCTION;
+    if (opts->sharpen)
+        features[feature_count++] = VDP_VIDEO_MIXER_FEATURE_SHARPNESS;
+    if (opts->hqscaling) {
+        VdpVideoMixerFeature hqscaling_feature =
+            VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L1 + opts->hqscaling - 1;
+        VdpBool hqscaling_available;
+        vdp_st = vdp->video_mixer_query_feature_support(vdp_device,
+                                                        hqscaling_feature,
+                                                        &hqscaling_available);
+        CHECK_VDP_ERROR(mixer, "Error when calling video_mixer_query_feature_support");
+        if (hqscaling_available) {
+            features[feature_count++] = hqscaling_feature;
+        } else {
+            MP_ERR(mixer, "Your hardware or VDPAU library does not support "
+                   "requested hqscaling.\n");
+        }
+    }
+
+    vdp_st = vdp->video_mixer_create(vdp_device, feature_count, features,
+                                     VDP_NUM_MIXER_PARAMETER,
+                                     parameters, parameter_values,
+                                     &mixer->video_mixer);
+    if (vdp_st != VDP_STATUS_OK)
+        mixer->video_mixer = VDP_INVALID_HANDLE;
+
+    CHECK_VDP_ERROR(mixer, "Error when calling vdp_video_mixer_create");
+
+    mixer->initialized = true;
+    mixer->current_chroma_type = chroma_type;
+    mixer->current_w = w;
+    mixer->current_h = h;
+
+    for (i = 0; i < feature_count; i++)
+        feature_enables[i] = VDP_TRUE;
+    if (feature_count) {
+        vdp_st = vdp->video_mixer_set_feature_enables(mixer->video_mixer,
+                                                      feature_count, features,
+                                                      feature_enables);
+        CHECK_VDP_WARNING(mixer, "Error calling vdp_video_mixer_set_feature_enables");
+    }
+    if (opts->denoise)
+        SET_VIDEO_ATTR(NOISE_REDUCTION_LEVEL, float, opts->denoise);
+    if (opts->sharpen)
+        SET_VIDEO_ATTR(SHARPNESS_LEVEL, float, opts->sharpen);
+    if (!opts->chroma_deint)
+        SET_VIDEO_ATTR(SKIP_CHROMA_DEINTERLACE, uint8_t, 1);
+
+    struct mp_cmat yuv2rgb;
+    VdpCSCMatrix matrix;
+
+    struct mp_csp_params cparams = MP_CSP_PARAMS_DEFAULTS;
+    mp_csp_set_image_params(&cparams, &mixer->image_params);
+    if (mixer->video_eq)
+        mp_csp_equalizer_state_get(mixer->video_eq, &cparams);
+    mp_get_csp_matrix(&cparams, &yuv2rgb);
+
+    for (int r = 0; r < 3; r++) {
+        for (int c = 0; c < 3; c++)
+            matrix[r][c] = yuv2rgb.m[r][c];
+        matrix[r][3] = yuv2rgb.c[r];
+    }
+
+    set_video_attribute(mixer, VDP_VIDEO_MIXER_ATTRIBUTE_CSC_MATRIX,
+                        &matrix, "CSC matrix");
+
+    return 0;
+}
+
+// If opts is NULL, use the opts as implied by the video image.
+int mp_vdpau_mixer_render(struct mp_vdpau_mixer *mixer,
+                          struct mp_vdpau_mixer_opts *opts,
+                          VdpOutputSurface output, VdpRect *output_rect,
+                          struct mp_image *video, VdpRect *video_rect)
+{
+    struct vdp_functions *vdp = &mixer->ctx->vdp;
+    VdpStatus vdp_st;
+    VdpRect fallback_rect = {0, 0, video->w, video->h};
+
+    if (!video_rect)
+        video_rect = &fallback_rect;
+
+    int pe = mp_vdpau_handle_preemption(mixer->ctx, &mixer->preemption_counter);
+    if (pe < 1) {
+        mixer->video_mixer = VDP_INVALID_HANDLE;
+        if (pe < 0)
+            return -1;
+    }
+
+    if (video->imgfmt == IMGFMT_VDPAU_OUTPUT) {
+        VdpOutputSurface surface = (uintptr_t)video->planes[3];
+        int flags = VDP_OUTPUT_SURFACE_RENDER_ROTATE_0;
+        vdp_st = vdp->output_surface_render_output_surface(output,
+                                                           output_rect,
+                                                           surface,
+                                                           video_rect,
+                                                           NULL, NULL, flags);
+        CHECK_VDP_WARNING(mixer, "Error when calling "
+                          "vdp_output_surface_render_output_surface");
+        return 0;
+    }
+
+    if (video->imgfmt != IMGFMT_VDPAU)
+        return -1;
+
+    struct mp_vdpau_mixer_frame *frame = mp_vdpau_mixed_frame_get(video);
+    struct mp_vdpau_mixer_frame fallback = {{0}};
+    if (!frame) {
+        frame = &fallback;
+        frame->current = (uintptr_t)video->planes[3];
+        for (int n = 0; n < MP_VDP_HISTORY_FRAMES; n++)
+            frame->past[n] = frame->future[n] = VDP_INVALID_HANDLE;
+        frame->field = VDP_VIDEO_MIXER_PICTURE_STRUCTURE_FRAME;
+    }
+
+    if (!opts)
+        opts = &frame->opts;
+
+    if (mixer->video_mixer == VDP_INVALID_HANDLE)
+        mixer->initialized = false;
+
+    if (mixer->video_eq && mp_csp_equalizer_state_changed(mixer->video_eq))
+        mixer->initialized = false;
+
+    VdpChromaType s_chroma_type;
+    uint32_t s_w, s_h;
+
+    vdp_st = vdp->video_surface_get_parameters(frame->current, &s_chroma_type,
+                                               &s_w, &s_h);
+    CHECK_VDP_ERROR(mixer, "Error when calling vdp_video_surface_get_parameters");
+
+    if (!mixer->initialized || !opts_equal(opts, &mixer->opts) ||
+        !mp_image_params_equal(&video->params, &mixer->image_params) ||
+        mixer->current_w != s_w || mixer->current_h != s_h ||
+        mixer->current_chroma_type != s_chroma_type)
+    {
+        mixer->opts = *opts;
+        mixer->image_params = video->params;
+        if (mixer->video_mixer != VDP_INVALID_HANDLE) {
+            vdp_st = vdp->video_mixer_destroy(mixer->video_mixer);
+            CHECK_VDP_WARNING(mixer, "Error when calling vdp_video_mixer_destroy");
+        }
+        mixer->video_mixer = VDP_INVALID_HANDLE;
+        mixer->initialized = false;
+        if (create_vdp_mixer(mixer, s_chroma_type, s_w, s_h) < 0)
+            return -1;
+    }
+
+    vdp_st = vdp->video_mixer_render(mixer->video_mixer, VDP_INVALID_HANDLE,
+                                     0, frame->field,
+                                     MP_VDP_HISTORY_FRAMES, frame->past,
+                                     frame->current,
+                                     MP_VDP_HISTORY_FRAMES, frame->future,
+                                     video_rect,
+                                     output, NULL, output_rect,
+                                     0, NULL);
+    CHECK_VDP_WARNING(mixer, "Error when calling vdp_video_mixer_render");
+    return 0;
+}
diff --git a/video/vdpau_mixer.h b/video/vdpau_mixer.h
new file mode 100644
index 0000000..4abe87e
--- /dev/null
+++ b/video/vdpau_mixer.h
@@ -0,0 +1,61 @@
+#ifndef MP_VDPAU_MIXER_H_
+#define MP_VDPAU_MIXER_H_
+
+#include <stdbool.h>
+
+#include "csputils.h"
+#include "mp_image.h"
+#include "vdpau.h"
+
+struct mp_vdpau_mixer_opts {
+    int deint;
+    bool chroma_deint;
+    bool pullup;
+    float denoise;
+    float sharpen;
+    int hqscaling;
+};
+
+#define MP_VDP_HISTORY_FRAMES 2
+
+struct mp_vdpau_mixer_frame {
+    // settings
+    struct mp_vdpau_mixer_opts opts;
+    // video data
+    VdpVideoMixerPictureStructure field;
+    VdpVideoSurface past[MP_VDP_HISTORY_FRAMES];
+    VdpVideoSurface current;
+    VdpVideoSurface future[MP_VDP_HISTORY_FRAMES];
+};
+
+struct mp_vdpau_mixer {
+    struct mp_log *log;
+    struct mp_vdpau_ctx *ctx;
+    uint64_t preemption_counter;
+    bool initialized;
+
+    struct mp_image_params image_params;
+    struct mp_vdpau_mixer_opts opts;
+
+    VdpChromaType current_chroma_type;
+    int current_w, current_h;
+
+    struct mp_csp_equalizer_state *video_eq;
+
+    VdpVideoMixer video_mixer;
+};
+
+struct mp_image *mp_vdpau_mixed_frame_create(struct mp_image *base);
+
+struct mp_vdpau_mixer_frame *mp_vdpau_mixed_frame_get(struct mp_image *mpi);
+
+struct mp_vdpau_mixer *mp_vdpau_mixer_create(struct mp_vdpau_ctx *vdp_ctx,
+                                             struct mp_log *log);
+void mp_vdpau_mixer_destroy(struct mp_vdpau_mixer *mixer);
+
+int mp_vdpau_mixer_render(struct mp_vdpau_mixer *mixer,
+                          struct mp_vdpau_mixer_opts *opts,
+                          VdpOutputSurface output, VdpRect *output_rect,
+                          struct mp_image *video, VdpRect *video_rect);
+
+#endif
diff --git a/video/zimg.c b/video/zimg.c
new file mode 100644
index 0000000..5ff300c
--- /dev/null
+++ b/video/zimg.c
@@ -0,0 +1,730 @@
+/*
+ * This file is part of mpv.
+ *
+ * mpv is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * mpv is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with mpv.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <math.h>
+
+#include <libavutil/cpu.h>
+
+#include "common/common.h"
+#include "common/msg.h"
+#include "csputils.h"
+#include "misc/thread_pool.h"
+#include "misc/thread_tools.h"
+#include "options/m_config.h"
+#include "options/m_option.h"
+#include "repack.h"
+#include "video/fmt-conversion.h"
+#include "video/img_format.h"
+#include "zimg.h"
+#include "config.h"
+
+static_assert(MP_IMAGE_BYTE_ALIGN >= ZIMG_ALIGN, "");
+
+#define HAVE_ZIMG_ALPHA (ZIMG_API_VERSION >= ZIMG_MAKE_API_VERSION(2, 4))
+
+static const struct m_opt_choice_alternatives mp_zimg_scalers[] = {
+    {"point",           ZIMG_RESIZE_POINT},
+    {"bilinear",        ZIMG_RESIZE_BILINEAR},
+    {"bicubic",         ZIMG_RESIZE_BICUBIC},
+    {"spline16",        ZIMG_RESIZE_SPLINE16},
+    {"spline36",        ZIMG_RESIZE_SPLINE36},
+    {"lanczos",         ZIMG_RESIZE_LANCZOS},
+    {0}
+};
+
+const struct zimg_opts zimg_opts_defaults = {
+    .scaler = ZIMG_RESIZE_LANCZOS,
+    .scaler_params = {NAN, NAN},
+    .scaler_chroma_params = {NAN, NAN},
+    .scaler_chroma = ZIMG_RESIZE_BILINEAR,
+    .dither = ZIMG_DITHER_RANDOM,
+    .fast = true,
+};
+
+#define OPT_PARAM(var) OPT_DOUBLE(var), .flags = M_OPT_DEFAULT_NAN
+
+#define OPT_BASE_STRUCT struct zimg_opts
+const struct m_sub_options zimg_conf = {
+    .opts = (struct m_option[]) {
+        {"scaler", OPT_CHOICE_C(scaler, mp_zimg_scalers)},
+        {"scaler-param-a", OPT_PARAM(scaler_params[0])},
+        {"scaler-param-b", OPT_PARAM(scaler_params[1])},
+        {"scaler-chroma", OPT_CHOICE_C(scaler_chroma, mp_zimg_scalers)},
+        {"scaler-chroma-param-a", OPT_PARAM(scaler_chroma_params[0])},
+        {"scaler-chroma-param-b", OPT_PARAM(scaler_chroma_params[1])},
+        {"dither", OPT_CHOICE(dither,
+            {"no",              ZIMG_DITHER_NONE},
+            {"ordered",         ZIMG_DITHER_ORDERED},
+            {"random",          ZIMG_DITHER_RANDOM},
+            {"error-diffusion", ZIMG_DITHER_ERROR_DIFFUSION})},
+        {"fast", OPT_BOOL(fast)},
+        {"threads", OPT_CHOICE(threads, {"auto", 0}), M_RANGE(1, 64)},
+        {0}
+    },
+    .size = sizeof(struct zimg_opts),
+    .defaults = &zimg_opts_defaults,
+};
+
+struct mp_zimg_state {
+    zimg_filter_graph *graph;
+    void *tmp;
+    void *tmp_alloc;
+    struct mp_zimg_repack *src;
+    struct mp_zimg_repack *dst;
+    int slice_y, slice_h; // y start position, height of target slice
+    double scale_y;
+    struct mp_waiter thread_waiter;
+};
+
+struct mp_zimg_repack {
+    bool pack;                  // if false, this is for unpacking
+    struct mp_image_params fmt; // original mp format (possibly packed format,
+                                // swapped endian)
+    int zimgfmt;                // zimg equivalent unpacked format
+    int num_planes;             // number of planes involved
+    unsigned zmask[4];          // zmask[mp_index] = zimg mask (using mp index!)
+    int z_planes[4];            // z_planes[zimg_index] = mp_index (or -1)
+
+    struct mp_repack *repack;   // converting to/from planar
+
+    // Temporary memory for slice-wise repacking. This may be set even if repack
+    // is not set (then it may be used to avoid alignment issues). This has
+    // about one slice worth of data.
+    struct mp_image *tmp;
+
+    // Temporary memory for zimg buffer.
+    zimg_image_buffer zbuf;
+    struct mp_image cropped_tmp;
+
+    int real_w, real_h;         // aligned size
+};
+
+static void mp_zimg_update_from_cmdline(struct mp_zimg_context *ctx)
+{
+    m_config_cache_update(ctx->opts_cache);
+
+    struct zimg_opts *opts = ctx->opts_cache->opts;
+    ctx->opts = *opts;
+}
+
+static zimg_chroma_location_e mp_to_z_chroma(enum mp_chroma_location cl)
+{
+    switch (cl) {
+    case MP_CHROMA_TOPLEFT:     return ZIMG_CHROMA_TOP_LEFT;
+    case MP_CHROMA_LEFT:        return ZIMG_CHROMA_LEFT;
+    case MP_CHROMA_CENTER:      return ZIMG_CHROMA_CENTER;
+    default:                    return ZIMG_CHROMA_LEFT;
+    }
+}
+
+static zimg_matrix_coefficients_e mp_to_z_matrix(enum mp_csp csp)
+{
+    switch (csp) {
+    case MP_CSP_BT_601:         return ZIMG_MATRIX_BT470_BG;
+    case MP_CSP_BT_709:         return ZIMG_MATRIX_BT709;
+    case MP_CSP_SMPTE_240M:     return ZIMG_MATRIX_ST240_M;
+    case MP_CSP_BT_2020_NC:     return ZIMG_MATRIX_BT2020_NCL;
+    case MP_CSP_BT_2020_C:      return ZIMG_MATRIX_BT2020_CL;
+    case MP_CSP_RGB:            return ZIMG_MATRIX_RGB;
+    case MP_CSP_XYZ:            return ZIMG_MATRIX_RGB;
+    case MP_CSP_YCGCO:          return ZIMG_MATRIX_YCGCO;
+    default:                    return ZIMG_MATRIX_BT709;
+    }
+}
+
+static zimg_transfer_characteristics_e mp_to_z_trc(enum mp_csp_trc trc)
+{
+    switch (trc) {
+    case MP_CSP_TRC_BT_1886:    return ZIMG_TRANSFER_BT709;
+    case MP_CSP_TRC_SRGB:       return ZIMG_TRANSFER_IEC_61966_2_1;
+    case MP_CSP_TRC_LINEAR:     return ZIMG_TRANSFER_LINEAR;
+    case MP_CSP_TRC_GAMMA22:    return ZIMG_TRANSFER_BT470_M;
+    case MP_CSP_TRC_GAMMA28:    return ZIMG_TRANSFER_BT470_BG;
+    case MP_CSP_TRC_PQ:         return ZIMG_TRANSFER_ST2084;
+    case MP_CSP_TRC_HLG:        return ZIMG_TRANSFER_ARIB_B67;
+#if HAVE_ZIMG_ST428
+    case MP_CSP_TRC_ST428:      return ZIMG_TRANSFER_ST428;
+#endif
+    case MP_CSP_TRC_GAMMA18:    // ?
+    case MP_CSP_TRC_GAMMA20:
+    case MP_CSP_TRC_GAMMA24:
+    case MP_CSP_TRC_GAMMA26:
+    case MP_CSP_TRC_PRO_PHOTO:
+    case MP_CSP_TRC_V_LOG:
+    case MP_CSP_TRC_S_LOG1:
+    case MP_CSP_TRC_S_LOG2:     // ?
+    default:                    return ZIMG_TRANSFER_BT709;
+    }
+}
+
+static zimg_color_primaries_e mp_to_z_prim(enum mp_csp_prim prim)
+{
+    switch (prim) {
+    case MP_CSP_PRIM_BT_601_525:return ZIMG_PRIMARIES_ST170_M;
+    case MP_CSP_PRIM_BT_601_625:return ZIMG_PRIMARIES_BT470_BG;
+    case MP_CSP_PRIM_BT_709:    return ZIMG_PRIMARIES_BT709;
+    case MP_CSP_PRIM_BT_2020:   return ZIMG_PRIMARIES_BT2020;
+    case MP_CSP_PRIM_BT_470M:   return ZIMG_PRIMARIES_BT470_M;
+    case MP_CSP_PRIM_DCI_P3:    return ZIMG_PRIMARIES_ST431_2;
+    case MP_CSP_PRIM_DISPLAY_P3:return ZIMG_PRIMARIES_ST432_1;
+    case MP_CSP_PRIM_EBU_3213:  return ZIMG_PRIMARIES_EBU3213_E;
+    case MP_CSP_PRIM_FILM_C:    return ZIMG_PRIMARIES_FILM;
+    case MP_CSP_PRIM_CIE_1931:
+    case MP_CSP_PRIM_APPLE:     // ?
+    case MP_CSP_PRIM_ADOBE:
+    case MP_CSP_PRIM_PRO_PHOTO:
+    case MP_CSP_PRIM_V_GAMUT:
+    case MP_CSP_PRIM_S_GAMUT:   // ?
+    case MP_CSP_PRIM_ACES_AP0:
+    case MP_CSP_PRIM_ACES_AP1:
+    default:                    return ZIMG_PRIMARIES_BT709;
+    }
+}
+
+static void destroy_zimg(struct mp_zimg_context *ctx)
+{
+    for (int n = 0; n < ctx->num_states; n++) {
+        struct mp_zimg_state *st = ctx->states[n];
+        talloc_free(st->tmp_alloc);
+        zimg_filter_graph_free(st->graph);
+        TA_FREEP(&st->src);
+        TA_FREEP(&st->dst);
+        talloc_free(st);
+    }
+    ctx->num_states = 0;
+}
+
+static void free_mp_zimg(void *p)
+{
+    struct mp_zimg_context *ctx = p;
+
+    destroy_zimg(ctx);
+    TA_FREEP(&ctx->tp);
+}
+
+struct mp_zimg_context *mp_zimg_alloc(void)
+{
+    struct mp_zimg_context *ctx = talloc_ptrtype(NULL, ctx);
+    *ctx = (struct mp_zimg_context) {
+        .log = mp_null_log,
+    };
+    ctx->opts = *(struct zimg_opts *)zimg_conf.defaults;
+    talloc_set_destructor(ctx, free_mp_zimg);
+    return ctx;
+}
+
+void mp_zimg_enable_cmdline_opts(struct mp_zimg_context *ctx,
+                                 struct mpv_global *g)
+{
+    if (ctx->opts_cache)
+        return;
+
+    ctx->opts_cache = m_config_cache_alloc(ctx, g, &zimg_conf);
+    destroy_zimg(ctx); // force update
+    mp_zimg_update_from_cmdline(ctx); // first update
+}
+
+static int repack_entrypoint(void *user, unsigned i, unsigned x0, unsigned x1)
+{
+    struct mp_zimg_repack *r = user;
+
+    // If reading is not aligned, just read slightly more data.
+    if (!r->pack)
+        x0 &= ~(unsigned)(mp_repack_get_align_x(r->repack) - 1);
+
+    // mp_repack requirements and zimg guarantees.
+    assert(!(i & (mp_repack_get_align_y(r->repack) - 1)));
+    assert(!(x0 & (mp_repack_get_align_x(r->repack) - 1)));
+
+    unsigned i_src = i & (r->pack ? r->zmask[0] : ZIMG_BUFFER_MAX);
+    unsigned i_dst = i & (r->pack ? ZIMG_BUFFER_MAX : r->zmask[0]);
+
+    repack_line(r->repack, x0, i_dst, x0, i_src, x1 - x0);
+
+    return 0;
+}
+
+static bool wrap_buffer(struct mp_zimg_state *st, struct mp_zimg_repack *r,
+                        struct mp_image *a_mpi)
+{
+    zimg_image_buffer *buf = &r->zbuf;
+    *buf = (zimg_image_buffer){ZIMG_API_VERSION};
+
+    struct mp_image *mpi = a_mpi;
+    if (r->pack) {
+        mpi = &r->cropped_tmp;
+        *mpi = *a_mpi;
+        int y1 = st->slice_y + st->slice_h;
+        // Due to subsampling we may assume the image to be bigger than it
+        // actually is (see real_h in setup_format).
+        if (mpi->h < y1) {
+            assert(y1 - mpi->h < 4);
+            mp_image_set_size(mpi, mpi->w, y1);
+        }
+        mp_image_crop(mpi, 0, st->slice_y, mpi->w, y1);
+    }
+
+    bool direct[MP_MAX_PLANES] = {0};
+
+    for (int p = 0; p < mpi->num_planes; p++) {
+        // If alignment is good, try to avoid copy.
+        direct[p] = !((uintptr_t)mpi->planes[p] % ZIMG_ALIGN) &&
+                    !(mpi->stride[p] % ZIMG_ALIGN);
+    }
+
+    if (!repack_config_buffers(r->repack, 0, r->pack ? mpi : r->tmp,
+                                          0, r->pack ? r->tmp : mpi, direct))
+        return false;
+
+    for (int n = 0; n < MP_ARRAY_SIZE(buf->plane); n++) {
+        // Note: this is really the only place we have to care about plane
+        // permutation (zimg_image_buffer may have a different plane order
+        // than the shadow mpi like r->tmp). We never use the zimg indexes
+        // in other places.
+        int mplane = r->z_planes[n];
+        if (mplane < 0)
+            continue;
+
+        struct mp_image *tmpi = direct[mplane] ? mpi : r->tmp;
+        buf->plane[n].data = tmpi->planes[mplane];
+        buf->plane[n].stride = tmpi->stride[mplane];
+        buf->plane[n].mask = direct[mplane] ? ZIMG_BUFFER_MAX : r->zmask[mplane];
+    }
+
+    return true;
+}
+
+// (ctx and st can be NULL for probing.)
+static bool setup_format(zimg_image_format *zfmt, struct mp_zimg_repack *r,
+                         bool pack, struct mp_image_params *user_fmt,
+                         struct mp_zimg_context *ctx,
+                         struct mp_zimg_state *st)
+{
+    r->fmt = *user_fmt;
+    r->pack = pack;
+
+    zimg_image_format_default(zfmt, ZIMG_API_VERSION);
+
+    int rp_flags = 0;
+
+    // For e.g. RGB565, go to lowest depth on pack for less weird dithering.
+    if (r->pack) {
+        rp_flags |= REPACK_CREATE_ROUND_DOWN;
+    } else {
+        rp_flags |= REPACK_CREATE_EXPAND_8BIT;
+    }
+
+    r->repack = mp_repack_create_planar(r->fmt.imgfmt, r->pack, rp_flags);
+    if (!r->repack)
+        return false;
+
+    int align_x = mp_repack_get_align_x(r->repack);
+
+    r->zimgfmt = r->pack ? mp_repack_get_format_src(r->repack)
+                         : mp_repack_get_format_dst(r->repack);
+
+    if (ctx) {
+        talloc_steal(r, r->repack);
+    } else {
+        TA_FREEP(&r->repack);
+    }
+
+    struct mp_image_params fmt = r->fmt;
+    mp_image_params_guess_csp(&fmt);
+
+    struct mp_regular_imgfmt desc;
+    if (!mp_get_regular_imgfmt(&desc, r->zimgfmt))
+        return false;
+
+    // Relies on zimg callbacks reading on 64 byte alignment.
+    if (!MP_IS_POWER_OF_2(align_x) || align_x > 64 / desc.component_size)
+        return false;
+
+    // no weird stuff
+    if (desc.num_planes > 4)
+        return false;
+
+    for (int n = 0; n < 4; n++)
+        r->z_planes[n] = -1;
+
+    for (int n = 0; n < desc.num_planes; n++) {
+        if (desc.planes[n].num_components != 1)
+            return false;
+        int c = desc.planes[n].components[0];
+        if (c < 1 || c > 4)
+            return false;
+        if (c < 4) {
+            // Unfortunately, ffmpeg prefers GBR order for planar RGB, while zimg
+            // is sane. This makes it necessary to determine and fix the order.
+            r->z_planes[c - 1] = n;
+        } else {
+            r->z_planes[3] = n; // alpha, always plane 4 in zimg
+
+#if HAVE_ZIMG_ALPHA
+            zfmt->alpha = fmt.alpha == MP_ALPHA_PREMUL
+                ? ZIMG_ALPHA_PREMULTIPLIED : ZIMG_ALPHA_STRAIGHT;
+#else
+            return false;
+#endif
+        }
+    }
+
+    r->num_planes = desc.num_planes;
+
+    // Take care of input/output size, including slicing.
+    // Note: formats with subsampled chroma may have odd width or height in
+    // mpv and FFmpeg. This is because the width/height is actually a cropping
+    // rectangle. Reconstruct the image allocation size and set the cropping.
+    zfmt->width = r->real_w = MP_ALIGN_UP(fmt.w, 1 << desc.chroma_xs);
+    zfmt->height = r->real_h = MP_ALIGN_UP(fmt.h, 1 << desc.chroma_ys);
+    if (st) {
+        if (r->pack) {
+            zfmt->height = r->real_h = st->slice_h =
+                MPMIN(st->slice_y + st->slice_h, r->real_h) - st->slice_y;
+
+            assert(MP_IS_ALIGNED(r->real_h, 1 << desc.chroma_ys));
+        } else {
+            // Relies on st->dst being initialized first.
+            struct mp_zimg_repack *dst = st->dst;
+
+            zfmt->active_region.width = dst->real_w * (double)fmt.w / dst->fmt.w;
+            zfmt->active_region.height = dst->real_h * st->scale_y;
+
+            zfmt->active_region.top = st->slice_y * st->scale_y;
+        }
+    }
+
+    zfmt->subsample_w = desc.chroma_xs;
+    zfmt->subsample_h = desc.chroma_ys;
+
+    zfmt->color_family = ZIMG_COLOR_YUV;
+    if (desc.num_planes <= 2) {
+        zfmt->color_family = ZIMG_COLOR_GREY;
+    } else if (fmt.color.space == MP_CSP_RGB || fmt.color.space == MP_CSP_XYZ) {
+        zfmt->color_family = ZIMG_COLOR_RGB;
+    }
+
+    if (desc.component_type == MP_COMPONENT_TYPE_UINT &&
+        desc.component_size == 1)
+    {
+        zfmt->pixel_type = ZIMG_PIXEL_BYTE;
+    } else if (desc.component_type == MP_COMPONENT_TYPE_UINT &&
+               desc.component_size == 2)
+    {
+        zfmt->pixel_type = ZIMG_PIXEL_WORD;
+    } else if (desc.component_type == MP_COMPONENT_TYPE_FLOAT &&
+               desc.component_size == 2)
+    {
+        zfmt->pixel_type = ZIMG_PIXEL_HALF;
+    } else if (desc.component_type == MP_COMPONENT_TYPE_FLOAT &&
+               desc.component_size == 4)
+    {
+        zfmt->pixel_type = ZIMG_PIXEL_FLOAT;
+    } else {
+        return false;
+    }
+
+    // (Formats like P010 are basically reported as P016.)
+    zfmt->depth = desc.component_size * 8 + MPMIN(0, desc.component_pad);
+
+    zfmt->pixel_range = fmt.color.levels == MP_CSP_LEVELS_PC ?
+                        ZIMG_RANGE_FULL : ZIMG_RANGE_LIMITED;
+
+    zfmt->matrix_coefficients = mp_to_z_matrix(fmt.color.space);
+    zfmt->transfer_characteristics = mp_to_z_trc(fmt.color.gamma);
+    // For MP_CSP_XYZ only valid primaries are defined in ST 428-1
+    zfmt->color_primaries = fmt.color.space == MP_CSP_XYZ
+                                ? ZIMG_PRIMARIES_ST428
+                                : mp_to_z_prim(fmt.color.primaries);
+    zfmt->chroma_location = mp_to_z_chroma(fmt.chroma_location);
+
+    if (ctx && ctx->opts.fast) {
+        // mpv's default for RGB output slows down zimg significantly.
+        if (zfmt->transfer_characteristics == ZIMG_TRANSFER_IEC_61966_2_1 &&
+            zfmt->color_family == ZIMG_COLOR_RGB)
+            zfmt->transfer_characteristics = ZIMG_TRANSFER_BT709;
+    }
+
+    // mpv treats _some_ gray formats as RGB; zimg doesn't like this.
+    if (zfmt->color_family == ZIMG_COLOR_GREY &&
+        zfmt->matrix_coefficients == ZIMG_MATRIX_RGB)
+        zfmt->matrix_coefficients = ZIMG_MATRIX_BT470_BG;
+
+    return true;
+}
+
+static bool allocate_buffer(struct mp_zimg_state *st, struct mp_zimg_repack *r)
+{
+    unsigned lines = 0;
+    int err;
+    if (r->pack) {
+        err = zimg_filter_graph_get_output_buffering(st->graph, &lines);
+    } else {
+        err = zimg_filter_graph_get_input_buffering(st->graph, &lines);
+    }
+
+    if (err)
+        return false;
+
+    r->zmask[0] = zimg_select_buffer_mask(lines);
+
+    // Either ZIMG_BUFFER_MAX, or a power-of-2 slice buffer.
+    assert(r->zmask[0] == ZIMG_BUFFER_MAX || MP_IS_POWER_OF_2(r->zmask[0] + 1));
+
+    int h = r->zmask[0] == ZIMG_BUFFER_MAX ? r->real_h : r->zmask[0] + 1;
+    if (h >= r->real_h) {
+        h = r->real_h;
+        r->zmask[0] = ZIMG_BUFFER_MAX;
+    }
+
+    r->tmp = mp_image_alloc(r->zimgfmt, r->real_w, h);
+    talloc_steal(r, r->tmp);
+
+    if (!r->tmp)
+        return false;
+
+    // Note: although zimg doesn't require that the chroma plane's zmask is
+    //       divided by the full size zmask, the repack callback requires it,
+    //       since mp_repack can handle only proper slices.
+    for (int n = 1; n < r->tmp->fmt.num_planes; n++) {
+        r->zmask[n] = r->zmask[0];
+        if (r->zmask[0] != ZIMG_BUFFER_MAX)
+            r->zmask[n] = r->zmask[n] >> r->tmp->fmt.ys[n];
+    }
+
+    return true;
+}
+
+static bool mp_zimg_state_init(struct mp_zimg_context *ctx,
+                               struct mp_zimg_state *st,
+                               int slice_y, int slice_h)
+{
+    struct zimg_opts *opts = &ctx->opts;
+
+    st->src = talloc_zero(NULL, struct mp_zimg_repack);
+    st->dst = talloc_zero(NULL, struct mp_zimg_repack);
+
+    st->scale_y = ctx->src.h / (double)ctx->dst.h;
+    st->slice_y = slice_y;
+    st->slice_h = slice_h;
+
+    zimg_image_format src_fmt, dst_fmt;
+
+    // Note: do dst first, because src uses fields from dst.
+    if (!setup_format(&dst_fmt, st->dst, true, &ctx->dst, ctx, st) ||
+        !setup_format(&src_fmt, st->src, false, &ctx->src, ctx, st))
+        return false;
+
+    zimg_graph_builder_params params;
+    zimg_graph_builder_params_default(&params, ZIMG_API_VERSION);
+
+    params.resample_filter = opts->scaler;
+    params.filter_param_a = opts->scaler_params[0];
+    params.filter_param_b = opts->scaler_params[1];
+
+    params.resample_filter_uv = opts->scaler_chroma;
+    params.filter_param_a_uv = opts->scaler_chroma_params[0];
+    params.filter_param_b_uv = opts->scaler_chroma_params[1];
+
+    params.dither_type = opts->dither;
+
+    params.cpu_type = ZIMG_CPU_AUTO_64B;
+
+    if (opts->fast)
+        params.allow_approximate_gamma = 1;
+
+    // leave at default for SDR, which means 100 cd/m^2 for zimg
+    if (ctx->dst.color.hdr.max_luma > 0 && mp_trc_is_hdr(ctx->dst.color.gamma))
+        params.nominal_peak_luminance = ctx->dst.color.hdr.max_luma;
+
+    st->graph = zimg_filter_graph_build(&src_fmt, &dst_fmt, &params);
+    if (!st->graph) {
+        char err[128] = {0};
+        zimg_get_last_error(err, sizeof(err) - 1);
+        MP_ERR(ctx, "zimg_filter_graph_build: %s \n", err);
+        return false;
+    }
+
+    size_t tmp_size;
+    if (!zimg_filter_graph_get_tmp_size(st->graph, &tmp_size)) {
+        tmp_size = MP_ALIGN_UP(tmp_size, ZIMG_ALIGN) + ZIMG_ALIGN;
+        st->tmp_alloc = ta_alloc_size(NULL, tmp_size);
+        if (st->tmp_alloc)
+            st->tmp = (void *)MP_ALIGN_UP((uintptr_t)st->tmp_alloc, ZIMG_ALIGN);
+    }
+
+    if (!st->tmp_alloc)
+        return false;
+
+    if (!allocate_buffer(st, st->src) || !allocate_buffer(st, st->dst))
+        return false;
+
+    return true;
+}
+
+bool mp_zimg_config(struct mp_zimg_context *ctx)
+{
+    destroy_zimg(ctx);
+
+    if (ctx->opts_cache)
+        mp_zimg_update_from_cmdline(ctx);
+
+    int slices = ctx->opts.threads;
+    if (slices < 1)
+        slices = av_cpu_count();
+    slices = MPCLAMP(slices, 1, 64);
+
+    struct mp_imgfmt_desc dstfmt = mp_imgfmt_get_desc(ctx->dst.imgfmt);
+    if (!dstfmt.align_y)
+        goto fail;
+    int full_h = MP_ALIGN_UP(ctx->dst.h, dstfmt.align_y);
+    int slice_h = (full_h + slices - 1) / slices;
+    slice_h = MP_ALIGN_UP(slice_h, dstfmt.align_y);
+    slice_h = MP_ALIGN_UP(slice_h, 64); // for dithering and minimum slice size
+    slices = (full_h + slice_h - 1) / slice_h;
+
+    int threads = slices - 1;
+    if (threads != ctx->current_thread_count) {
+        // Just destroy and recreate all - dumb and costly, but rarely happens.
+        TA_FREEP(&ctx->tp);
+        ctx->current_thread_count = 0;
+        if (threads) {
+            MP_VERBOSE(ctx, "using %d threads for scaling\n", threads);
+            ctx->tp = mp_thread_pool_create(NULL, threads, threads, threads);
+            if (!ctx->tp)
+                goto fail;
+            ctx->current_thread_count = threads;
+        }
+    }
+
+    for (int n = 0; n < slices; n++) {
+        struct mp_zimg_state *st = talloc_zero(NULL, struct mp_zimg_state);
+        MP_TARRAY_APPEND(ctx, ctx->states, ctx->num_states, st);
+
+        if (!mp_zimg_state_init(ctx, st, n * slice_h, slice_h))
+            goto fail;
+    }
+
+    assert(ctx->num_states == slices);
+
+    return true;
+
+fail:
+    destroy_zimg(ctx);
+    return false;
+}
+
+bool mp_zimg_config_image_params(struct mp_zimg_context *ctx)
+{
+    if (ctx->num_states) {
+        // All states are the same, so checking only one of them is sufficient.
+        struct mp_zimg_state *st = ctx->states[0];
+        if (st->src && mp_image_params_equal(&ctx->src, &st->src->fmt) &&
+            st->dst && mp_image_params_equal(&ctx->dst, &st->dst->fmt) &&
+            (!ctx->opts_cache || !m_config_cache_update(ctx->opts_cache)) &&
+            st->graph)
+            return true;
+    }
+    return mp_zimg_config(ctx);
+}
+
+static void do_convert(struct mp_zimg_state *st)
+{
+    assert(st->graph);
+
+    // An annoyance.
+    zimg_image_buffer *zsrc = &st->src->zbuf;
+    zimg_image_buffer_const zsrc_c = {ZIMG_API_VERSION};
+    for (int n = 0; n < MP_ARRAY_SIZE(zsrc_c.plane); n++) {
+        zsrc_c.plane[n].data = zsrc->plane[n].data;
+        zsrc_c.plane[n].stride = zsrc->plane[n].stride;
+        zsrc_c.plane[n].mask = zsrc->plane[n].mask;
+    }
+
+    // (The API promises to succeed if no user callbacks fail, so no need
+    // to check the return value.)
+    zimg_filter_graph_process(st->graph, &zsrc_c, &st->dst->zbuf, st->tmp,
+                              repack_entrypoint, st->src,
+                              repack_entrypoint, st->dst);
+}
+
+static void do_convert_thread(void *ptr)
+{
+    struct mp_zimg_state *st = ptr;
+
+    do_convert(st);
+    mp_waiter_wakeup(&st->thread_waiter, 0);
+}
+
+bool mp_zimg_convert(struct mp_zimg_context *ctx, struct mp_image *dst,
+                     struct mp_image *src)
+{
+    ctx->src = src->params;
+    ctx->dst = dst->params;
+
+    if (!mp_zimg_config_image_params(ctx)) {
+        MP_ERR(ctx, "zimg initialization failed.\n");
+        return false;
+    }
+
+    for (int n = 0; n < ctx->num_states; n++) {
+        struct mp_zimg_state *st = ctx->states[n];
+
+        if (!wrap_buffer(st, st->src, src) || !wrap_buffer(st, st->dst, dst)) {
+            MP_ERR(ctx, "zimg repacker initialization failed.\n");
+            return false;
+        }
+    }
+
+    for (int n = 1; n < ctx->num_states; n++) {
+        struct mp_zimg_state *st = ctx->states[n];
+
+        st->thread_waiter = (struct mp_waiter)MP_WAITER_INITIALIZER;
+
+        bool r = mp_thread_pool_run(ctx->tp, do_convert_thread, st);
+        // This is guaranteed by the API; and unrolling would be inconvenient.
+        assert(r);
+    }
+
+    do_convert(ctx->states[0]);
+
+    for (int n = 1; n < ctx->num_states; n++) {
+        struct mp_zimg_state *st = ctx->states[n];
+
+        mp_waiter_wait(&st->thread_waiter);
+    }
+
+    return true;
+}
+
+static bool supports_format(int imgfmt, bool out)
+{
+    struct mp_image_params fmt = {.imgfmt = imgfmt};
+    struct mp_zimg_repack t;
+    zimg_image_format zfmt;
+    return setup_format(&zfmt, &t, out, &fmt, NULL, NULL);
+}
+
+bool mp_zimg_supports_in_format(int imgfmt)
+{
+    return supports_format(imgfmt, false);
+}
+
+bool mp_zimg_supports_out_format(int imgfmt)
+{
+    return supports_format(imgfmt, true);
+}
diff --git a/video/zimg.h b/video/zimg.h
new file mode 100644
index 0000000..be018ca
--- /dev/null
+++ b/video/zimg.h
@@ -0,0 +1,73 @@
+#pragma once
+
+#include <stdbool.h>
+
+#include <zimg.h>
+
+#include "mp_image.h"
+
+#define ZIMG_ALIGN 64
+
+struct mpv_global;
+
+bool mp_zimg_supports_in_format(int imgfmt);
+bool mp_zimg_supports_out_format(int imgfmt);
+
+struct zimg_opts {
+    int scaler;
+    double scaler_params[2];
+    int scaler_chroma;
+    double scaler_chroma_params[2];
+    int dither;
+    bool fast;
+    int threads;
+};
+
+extern const struct zimg_opts zimg_opts_defaults;
+
+struct mp_zimg_context {
+    // Can be set for verbose error printing.
+    struct mp_log *log;
+
+    // User configuration. Note: changing these requires calling mp_zimg_config()
+    // to update the filter graph. The first mp_zimg_convert() call (or if the
+    // image format changes) will do this automatically.
+    struct zimg_opts opts;
+
+    // Input/output parameters. Note: if these mismatch with the
+    // mp_zimg_convert() parameters, mp_zimg_config() will be called
+    // automatically.
+    struct mp_image_params src, dst;
+
+    // Cached zimg state (if any). Private, do not touch.
+    struct m_config_cache *opts_cache;
+    struct mp_zimg_state **states;
+    int num_states;
+    struct mp_thread_pool *tp;
+    int current_thread_count;
+};
+
+// Allocate a zimg context. Always succeeds. Returns a talloc pointer (use
+// talloc_free() to release it).
+struct mp_zimg_context *mp_zimg_alloc(void);
+
+// Enable auto-update of parameters from command line. Don't try to set custom
+// options (other than possibly .src/.dst), because they might be overwritten
+// if the user changes any options.
+void mp_zimg_enable_cmdline_opts(struct mp_zimg_context *ctx,
+                                 struct mpv_global *g);
+
+// Try to build the conversion chain using the parameters currently set in ctx.
+// If this succeeds, mp_zimg_convert() will always succeed (probably), as long
+// as the input has the same parameters.
+// Returns false on error.
+bool mp_zimg_config(struct mp_zimg_context *ctx);
+
+// Similar to mp_zimg_config(), but assume none of the user parameters changed,
+// except possibly .src and .dst. This essentially checks whether src/dst
+// changed, and if so, calls mp_zimg_config().
+bool mp_zimg_config_image_params(struct mp_zimg_context *ctx);
+
+// Convert/scale src to dst. On failure, the data in dst is not touched.
+bool mp_zimg_convert(struct mp_zimg_context *ctx, struct mp_image *dst,
+                     struct mp_image *src);