diff options
Diffstat (limited to '')
213 files changed, 79523 insertions, 0 deletions
diff --git a/video/csputils.c b/video/csputils.c new file mode 100644 index 0000000..59200c5 --- /dev/null +++ b/video/csputils.c @@ -0,0 +1,1020 @@ +/* + * Common code related to colorspaces and conversion + * + * Copyleft (C) 2009 Reimar Döffinger <Reimar.Doeffinger@gmx.de> + * + * mp_invert_cmat based on DarkPlaces engine (relicensed from GPL to LGPL) + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdint.h> +#include <math.h> +#include <assert.h> +#include <libavutil/common.h> +#include <libavcodec/avcodec.h> + +#include "mp_image.h" +#include "csputils.h" +#include "options/m_config.h" +#include "options/m_option.h" + +const struct m_opt_choice_alternatives mp_csp_names[] = { + {"auto", MP_CSP_AUTO}, + {"bt.601", MP_CSP_BT_601}, + {"bt.709", MP_CSP_BT_709}, + {"smpte-240m", MP_CSP_SMPTE_240M}, + {"bt.2020-ncl", MP_CSP_BT_2020_NC}, + {"bt.2020-cl", MP_CSP_BT_2020_C}, + {"rgb", MP_CSP_RGB}, + {"xyz", MP_CSP_XYZ}, + {"ycgco", MP_CSP_YCGCO}, + {0} +}; + +const struct m_opt_choice_alternatives mp_csp_levels_names[] = { + {"auto", MP_CSP_LEVELS_AUTO}, + {"limited", MP_CSP_LEVELS_TV}, + {"full", MP_CSP_LEVELS_PC}, + {0} +}; + +const struct m_opt_choice_alternatives mp_csp_prim_names[] = { + {"auto", MP_CSP_PRIM_AUTO}, + {"bt.601-525", MP_CSP_PRIM_BT_601_525}, + {"bt.601-625", MP_CSP_PRIM_BT_601_625}, + {"bt.709", MP_CSP_PRIM_BT_709}, + {"bt.2020", MP_CSP_PRIM_BT_2020}, + {"bt.470m", MP_CSP_PRIM_BT_470M}, + {"apple", MP_CSP_PRIM_APPLE}, + {"adobe", MP_CSP_PRIM_ADOBE}, + {"prophoto", MP_CSP_PRIM_PRO_PHOTO}, + {"cie1931", MP_CSP_PRIM_CIE_1931}, + {"dci-p3", MP_CSP_PRIM_DCI_P3}, + {"display-p3", MP_CSP_PRIM_DISPLAY_P3}, + {"v-gamut", MP_CSP_PRIM_V_GAMUT}, + {"s-gamut", MP_CSP_PRIM_S_GAMUT}, + {"ebu3213", MP_CSP_PRIM_EBU_3213}, + {"film-c", MP_CSP_PRIM_FILM_C}, + {"aces-ap0", MP_CSP_PRIM_ACES_AP0}, + {"aces-ap1", MP_CSP_PRIM_ACES_AP1}, + {0} +}; + +const struct m_opt_choice_alternatives mp_csp_trc_names[] = { + {"auto", MP_CSP_TRC_AUTO}, + {"bt.1886", MP_CSP_TRC_BT_1886}, + {"srgb", MP_CSP_TRC_SRGB}, + {"linear", MP_CSP_TRC_LINEAR}, + {"gamma1.8", MP_CSP_TRC_GAMMA18}, + {"gamma2.0", MP_CSP_TRC_GAMMA20}, + {"gamma2.2", MP_CSP_TRC_GAMMA22}, + {"gamma2.4", MP_CSP_TRC_GAMMA24}, + {"gamma2.6", MP_CSP_TRC_GAMMA26}, + {"gamma2.8", MP_CSP_TRC_GAMMA28}, + {"prophoto", MP_CSP_TRC_PRO_PHOTO}, + {"pq", MP_CSP_TRC_PQ}, + {"hlg", MP_CSP_TRC_HLG}, + {"v-log", MP_CSP_TRC_V_LOG}, + {"s-log1", MP_CSP_TRC_S_LOG1}, + {"s-log2", MP_CSP_TRC_S_LOG2}, + {"st428", MP_CSP_TRC_ST428}, + {0} +}; + +const struct m_opt_choice_alternatives mp_csp_light_names[] = { + {"auto", MP_CSP_LIGHT_AUTO}, + {"display", MP_CSP_LIGHT_DISPLAY}, + {"hlg", MP_CSP_LIGHT_SCENE_HLG}, + {"709-1886", MP_CSP_LIGHT_SCENE_709_1886}, + {"gamma1.2", MP_CSP_LIGHT_SCENE_1_2}, + {0} +}; + +const struct m_opt_choice_alternatives mp_chroma_names[] = { + {"unknown", MP_CHROMA_AUTO}, + {"uhd", MP_CHROMA_TOPLEFT}, + {"mpeg2/4/h264",MP_CHROMA_LEFT}, + {"mpeg1/jpeg", MP_CHROMA_CENTER}, + {0} +}; + +const struct m_opt_choice_alternatives mp_alpha_names[] = { + {"auto", MP_ALPHA_AUTO}, + {"straight", MP_ALPHA_STRAIGHT}, + {"premul", MP_ALPHA_PREMUL}, + {0} +}; + +void mp_colorspace_merge(struct mp_colorspace *orig, struct mp_colorspace *new) +{ + if (!orig->space) + orig->space = new->space; + if (!orig->levels) + orig->levels = new->levels; + if (!orig->primaries) + orig->primaries = new->primaries; + if (!orig->gamma) + orig->gamma = new->gamma; + if (!orig->light) + orig->light = new->light; + pl_hdr_metadata_merge(&orig->hdr, &new->hdr); +} + +// The short name _must_ match with what vf_stereo3d accepts (if supported). +// The long name in comments is closer to the Matroska spec (StereoMode element). +// The numeric index matches the Matroska StereoMode value. If you add entries +// that don't match Matroska, make sure demux_mkv.c rejects them properly. +const struct m_opt_choice_alternatives mp_stereo3d_names[] = { + {"no", -1}, // disable/invalid + {"mono", 0}, + {"sbs2l", 1}, // "side_by_side_left" + {"ab2r", 2}, // "top_bottom_right" + {"ab2l", 3}, // "top_bottom_left" + {"checkr", 4}, // "checkboard_right" (unsupported by vf_stereo3d) + {"checkl", 5}, // "checkboard_left" (unsupported by vf_stereo3d) + {"irr", 6}, // "row_interleaved_right" + {"irl", 7}, // "row_interleaved_left" + {"icr", 8}, // "column_interleaved_right" (unsupported by vf_stereo3d) + {"icl", 9}, // "column_interleaved_left" (unsupported by vf_stereo3d) + {"arcc", 10}, // "anaglyph_cyan_red" (Matroska: unclear which mode) + {"sbs2r", 11}, // "side_by_side_right" + {"agmc", 12}, // "anaglyph_green_magenta" (Matroska: unclear which mode) + {"al", 13}, // "alternating frames left first" + {"ar", 14}, // "alternating frames right first" + {0} +}; + +enum mp_csp avcol_spc_to_mp_csp(int avcolorspace) +{ + switch (avcolorspace) { + case AVCOL_SPC_BT709: return MP_CSP_BT_709; + case AVCOL_SPC_BT470BG: return MP_CSP_BT_601; + case AVCOL_SPC_BT2020_NCL: return MP_CSP_BT_2020_NC; + case AVCOL_SPC_BT2020_CL: return MP_CSP_BT_2020_C; + case AVCOL_SPC_SMPTE170M: return MP_CSP_BT_601; + case AVCOL_SPC_SMPTE240M: return MP_CSP_SMPTE_240M; + case AVCOL_SPC_RGB: return MP_CSP_RGB; + case AVCOL_SPC_YCOCG: return MP_CSP_YCGCO; + default: return MP_CSP_AUTO; + } +} + +enum mp_csp_levels avcol_range_to_mp_csp_levels(int avrange) +{ + switch (avrange) { + case AVCOL_RANGE_MPEG: return MP_CSP_LEVELS_TV; + case AVCOL_RANGE_JPEG: return MP_CSP_LEVELS_PC; + default: return MP_CSP_LEVELS_AUTO; + } +} + +enum mp_csp_prim avcol_pri_to_mp_csp_prim(int avpri) +{ + switch (avpri) { + case AVCOL_PRI_SMPTE240M: // Same as below + case AVCOL_PRI_SMPTE170M: return MP_CSP_PRIM_BT_601_525; + case AVCOL_PRI_BT470BG: return MP_CSP_PRIM_BT_601_625; + case AVCOL_PRI_BT709: return MP_CSP_PRIM_BT_709; + case AVCOL_PRI_BT2020: return MP_CSP_PRIM_BT_2020; + case AVCOL_PRI_BT470M: return MP_CSP_PRIM_BT_470M; + case AVCOL_PRI_SMPTE431: return MP_CSP_PRIM_DCI_P3; + case AVCOL_PRI_SMPTE432: return MP_CSP_PRIM_DISPLAY_P3; + default: return MP_CSP_PRIM_AUTO; + } +} + +enum mp_csp_trc avcol_trc_to_mp_csp_trc(int avtrc) +{ + switch (avtrc) { + case AVCOL_TRC_BT709: + case AVCOL_TRC_SMPTE170M: + case AVCOL_TRC_SMPTE240M: + case AVCOL_TRC_BT1361_ECG: + case AVCOL_TRC_BT2020_10: + case AVCOL_TRC_BT2020_12: return MP_CSP_TRC_BT_1886; + case AVCOL_TRC_IEC61966_2_1: return MP_CSP_TRC_SRGB; + case AVCOL_TRC_LINEAR: return MP_CSP_TRC_LINEAR; + case AVCOL_TRC_GAMMA22: return MP_CSP_TRC_GAMMA22; + case AVCOL_TRC_GAMMA28: return MP_CSP_TRC_GAMMA28; + case AVCOL_TRC_SMPTEST2084: return MP_CSP_TRC_PQ; + case AVCOL_TRC_ARIB_STD_B67: return MP_CSP_TRC_HLG; + case AVCOL_TRC_SMPTE428: return MP_CSP_TRC_ST428; + default: return MP_CSP_TRC_AUTO; + } +} + +int mp_csp_to_avcol_spc(enum mp_csp colorspace) +{ + switch (colorspace) { + case MP_CSP_BT_709: return AVCOL_SPC_BT709; + case MP_CSP_BT_601: return AVCOL_SPC_BT470BG; + case MP_CSP_BT_2020_NC: return AVCOL_SPC_BT2020_NCL; + case MP_CSP_BT_2020_C: return AVCOL_SPC_BT2020_CL; + case MP_CSP_SMPTE_240M: return AVCOL_SPC_SMPTE240M; + case MP_CSP_RGB: return AVCOL_SPC_RGB; + case MP_CSP_YCGCO: return AVCOL_SPC_YCOCG; + default: return AVCOL_SPC_UNSPECIFIED; + } +} + +int mp_csp_levels_to_avcol_range(enum mp_csp_levels range) +{ + switch (range) { + case MP_CSP_LEVELS_TV: return AVCOL_RANGE_MPEG; + case MP_CSP_LEVELS_PC: return AVCOL_RANGE_JPEG; + default: return AVCOL_RANGE_UNSPECIFIED; + } +} + +int mp_csp_prim_to_avcol_pri(enum mp_csp_prim prim) +{ + switch (prim) { + case MP_CSP_PRIM_BT_601_525: return AVCOL_PRI_SMPTE170M; + case MP_CSP_PRIM_BT_601_625: return AVCOL_PRI_BT470BG; + case MP_CSP_PRIM_BT_709: return AVCOL_PRI_BT709; + case MP_CSP_PRIM_BT_2020: return AVCOL_PRI_BT2020; + case MP_CSP_PRIM_BT_470M: return AVCOL_PRI_BT470M; + case MP_CSP_PRIM_DCI_P3: return AVCOL_PRI_SMPTE431; + case MP_CSP_PRIM_DISPLAY_P3: return AVCOL_PRI_SMPTE432; + default: return AVCOL_PRI_UNSPECIFIED; + } +} + +int mp_csp_trc_to_avcol_trc(enum mp_csp_trc trc) +{ + switch (trc) { + // We just call it BT.1886 since we're decoding, but it's still BT.709 + case MP_CSP_TRC_BT_1886: return AVCOL_TRC_BT709; + case MP_CSP_TRC_SRGB: return AVCOL_TRC_IEC61966_2_1; + case MP_CSP_TRC_LINEAR: return AVCOL_TRC_LINEAR; + case MP_CSP_TRC_GAMMA22: return AVCOL_TRC_GAMMA22; + case MP_CSP_TRC_GAMMA28: return AVCOL_TRC_GAMMA28; + case MP_CSP_TRC_PQ: return AVCOL_TRC_SMPTEST2084; + case MP_CSP_TRC_HLG: return AVCOL_TRC_ARIB_STD_B67; + case MP_CSP_TRC_ST428: return AVCOL_TRC_SMPTE428; + default: return AVCOL_TRC_UNSPECIFIED; + } +} + +enum mp_csp mp_csp_guess_colorspace(int width, int height) +{ + return width >= 1280 || height > 576 ? MP_CSP_BT_709 : MP_CSP_BT_601; +} + +enum mp_csp_prim mp_csp_guess_primaries(int width, int height) +{ + // HD content + if (width >= 1280 || height > 576) + return MP_CSP_PRIM_BT_709; + + switch (height) { + case 576: // Typical PAL content, including anamorphic/squared + return MP_CSP_PRIM_BT_601_625; + + case 480: // Typical NTSC content, including squared + case 486: // NTSC Pro or anamorphic NTSC + return MP_CSP_PRIM_BT_601_525; + + default: // No good metric, just pick BT.709 to minimize damage + return MP_CSP_PRIM_BT_709; + } +} + +enum mp_chroma_location avchroma_location_to_mp(int avloc) +{ + switch (avloc) { + case AVCHROMA_LOC_TOPLEFT: return MP_CHROMA_TOPLEFT; + case AVCHROMA_LOC_LEFT: return MP_CHROMA_LEFT; + case AVCHROMA_LOC_CENTER: return MP_CHROMA_CENTER; + default: return MP_CHROMA_AUTO; + } +} + +int mp_chroma_location_to_av(enum mp_chroma_location mploc) +{ + switch (mploc) { + case MP_CHROMA_TOPLEFT: return AVCHROMA_LOC_TOPLEFT; + case MP_CHROMA_LEFT: return AVCHROMA_LOC_LEFT; + case MP_CHROMA_CENTER: return AVCHROMA_LOC_CENTER; + default: return AVCHROMA_LOC_UNSPECIFIED; + } +} + +// Return location of chroma samples relative to luma samples. 0/0 means +// centered. Other possible values are -1 (top/left) and +1 (right/bottom). +void mp_get_chroma_location(enum mp_chroma_location loc, int *x, int *y) +{ + *x = 0; + *y = 0; + if (loc == MP_CHROMA_LEFT || loc == MP_CHROMA_TOPLEFT) + *x = -1; + if (loc == MP_CHROMA_TOPLEFT) + *y = -1; +} + +void mp_invert_matrix3x3(float m[3][3]) +{ + float m00 = m[0][0], m01 = m[0][1], m02 = m[0][2], + m10 = m[1][0], m11 = m[1][1], m12 = m[1][2], + m20 = m[2][0], m21 = m[2][1], m22 = m[2][2]; + + // calculate the adjoint + m[0][0] = (m11 * m22 - m21 * m12); + m[0][1] = -(m01 * m22 - m21 * m02); + m[0][2] = (m01 * m12 - m11 * m02); + m[1][0] = -(m10 * m22 - m20 * m12); + m[1][1] = (m00 * m22 - m20 * m02); + m[1][2] = -(m00 * m12 - m10 * m02); + m[2][0] = (m10 * m21 - m20 * m11); + m[2][1] = -(m00 * m21 - m20 * m01); + m[2][2] = (m00 * m11 - m10 * m01); + + // calculate the determinant (as inverse == 1/det * adjoint, + // adjoint * m == identity * det, so this calculates the det) + float det = m00 * m[0][0] + m10 * m[0][1] + m20 * m[0][2]; + det = 1.0f / det; + + for (int i = 0; i < 3; i++) { + for (int j = 0; j < 3; j++) + m[i][j] *= det; + } +} + +// A := A * B +static void mp_mul_matrix3x3(float a[3][3], float b[3][3]) +{ + float a00 = a[0][0], a01 = a[0][1], a02 = a[0][2], + a10 = a[1][0], a11 = a[1][1], a12 = a[1][2], + a20 = a[2][0], a21 = a[2][1], a22 = a[2][2]; + + for (int i = 0; i < 3; i++) { + a[0][i] = a00 * b[0][i] + a01 * b[1][i] + a02 * b[2][i]; + a[1][i] = a10 * b[0][i] + a11 * b[1][i] + a12 * b[2][i]; + a[2][i] = a20 * b[0][i] + a21 * b[1][i] + a22 * b[2][i]; + } +} + +// return the primaries associated with a certain mp_csp_primaries val +struct mp_csp_primaries mp_get_csp_primaries(enum mp_csp_prim spc) +{ + /* + Values from: ITU-R Recommendations BT.470-6, BT.601-7, BT.709-5, BT.2020-0 + + https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.470-6-199811-S!!PDF-E.pdf + https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.601-7-201103-I!!PDF-E.pdf + https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.709-5-200204-I!!PDF-E.pdf + https://www.itu.int/dms_pubrec/itu-r/rec/bt/R-REC-BT.2020-0-201208-I!!PDF-E.pdf + + Other colorspaces from https://en.wikipedia.org/wiki/RGB_color_space#Specifications + */ + + // CIE standard illuminant series + static const struct mp_csp_col_xy + d50 = {0.34577, 0.35850}, + d65 = {0.31271, 0.32902}, + c = {0.31006, 0.31616}, + dci = {0.31400, 0.35100}, + e = {1.0/3.0, 1.0/3.0}; + + switch (spc) { + case MP_CSP_PRIM_BT_470M: + return (struct mp_csp_primaries) { + .red = {0.670, 0.330}, + .green = {0.210, 0.710}, + .blue = {0.140, 0.080}, + .white = c + }; + case MP_CSP_PRIM_BT_601_525: + return (struct mp_csp_primaries) { + .red = {0.630, 0.340}, + .green = {0.310, 0.595}, + .blue = {0.155, 0.070}, + .white = d65 + }; + case MP_CSP_PRIM_BT_601_625: + return (struct mp_csp_primaries) { + .red = {0.640, 0.330}, + .green = {0.290, 0.600}, + .blue = {0.150, 0.060}, + .white = d65 + }; + // This is the default assumption if no colorspace information could + // be determined, eg. for files which have no video channel. + case MP_CSP_PRIM_AUTO: + case MP_CSP_PRIM_BT_709: + return (struct mp_csp_primaries) { + .red = {0.640, 0.330}, + .green = {0.300, 0.600}, + .blue = {0.150, 0.060}, + .white = d65 + }; + case MP_CSP_PRIM_BT_2020: + return (struct mp_csp_primaries) { + .red = {0.708, 0.292}, + .green = {0.170, 0.797}, + .blue = {0.131, 0.046}, + .white = d65 + }; + case MP_CSP_PRIM_APPLE: + return (struct mp_csp_primaries) { + .red = {0.625, 0.340}, + .green = {0.280, 0.595}, + .blue = {0.115, 0.070}, + .white = d65 + }; + case MP_CSP_PRIM_ADOBE: + return (struct mp_csp_primaries) { + .red = {0.640, 0.330}, + .green = {0.210, 0.710}, + .blue = {0.150, 0.060}, + .white = d65 + }; + case MP_CSP_PRIM_PRO_PHOTO: + return (struct mp_csp_primaries) { + .red = {0.7347, 0.2653}, + .green = {0.1596, 0.8404}, + .blue = {0.0366, 0.0001}, + .white = d50 + }; + case MP_CSP_PRIM_CIE_1931: + return (struct mp_csp_primaries) { + .red = {0.7347, 0.2653}, + .green = {0.2738, 0.7174}, + .blue = {0.1666, 0.0089}, + .white = e + }; + // From SMPTE RP 431-2 and 432-1 + case MP_CSP_PRIM_DCI_P3: + case MP_CSP_PRIM_DISPLAY_P3: + return (struct mp_csp_primaries) { + .red = {0.680, 0.320}, + .green = {0.265, 0.690}, + .blue = {0.150, 0.060}, + .white = spc == MP_CSP_PRIM_DCI_P3 ? dci : d65 + }; + // From Panasonic VARICAM reference manual + case MP_CSP_PRIM_V_GAMUT: + return (struct mp_csp_primaries) { + .red = {0.730, 0.280}, + .green = {0.165, 0.840}, + .blue = {0.100, -0.03}, + .white = d65 + }; + // From Sony S-Log reference manual + case MP_CSP_PRIM_S_GAMUT: + return (struct mp_csp_primaries) { + .red = {0.730, 0.280}, + .green = {0.140, 0.855}, + .blue = {0.100, -0.05}, + .white = d65 + }; + // from EBU Tech. 3213-E + case MP_CSP_PRIM_EBU_3213: + return (struct mp_csp_primaries) { + .red = {0.630, 0.340}, + .green = {0.295, 0.605}, + .blue = {0.155, 0.077}, + .white = d65 + }; + // From H.273, traditional film with Illuminant C + case MP_CSP_PRIM_FILM_C: + return (struct mp_csp_primaries) { + .red = {0.681, 0.319}, + .green = {0.243, 0.692}, + .blue = {0.145, 0.049}, + .white = c + }; + // From libplacebo source code + case MP_CSP_PRIM_ACES_AP0: + return (struct mp_csp_primaries) { + .red = {0.7347, 0.2653}, + .green = {0.0000, 1.0000}, + .blue = {0.0001, -0.0770}, + .white = {0.32168, 0.33767}, + }; + // From libplacebo source code + case MP_CSP_PRIM_ACES_AP1: + return (struct mp_csp_primaries) { + .red = {0.713, 0.293}, + .green = {0.165, 0.830}, + .blue = {0.128, 0.044}, + .white = {0.32168, 0.33767}, + }; + default: + return (struct mp_csp_primaries) {{0}}; + } +} + +// Get the nominal peak for a given colorspace, relative to the reference white +// level. In other words, this returns the brightest encodable value that can +// be represented by a given transfer curve. +float mp_trc_nom_peak(enum mp_csp_trc trc) +{ + switch (trc) { + case MP_CSP_TRC_PQ: return 10000.0 / MP_REF_WHITE; + case MP_CSP_TRC_HLG: return 12.0 / MP_REF_WHITE_HLG; + case MP_CSP_TRC_V_LOG: return 46.0855; + case MP_CSP_TRC_S_LOG1: return 6.52; + case MP_CSP_TRC_S_LOG2: return 9.212; + } + + return 1.0; +} + +bool mp_trc_is_hdr(enum mp_csp_trc trc) +{ + return mp_trc_nom_peak(trc) > 1.0; +} + +// Compute the RGB/XYZ matrix as described here: +// http://www.brucelindbloom.com/index.html?Eqn_RGB_XYZ_Matrix.html +void mp_get_rgb2xyz_matrix(struct mp_csp_primaries space, float m[3][3]) +{ + float S[3], X[4], Z[4]; + + // Convert from CIE xyY to XYZ. Note that Y=1 holds true for all primaries + X[0] = space.red.x / space.red.y; + X[1] = space.green.x / space.green.y; + X[2] = space.blue.x / space.blue.y; + X[3] = space.white.x / space.white.y; + + Z[0] = (1 - space.red.x - space.red.y) / space.red.y; + Z[1] = (1 - space.green.x - space.green.y) / space.green.y; + Z[2] = (1 - space.blue.x - space.blue.y) / space.blue.y; + Z[3] = (1 - space.white.x - space.white.y) / space.white.y; + + // S = XYZ^-1 * W + for (int i = 0; i < 3; i++) { + m[0][i] = X[i]; + m[1][i] = 1; + m[2][i] = Z[i]; + } + + mp_invert_matrix3x3(m); + + for (int i = 0; i < 3; i++) + S[i] = m[i][0] * X[3] + m[i][1] * 1 + m[i][2] * Z[3]; + + // M = [Sc * XYZc] + for (int i = 0; i < 3; i++) { + m[0][i] = S[i] * X[i]; + m[1][i] = S[i] * 1; + m[2][i] = S[i] * Z[i]; + } +} + +// M := M * XYZd<-XYZs +static void mp_apply_chromatic_adaptation(struct mp_csp_col_xy src, + struct mp_csp_col_xy dest, float m[3][3]) +{ + // If the white points are nearly identical, this is a wasteful identity + // operation. + if (fabs(src.x - dest.x) < 1e-6 && fabs(src.y - dest.y) < 1e-6) + return; + + // XYZd<-XYZs = Ma^-1 * (I*[Cd/Cs]) * Ma + // http://www.brucelindbloom.com/index.html?Eqn_ChromAdapt.html + float C[3][2], tmp[3][3] = {{0}}; + + // Ma = Bradford matrix, arguably most popular method in use today. + // This is derived experimentally and thus hard-coded. + float bradford[3][3] = { + { 0.8951, 0.2664, -0.1614 }, + { -0.7502, 1.7135, 0.0367 }, + { 0.0389, -0.0685, 1.0296 }, + }; + + for (int i = 0; i < 3; i++) { + // source cone + C[i][0] = bradford[i][0] * mp_xy_X(src) + + bradford[i][1] * 1 + + bradford[i][2] * mp_xy_Z(src); + + // dest cone + C[i][1] = bradford[i][0] * mp_xy_X(dest) + + bradford[i][1] * 1 + + bradford[i][2] * mp_xy_Z(dest); + } + + // tmp := I * [Cd/Cs] * Ma + for (int i = 0; i < 3; i++) + tmp[i][i] = C[i][1] / C[i][0]; + + mp_mul_matrix3x3(tmp, bradford); + + // M := M * Ma^-1 * tmp + mp_invert_matrix3x3(bradford); + mp_mul_matrix3x3(m, bradford); + mp_mul_matrix3x3(m, tmp); +} + +// get the coefficients of the source -> dest cms matrix +void mp_get_cms_matrix(struct mp_csp_primaries src, struct mp_csp_primaries dest, + enum mp_render_intent intent, float m[3][3]) +{ + float tmp[3][3]; + + // In saturation mapping, we don't care about accuracy and just want + // primaries to map to primaries, making this an identity transformation. + if (intent == MP_INTENT_SATURATION) { + for (int i = 0; i < 3; i++) + m[i][i] = 1; + return; + } + + // RGBd<-RGBs = RGBd<-XYZd * XYZd<-XYZs * XYZs<-RGBs + // Equations from: http://www.brucelindbloom.com/index.html?Math.html + // Note: Perceptual is treated like relative colorimetric. There's no + // definition for perceptual other than "make it look good". + + // RGBd<-XYZd, inverted from XYZd<-RGBd + mp_get_rgb2xyz_matrix(dest, m); + mp_invert_matrix3x3(m); + + // Chromatic adaptation, except in absolute colorimetric intent + if (intent != MP_INTENT_ABSOLUTE_COLORIMETRIC) + mp_apply_chromatic_adaptation(src.white, dest.white, m); + + // XYZs<-RGBs + mp_get_rgb2xyz_matrix(src, tmp); + mp_mul_matrix3x3(m, tmp); +} + +// get the coefficients of an ST 428-1 xyz -> rgb conversion matrix +// intent = the rendering intent used to convert to the target primaries +static void mp_get_xyz2rgb_coeffs(struct mp_csp_params *params, + enum mp_render_intent intent, struct mp_cmat *m) +{ + // Convert to DCI-P3 + struct mp_csp_primaries prim = mp_get_csp_primaries(MP_CSP_PRIM_DCI_P3); + float brightness = params->brightness; + mp_get_rgb2xyz_matrix(prim, m->m); + mp_invert_matrix3x3(m->m); + + // All non-absolute mappings want to map source white to target white + if (intent != MP_INTENT_ABSOLUTE_COLORIMETRIC) { + // SMPTE EG 432-1 Annex H defines the white point as equal energy + static const struct mp_csp_col_xy smpte432 = {1.0/3.0, 1.0/3.0}; + mp_apply_chromatic_adaptation(smpte432, prim.white, m->m); + } + + // Since this outputs linear RGB rather than companded RGB, we + // want to linearize any brightness additions. 2 is a reasonable + // approximation for any sort of gamma function that could be in use. + // As this is an aesthetic setting only, any exact values do not matter. + brightness *= fabs(brightness); + + for (int i = 0; i < 3; i++) + m->c[i] = brightness; +} + +// Get multiplication factor required if image data is fit within the LSBs of a +// higher smaller bit depth fixed-point texture data. +// This is broken. Use mp_get_csp_uint_mul(). +double mp_get_csp_mul(enum mp_csp csp, int input_bits, int texture_bits) +{ + assert(texture_bits >= input_bits); + + // Convenience for some irrelevant cases, e.g. rgb565 or disabling expansion. + if (!input_bits) + return 1; + + // RGB always uses the full range available. + if (csp == MP_CSP_RGB) + return ((1LL << input_bits) - 1.) / ((1LL << texture_bits) - 1.); + + if (csp == MP_CSP_XYZ) + return 1; + + // High bit depth YUV uses a range shifted from 8 bit. + return (1LL << input_bits) / ((1LL << texture_bits) - 1.) * 255 / 256; +} + +// Return information about color fixed point representation.his is needed for +// converting color from integer formats to or from float. Use as follows: +// float_val = uint_val * m + o +// uint_val = clamp(round((float_val - o) / m)) +// See H.264/5 Annex E. +// csp: colorspace +// levels: full range flag +// component: ID of the channel, as in mp_regular_imgfmt: +// 1 is red/luminance/gray, 2 is green/Cb, 3 is blue/Cr, 4 is alpha. +// bits: number of significant bits, e.g. 10 for yuv420p10, 16 for p010 +// out_m: returns factor to multiply the uint number with +// out_o: returns offset to add after multiplication +void mp_get_csp_uint_mul(enum mp_csp csp, enum mp_csp_levels levels, + int bits, int component, double *out_m, double *out_o) +{ + uint16_t i_min = 0; + uint16_t i_max = (1u << bits) - 1; + double f_min = 0; // min. float value + + if (csp != MP_CSP_RGB && component != 4) { + if (component == 2 || component == 3) { + f_min = (1u << (bits - 1)) / -(double)i_max; // force center => 0 + + if (levels != MP_CSP_LEVELS_PC && bits >= 8) { + i_min = 16 << (bits - 8); // => -0.5 + i_max = 240 << (bits - 8); // => 0.5 + f_min = -0.5; + } + } else { + if (levels != MP_CSP_LEVELS_PC && bits >= 8) { + i_min = 16 << (bits - 8); // => 0 + i_max = 235 << (bits - 8); // => 1 + } + } + } + + *out_m = 1.0 / (i_max - i_min); + *out_o = (1 + f_min) - i_max * *out_m; +} + +/* Fill in the Y, U, V vectors of a yuv-to-rgb conversion matrix + * based on the given luma weights of the R, G and B components (lr, lg, lb). + * lr+lg+lb is assumed to equal 1. + * This function is meant for colorspaces satisfying the following + * conditions (which are true for common YUV colorspaces): + * - The mapping from input [Y, U, V] to output [R, G, B] is linear. + * - Y is the vector [1, 1, 1]. (meaning input Y component maps to 1R+1G+1B) + * - U maps to a value with zero R and positive B ([0, x, y], y > 0; + * i.e. blue and green only). + * - V maps to a value with zero B and positive R ([x, y, 0], x > 0; + * i.e. red and green only). + * - U and V are orthogonal to the luma vector [lr, lg, lb]. + * - The magnitudes of the vectors U and V are the minimal ones for which + * the image of the set Y=[0...1],U=[-0.5...0.5],V=[-0.5...0.5] under the + * conversion function will cover the set R=[0...1],G=[0...1],B=[0...1] + * (the resulting matrix can be converted for other input/output ranges + * outside this function). + * Under these conditions the given parameters lr, lg, lb uniquely + * determine the mapping of Y, U, V to R, G, B. + */ +static void luma_coeffs(struct mp_cmat *mat, float lr, float lg, float lb) +{ + assert(fabs(lr+lg+lb - 1) < 1e-6); + *mat = (struct mp_cmat) { + { {1, 0, 2 * (1-lr) }, + {1, -2 * (1-lb) * lb/lg, -2 * (1-lr) * lr/lg }, + {1, 2 * (1-lb), 0 } }, + // Constant coefficients (mat->c) not set here + }; +} + +// get the coefficients of the yuv -> rgb conversion matrix +void mp_get_csp_matrix(struct mp_csp_params *params, struct mp_cmat *m) +{ + enum mp_csp colorspace = params->color.space; + if (colorspace <= MP_CSP_AUTO || colorspace >= MP_CSP_COUNT) + colorspace = MP_CSP_BT_601; + enum mp_csp_levels levels_in = params->color.levels; + if (levels_in <= MP_CSP_LEVELS_AUTO || levels_in >= MP_CSP_LEVELS_COUNT) + levels_in = MP_CSP_LEVELS_TV; + + switch (colorspace) { + case MP_CSP_BT_601: luma_coeffs(m, 0.299, 0.587, 0.114 ); break; + case MP_CSP_BT_709: luma_coeffs(m, 0.2126, 0.7152, 0.0722); break; + case MP_CSP_SMPTE_240M: luma_coeffs(m, 0.2122, 0.7013, 0.0865); break; + case MP_CSP_BT_2020_NC: luma_coeffs(m, 0.2627, 0.6780, 0.0593); break; + case MP_CSP_BT_2020_C: { + // Note: This outputs into the [-0.5,0.5] range for chroma information. + // If this clips on any VO, a constant 0.5 coefficient can be added + // to the chroma channels to normalize them into [0,1]. This is not + // currently needed by anything, though. + *m = (struct mp_cmat){{{0, 0, 1}, {1, 0, 0}, {0, 1, 0}}}; + break; + } + case MP_CSP_RGB: { + *m = (struct mp_cmat){{{1, 0, 0}, {0, 1, 0}, {0, 0, 1}}}; + levels_in = -1; + break; + } + case MP_CSP_XYZ: { + // The vo should probably not be using a matrix generated by this + // function for XYZ sources, but if it does, let's just convert it to + // an equivalent RGB space based on the colorimetry metadata it + // provided in mp_csp_params. (At the risk of clipping, if the + // chosen primaries are too small to fit the actual data) + mp_get_xyz2rgb_coeffs(params, MP_INTENT_RELATIVE_COLORIMETRIC, m); + levels_in = -1; + break; + } + case MP_CSP_YCGCO: { + *m = (struct mp_cmat) { + {{1, -1, 1}, + {1, 1, 0}, + {1, -1, -1}}, + }; + break; + } + default: + MP_ASSERT_UNREACHABLE(); + }; + + if (params->is_float) + levels_in = -1; + + if ((colorspace == MP_CSP_BT_601 || colorspace == MP_CSP_BT_709 || + colorspace == MP_CSP_SMPTE_240M || colorspace == MP_CSP_BT_2020_NC)) + { + // Hue is equivalent to rotating input [U, V] subvector around the origin. + // Saturation scales [U, V]. + float huecos = params->gray ? 0 : params->saturation * cos(params->hue); + float huesin = params->gray ? 0 : params->saturation * sin(params->hue); + for (int i = 0; i < 3; i++) { + float u = m->m[i][1], v = m->m[i][2]; + m->m[i][1] = huecos * u - huesin * v; + m->m[i][2] = huesin * u + huecos * v; + } + } + + // The values below are written in 0-255 scale - thus bring s into range. + double s = + mp_get_csp_mul(colorspace, params->input_bits, params->texture_bits) / 255; + // NOTE: The yuvfull ranges as presented here are arguably ambiguous, + // and conflict with at least the full-range YCbCr/ICtCp values as defined + // by ITU-R BT.2100. If somebody ever complains about full-range YUV looking + // different from their reference display, this comment is probably why. + struct yuvlevels { double ymin, ymax, cmax, cmid; } + yuvlim = { 16*s, 235*s, 240*s, 128*s }, + yuvfull = { 0*s, 255*s, 255*s, 128*s }, + anyfull = { 0*s, 255*s, 255*s/2, 0 }, // cmax picked to make cmul=ymul + yuvlev; + switch (levels_in) { + case MP_CSP_LEVELS_TV: yuvlev = yuvlim; break; + case MP_CSP_LEVELS_PC: yuvlev = yuvfull; break; + case -1: yuvlev = anyfull; break; + default: + MP_ASSERT_UNREACHABLE(); + } + + int levels_out = params->levels_out; + if (levels_out <= MP_CSP_LEVELS_AUTO || levels_out >= MP_CSP_LEVELS_COUNT) + levels_out = MP_CSP_LEVELS_PC; + struct rgblevels { double min, max; } + rgblim = { 16/255., 235/255. }, + rgbfull = { 0, 1 }, + rgblev; + switch (levels_out) { + case MP_CSP_LEVELS_TV: rgblev = rgblim; break; + case MP_CSP_LEVELS_PC: rgblev = rgbfull; break; + default: + MP_ASSERT_UNREACHABLE(); + } + + double ymul = (rgblev.max - rgblev.min) / (yuvlev.ymax - yuvlev.ymin); + double cmul = (rgblev.max - rgblev.min) / (yuvlev.cmax - yuvlev.cmid) / 2; + + // Contrast scales the output value range (gain) + ymul *= params->contrast; + cmul *= params->contrast; + + for (int i = 0; i < 3; i++) { + m->m[i][0] *= ymul; + m->m[i][1] *= cmul; + m->m[i][2] *= cmul; + // Set c so that Y=umin,UV=cmid maps to RGB=min (black to black), + // also add brightness offset (black lift) + m->c[i] = rgblev.min - m->m[i][0] * yuvlev.ymin + - (m->m[i][1] + m->m[i][2]) * yuvlev.cmid + + params->brightness; + } +} + +// Set colorspace related fields in p from f. Don't touch other fields. +void mp_csp_set_image_params(struct mp_csp_params *params, + const struct mp_image_params *imgparams) +{ + struct mp_image_params p = *imgparams; + mp_image_params_guess_csp(&p); // ensure consistency + params->color = p.color; +} + +bool mp_colorspace_equal(struct mp_colorspace c1, struct mp_colorspace c2) +{ + return c1.space == c2.space && + c1.levels == c2.levels && + c1.primaries == c2.primaries && + c1.gamma == c2.gamma && + c1.light == c2.light && + pl_hdr_metadata_equal(&c1.hdr, &c2.hdr); +} + +enum mp_csp_equalizer_param { + MP_CSP_EQ_BRIGHTNESS, + MP_CSP_EQ_CONTRAST, + MP_CSP_EQ_HUE, + MP_CSP_EQ_SATURATION, + MP_CSP_EQ_GAMMA, + MP_CSP_EQ_COUNT, +}; + +// Default initialization with 0 is enough, except for the capabilities field +struct mp_csp_equalizer_opts { + // Value for each property is in the range [-100.0, 100.0]. + // 0.0 is default, meaning neutral or no change. + float values[MP_CSP_EQ_COUNT]; + int output_levels; +}; + +#define OPT_BASE_STRUCT struct mp_csp_equalizer_opts + +const struct m_sub_options mp_csp_equalizer_conf = { + .opts = (const m_option_t[]) { + {"brightness", OPT_FLOAT(values[MP_CSP_EQ_BRIGHTNESS]), + M_RANGE(-100, 100)}, + {"saturation", OPT_FLOAT(values[MP_CSP_EQ_SATURATION]), + M_RANGE(-100, 100)}, + {"contrast", OPT_FLOAT(values[MP_CSP_EQ_CONTRAST]), + M_RANGE(-100, 100)}, + {"hue", OPT_FLOAT(values[MP_CSP_EQ_HUE]), + M_RANGE(-100, 100)}, + {"gamma", OPT_FLOAT(values[MP_CSP_EQ_GAMMA]), + M_RANGE(-100, 100)}, + {"video-output-levels", + OPT_CHOICE_C(output_levels, mp_csp_levels_names)}, + {0} + }, + .size = sizeof(struct mp_csp_equalizer_opts), +}; + +// Copy settings from eq into params. +static void mp_csp_copy_equalizer_values(struct mp_csp_params *params, + const struct mp_csp_equalizer_opts *eq) +{ + params->brightness = eq->values[MP_CSP_EQ_BRIGHTNESS] / 100.0; + params->contrast = (eq->values[MP_CSP_EQ_CONTRAST] + 100) / 100.0; + params->hue = eq->values[MP_CSP_EQ_HUE] / 100.0 * M_PI; + params->saturation = (eq->values[MP_CSP_EQ_SATURATION] + 100) / 100.0; + params->gamma = exp(log(8.0) * eq->values[MP_CSP_EQ_GAMMA] / 100.0); + params->levels_out = eq->output_levels; +} + +struct mp_csp_equalizer_state *mp_csp_equalizer_create(void *ta_parent, + struct mpv_global *global) +{ + struct m_config_cache *c = m_config_cache_alloc(ta_parent, global, + &mp_csp_equalizer_conf); + // The terrible, terrible truth. + return (struct mp_csp_equalizer_state *)c; +} + +bool mp_csp_equalizer_state_changed(struct mp_csp_equalizer_state *state) +{ + struct m_config_cache *c = (struct m_config_cache *)state; + return m_config_cache_update(c); +} + +void mp_csp_equalizer_state_get(struct mp_csp_equalizer_state *state, + struct mp_csp_params *params) +{ + struct m_config_cache *c = (struct m_config_cache *)state; + m_config_cache_update(c); + struct mp_csp_equalizer_opts *opts = c->opts; + mp_csp_copy_equalizer_values(params, opts); +} + +void mp_invert_cmat(struct mp_cmat *out, struct mp_cmat *in) +{ + *out = *in; + mp_invert_matrix3x3(out->m); + + // fix the constant coefficient + // rgb = M * yuv + C + // M^-1 * rgb = yuv + M^-1 * C + // yuv = M^-1 * rgb - M^-1 * C + // ^^^^^^^^^^ + out->c[0] = -(out->m[0][0] * in->c[0] + out->m[0][1] * in->c[1] + out->m[0][2] * in->c[2]); + out->c[1] = -(out->m[1][0] * in->c[0] + out->m[1][1] * in->c[1] + out->m[1][2] * in->c[2]); + out->c[2] = -(out->m[2][0] * in->c[0] + out->m[2][1] * in->c[1] + out->m[2][2] * in->c[2]); +} + +// Multiply the color in c with the given matrix. +// i/o is {R, G, B} or {Y, U, V} (depending on input/output and matrix), using +// a fixed point representation with the given number of bits (so for bits==8, +// [0,255] maps to [0,1]). The output is clipped to the range as needed. +void mp_map_fixp_color(struct mp_cmat *matrix, int ibits, int in[3], + int obits, int out[3]) +{ + for (int i = 0; i < 3; i++) { + double val = matrix->c[i]; + for (int x = 0; x < 3; x++) + val += matrix->m[i][x] * in[x] / ((1 << ibits) - 1); + int ival = lrint(val * ((1 << obits) - 1)); + out[i] = av_clip(ival, 0, (1 << obits) - 1); + } +} diff --git a/video/csputils.h b/video/csputils.h new file mode 100644 index 0000000..3a904cb --- /dev/null +++ b/video/csputils.h @@ -0,0 +1,290 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MPLAYER_CSPUTILS_H +#define MPLAYER_CSPUTILS_H + +#include <stdbool.h> +#include <stdint.h> + +#include <libplacebo/colorspace.h> + +#include "options/m_option.h" + +/* NOTE: the csp and levels AUTO values are converted to specific ones + * above vf/vo level. At least vf_scale relies on all valid settings being + * nonzero at vf/vo level. + */ + +enum mp_csp { + MP_CSP_AUTO, + MP_CSP_BT_601, + MP_CSP_BT_709, + MP_CSP_SMPTE_240M, + MP_CSP_BT_2020_NC, + MP_CSP_BT_2020_C, + MP_CSP_RGB, + MP_CSP_XYZ, + MP_CSP_YCGCO, + MP_CSP_COUNT +}; + +extern const struct m_opt_choice_alternatives mp_csp_names[]; + +enum mp_csp_levels { + MP_CSP_LEVELS_AUTO, + MP_CSP_LEVELS_TV, + MP_CSP_LEVELS_PC, + MP_CSP_LEVELS_COUNT, +}; + +extern const struct m_opt_choice_alternatives mp_csp_levels_names[]; + +enum mp_csp_prim { + MP_CSP_PRIM_AUTO, + MP_CSP_PRIM_BT_601_525, + MP_CSP_PRIM_BT_601_625, + MP_CSP_PRIM_BT_709, + MP_CSP_PRIM_BT_2020, + MP_CSP_PRIM_BT_470M, + MP_CSP_PRIM_APPLE, + MP_CSP_PRIM_ADOBE, + MP_CSP_PRIM_PRO_PHOTO, + MP_CSP_PRIM_CIE_1931, + MP_CSP_PRIM_DCI_P3, + MP_CSP_PRIM_DISPLAY_P3, + MP_CSP_PRIM_V_GAMUT, + MP_CSP_PRIM_S_GAMUT, + MP_CSP_PRIM_EBU_3213, + MP_CSP_PRIM_FILM_C, + MP_CSP_PRIM_ACES_AP0, + MP_CSP_PRIM_ACES_AP1, + MP_CSP_PRIM_COUNT +}; + +extern const struct m_opt_choice_alternatives mp_csp_prim_names[]; + +enum mp_csp_trc { + MP_CSP_TRC_AUTO, + MP_CSP_TRC_BT_1886, + MP_CSP_TRC_SRGB, + MP_CSP_TRC_LINEAR, + MP_CSP_TRC_GAMMA18, + MP_CSP_TRC_GAMMA20, + MP_CSP_TRC_GAMMA22, + MP_CSP_TRC_GAMMA24, + MP_CSP_TRC_GAMMA26, + MP_CSP_TRC_GAMMA28, + MP_CSP_TRC_PRO_PHOTO, + MP_CSP_TRC_PQ, + MP_CSP_TRC_HLG, + MP_CSP_TRC_V_LOG, + MP_CSP_TRC_S_LOG1, + MP_CSP_TRC_S_LOG2, + MP_CSP_TRC_ST428, + MP_CSP_TRC_COUNT +}; + +extern const struct m_opt_choice_alternatives mp_csp_trc_names[]; + +enum mp_csp_light { + MP_CSP_LIGHT_AUTO, + MP_CSP_LIGHT_DISPLAY, + MP_CSP_LIGHT_SCENE_HLG, + MP_CSP_LIGHT_SCENE_709_1886, + MP_CSP_LIGHT_SCENE_1_2, + MP_CSP_LIGHT_COUNT +}; + +extern const struct m_opt_choice_alternatives mp_csp_light_names[]; + +// These constants are based on the ICC specification (Table 23) and match +// up with the API of LittleCMS, which treats them as integers. +enum mp_render_intent { + MP_INTENT_PERCEPTUAL = 0, + MP_INTENT_RELATIVE_COLORIMETRIC = 1, + MP_INTENT_SATURATION = 2, + MP_INTENT_ABSOLUTE_COLORIMETRIC = 3 +}; + +// The numeric values (except -1) match the Matroska StereoMode element value. +enum mp_stereo3d_mode { + MP_STEREO3D_INVALID = -1, + /* only modes explicitly referenced in the code are listed */ + MP_STEREO3D_MONO = 0, + MP_STEREO3D_SBS2L = 1, + MP_STEREO3D_AB2R = 2, + MP_STEREO3D_AB2L = 3, + MP_STEREO3D_SBS2R = 11, + /* no explicit enum entries for most valid values */ + MP_STEREO3D_COUNT = 15, // 14 is last valid mode +}; + +extern const struct m_opt_choice_alternatives mp_stereo3d_names[]; + +#define MP_STEREO3D_NAME(x) m_opt_choice_str(mp_stereo3d_names, x) + +#define MP_STEREO3D_NAME_DEF(x, def) \ + (MP_STEREO3D_NAME(x) ? MP_STEREO3D_NAME(x) : (def)) + +struct mp_colorspace { + enum mp_csp space; + enum mp_csp_levels levels; + enum mp_csp_prim primaries; + enum mp_csp_trc gamma; + enum mp_csp_light light; + struct pl_hdr_metadata hdr; +}; + +// For many colorspace conversions, in particular those involving HDR, an +// implicit reference white level is needed. Since this magic constant shows up +// a lot, give it an explicit name. The value of 203 cd/m² comes from ITU-R +// Report BT.2408, and the value for HLG comes from the cited HLG 75% level +// (transferred to scene space). +#define MP_REF_WHITE 203.0 +#define MP_REF_WHITE_HLG 3.17955 + +// Replaces unknown values in the first struct by those of the second struct +void mp_colorspace_merge(struct mp_colorspace *orig, struct mp_colorspace *new); + +struct mp_csp_params { + struct mp_colorspace color; // input colorspace + enum mp_csp_levels levels_out; // output device + float brightness; + float contrast; + float hue; + float saturation; + float gamma; + // discard U/V components + bool gray; + // input is already centered and range-expanded + bool is_float; + // texture_bits/input_bits is for rescaling fixed point input to range [0,1] + int texture_bits; + int input_bits; +}; + +#define MP_CSP_PARAMS_DEFAULTS { \ + .color = { .space = MP_CSP_BT_601, \ + .levels = MP_CSP_LEVELS_TV }, \ + .levels_out = MP_CSP_LEVELS_PC, \ + .brightness = 0, .contrast = 1, .hue = 0, .saturation = 1, \ + .gamma = 1, .texture_bits = 8, .input_bits = 8} + +struct mp_image_params; +void mp_csp_set_image_params(struct mp_csp_params *params, + const struct mp_image_params *imgparams); + +bool mp_colorspace_equal(struct mp_colorspace c1, struct mp_colorspace c2); + +enum mp_chroma_location { + MP_CHROMA_AUTO, + MP_CHROMA_TOPLEFT, // uhd + MP_CHROMA_LEFT, // mpeg2/4, h264 + MP_CHROMA_CENTER, // mpeg1, jpeg + MP_CHROMA_COUNT, +}; + +extern const struct m_opt_choice_alternatives mp_chroma_names[]; + +enum mp_alpha_type { + MP_ALPHA_AUTO, + MP_ALPHA_STRAIGHT, + MP_ALPHA_PREMUL, +}; + +extern const struct m_opt_choice_alternatives mp_alpha_names[]; + +extern const struct m_sub_options mp_csp_equalizer_conf; + +struct mpv_global; +struct mp_csp_equalizer_state *mp_csp_equalizer_create(void *ta_parent, + struct mpv_global *global); +bool mp_csp_equalizer_state_changed(struct mp_csp_equalizer_state *state); +void mp_csp_equalizer_state_get(struct mp_csp_equalizer_state *state, + struct mp_csp_params *params); + +struct mp_csp_col_xy { + float x, y; +}; + +static inline float mp_xy_X(struct mp_csp_col_xy xy) { + return xy.x / xy.y; +} + +static inline float mp_xy_Z(struct mp_csp_col_xy xy) { + return (1 - xy.x - xy.y) / xy.y; +} + +struct mp_csp_primaries { + struct mp_csp_col_xy red, green, blue, white; +}; + +enum mp_csp avcol_spc_to_mp_csp(int avcolorspace); +enum mp_csp_levels avcol_range_to_mp_csp_levels(int avrange); +enum mp_csp_prim avcol_pri_to_mp_csp_prim(int avpri); +enum mp_csp_trc avcol_trc_to_mp_csp_trc(int avtrc); + +int mp_csp_to_avcol_spc(enum mp_csp colorspace); +int mp_csp_levels_to_avcol_range(enum mp_csp_levels range); +int mp_csp_prim_to_avcol_pri(enum mp_csp_prim prim); +int mp_csp_trc_to_avcol_trc(enum mp_csp_trc trc); + +enum mp_csp mp_csp_guess_colorspace(int width, int height); +enum mp_csp_prim mp_csp_guess_primaries(int width, int height); + +enum mp_chroma_location avchroma_location_to_mp(int avloc); +int mp_chroma_location_to_av(enum mp_chroma_location mploc); +void mp_get_chroma_location(enum mp_chroma_location loc, int *x, int *y); + +struct mp_csp_primaries mp_get_csp_primaries(enum mp_csp_prim csp); +float mp_trc_nom_peak(enum mp_csp_trc trc); +bool mp_trc_is_hdr(enum mp_csp_trc trc); + +/* Color conversion matrix: RGB = m * YUV + c + * m is in row-major matrix, with m[row][col], e.g.: + * [ a11 a12 a13 ] float m[3][3] = { { a11, a12, a13 }, + * [ a21 a22 a23 ] { a21, a22, a23 }, + * [ a31 a32 a33 ] { a31, a32, a33 } }; + * This is accessed as e.g.: m[2-1][1-1] = a21 + * In particular, each row contains all the coefficients for one of R, G, B, + * while each column contains all the coefficients for one of Y, U, V: + * m[r,g,b][y,u,v] = ... + * The matrix could also be viewed as group of 3 vectors, e.g. the 1st column + * is the Y vector (1, 1, 1), the 2nd is the U vector, the 3rd the V vector. + * The matrix might also be used for other conversions and colorspaces. + */ +struct mp_cmat { + float m[3][3]; + float c[3]; +}; + +void mp_get_rgb2xyz_matrix(struct mp_csp_primaries space, float m[3][3]); +void mp_get_cms_matrix(struct mp_csp_primaries src, struct mp_csp_primaries dest, + enum mp_render_intent intent, float cms_matrix[3][3]); + +double mp_get_csp_mul(enum mp_csp csp, int input_bits, int texture_bits); +void mp_get_csp_uint_mul(enum mp_csp csp, enum mp_csp_levels levels, + int bits, int component, double *out_m, double *out_o); +void mp_get_csp_matrix(struct mp_csp_params *params, struct mp_cmat *out); + +void mp_invert_matrix3x3(float m[3][3]); +void mp_invert_cmat(struct mp_cmat *out, struct mp_cmat *in); +void mp_map_fixp_color(struct mp_cmat *matrix, int ibits, int in[3], + int obits, int out[3]); + +#endif /* MPLAYER_CSPUTILS_H */ diff --git a/video/cuda.c b/video/cuda.c new file mode 100644 index 0000000..3b7a2d8 --- /dev/null +++ b/video/cuda.c @@ -0,0 +1,44 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "hwdec.h" +#include "options/m_config.h" +#include "options/options.h" + +#include <libavutil/hwcontext.h> + +static struct AVBufferRef *cuda_create_standalone(struct mpv_global *global, + struct mp_log *log, struct hwcontext_create_dev_params *params) +{ + struct cuda_opts *opts = mp_get_config_group(NULL, global, &cuda_conf); + + char *decode_dev = NULL; + if (opts->cuda_device != -1) + decode_dev = talloc_asprintf(NULL, "%d", opts->cuda_device); + + AVBufferRef* ref = NULL; + av_hwdevice_ctx_create(&ref, AV_HWDEVICE_TYPE_CUDA, decode_dev, NULL, 0); + + talloc_free(decode_dev); + talloc_free(opts); + return ref; +} + +const struct hwcontext_fns hwcontext_fns_cuda = { + .av_hwdevice_type = AV_HWDEVICE_TYPE_CUDA, + .create_dev = cuda_create_standalone, +}; diff --git a/video/d3d.c b/video/d3d.c new file mode 100644 index 0000000..ceddcf3 --- /dev/null +++ b/video/d3d.c @@ -0,0 +1,273 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "config.h" + +#include <libavcodec/avcodec.h> + +#include <libavutil/hwcontext.h> +#include <libavutil/hwcontext_d3d11va.h> + +#if HAVE_D3D9_HWACCEL +#include <libavutil/hwcontext_dxva2.h> +#endif + +#include "common/av_common.h" +#include "common/common.h" +#include "osdep/threads.h" +#include "osdep/windows_utils.h" +#include "video/fmt-conversion.h" +#include "video/hwdec.h" +#include "video/mp_image_pool.h" +#include "video/mp_image.h" + +#include "d3d.h" + +HMODULE d3d11_dll, d3d9_dll, dxva2_dll; +PFN_D3D11_CREATE_DEVICE d3d11_D3D11CreateDevice; + +static mp_once d3d_load_once = MP_STATIC_ONCE_INITIALIZER; + +#if !HAVE_UWP +static void d3d_do_load(void) +{ + d3d11_dll = LoadLibrary(L"d3d11.dll"); + d3d9_dll = LoadLibrary(L"d3d9.dll"); + dxva2_dll = LoadLibrary(L"dxva2.dll"); + + if (d3d11_dll) { + d3d11_D3D11CreateDevice = + (void *)GetProcAddress(d3d11_dll, "D3D11CreateDevice"); + } +} +#else +static void d3d_do_load(void) +{ + + d3d11_D3D11CreateDevice = D3D11CreateDevice; +} +#endif + +void d3d_load_dlls(void) +{ + mp_exec_once(&d3d_load_once, d3d_do_load); +} + +// Test if Direct3D11 can be used by us. Basically, this prevents trying to use +// D3D11 on Win7, and then failing somewhere in the process. +bool d3d11_check_decoding(ID3D11Device *dev) +{ + HRESULT hr; + // We assume that NV12 is always supported, if hw decoding is supported at + // all. + UINT supported = 0; + hr = ID3D11Device_CheckFormatSupport(dev, DXGI_FORMAT_NV12, &supported); + return !FAILED(hr) && (supported & D3D11_BIND_DECODER); +} + +static void d3d11_refine_hwframes(AVBufferRef *hw_frames_ctx) +{ + AVHWFramesContext *fctx = (void *)hw_frames_ctx->data; + + if (fctx->format == AV_PIX_FMT_D3D11) { + AVD3D11VAFramesContext *hwctx = fctx->hwctx; + + // According to hwcontex_d3d11va.h, yuv420p means DXGI_FORMAT_420_OPAQUE, + // which has no shader support. + if (fctx->sw_format != AV_PIX_FMT_YUV420P) + hwctx->BindFlags |= D3D11_BIND_SHADER_RESOURCE; + } +} + +AVBufferRef *d3d11_wrap_device_ref(ID3D11Device *device) +{ + AVBufferRef *device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_D3D11VA); + if (!device_ref) + return NULL; + + AVHWDeviceContext *ctx = (void *)device_ref->data; + AVD3D11VADeviceContext *hwctx = ctx->hwctx; + + ID3D11Device_AddRef(device); + hwctx->device = device; + + if (av_hwdevice_ctx_init(device_ref) < 0) + av_buffer_unref(&device_ref); + + return device_ref; +} + +static struct AVBufferRef *d3d11_create_standalone(struct mpv_global *global, + struct mp_log *plog, struct hwcontext_create_dev_params *params) +{ + ID3D11Device *device = NULL; + HRESULT hr; + + d3d_load_dlls(); + if (!d3d11_D3D11CreateDevice) { + mp_err(plog, "Failed to load D3D11 library\n"); + return NULL; + } + + hr = d3d11_D3D11CreateDevice(NULL, D3D_DRIVER_TYPE_HARDWARE, NULL, + D3D11_CREATE_DEVICE_VIDEO_SUPPORT, NULL, 0, + D3D11_SDK_VERSION, &device, NULL, NULL); + if (FAILED(hr)) { + mp_err(plog, "Failed to create D3D11 Device: %s\n", + mp_HRESULT_to_str(hr)); + return NULL; + } + + AVBufferRef *avref = d3d11_wrap_device_ref(device); + ID3D11Device_Release(device); + if (!avref) + mp_err(plog, "Failed to allocate AVHWDeviceContext.\n"); + + return avref; +} + +const struct hwcontext_fns hwcontext_fns_d3d11 = { + .av_hwdevice_type = AV_HWDEVICE_TYPE_D3D11VA, + .refine_hwframes = d3d11_refine_hwframes, + .create_dev = d3d11_create_standalone, +}; + +#if HAVE_D3D9_HWACCEL + +#define DXVA2API_USE_BITFIELDS +#include <libavutil/common.h> + +#include <libavutil/hwcontext_dxva2.h> + +static void d3d9_free_av_device_ref(AVHWDeviceContext *ctx) +{ + AVDXVA2DeviceContext *hwctx = ctx->hwctx; + + if (hwctx->devmgr) + IDirect3DDeviceManager9_Release(hwctx->devmgr); +} + +AVBufferRef *d3d9_wrap_device_ref(IDirect3DDevice9 *device) +{ + HRESULT hr; + + d3d_load_dlls(); + if (!dxva2_dll) + return NULL; + + HRESULT (WINAPI *DXVA2CreateDirect3DDeviceManager9)(UINT *, IDirect3DDeviceManager9 **) = + (void *)GetProcAddress(dxva2_dll, "DXVA2CreateDirect3DDeviceManager9"); + if (!DXVA2CreateDirect3DDeviceManager9) + return NULL; + + AVBufferRef *device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_DXVA2); + if (!device_ref) + return NULL; + + AVHWDeviceContext *ctx = (void *)device_ref->data; + AVDXVA2DeviceContext *hwctx = ctx->hwctx; + + UINT reset_token = 0; + hr = DXVA2CreateDirect3DDeviceManager9(&reset_token, &hwctx->devmgr); + if (FAILED(hr)) + goto fail; + + hr = IDirect3DDeviceManager9_ResetDevice(hwctx->devmgr, device, reset_token); + if (FAILED(hr)) + goto fail; + + ctx->free = d3d9_free_av_device_ref; + + if (av_hwdevice_ctx_init(device_ref) < 0) + goto fail; + + return device_ref; + +fail: + d3d9_free_av_device_ref(ctx); + av_buffer_unref(&device_ref); + return NULL; +} + +static struct AVBufferRef *d3d9_create_standalone(struct mpv_global *global, + struct mp_log *plog, struct hwcontext_create_dev_params *params) +{ + d3d_load_dlls(); + if (!d3d9_dll || !dxva2_dll) { + mp_err(plog, "Failed to load D3D9 library\n"); + return NULL; + } + + HRESULT (WINAPI *Direct3DCreate9Ex)(UINT, IDirect3D9Ex **) = + (void *)GetProcAddress(d3d9_dll, "Direct3DCreate9Ex"); + if (!Direct3DCreate9Ex) { + mp_err(plog, "Failed to locate Direct3DCreate9Ex\n"); + return NULL; + } + + IDirect3D9Ex *d3d9ex = NULL; + HRESULT hr = Direct3DCreate9Ex(D3D_SDK_VERSION, &d3d9ex); + if (FAILED(hr)) { + mp_err(plog, "Failed to create IDirect3D9Ex object\n"); + return NULL; + } + + UINT adapter = D3DADAPTER_DEFAULT; + D3DDISPLAYMODEEX modeex = {0}; + IDirect3D9Ex_GetAdapterDisplayModeEx(d3d9ex, adapter, &modeex, NULL); + + D3DPRESENT_PARAMETERS present_params = { + .Windowed = TRUE, + .BackBufferWidth = 640, + .BackBufferHeight = 480, + .BackBufferCount = 0, + .BackBufferFormat = modeex.Format, + .SwapEffect = D3DSWAPEFFECT_DISCARD, + .Flags = D3DPRESENTFLAG_VIDEO, + }; + + IDirect3DDevice9Ex *exdev = NULL; + hr = IDirect3D9Ex_CreateDeviceEx(d3d9ex, adapter, + D3DDEVTYPE_HAL, + GetShellWindow(), + D3DCREATE_SOFTWARE_VERTEXPROCESSING | + D3DCREATE_MULTITHREADED | + D3DCREATE_FPU_PRESERVE, + &present_params, + NULL, + &exdev); + IDirect3D9_Release(d3d9ex); + if (FAILED(hr)) { + mp_err(plog, "Failed to create Direct3D device: %s\n", + mp_HRESULT_to_str(hr)); + return NULL; + } + + AVBufferRef *avref = d3d9_wrap_device_ref((IDirect3DDevice9 *)exdev); + IDirect3DDevice9Ex_Release(exdev); + if (!avref) + mp_err(plog, "Failed to allocate AVHWDeviceContext.\n"); + + return avref; +} + +const struct hwcontext_fns hwcontext_fns_dxva2 = { + .av_hwdevice_type = AV_HWDEVICE_TYPE_DXVA2, + .create_dev = d3d9_create_standalone, +}; + +#endif /* HAVE_D3D9_HWACCEL */ diff --git a/video/d3d.h b/video/d3d.h new file mode 100644 index 0000000..0058905 --- /dev/null +++ b/video/d3d.h @@ -0,0 +1,42 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MPV_DECODE_D3D_H +#define MPV_DECODE_D3D_H + +#include <windows.h> +#include <d3d11.h> + +#include <stdbool.h> +#include <inttypes.h> + +// Must call d3d_load_dlls() before accessing. Once this is done, the DLLs +// remain loaded forever. +extern HMODULE d3d11_dll, d3d9_dll, dxva2_dll; +extern PFN_D3D11_CREATE_DEVICE d3d11_D3D11CreateDevice; + +void d3d_load_dlls(void); + +bool d3d11_check_decoding(ID3D11Device *dev); + +struct AVBufferRef; +struct IDirect3DDevice9; + +struct AVBufferRef *d3d11_wrap_device_ref(ID3D11Device *device); +struct AVBufferRef *d3d9_wrap_device_ref(struct IDirect3DDevice9 *device); + +#endif diff --git a/video/decode/vd_lavc.c b/video/decode/vd_lavc.c new file mode 100644 index 0000000..b971d26 --- /dev/null +++ b/video/decode/vd_lavc.c @@ -0,0 +1,1457 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <float.h> +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> +#include <stdbool.h> + +#include <libavcodec/avcodec.h> +#include <libavformat/version.h> +#include <libavutil/common.h> +#include <libavutil/hwcontext.h> +#include <libavutil/opt.h> +#include <libavutil/intreadwrite.h> +#include <libavutil/pixdesc.h> + +#include "mpv_talloc.h" +#include "common/global.h" +#include "common/msg.h" +#include "options/m_config.h" +#include "options/options.h" +#include "osdep/threads.h" +#include "misc/bstr.h" +#include "common/av_common.h" +#include "common/codecs.h" + +#include "video/fmt-conversion.h" + +#include "filters/f_decoder_wrapper.h" +#include "filters/filter_internal.h" +#include "video/hwdec.h" +#include "video/img_format.h" +#include "video/mp_image.h" +#include "video/mp_image_pool.h" +#include "demux/demux.h" +#include "demux/stheader.h" +#include "demux/packet.h" +#include "video/csputils.h" +#include "video/sws_utils.h" +#include "video/out/vo.h" + +#include "options/m_option.h" + +static void init_avctx(struct mp_filter *vd); +static void uninit_avctx(struct mp_filter *vd); + +static int get_buffer2_direct(AVCodecContext *avctx, AVFrame *pic, int flags); +static enum AVPixelFormat get_format_hwdec(struct AVCodecContext *avctx, + const enum AVPixelFormat *pix_fmt); +static int hwdec_opt_help(struct mp_log *log, const m_option_t *opt, + struct bstr name); + +#define HWDEC_DELAY_QUEUE_COUNT 2 + +#define OPT_BASE_STRUCT struct vd_lavc_params + +struct vd_lavc_params { + bool fast; + int film_grain; + bool show_all; + int skip_loop_filter; + int skip_idct; + int skip_frame; + int framedrop; + int threads; + bool bitexact; + bool old_x264; + bool apply_cropping; + bool check_hw_profile; + int software_fallback; + char **avopts; + int dr; + char **hwdec_api; + char *hwdec_codecs; + int hwdec_image_format; + int hwdec_extra_frames; +}; + +static const struct m_opt_choice_alternatives discard_names[] = { + {"none", AVDISCARD_NONE}, + {"default", AVDISCARD_DEFAULT}, + {"nonref", AVDISCARD_NONREF}, + {"bidir", AVDISCARD_BIDIR}, + {"nonkey", AVDISCARD_NONKEY}, + {"all", AVDISCARD_ALL}, + {0} +}; +#define OPT_DISCARD(field) OPT_CHOICE_C(field, discard_names) + +const struct m_sub_options vd_lavc_conf = { + .opts = (const m_option_t[]){ + {"vd-lavc-fast", OPT_BOOL(fast)}, + {"vd-lavc-film-grain", OPT_CHOICE(film_grain, + {"auto", -1}, {"cpu", 0}, {"gpu", 1})}, + {"vd-lavc-show-all", OPT_BOOL(show_all)}, + {"vd-lavc-skiploopfilter", OPT_DISCARD(skip_loop_filter)}, + {"vd-lavc-skipidct", OPT_DISCARD(skip_idct)}, + {"vd-lavc-skipframe", OPT_DISCARD(skip_frame)}, + {"vd-lavc-framedrop", OPT_DISCARD(framedrop)}, + {"vd-lavc-threads", OPT_INT(threads), M_RANGE(0, DBL_MAX)}, + {"vd-lavc-bitexact", OPT_BOOL(bitexact)}, + {"vd-lavc-assume-old-x264", OPT_BOOL(old_x264)}, + {"vd-lavc-check-hw-profile", OPT_BOOL(check_hw_profile)}, + {"vd-lavc-software-fallback", OPT_CHOICE(software_fallback, + {"no", INT_MAX}, {"yes", 1}), M_RANGE(1, INT_MAX)}, + {"vd-lavc-o", OPT_KEYVALUELIST(avopts)}, + {"vd-lavc-dr", OPT_CHOICE(dr, + {"auto", -1}, {"no", 0}, {"yes", 1})}, + {"vd-apply-cropping", OPT_BOOL(apply_cropping)}, + {"hwdec", OPT_STRINGLIST(hwdec_api), + .help = hwdec_opt_help, + .flags = M_OPT_OPTIONAL_PARAM | UPDATE_HWDEC}, + {"hwdec-codecs", OPT_STRING(hwdec_codecs)}, + {"hwdec-image-format", OPT_IMAGEFORMAT(hwdec_image_format)}, + {"hwdec-extra-frames", OPT_INT(hwdec_extra_frames), M_RANGE(0, 256)}, + {0} + }, + .size = sizeof(struct vd_lavc_params), + .defaults = &(const struct vd_lavc_params){ + .film_grain = -1 /*auto*/, + .check_hw_profile = true, + .software_fallback = 3, + .skip_loop_filter = AVDISCARD_DEFAULT, + .skip_idct = AVDISCARD_DEFAULT, + .skip_frame = AVDISCARD_DEFAULT, + .framedrop = AVDISCARD_NONREF, + .dr = -1, + .hwdec_api = (char *[]){"no", NULL,}, + .hwdec_codecs = "h264,vc1,hevc,vp8,vp9,av1,prores", + // Maximum number of surfaces the player wants to buffer. This number + // might require adjustment depending on whatever the player does; + // for example, if vo_gpu increases the number of reference surfaces for + // interpolation, this value has to be increased too. + .hwdec_extra_frames = 6, + .apply_cropping = true, + }, +}; + +struct hwdec_info { + char name[64]; + char method_name[24]; // non-unique name describing the hwdec method + const AVCodec *codec; // implemented by this codec + enum AVHWDeviceType lavc_device; // if not NONE, get a hwdevice + bool copying; // if true, outputs sw frames, or copy to sw ourselves + enum AVPixelFormat pix_fmt; // if not NONE, select in get_format + bool use_hw_frames; // set AVCodecContext.hw_frames_ctx + bool use_hw_device; // set AVCodecContext.hw_device_ctx + unsigned int flags; // HWDEC_FLAG_* + + // for internal sorting + int auto_pos; + int rank; +}; + +typedef struct lavc_ctx { + struct mp_log *log; + struct m_config_cache *opts_cache; + struct vd_lavc_params *opts; + struct mp_codec_params *codec; + AVCodecContext *avctx; + AVFrame *pic; + AVPacket *avpkt; + bool use_hwdec; + struct hwdec_info hwdec; // valid only if use_hwdec==true + bstr *attempted_hwdecs; + int num_attempted_hwdecs; + AVRational codec_timebase; + enum AVDiscard skip_frame; + bool flushing; + struct lavc_state state; + const char *decoder; + bool hwdec_failed; + bool hwdec_notified; + bool force_eof; + + bool intra_only; + int framedrop_flags; + + bool hw_probing; + struct demux_packet **sent_packets; + int num_sent_packets; + + struct demux_packet **requeue_packets; + int num_requeue_packets; + + struct mp_image **delay_queue; + int num_delay_queue; + int max_delay_queue; + + // From VO + struct vo *vo; + struct mp_hwdec_devices *hwdec_devs; + + // Wrapped AVHWDeviceContext* used for decoding. + AVBufferRef *hwdec_dev; + + bool hwdec_request_reinit; + int hwdec_fail_count; + + struct mp_image_pool *hwdec_swpool; + + AVBufferRef *cached_hw_frames_ctx; + + // --- The following fields are protected by dr_lock. + mp_mutex dr_lock; + bool dr_failed; + struct mp_image_pool *dr_pool; + int dr_imgfmt, dr_w, dr_h, dr_stride_align; + + struct mp_decoder public; +} vd_ffmpeg_ctx; + +enum { + HWDEC_FLAG_AUTO = (1 << 0), // prioritize in autoprobe order + HWDEC_FLAG_WHITELIST = (1 << 1), // whitelist for auto-safe +}; + +struct autoprobe_info { + const char *method_name; + unsigned int flags; // HWDEC_FLAG_* +}; + +// Things not included in this list will be tried last, in random order. +const struct autoprobe_info hwdec_autoprobe_info[] = { + {"d3d11va", HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST}, + {"dxva2", HWDEC_FLAG_AUTO}, + {"d3d11va-copy", HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST}, + {"dxva2-copy", HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST}, + {"nvdec", HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST}, + {"nvdec-copy", HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST}, + {"vaapi", HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST}, + {"vaapi-copy", HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST}, + {"vdpau", HWDEC_FLAG_AUTO}, + {"vdpau-copy", HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST}, + {"drm", HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST}, + {"drm-copy", HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST}, + {"mmal", HWDEC_FLAG_AUTO}, + {"mmal-copy", HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST}, + {"mediacodec", HWDEC_FLAG_AUTO}, + {"mediacodec-copy", HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST}, + {"videotoolbox", HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST}, + {"videotoolbox-copy", HWDEC_FLAG_AUTO | HWDEC_FLAG_WHITELIST}, + {0} +}; + +static int hwdec_compare(const void *p1, const void *p2) +{ + struct hwdec_info *h1 = (void *)p1; + struct hwdec_info *h2 = (void *)p2; + + if (h1 == h2) + return 0; + + // Strictly put non-preferred hwdecs to the end of the list. + if ((h1->auto_pos == INT_MAX) != (h2->auto_pos == INT_MAX)) + return h1->auto_pos == INT_MAX ? 1 : -1; + // List non-copying entries first, so --hwdec=auto takes them. + if (h1->copying != h2->copying) + return h1->copying ? 1 : -1; + // Order by autoprobe preference order. + if (h1->auto_pos != h2->auto_pos) + return h1->auto_pos > h2->auto_pos ? 1 : -1; + // Put hwdecs without hw_device_ctx last + if ((!!h1->lavc_device) != (!!h2->lavc_device)) + return h1->lavc_device ? -1 : 1; + // Fallback sort order to make sorting stable. + return h1->rank > h2->rank ? 1 :-1; +} + +// (This takes care of some bookkeeping too, like setting info.name) +static void add_hwdec_item(struct hwdec_info **infos, int *num_infos, + struct hwdec_info info) +{ + if (info.copying) + mp_snprintf_cat(info.method_name, sizeof(info.method_name), "-copy"); + + // (Including the codec name in case this is a wrapper looks pretty dumb, + // but better not have them clash with hwaccels and others.) + snprintf(info.name, sizeof(info.name), "%s-%s", + info.codec->name, info.method_name); + + info.rank = *num_infos; + info.auto_pos = INT_MAX; + + for (int x = 0; hwdec_autoprobe_info[x].method_name; x++) { + const struct autoprobe_info *entry = &hwdec_autoprobe_info[x]; + if (strcmp(entry->method_name, info.method_name) == 0) { + info.flags |= entry->flags; + if (info.flags & HWDEC_FLAG_AUTO) + info.auto_pos = x; + } + } + + MP_TARRAY_APPEND(NULL, *infos, *num_infos, info); +} + +static void add_all_hwdec_methods(struct hwdec_info **infos, int *num_infos) +{ + const AVCodec *codec = NULL; + void *iter = NULL; + while (1) { + codec = av_codec_iterate(&iter); + if (!codec) + break; + if (codec->type != AVMEDIA_TYPE_VIDEO || !av_codec_is_decoder(codec)) + continue; + + struct hwdec_info info_template = { + .pix_fmt = AV_PIX_FMT_NONE, + .codec = codec, + }; + + const char *wrapper = NULL; + if (codec->capabilities & (AV_CODEC_CAP_HARDWARE | AV_CODEC_CAP_HYBRID)) + wrapper = codec->wrapper_name; + + // A decoder can provide multiple methods. In particular, hwaccels + // provide various methods (e.g. native h264 with vaapi & d3d11), but + // even wrapper decoders could provide multiple methods. + bool found_any = false; + for (int n = 0; ; n++) { + const AVCodecHWConfig *cfg = avcodec_get_hw_config(codec, n); + if (!cfg) + break; + + if ((cfg->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) || + (cfg->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX)) + { + struct hwdec_info info = info_template; + info.lavc_device = cfg->device_type; + info.pix_fmt = cfg->pix_fmt; + + const char *name = av_hwdevice_get_type_name(cfg->device_type); + assert(name); // API violation by libavcodec + + // nvdec hwaccels and the cuvid full decoder clash with their + // naming, so fix it here; we also prefer nvdec for the hwaccel. + if (strcmp(name, "cuda") == 0 && !wrapper) + name = "nvdec"; + + snprintf(info.method_name, sizeof(info.method_name), "%s", name); + + // Usually we want to prefer using hw_frames_ctx for true + // hwaccels only, but we actually don't have any way to detect + // those, so always use hw_frames_ctx if offered. + if (cfg->methods & AV_CODEC_HW_CONFIG_METHOD_HW_FRAMES_CTX) { + info.use_hw_frames = true; + } else { + info.use_hw_device = true; + } + + // Direct variant. + add_hwdec_item(infos, num_infos, info); + + // Copy variant. + info.copying = true; + if (cfg->methods & AV_CODEC_HW_CONFIG_METHOD_HW_DEVICE_CTX) { + info.use_hw_frames = false; + info.use_hw_device = true; + } + add_hwdec_item(infos, num_infos, info); + + found_any = true; + } else if (cfg->methods & AV_CODEC_HW_CONFIG_METHOD_INTERNAL) { + struct hwdec_info info = info_template; + info.pix_fmt = cfg->pix_fmt; + + const char *name = wrapper; + if (!name) + name = av_get_pix_fmt_name(info.pix_fmt); + assert(name); // API violation by libavcodec + + snprintf(info.method_name, sizeof(info.method_name), "%s", name); + + // Direct variant. + add_hwdec_item(infos, num_infos, info); + + // Copy variant. + info.copying = true; + info.pix_fmt = AV_PIX_FMT_NONE; // trust it can do sw output + add_hwdec_item(infos, num_infos, info); + + found_any = true; + } + } + + if (!found_any && wrapper) { + // We _know_ there's something supported here, usually outputting + // sw surfaces. E.g. mediacodec (before hw_device_ctx support). + + struct hwdec_info info = info_template; + info.copying = true; // probably + + snprintf(info.method_name, sizeof(info.method_name), "%s", wrapper); + add_hwdec_item(infos, num_infos, info); + } + } + + qsort(*infos, *num_infos, sizeof(struct hwdec_info), hwdec_compare); +} + +static bool hwdec_codec_allowed(struct mp_filter *vd, const char *codec) +{ + vd_ffmpeg_ctx *ctx = vd->priv; + bstr s = bstr0(ctx->opts->hwdec_codecs); + while (s.len) { + bstr item; + bstr_split_tok(s, ",", &item, &s); + if (bstr_equals0(item, "all") || bstr_equals0(item, codec)) + return true; + } + return false; +} + +static AVBufferRef *hwdec_create_dev(struct mp_filter *vd, + struct hwdec_info *hwdec, + bool autoprobe) +{ + vd_ffmpeg_ctx *ctx = vd->priv; + assert(hwdec->lavc_device); + + if (hwdec->copying) { + const struct hwcontext_fns *fns = + hwdec_get_hwcontext_fns(hwdec->lavc_device); + if (fns && fns->create_dev) { + struct hwcontext_create_dev_params params = { + .probing = autoprobe, + }; + return fns->create_dev(vd->global, vd->log, ¶ms); + } else { + AVBufferRef* ref = NULL; + av_hwdevice_ctx_create(&ref, hwdec->lavc_device, NULL, NULL, 0); + return ref; + } + } else if (ctx->hwdec_devs) { + int imgfmt = pixfmt2imgfmt(hwdec->pix_fmt); + struct hwdec_imgfmt_request params = { + .imgfmt = imgfmt, + .probing = autoprobe, + }; + hwdec_devices_request_for_img_fmt(ctx->hwdec_devs, ¶ms); + + const struct mp_hwdec_ctx *hw_ctx = + hwdec_devices_get_by_imgfmt(ctx->hwdec_devs, imgfmt); + + if (hw_ctx && hw_ctx->av_device_ref) + return av_buffer_ref(hw_ctx->av_device_ref); + } + + return NULL; +} + +// Select if and which hwdec to use. Also makes sure to get the decode device. +static void select_and_set_hwdec(struct mp_filter *vd) +{ + vd_ffmpeg_ctx *ctx = vd->priv; + const char *codec = ctx->codec->codec; + + m_config_cache_update(ctx->opts_cache); + + struct hwdec_info *hwdecs = NULL; + int num_hwdecs = 0; + add_all_hwdec_methods(&hwdecs, &num_hwdecs); + + char **hwdec_api = ctx->opts->hwdec_api; + for (int i = 0; hwdec_api[i]; i++) { + bstr opt = bstr0(hwdec_api[i]); + + bool hwdec_requested = !bstr_equals0(opt, "no"); + bool hwdec_auto_all = bstr_equals0(opt, "auto") || + bstr_equals0(opt, ""); + bool hwdec_auto_safe = bstr_equals0(opt, "auto-safe") || + bstr_equals0(opt, "auto-copy-safe") || + bstr_equals0(opt, "yes"); + bool hwdec_auto_copy = bstr_equals0(opt, "auto-copy") || + bstr_equals0(opt, "auto-copy-safe"); + bool hwdec_auto = hwdec_auto_all || hwdec_auto_copy || hwdec_auto_safe; + + if (!hwdec_requested) { + MP_VERBOSE(vd, "No hardware decoding requested.\n"); + break; + } else if (!hwdec_codec_allowed(vd, codec)) { + MP_VERBOSE(vd, "Not trying to use hardware decoding: codec %s is not " + "on whitelist.\n", codec); + break; + } else { + bool hwdec_name_supported = false; // relevant only if !hwdec_auto + for (int n = 0; n < num_hwdecs; n++) { + struct hwdec_info *hwdec = &hwdecs[n]; + + if (!hwdec_auto && !(bstr_equals0(opt, hwdec->method_name) || + bstr_equals0(opt, hwdec->name))) + continue; + hwdec_name_supported = true; + + bool already_attempted = false; + for (int j = 0; j < ctx->num_attempted_hwdecs; j++) { + if (bstr_equals0(ctx->attempted_hwdecs[j], hwdec->name)) { + MP_DBG(vd, "Skipping previously attempted hwdec: %s\n", + hwdec->name); + already_attempted = true; + break; + } + } + if (already_attempted) + continue; + + const char *hw_codec = mp_codec_from_av_codec_id(hwdec->codec->id); + if (!hw_codec || strcmp(hw_codec, codec) != 0) + continue; + + if (hwdec_auto_safe && !(hwdec->flags & HWDEC_FLAG_WHITELIST)) + continue; + + MP_VERBOSE(vd, "Looking at hwdec %s...\n", hwdec->name); + + /* + * Past this point, any kind of failure that results in us + * looking for a new hwdec should not lead to use trying this + * hwdec again - so add it to the list, regardless of whether + * initialisation will succeed or not. + */ + MP_TARRAY_APPEND(ctx, ctx->attempted_hwdecs, + ctx->num_attempted_hwdecs, + bstrdup(ctx, bstr0(hwdec->name))); + + if (hwdec_auto_copy && !hwdec->copying) { + MP_VERBOSE(vd, "Not using this for auto-copy.\n"); + continue; + } + + if (hwdec->lavc_device) { + ctx->hwdec_dev = hwdec_create_dev(vd, hwdec, hwdec_auto); + if (!ctx->hwdec_dev) { + MP_VERBOSE(vd, "Could not create device.\n"); + continue; + } + + const struct hwcontext_fns *fns = + hwdec_get_hwcontext_fns(hwdec->lavc_device); + if (fns && fns->is_emulated && fns->is_emulated(ctx->hwdec_dev)) { + if (hwdec_auto) { + MP_VERBOSE(vd, "Not using emulated API.\n"); + av_buffer_unref(&ctx->hwdec_dev); + continue; + } + MP_WARN(vd, "Using emulated hardware decoding API.\n"); + } + } else if (!hwdec->copying) { + // Most likely METHOD_INTERNAL, which often use delay-loaded + // VO support as well. + if (ctx->hwdec_devs) { + struct hwdec_imgfmt_request params = { + .imgfmt = pixfmt2imgfmt(hwdec->pix_fmt), + .probing = hwdec_auto, + }; + hwdec_devices_request_for_img_fmt( + ctx->hwdec_devs, ¶ms); + } + } + + ctx->use_hwdec = true; + ctx->hwdec = *hwdec; + break; + } + if (ctx->use_hwdec) + break; + else if (!hwdec_auto && !hwdec_name_supported) + MP_WARN(vd, "Unsupported hwdec: %.*s\n", BSTR_P(opt)); + } + } + talloc_free(hwdecs); + + + if (ctx->use_hwdec) { + MP_VERBOSE(vd, "Trying hardware decoding via %s.\n", ctx->hwdec.name); + if (strcmp(ctx->decoder, ctx->hwdec.codec->name) != 0) + MP_VERBOSE(vd, "Using underlying hw-decoder '%s'\n", + ctx->hwdec.codec->name); + } else { + // If software fallback is disabled and we get here, all hwdec must + // have failed. Tell the ctx to always force an eof. + if (ctx->opts->software_fallback == INT_MAX) { + MP_WARN(ctx, "Software decoding fallback is disabled.\n"); + ctx->force_eof = true; + } else { + MP_VERBOSE(vd, "Using software decoding.\n"); + } + } +} + +static int hwdec_opt_help(struct mp_log *log, const m_option_t *opt, + struct bstr name) +{ + struct hwdec_info *hwdecs = NULL; + int num_hwdecs = 0; + add_all_hwdec_methods(&hwdecs, &num_hwdecs); + + mp_info(log, "Valid values (with alternative full names):\n"); + + for (int n = 0; n < num_hwdecs; n++) { + struct hwdec_info *hwdec = &hwdecs[n]; + + mp_info(log, " %s (%s)\n", hwdec->method_name, hwdec->name); + } + + talloc_free(hwdecs); + + mp_info(log, " auto (yes '')\n"); + mp_info(log, " no\n"); + mp_info(log, " auto-safe\n"); + mp_info(log, " auto-copy\n"); + mp_info(log, " auto-copy-safe\n"); + + return M_OPT_EXIT; +} + +static void force_fallback(struct mp_filter *vd) +{ + vd_ffmpeg_ctx *ctx = vd->priv; + + uninit_avctx(vd); + int lev = ctx->hwdec_notified ? MSGL_WARN : MSGL_V; + mp_msg(vd->log, lev, "Attempting next decoding method after failure of %.*s.\n", + BSTR_P(ctx->attempted_hwdecs[ctx->num_attempted_hwdecs - 1])); + select_and_set_hwdec(vd); + init_avctx(vd); +} + +static void reinit(struct mp_filter *vd) +{ + vd_ffmpeg_ctx *ctx = vd->priv; + + uninit_avctx(vd); + + /* + * Reset attempted hwdecs so that if the hwdec list is reconfigured + * we attempt all of them from the beginning. The most practical + * reason for this is that ctrl+h toggles between `no` and + * `auto-safe`, and we want to reevaluate from a clean slate each time. + */ + TA_FREEP(&ctx->attempted_hwdecs); + ctx->num_attempted_hwdecs = 0; + ctx->hwdec_notified = false; + + select_and_set_hwdec(vd); + + bool use_hwdec = ctx->use_hwdec; + init_avctx(vd); + if (!ctx->avctx && use_hwdec) { + do { + force_fallback(vd); + } while (!ctx->avctx); + } +} + +static void init_avctx(struct mp_filter *vd) +{ + vd_ffmpeg_ctx *ctx = vd->priv; + struct vd_lavc_params *lavc_param = ctx->opts; + struct mp_codec_params *c = ctx->codec; + + m_config_cache_update(ctx->opts_cache); + + assert(!ctx->avctx); + + const AVCodec *lavc_codec = NULL; + + if (ctx->use_hwdec) { + lavc_codec = ctx->hwdec.codec; + } else { + lavc_codec = avcodec_find_decoder_by_name(ctx->decoder); + } + if (!lavc_codec) + return; + + const AVCodecDescriptor *desc = avcodec_descriptor_get(lavc_codec->id); + ctx->intra_only = desc && (desc->props & AV_CODEC_PROP_INTRA_ONLY); + + ctx->codec_timebase = mp_get_codec_timebase(ctx->codec); + + // This decoder does not read pkt_timebase correctly yet. + if (strstr(lavc_codec->name, "_mmal")) + ctx->codec_timebase = (AVRational){1, 1000000}; + + ctx->hwdec_failed = false; + ctx->hwdec_request_reinit = false; + ctx->avctx = avcodec_alloc_context3(lavc_codec); + AVCodecContext *avctx = ctx->avctx; + if (!ctx->avctx) + goto error; + avctx->codec_type = AVMEDIA_TYPE_VIDEO; + avctx->codec_id = lavc_codec->id; + avctx->pkt_timebase = ctx->codec_timebase; + + ctx->pic = av_frame_alloc(); + if (!ctx->pic) + goto error; + + ctx->avpkt = av_packet_alloc(); + if (!ctx->avpkt) + goto error; + + if (ctx->use_hwdec) { + avctx->opaque = vd; + avctx->thread_count = 1; + avctx->hwaccel_flags |= AV_HWACCEL_FLAG_IGNORE_LEVEL; + if (!lavc_param->check_hw_profile) + avctx->hwaccel_flags |= AV_HWACCEL_FLAG_ALLOW_PROFILE_MISMATCH; + +#ifdef AV_HWACCEL_FLAG_UNSAFE_OUTPUT + /* + * This flag primarily exists for nvdec which has a very limited + * output frame pool, which can get exhausted if consumers don't + * release frames quickly. However, as an implementation + * requirement, we have to copy the frames anyway, so we don't + * need this extra implicit copy. + */ + avctx->hwaccel_flags |= AV_HWACCEL_FLAG_UNSAFE_OUTPUT; +#endif + + if (ctx->hwdec.use_hw_device) { + if (ctx->hwdec_dev) + avctx->hw_device_ctx = av_buffer_ref(ctx->hwdec_dev); + if (!avctx->hw_device_ctx) + goto error; + } + if (ctx->hwdec.use_hw_frames) { + if (!ctx->hwdec_dev) + goto error; + } + + if (ctx->hwdec.pix_fmt != AV_PIX_FMT_NONE) + avctx->get_format = get_format_hwdec; + + // Some APIs benefit from this, for others it's additional bloat. + if (ctx->hwdec.copying) + ctx->max_delay_queue = HWDEC_DELAY_QUEUE_COUNT; + ctx->hw_probing = true; + } else { + mp_set_avcodec_threads(vd->log, avctx, lavc_param->threads); + } + + if (!ctx->use_hwdec && ctx->vo && lavc_param->dr) { + avctx->opaque = vd; + avctx->get_buffer2 = get_buffer2_direct; +#if LIBAVCODEC_VERSION_MAJOR < 60 + AV_NOWARN_DEPRECATED({ + avctx->thread_safe_callbacks = 1; + }); +#endif + } + + avctx->flags |= lavc_param->bitexact ? AV_CODEC_FLAG_BITEXACT : 0; + avctx->flags2 |= lavc_param->fast ? AV_CODEC_FLAG2_FAST : 0; + + if (lavc_param->show_all) + avctx->flags |= AV_CODEC_FLAG_OUTPUT_CORRUPT; + + avctx->skip_loop_filter = lavc_param->skip_loop_filter; + avctx->skip_idct = lavc_param->skip_idct; + avctx->skip_frame = lavc_param->skip_frame; + avctx->apply_cropping = lavc_param->apply_cropping; + + if (lavc_codec->id == AV_CODEC_ID_H264 && lavc_param->old_x264) + av_opt_set(avctx, "x264_build", "150", AV_OPT_SEARCH_CHILDREN); + +#ifndef AV_CODEC_EXPORT_DATA_FILM_GRAIN + if (ctx->opts->film_grain == 1) + MP_WARN(vd, "GPU film grain requested, but FFmpeg too old to expose " + "film grain parameters. Please update to latest master, " + "or at least to release 4.4.\n"); +#else + switch(ctx->opts->film_grain) { + case 0: /*CPU*/ + // default lavc flags handle film grain within the decoder. + break; + case 1: /*GPU*/ + if (!ctx->vo || + (ctx->vo && !(ctx->vo->driver->caps & VO_CAP_FILM_GRAIN))) { + MP_MSG(vd, ctx->vo ? MSGL_WARN : MSGL_V, + "GPU film grain requested, but VO %s, expect wrong output.\n", + ctx->vo ? + "does not support applying film grain" : + "is not available at decoder initialization to verify support"); + } + + avctx->export_side_data |= AV_CODEC_EXPORT_DATA_FILM_GRAIN; + break; + default: + if (ctx->vo && (ctx->vo->driver->caps & VO_CAP_FILM_GRAIN)) + avctx->export_side_data |= AV_CODEC_EXPORT_DATA_FILM_GRAIN; + + break; + } +#endif + + mp_set_avopts(vd->log, avctx, lavc_param->avopts); + + // Do this after the above avopt handling in case it changes values + ctx->skip_frame = avctx->skip_frame; + + if (mp_set_avctx_codec_headers(avctx, c) < 0) { + MP_ERR(vd, "Could not set codec parameters.\n"); + goto error; + } + + /* open it */ + if (avcodec_open2(avctx, lavc_codec, NULL) < 0) + goto error; + + // Sometimes, the first packet contains information required for correct + // decoding of the rest of the stream. The only currently known case is the + // x264 build number (encoded in a SEI element), needed to enable a + // workaround for broken 4:4:4 streams produced by older x264 versions. + if (lavc_codec->id == AV_CODEC_ID_H264 && c->first_packet) { + mp_set_av_packet(ctx->avpkt, c->first_packet, &ctx->codec_timebase); + avcodec_send_packet(avctx, ctx->avpkt); + avcodec_receive_frame(avctx, ctx->pic); + av_frame_unref(ctx->pic); + avcodec_flush_buffers(ctx->avctx); + } + + return; + +error: + MP_ERR(vd, "Could not open codec.\n"); + uninit_avctx(vd); +} + +static void reset_avctx(struct mp_filter *vd) +{ + vd_ffmpeg_ctx *ctx = vd->priv; + + if (ctx->avctx && avcodec_is_open(ctx->avctx)) + avcodec_flush_buffers(ctx->avctx); + ctx->flushing = false; + ctx->hwdec_request_reinit = false; +} + +static void flush_all(struct mp_filter *vd) +{ + vd_ffmpeg_ctx *ctx = vd->priv; + + for (int n = 0; n < ctx->num_delay_queue; n++) + talloc_free(ctx->delay_queue[n]); + ctx->num_delay_queue = 0; + + for (int n = 0; n < ctx->num_sent_packets; n++) + talloc_free(ctx->sent_packets[n]); + ctx->num_sent_packets = 0; + + for (int n = 0; n < ctx->num_requeue_packets; n++) + talloc_free(ctx->requeue_packets[n]); + ctx->num_requeue_packets = 0; + + reset_avctx(vd); +} + +static void uninit_avctx(struct mp_filter *vd) +{ + vd_ffmpeg_ctx *ctx = vd->priv; + + flush_all(vd); + av_frame_free(&ctx->pic); + mp_free_av_packet(&ctx->avpkt); + av_buffer_unref(&ctx->cached_hw_frames_ctx); + + avcodec_free_context(&ctx->avctx); + + av_buffer_unref(&ctx->hwdec_dev); + + ctx->hwdec_failed = false; + ctx->hwdec_fail_count = 0; + ctx->max_delay_queue = 0; + ctx->hw_probing = false; + ctx->hwdec = (struct hwdec_info){0}; + ctx->use_hwdec = false; +} + +static int init_generic_hwaccel(struct mp_filter *vd, enum AVPixelFormat hw_fmt) +{ + struct lavc_ctx *ctx = vd->priv; + AVBufferRef *new_frames_ctx = NULL; + + if (!ctx->hwdec.use_hw_frames) + return 0; + + if (!ctx->hwdec_dev) { + MP_ERR(ctx, "Missing device context.\n"); + goto error; + } + + if (avcodec_get_hw_frames_parameters(ctx->avctx, + ctx->hwdec_dev, hw_fmt, &new_frames_ctx) < 0) + { + MP_VERBOSE(ctx, "Hardware decoding of this stream is unsupported?\n"); + goto error; + } + + AVHWFramesContext *new_fctx = (void *)new_frames_ctx->data; + + if (ctx->opts->hwdec_image_format) + new_fctx->sw_format = imgfmt2pixfmt(ctx->opts->hwdec_image_format); + + // 1 surface is already included by libavcodec. The field is 0 if the + // hwaccel supports dynamic surface allocation. + if (new_fctx->initial_pool_size) + new_fctx->initial_pool_size += ctx->opts->hwdec_extra_frames - 1; + + const struct hwcontext_fns *fns = + hwdec_get_hwcontext_fns(new_fctx->device_ctx->type); + + if (fns && fns->refine_hwframes) + fns->refine_hwframes(new_frames_ctx); + + // We might be able to reuse a previously allocated frame pool. + if (ctx->cached_hw_frames_ctx) { + AVHWFramesContext *old_fctx = (void *)ctx->cached_hw_frames_ctx->data; + + if (new_fctx->format != old_fctx->format || + new_fctx->sw_format != old_fctx->sw_format || + new_fctx->width != old_fctx->width || + new_fctx->height != old_fctx->height || + new_fctx->initial_pool_size != old_fctx->initial_pool_size) + av_buffer_unref(&ctx->cached_hw_frames_ctx); + } + + if (!ctx->cached_hw_frames_ctx) { + if (av_hwframe_ctx_init(new_frames_ctx) < 0) { + MP_ERR(ctx, "Failed to allocate hw frames.\n"); + goto error; + } + + ctx->cached_hw_frames_ctx = new_frames_ctx; + new_frames_ctx = NULL; + } + + ctx->avctx->hw_frames_ctx = av_buffer_ref(ctx->cached_hw_frames_ctx); + if (!ctx->avctx->hw_frames_ctx) + goto error; + + av_buffer_unref(&new_frames_ctx); + return 0; + +error: + av_buffer_unref(&new_frames_ctx); + av_buffer_unref(&ctx->cached_hw_frames_ctx); + return -1; +} + +static enum AVPixelFormat get_format_hwdec(struct AVCodecContext *avctx, + const enum AVPixelFormat *fmt) +{ + struct mp_filter *vd = avctx->opaque; + vd_ffmpeg_ctx *ctx = vd->priv; + + MP_VERBOSE(vd, "Pixel formats supported by decoder:"); + for (int i = 0; fmt[i] != AV_PIX_FMT_NONE; i++) + MP_VERBOSE(vd, " %s", av_get_pix_fmt_name(fmt[i])); + MP_VERBOSE(vd, "\n"); + + const char *profile = avcodec_profile_name(avctx->codec_id, avctx->profile); + MP_VERBOSE(vd, "Codec profile: %s (0x%x)\n", profile ? profile : "unknown", + avctx->profile); + + assert(ctx->use_hwdec); + + ctx->hwdec_request_reinit |= ctx->hwdec_failed; + ctx->hwdec_failed = false; + + enum AVPixelFormat select = AV_PIX_FMT_NONE; + for (int i = 0; fmt[i] != AV_PIX_FMT_NONE; i++) { + if (ctx->hwdec.pix_fmt == fmt[i]) { + if (init_generic_hwaccel(vd, fmt[i]) < 0) + break; + select = fmt[i]; + break; + } + } + + if (select == AV_PIX_FMT_NONE) { + ctx->hwdec_failed = true; + select = avcodec_default_get_format(avctx, fmt); + } + + const char *name = av_get_pix_fmt_name(select); + MP_VERBOSE(vd, "Requesting pixfmt '%s' from decoder.\n", name ? name : "-"); + return select; +} + +static int get_buffer2_direct(AVCodecContext *avctx, AVFrame *pic, int flags) +{ + struct mp_filter *vd = avctx->opaque; + vd_ffmpeg_ctx *p = vd->priv; + + mp_mutex_lock(&p->dr_lock); + + int w = pic->width; + int h = pic->height; + int linesize_align[AV_NUM_DATA_POINTERS] = {0}; + avcodec_align_dimensions2(avctx, &w, &h, linesize_align); + + // We assume that different alignments are just different power-of-2s. + // Thus, a higher alignment always satisfies a lower alignment. + int stride_align = MP_IMAGE_BYTE_ALIGN; + for (int n = 0; n < AV_NUM_DATA_POINTERS; n++) + stride_align = MPMAX(stride_align, linesize_align[n]); + + // Note: texel sizes may be NPOT, so use full lcm instead of max + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pic->format); + if (!(desc->flags & AV_PIX_FMT_FLAG_BITSTREAM)) { + for (int n = 0; n < desc->nb_components; n++) + stride_align = mp_lcm(stride_align, desc->comp[n].step); + } + + int imgfmt = pixfmt2imgfmt(pic->format); + if (!imgfmt) + goto fallback; + + if (p->dr_failed) + goto fallback; + + // (For simplicity, we realloc on any parameter change, instead of trying + // to be clever.) + if (stride_align != p->dr_stride_align || w != p->dr_w || h != p->dr_h || + imgfmt != p->dr_imgfmt) + { + mp_image_pool_clear(p->dr_pool); + p->dr_imgfmt = imgfmt; + p->dr_w = w; + p->dr_h = h; + p->dr_stride_align = stride_align; + MP_DBG(p, "DR parameter change to %dx%d %s align=%d\n", w, h, + mp_imgfmt_to_name(imgfmt), stride_align); + } + + struct mp_image *img = mp_image_pool_get_no_alloc(p->dr_pool, imgfmt, w, h); + if (!img) { + bool host_cached = p->opts->dr == -1; // auto + int dr_flags = host_cached ? VO_DR_FLAG_HOST_CACHED : 0; + MP_DBG(p, "Allocating new%s DR image...\n", host_cached ? " (host-cached)" : ""); + img = vo_get_image(p->vo, imgfmt, w, h, stride_align, dr_flags); + if (!img) { + MP_DBG(p, "...failed..\n"); + goto fallback; + } + + // Now make the mp_image part of the pool. This requires doing magic to + // the image, so just add it to the pool and get it back to avoid + // dealing with magic ourselves. (Normally this never fails.) + mp_image_pool_add(p->dr_pool, img); + img = mp_image_pool_get_no_alloc(p->dr_pool, imgfmt, w, h); + if (!img) + goto fallback; + } + + // get_buffer2 callers seem very unappreciative of overwriting pic with a + // new reference. The AVCodecContext.get_buffer2 comments tell us exactly + // what we should do, so follow that. + for (int n = 0; n < 4; n++) { + pic->data[n] = img->planes[n]; + pic->linesize[n] = img->stride[n]; + pic->buf[n] = img->bufs[n]; + img->bufs[n] = NULL; + } + talloc_free(img); + + mp_mutex_unlock(&p->dr_lock); + + return 0; + +fallback: + if (!p->dr_failed) + MP_VERBOSE(p, "DR failed - disabling.\n"); + p->dr_failed = true; + mp_mutex_unlock(&p->dr_lock); + + return avcodec_default_get_buffer2(avctx, pic, flags); +} + +static void prepare_decoding(struct mp_filter *vd) +{ + vd_ffmpeg_ctx *ctx = vd->priv; + AVCodecContext *avctx = ctx->avctx; + struct vd_lavc_params *opts = ctx->opts; + + if (!avctx) + return; + + int drop = ctx->framedrop_flags; + if (drop == 1) { + avctx->skip_frame = opts->framedrop; // normal framedrop + } else if (drop == 2) { + avctx->skip_frame = AVDISCARD_NONREF; // hr-seek framedrop + // Can be much more aggressive for true intra codecs. + if (ctx->intra_only) + avctx->skip_frame = AVDISCARD_ALL; + } else { + avctx->skip_frame = ctx->skip_frame; // normal playback + } + + if (ctx->hwdec_request_reinit) + reset_avctx(vd); +} + +static void handle_err(struct mp_filter *vd) +{ + vd_ffmpeg_ctx *ctx = vd->priv; + struct vd_lavc_params *opts = ctx->opts; + + MP_WARN(vd, "Error while decoding frame%s!\n", + ctx->use_hwdec ? " (hardware decoding)" : ""); + + if (ctx->use_hwdec) { + ctx->hwdec_fail_count += 1; + if (ctx->hwdec_fail_count >= opts->software_fallback) + ctx->hwdec_failed = true; + } +} + +static int send_packet(struct mp_filter *vd, struct demux_packet *pkt) +{ + vd_ffmpeg_ctx *ctx = vd->priv; + AVCodecContext *avctx = ctx->avctx; + + if (ctx->num_requeue_packets && ctx->requeue_packets[0] != pkt) + return AVERROR(EAGAIN); // cannot consume the packet + + if (ctx->hwdec_failed) + return AVERROR(EAGAIN); + + if (!ctx->avctx) + return AVERROR_EOF; + + prepare_decoding(vd); + + if (avctx->skip_frame == AVDISCARD_ALL) + return 0; + + mp_set_av_packet(ctx->avpkt, pkt, &ctx->codec_timebase); + + int ret = avcodec_send_packet(avctx, pkt ? ctx->avpkt : NULL); + if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) + return ret; + + if (ctx->hw_probing && ctx->num_sent_packets < 32 && + ctx->opts->software_fallback <= 32) + { + pkt = pkt ? demux_copy_packet(pkt) : NULL; + MP_TARRAY_APPEND(ctx, ctx->sent_packets, ctx->num_sent_packets, pkt); + } + + if (ret < 0) + handle_err(vd); + return ret; +} + +static void send_queued_packet(struct mp_filter *vd) +{ + vd_ffmpeg_ctx *ctx = vd->priv; + + assert(ctx->num_requeue_packets); + + if (send_packet(vd, ctx->requeue_packets[0]) != AVERROR(EAGAIN)) { + talloc_free(ctx->requeue_packets[0]); + MP_TARRAY_REMOVE_AT(ctx->requeue_packets, ctx->num_requeue_packets, 0); + } +} + +// Returns whether decoder is still active (!EOF state). +static int decode_frame(struct mp_filter *vd) +{ + vd_ffmpeg_ctx *ctx = vd->priv; + AVCodecContext *avctx = ctx->avctx; + + if (!avctx || ctx->force_eof) + return AVERROR_EOF; + + prepare_decoding(vd); + + // Re-send old packets (typically after a hwdec fallback during init). + if (ctx->num_requeue_packets) + send_queued_packet(vd); + + int ret = avcodec_receive_frame(avctx, ctx->pic); + if (ret < 0) { + if (ret == AVERROR_EOF) { + // If flushing was initialized earlier and has ended now, make it + // start over in case we get new packets at some point in the future. + // This must take the delay queue into account, so avctx returns EOF + // until the delay queue has been drained. + if (!ctx->num_delay_queue) + reset_avctx(vd); + } else if (ret == AVERROR(EAGAIN)) { + // just retry after caller writes a packet + } else { + handle_err(vd); + } + return ret; + } + + // If something was decoded successfully, it must return a frame with valid + // data. + assert(ctx->pic->buf[0]); + + struct mp_image *mpi = mp_image_from_av_frame(ctx->pic); + if (!mpi) { + av_frame_unref(ctx->pic); + return ret; + } + + if (mpi->imgfmt == IMGFMT_CUDA && !mpi->planes[0]) { + MP_ERR(vd, "CUDA frame without data. This is a FFmpeg bug.\n"); + talloc_free(mpi); + handle_err(vd); + return AVERROR_BUG; + } + + ctx->hwdec_fail_count = 0; + + mpi->pts = mp_pts_from_av(ctx->pic->pts, &ctx->codec_timebase); + mpi->dts = mp_pts_from_av(ctx->pic->pkt_dts, &ctx->codec_timebase); + + mpi->pkt_duration = +#if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(59, 30, 100) + mp_pts_from_av(ctx->pic->duration, &ctx->codec_timebase); +#else + mp_pts_from_av(ctx->pic->pkt_duration, &ctx->codec_timebase); +#endif + + av_frame_unref(ctx->pic); + + MP_TARRAY_APPEND(ctx, ctx->delay_queue, ctx->num_delay_queue, mpi); + return ret; +} + +static int receive_frame(struct mp_filter *vd, struct mp_frame *out_frame) +{ + vd_ffmpeg_ctx *ctx = vd->priv; + + int ret = decode_frame(vd); + + if (ctx->hwdec_failed) { + // Failed hardware decoding? Try the next one, and eventually software. + struct demux_packet **pkts = ctx->sent_packets; + int num_pkts = ctx->num_sent_packets; + ctx->sent_packets = NULL; + ctx->num_sent_packets = 0; + + /* + * We repeatedly force_fallback until we get an avctx, because there are + * certain hwdecs that are really full decoders, and so if these fail, + * they also fail to give us a valid avctx, and the early return path + * here will simply give up on decoding completely if there is no + * decoder. We should never hit an infinite loop as the hwdec list is + * finite and we will eventually exhaust it and fall back to software + * decoding (and in practice, most hwdecs are hwaccels and so the + * decoder will successfully init even if the hwaccel fails later.) + */ + do { + force_fallback(vd); + } while (!ctx->avctx); + + ctx->requeue_packets = pkts; + ctx->num_requeue_packets = num_pkts; + + return 0; // force retry + } + + if (ret == AVERROR(EAGAIN) && ctx->num_requeue_packets) + return 0; // force retry, so send_queued_packet() gets called + + if (ctx->num_delay_queue <= ctx->max_delay_queue && ret != AVERROR_EOF) + return ret; + + if (!ctx->num_delay_queue) + return ret; + + struct mp_image *res = ctx->delay_queue[0]; + MP_TARRAY_REMOVE_AT(ctx->delay_queue, ctx->num_delay_queue, 0); + + res = res ? mp_img_swap_to_native(res) : NULL; + if (!res) + return AVERROR_UNKNOWN; + + if (ctx->use_hwdec && ctx->hwdec.copying && res->hwctx) { + struct mp_image *sw = mp_image_hw_download(res, ctx->hwdec_swpool); + mp_image_unrefp(&res); + res = sw; + if (!res) { + MP_ERR(vd, "Could not copy back hardware decoded frame.\n"); + ctx->hwdec_fail_count = INT_MAX - 1; // force fallback + handle_err(vd); + return AVERROR_UNKNOWN; + } + } + + if (!ctx->hwdec_notified) { + if (ctx->use_hwdec) { + MP_INFO(vd, "Using hardware decoding (%s).\n", + ctx->hwdec.method_name); + } else { + MP_VERBOSE(vd, "Using software decoding.\n"); + } + ctx->hwdec_notified = true; + } + + if (ctx->hw_probing) { + for (int n = 0; n < ctx->num_sent_packets; n++) + talloc_free(ctx->sent_packets[n]); + ctx->num_sent_packets = 0; + ctx->hw_probing = false; + } + + *out_frame = MAKE_FRAME(MP_FRAME_VIDEO, res); + return 0; +} + +static int control(struct mp_filter *vd, enum dec_ctrl cmd, void *arg) +{ + vd_ffmpeg_ctx *ctx = vd->priv; + switch (cmd) { + case VDCTRL_SET_FRAMEDROP: + ctx->framedrop_flags = *(int *)arg; + return CONTROL_TRUE; + case VDCTRL_CHECK_FORCED_EOF: { + *(bool *)arg = ctx->force_eof; + return CONTROL_TRUE; + } + case VDCTRL_GET_BFRAMES: { + AVCodecContext *avctx = ctx->avctx; + if (!avctx) + break; + if (ctx->use_hwdec && strcmp(ctx->hwdec.method_name, "mmal") == 0) + break; // MMAL has arbitrary buffering, thus unknown + *(int *)arg = avctx->has_b_frames; + return CONTROL_TRUE; + } + case VDCTRL_GET_HWDEC: { + *(char **)arg = ctx->use_hwdec ? ctx->hwdec.method_name : NULL; + return CONTROL_TRUE; + } + case VDCTRL_FORCE_HWDEC_FALLBACK: + if (ctx->use_hwdec) { + force_fallback(vd); + return ctx->avctx ? CONTROL_OK : CONTROL_ERROR; + } + return CONTROL_FALSE; + case VDCTRL_REINIT: + reinit(vd); + return CONTROL_TRUE; + } + return CONTROL_UNKNOWN; +} + +static void process(struct mp_filter *vd) +{ + vd_ffmpeg_ctx *ctx = vd->priv; + + lavc_process(vd, &ctx->state, send_packet, receive_frame); +} + +static void reset(struct mp_filter *vd) +{ + vd_ffmpeg_ctx *ctx = vd->priv; + + flush_all(vd); + + ctx->state = (struct lavc_state){0}; + ctx->framedrop_flags = 0; +} + +static void destroy(struct mp_filter *vd) +{ + vd_ffmpeg_ctx *ctx = vd->priv; + + uninit_avctx(vd); + + mp_mutex_destroy(&ctx->dr_lock); +} + +static const struct mp_filter_info vd_lavc_filter = { + .name = "vd_lavc", + .priv_size = sizeof(vd_ffmpeg_ctx), + .process = process, + .reset = reset, + .destroy = destroy, +}; + +static struct mp_decoder *create(struct mp_filter *parent, + struct mp_codec_params *codec, + const char *decoder) +{ + struct mp_filter *vd = mp_filter_create(parent, &vd_lavc_filter); + if (!vd) + return NULL; + + mp_filter_add_pin(vd, MP_PIN_IN, "in"); + mp_filter_add_pin(vd, MP_PIN_OUT, "out"); + + vd->log = mp_log_new(vd, parent->log, NULL); + + vd_ffmpeg_ctx *ctx = vd->priv; + ctx->log = vd->log; + ctx->opts_cache = m_config_cache_alloc(ctx, vd->global, &vd_lavc_conf); + ctx->opts = ctx->opts_cache->opts; + ctx->codec = codec; + ctx->decoder = talloc_strdup(ctx, decoder); + ctx->hwdec_swpool = mp_image_pool_new(ctx); + ctx->dr_pool = mp_image_pool_new(ctx); + + ctx->public.f = vd; + ctx->public.control = control; + + mp_mutex_init(&ctx->dr_lock); + + // hwdec/DR + struct mp_stream_info *info = mp_filter_find_stream_info(vd); + if (info) { + ctx->hwdec_devs = info->hwdec_devs; + ctx->vo = info->dr_vo; + } + + reinit(vd); + + if (!ctx->avctx) { + talloc_free(vd); + return NULL; + } + return &ctx->public; +} + +static void add_decoders(struct mp_decoder_list *list) +{ + mp_add_lavc_decoders(list, AVMEDIA_TYPE_VIDEO); +} + +const struct mp_decoder_fns vd_lavc = { + .create = create, + .add_decoders = add_decoders, +}; diff --git a/video/drmprime.c b/video/drmprime.c new file mode 100644 index 0000000..64d793f --- /dev/null +++ b/video/drmprime.c @@ -0,0 +1,43 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <libavutil/hwcontext.h> + +#include "hwdec.h" +#include "options/m_config.h" +#include "video/out/drm_common.h" + +extern const struct m_sub_options drm_conf; +static struct AVBufferRef *drm_create_standalone(struct mpv_global *global, + struct mp_log *log, struct hwcontext_create_dev_params *params) +{ + void *tmp = talloc_new(NULL); + struct drm_opts *drm_opts = mp_get_config_group(tmp, global, &drm_conf); + const char *opt_path = drm_opts->device_path; + + const char *device_path = opt_path ? opt_path : "/dev/dri/renderD128"; + AVBufferRef* ref = NULL; + av_hwdevice_ctx_create(&ref, AV_HWDEVICE_TYPE_DRM, device_path, NULL, 0); + + talloc_free(tmp); + return ref; +} + +const struct hwcontext_fns hwcontext_fns_drmprime = { + .av_hwdevice_type = AV_HWDEVICE_TYPE_DRM, + .create_dev = drm_create_standalone, +}; diff --git a/video/filter/refqueue.c b/video/filter/refqueue.c new file mode 100644 index 0000000..d018e38 --- /dev/null +++ b/video/filter/refqueue.c @@ -0,0 +1,356 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> + +#include <libavutil/buffer.h> + +#include "common/common.h" +#include "filters/f_autoconvert.h" +#include "filters/filter_internal.h" +#include "video/mp_image.h" + +#include "refqueue.h" + +struct mp_refqueue { + struct mp_filter *filter; + struct mp_autoconvert *conv; + struct mp_pin *in, *out; + + struct mp_image *in_format; + + // Buffered frame in case of format changes. + struct mp_image *next; + + int needed_past_frames; + int needed_future_frames; + int flags; + + bool second_field; // current frame has to output a second field yet + bool eof; + + // Queue of input frames, used to determine past/current/future frames. + // queue[0] is the newest frame, queue[num_queue - 1] the oldest. + struct mp_image **queue; + int num_queue; + // queue[pos] is the current frame, unless pos is an invalid index. + int pos; +}; + +static bool mp_refqueue_has_output(struct mp_refqueue *q); + +static void refqueue_dtor(void *p) +{ + struct mp_refqueue *q = p; + mp_refqueue_flush(q); + mp_image_unrefp(&q->in_format); + talloc_free(q->conv->f); +} + +struct mp_refqueue *mp_refqueue_alloc(struct mp_filter *f) +{ + struct mp_refqueue *q = talloc_zero(f, struct mp_refqueue); + talloc_set_destructor(q, refqueue_dtor); + q->filter = f; + + q->conv = mp_autoconvert_create(f); + MP_HANDLE_OOM(q->conv); + + q->in = q->conv->f->pins[1]; + mp_pin_connect(q->conv->f->pins[0], f->ppins[0]); + q->out = f->ppins[1]; + + mp_refqueue_flush(q); + return q; +} + +void mp_refqueue_add_in_format(struct mp_refqueue *q, int fmt, int subfmt) +{ + mp_autoconvert_add_imgfmt(q->conv, fmt, subfmt); +} + +// The minimum number of frames required before and after the current frame. +void mp_refqueue_set_refs(struct mp_refqueue *q, int past, int future) +{ + assert(past >= 0 && future >= 0); + q->needed_past_frames = past; + q->needed_future_frames = MPMAX(future, 1); // at least 1 for determining PTS +} + +// MP_MODE_* flags +void mp_refqueue_set_mode(struct mp_refqueue *q, int flags) +{ + q->flags = flags; +} + +// Whether the current frame should be deinterlaced. +bool mp_refqueue_should_deint(struct mp_refqueue *q) +{ + if (!mp_refqueue_has_output(q) || !(q->flags & MP_MODE_DEINT)) + return false; + + return (q->queue[q->pos]->fields & MP_IMGFIELD_INTERLACED) || + !(q->flags & MP_MODE_INTERLACED_ONLY); +} + +// Whether the current output frame (field) is the top field, bottom field +// otherwise. (Assumes the caller forces deinterlacing.) +bool mp_refqueue_is_top_field(struct mp_refqueue *q) +{ + if (!mp_refqueue_has_output(q)) + return false; + + return !!(q->queue[q->pos]->fields & MP_IMGFIELD_TOP_FIRST) ^ q->second_field; +} + +// Whether top-field-first mode is enabled. +bool mp_refqueue_top_field_first(struct mp_refqueue *q) +{ + if (!mp_refqueue_has_output(q)) + return false; + + return q->queue[q->pos]->fields & MP_IMGFIELD_TOP_FIRST; +} + +// Discard all state. +void mp_refqueue_flush(struct mp_refqueue *q) +{ + for (int n = 0; n < q->num_queue; n++) + talloc_free(q->queue[n]); + q->num_queue = 0; + q->pos = -1; + q->second_field = false; + q->eof = false; + mp_image_unrefp(&q->next); +} + +static void mp_refqueue_add_input(struct mp_refqueue *q, struct mp_image *img) +{ + assert(img); + + MP_TARRAY_INSERT_AT(q, q->queue, q->num_queue, 0, img); + q->pos++; + + assert(q->pos >= 0 && q->pos < q->num_queue); +} + +static bool mp_refqueue_need_input(struct mp_refqueue *q) +{ + return q->pos < q->needed_future_frames && !q->eof; +} + +static bool mp_refqueue_has_output(struct mp_refqueue *q) +{ + return q->pos >= 0 && !mp_refqueue_need_input(q); +} + +static bool output_next_field(struct mp_refqueue *q) +{ + if (q->second_field) + return false; + if (!(q->flags & MP_MODE_OUTPUT_FIELDS)) + return false; + if (!mp_refqueue_should_deint(q)) + return false; + + assert(q->pos >= 0); + + // If there's no (reasonable) timestamp, also skip the field. + if (q->pos == 0) + return false; + + double pts = q->queue[q->pos]->pts; + double next_pts = q->queue[q->pos - 1]->pts; + if (pts == MP_NOPTS_VALUE || next_pts == MP_NOPTS_VALUE) + return false; + + double frametime = next_pts - pts; + if (frametime <= 0.0 || frametime >= 1.0) + return false; + + q->queue[q->pos]->pts = pts + frametime / 2; + q->second_field = true; + return true; +} + +// Advance to next input frame (skips fields even in field output mode). +static void mp_refqueue_next(struct mp_refqueue *q) +{ + if (!mp_refqueue_has_output(q)) + return; + + q->pos--; + q->second_field = false; + + assert(q->pos >= -1 && q->pos < q->num_queue); + + // Discard unneeded past frames. + while (q->num_queue - (q->pos + 1) > q->needed_past_frames) { + assert(q->num_queue > 0); + talloc_free(q->queue[q->num_queue - 1]); + q->num_queue--; + } + + assert(q->pos >= -1 && q->pos < q->num_queue); +} + +// Advance current field, depending on interlace flags. +static void mp_refqueue_next_field(struct mp_refqueue *q) +{ + if (!mp_refqueue_has_output(q)) + return; + + if (!output_next_field(q)) + mp_refqueue_next(q); +} + +// Return a frame by relative position: +// -1: first past frame +// 0: current frame +// 1: first future frame +// Caller doesn't get ownership. Return NULL if unavailable. +struct mp_image *mp_refqueue_get(struct mp_refqueue *q, int pos) +{ + int i = q->pos - pos; + return i >= 0 && i < q->num_queue ? q->queue[i] : NULL; +} + +// Same as mp_refqueue_get(), but return the frame which contains a field +// relative to the current field's position. +struct mp_image *mp_refqueue_get_field(struct mp_refqueue *q, int pos) +{ + // If the current field is the second field (conceptually), then pos=1 + // needs to get the next frame. Similarly, pos=-1 needs to get the current + // frame, so round towards negative infinity. + int round = mp_refqueue_top_field_first(q) != mp_refqueue_is_top_field(q); + int frame = (pos < 0 ? pos - (1 - round) : pos + round) / 2; + return mp_refqueue_get(q, frame); +} + +bool mp_refqueue_is_second_field(struct mp_refqueue *q) +{ + return mp_refqueue_has_output(q) && q->second_field; +} + +// Return non-NULL if a format change happened. A format change is defined by +// a change in image parameters, using broad enough checks that happen to be +// sufficient for all users of refqueue. +// On format change, the refqueue transparently drains remaining frames, and +// once that is done, this function returns a mp_image reference of the new +// frame. Reinit the low level video processor based on it, and then leave the +// reference alone and continue normally. +// All frames returned in the future will have a compatible format. +struct mp_image *mp_refqueue_execute_reinit(struct mp_refqueue *q) +{ + if (mp_refqueue_has_output(q) || !q->next) + return NULL; + + struct mp_image *cur = q->next; + q->next = NULL; + + mp_image_unrefp(&q->in_format); + mp_refqueue_flush(q); + + q->in_format = mp_image_new_ref(cur); + mp_image_unref_data(q->in_format); + + mp_refqueue_add_input(q, cur); + return cur; +} + +// Main processing function. Call this in the filter process function. +// Returns if enough input frames are available for filtering, and output pin +// needs data; in other words, if this returns true, you render a frame and +// output it. +// If this returns true, you must call mp_refqueue_write_out_pin() to make +// progress. +bool mp_refqueue_can_output(struct mp_refqueue *q) +{ + if (!mp_pin_in_needs_data(q->out)) + return false; + + // Strictly return any output first to reduce latency. + if (mp_refqueue_has_output(q)) + return true; + + if (q->next) { + // Make it call again for mp_refqueue_execute_reinit(). + mp_filter_internal_mark_progress(q->filter); + return false; + } + + struct mp_frame frame = mp_pin_out_read(q->in); + if (frame.type == MP_FRAME_NONE) + return false; + + if (frame.type == MP_FRAME_EOF) { + q->eof = true; + if (mp_refqueue_has_output(q)) { + mp_pin_out_unread(q->in, frame); + return true; + } + mp_pin_in_write(q->out, frame); + mp_refqueue_flush(q); + return false; + } + + if (frame.type != MP_FRAME_VIDEO) { + MP_ERR(q->filter, "unsupported frame type\n"); + mp_frame_unref(&frame); + mp_filter_internal_mark_failed(q->filter); + return false; + } + + struct mp_image *img = frame.data; + + if (!q->in_format || !!q->in_format->hwctx != !!img->hwctx || + (img->hwctx && img->hwctx->data != q->in_format->hwctx->data) || + !mp_image_params_equal(&q->in_format->params, &img->params)) + { + q->next = img; + q->eof = true; + mp_filter_internal_mark_progress(q->filter); + return false; + } + + mp_refqueue_add_input(q, img); + + if (mp_refqueue_has_output(q)) + return true; + + mp_pin_out_request_data(q->in); + return false; +} + +// (Accepts NULL for generic errors.) +void mp_refqueue_write_out_pin(struct mp_refqueue *q, struct mp_image *mpi) +{ + if (mpi) { + mp_pin_in_write(q->out, MAKE_FRAME(MP_FRAME_VIDEO, mpi)); + } else { + MP_WARN(q->filter, "failed to output frame\n"); + mp_filter_internal_mark_failed(q->filter); + } + mp_refqueue_next_field(q); +} + +// Return frame for current format (without data). Reference is owned by q, +// might go away on further queue accesses. NULL if none yet. +struct mp_image *mp_refqueue_get_format(struct mp_refqueue *q) +{ + return q->in_format; +} diff --git a/video/filter/refqueue.h b/video/filter/refqueue.h new file mode 100644 index 0000000..0a8ace0 --- /dev/null +++ b/video/filter/refqueue.h @@ -0,0 +1,39 @@ +#ifndef MP_REFQUEUE_H_ +#define MP_REFQUEUE_H_ + +#include <stdbool.h> + +#include "filters/filter.h" + +// A helper for deinterlacers which require past/future reference frames. + +struct mp_refqueue; + +struct mp_refqueue *mp_refqueue_alloc(struct mp_filter *f); + +void mp_refqueue_add_in_format(struct mp_refqueue *q, int fmt, int subfmt); + +void mp_refqueue_set_refs(struct mp_refqueue *q, int past, int future); +void mp_refqueue_flush(struct mp_refqueue *q); +struct mp_image *mp_refqueue_get(struct mp_refqueue *q, int pos); + +struct mp_image *mp_refqueue_execute_reinit(struct mp_refqueue *q); +bool mp_refqueue_can_output(struct mp_refqueue *q); +void mp_refqueue_write_out_pin(struct mp_refqueue *q, struct mp_image *mpi); + +struct mp_image *mp_refqueue_get_format(struct mp_refqueue *q); + +enum { + MP_MODE_DEINT = (1 << 0), // deinterlacing enabled + MP_MODE_OUTPUT_FIELDS = (1 << 1), // output fields separately + MP_MODE_INTERLACED_ONLY = (1 << 2), // only deinterlace marked frames +}; + +void mp_refqueue_set_mode(struct mp_refqueue *q, int flags); +bool mp_refqueue_should_deint(struct mp_refqueue *q); +bool mp_refqueue_is_top_field(struct mp_refqueue *q); +bool mp_refqueue_top_field_first(struct mp_refqueue *q); +bool mp_refqueue_is_second_field(struct mp_refqueue *q); +struct mp_image *mp_refqueue_get_field(struct mp_refqueue *q, int pos); + +#endif diff --git a/video/filter/vf_d3d11vpp.c b/video/filter/vf_d3d11vpp.c new file mode 100644 index 0000000..3f00c5a --- /dev/null +++ b/video/filter/vf_d3d11vpp.c @@ -0,0 +1,506 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <windows.h> +#include <d3d11.h> + +#include <libavutil/hwcontext.h> +#include <libavutil/hwcontext_d3d11va.h> + +#include "common/common.h" +#include "osdep/timer.h" +#include "osdep/windows_utils.h" +#include "filters/f_autoconvert.h" +#include "filters/filter.h" +#include "filters/filter_internal.h" +#include "filters/user_filters.h" +#include "refqueue.h" +#include "video/hwdec.h" +#include "video/mp_image.h" +#include "video/mp_image_pool.h" + +// missing in MinGW +#define D3D11_VIDEO_PROCESSOR_PROCESSOR_CAPS_DEINTERLACE_BLEND 0x1 +#define D3D11_VIDEO_PROCESSOR_PROCESSOR_CAPS_DEINTERLACE_BOB 0x2 +#define D3D11_VIDEO_PROCESSOR_PROCESSOR_CAPS_DEINTERLACE_ADAPTIVE 0x4 +#define D3D11_VIDEO_PROCESSOR_PROCESSOR_CAPS_DEINTERLACE_MOTION_COMPENSATION 0x8 +#define D3D11_VIDEO_PROCESSOR_PROCESSOR_CAPS_INVERSE_TELECINE 0x10 +#define D3D11_VIDEO_PROCESSOR_PROCESSOR_CAPS_FRAME_RATE_CONVERSION 0x20 + +struct opts { + bool deint_enabled; + bool interlaced_only; + int mode; +}; + +struct priv { + struct opts *opts; + + ID3D11Device *vo_dev; + + ID3D11DeviceContext *device_ctx; + ID3D11VideoDevice *video_dev; + ID3D11VideoContext *video_ctx; + + ID3D11VideoProcessor *video_proc; + ID3D11VideoProcessorEnumerator *vp_enum; + D3D11_VIDEO_FRAME_FORMAT d3d_frame_format; + + DXGI_FORMAT out_format; + + bool require_filtering; + + struct mp_image_params params, out_params; + int c_w, c_h; + + struct mp_image_pool *pool; + + struct mp_refqueue *queue; +}; + +static void release_tex(void *arg) +{ + ID3D11Texture2D *texture = arg; + + ID3D11Texture2D_Release(texture); +} + +static struct mp_image *alloc_pool(void *pctx, int fmt, int w, int h) +{ + struct mp_filter *vf = pctx; + struct priv *p = vf->priv; + HRESULT hr; + + ID3D11Texture2D *texture = NULL; + D3D11_TEXTURE2D_DESC texdesc = { + .Width = w, + .Height = h, + .Format = p->out_format, + .MipLevels = 1, + .ArraySize = 1, + .SampleDesc = { .Count = 1 }, + .Usage = D3D11_USAGE_DEFAULT, + .BindFlags = D3D11_BIND_RENDER_TARGET | D3D11_BIND_SHADER_RESOURCE, + }; + hr = ID3D11Device_CreateTexture2D(p->vo_dev, &texdesc, NULL, &texture); + if (FAILED(hr)) + return NULL; + + struct mp_image *mpi = mp_image_new_custom_ref(NULL, texture, release_tex); + MP_HANDLE_OOM(mpi); + + mp_image_setfmt(mpi, IMGFMT_D3D11); + mp_image_set_size(mpi, w, h); + mpi->params.hw_subfmt = p->out_params.hw_subfmt; + + mpi->planes[0] = (void *)texture; + mpi->planes[1] = (void *)(intptr_t)0; + + return mpi; +} + +static void flush_frames(struct mp_filter *vf) +{ + struct priv *p = vf->priv; + mp_refqueue_flush(p->queue); +} + +static void destroy_video_proc(struct mp_filter *vf) +{ + struct priv *p = vf->priv; + + if (p->video_proc) + ID3D11VideoProcessor_Release(p->video_proc); + p->video_proc = NULL; + + if (p->vp_enum) + ID3D11VideoProcessorEnumerator_Release(p->vp_enum); + p->vp_enum = NULL; +} + +static int recreate_video_proc(struct mp_filter *vf) +{ + struct priv *p = vf->priv; + HRESULT hr; + + destroy_video_proc(vf); + + D3D11_VIDEO_PROCESSOR_CONTENT_DESC vpdesc = { + .InputFrameFormat = p->d3d_frame_format, + .InputWidth = p->c_w, + .InputHeight = p->c_h, + .OutputWidth = p->params.w, + .OutputHeight = p->params.h, + }; + hr = ID3D11VideoDevice_CreateVideoProcessorEnumerator(p->video_dev, &vpdesc, + &p->vp_enum); + if (FAILED(hr)) + goto fail; + + D3D11_VIDEO_PROCESSOR_CAPS caps; + hr = ID3D11VideoProcessorEnumerator_GetVideoProcessorCaps(p->vp_enum, &caps); + if (FAILED(hr)) + goto fail; + + MP_VERBOSE(vf, "Found %d rate conversion caps. Looking for caps=0x%x.\n", + (int)caps.RateConversionCapsCount, p->opts->mode); + + int rindex = -1; + for (int n = 0; n < caps.RateConversionCapsCount; n++) { + D3D11_VIDEO_PROCESSOR_RATE_CONVERSION_CAPS rcaps; + hr = ID3D11VideoProcessorEnumerator_GetVideoProcessorRateConversionCaps + (p->vp_enum, n, &rcaps); + if (FAILED(hr)) + goto fail; + MP_VERBOSE(vf, " - %d: 0x%08x\n", n, (unsigned)rcaps.ProcessorCaps); + if (rcaps.ProcessorCaps & p->opts->mode) { + MP_VERBOSE(vf, " (matching)\n"); + if (rindex < 0) + rindex = n; + } + } + + if (rindex < 0) { + MP_WARN(vf, "No fitting video processor found, picking #0.\n"); + rindex = 0; + } + + // TODO: so, how do we select which rate conversion mode the processor uses? + + hr = ID3D11VideoDevice_CreateVideoProcessor(p->video_dev, p->vp_enum, rindex, + &p->video_proc); + if (FAILED(hr)) { + MP_ERR(vf, "Failed to create D3D11 video processor.\n"); + goto fail; + } + + // Note: libavcodec does not support cropping left/top with hwaccel. + RECT src_rc = { + .right = p->params.w, + .bottom = p->params.h, + }; + ID3D11VideoContext_VideoProcessorSetStreamSourceRect(p->video_ctx, + p->video_proc, + 0, TRUE, &src_rc); + + // This is supposed to stop drivers from fucking up the video quality. + ID3D11VideoContext_VideoProcessorSetStreamAutoProcessingMode(p->video_ctx, + p->video_proc, + 0, FALSE); + + ID3D11VideoContext_VideoProcessorSetStreamOutputRate(p->video_ctx, + p->video_proc, + 0, + D3D11_VIDEO_PROCESSOR_OUTPUT_RATE_NORMAL, + FALSE, 0); + + D3D11_VIDEO_PROCESSOR_COLOR_SPACE csp = { + .YCbCr_Matrix = p->params.color.space != MP_CSP_BT_601, + .Nominal_Range = p->params.color.levels == MP_CSP_LEVELS_TV ? 1 : 2, + }; + ID3D11VideoContext_VideoProcessorSetStreamColorSpace(p->video_ctx, + p->video_proc, + 0, &csp); + ID3D11VideoContext_VideoProcessorSetOutputColorSpace(p->video_ctx, + p->video_proc, + &csp); + + return 0; +fail: + destroy_video_proc(vf); + return -1; +} + +static struct mp_image *render(struct mp_filter *vf) +{ + struct priv *p = vf->priv; + int res = -1; + HRESULT hr; + ID3D11VideoProcessorInputView *in_view = NULL; + ID3D11VideoProcessorOutputView *out_view = NULL; + struct mp_image *in = NULL, *out = NULL; + out = mp_image_pool_get(p->pool, IMGFMT_D3D11, p->params.w, p->params.h); + if (!out) { + MP_WARN(vf, "failed to allocate frame\n"); + goto cleanup; + } + + ID3D11Texture2D *d3d_out_tex = (void *)out->planes[0]; + + in = mp_refqueue_get(p->queue, 0); + if (!in) + goto cleanup; + ID3D11Texture2D *d3d_tex = (void *)in->planes[0]; + int d3d_subindex = (intptr_t)in->planes[1]; + + mp_image_copy_attributes(out, in); + + D3D11_VIDEO_FRAME_FORMAT d3d_frame_format; + if (!mp_refqueue_should_deint(p->queue)) { + d3d_frame_format = D3D11_VIDEO_FRAME_FORMAT_PROGRESSIVE; + } else if (mp_refqueue_top_field_first(p->queue)) { + d3d_frame_format = D3D11_VIDEO_FRAME_FORMAT_INTERLACED_TOP_FIELD_FIRST; + } else { + d3d_frame_format = D3D11_VIDEO_FRAME_FORMAT_INTERLACED_BOTTOM_FIELD_FIRST; + } + + D3D11_TEXTURE2D_DESC texdesc; + ID3D11Texture2D_GetDesc(d3d_tex, &texdesc); + if (!p->video_proc || p->c_w != texdesc.Width || p->c_h != texdesc.Height || + p->d3d_frame_format != d3d_frame_format) + { + p->c_w = texdesc.Width; + p->c_h = texdesc.Height; + p->d3d_frame_format = d3d_frame_format; + if (recreate_video_proc(vf) < 0) + goto cleanup; + } + + if (!mp_refqueue_should_deint(p->queue)) { + d3d_frame_format = D3D11_VIDEO_FRAME_FORMAT_PROGRESSIVE; + } else if (mp_refqueue_is_top_field(p->queue)) { + d3d_frame_format = D3D11_VIDEO_FRAME_FORMAT_INTERLACED_TOP_FIELD_FIRST; + } else { + d3d_frame_format = D3D11_VIDEO_FRAME_FORMAT_INTERLACED_BOTTOM_FIELD_FIRST; + } + + ID3D11VideoContext_VideoProcessorSetStreamFrameFormat(p->video_ctx, + p->video_proc, + 0, d3d_frame_format); + + D3D11_VIDEO_PROCESSOR_INPUT_VIEW_DESC indesc = { + .ViewDimension = D3D11_VPIV_DIMENSION_TEXTURE2D, + .Texture2D = { + .ArraySlice = d3d_subindex, + }, + }; + hr = ID3D11VideoDevice_CreateVideoProcessorInputView(p->video_dev, + (ID3D11Resource *)d3d_tex, + p->vp_enum, &indesc, + &in_view); + if (FAILED(hr)) { + MP_ERR(vf, "Could not create ID3D11VideoProcessorInputView\n"); + goto cleanup; + } + + D3D11_VIDEO_PROCESSOR_OUTPUT_VIEW_DESC outdesc = { + .ViewDimension = D3D11_VPOV_DIMENSION_TEXTURE2D, + }; + hr = ID3D11VideoDevice_CreateVideoProcessorOutputView(p->video_dev, + (ID3D11Resource *)d3d_out_tex, + p->vp_enum, &outdesc, + &out_view); + if (FAILED(hr)) { + MP_ERR(vf, "Could not create ID3D11VideoProcessorOutputView\n"); + goto cleanup; + } + + D3D11_VIDEO_PROCESSOR_STREAM stream = { + .Enable = TRUE, + .pInputSurface = in_view, + }; + int frame = mp_refqueue_is_second_field(p->queue); + hr = ID3D11VideoContext_VideoProcessorBlt(p->video_ctx, p->video_proc, + out_view, frame, 1, &stream); + if (FAILED(hr)) { + MP_ERR(vf, "VideoProcessorBlt failed.\n"); + goto cleanup; + } + + res = 0; +cleanup: + if (in_view) + ID3D11VideoProcessorInputView_Release(in_view); + if (out_view) + ID3D11VideoProcessorOutputView_Release(out_view); + if (res < 0) + TA_FREEP(&out); + return out; +} + +static void vf_d3d11vpp_process(struct mp_filter *vf) +{ + struct priv *p = vf->priv; + + struct mp_image *in_fmt = mp_refqueue_execute_reinit(p->queue); + if (in_fmt) { + mp_image_pool_clear(p->pool); + + destroy_video_proc(vf); + + p->params = in_fmt->params; + p->out_params = p->params; + + p->out_params.hw_subfmt = IMGFMT_NV12; + p->out_format = DXGI_FORMAT_NV12; + + p->require_filtering = p->params.hw_subfmt != p->out_params.hw_subfmt; + } + + if (!mp_refqueue_can_output(p->queue)) + return; + + if (!mp_refqueue_should_deint(p->queue) && !p->require_filtering) { + // no filtering + struct mp_image *in = mp_image_new_ref(mp_refqueue_get(p->queue, 0)); + if (!in) { + mp_filter_internal_mark_failed(vf); + return; + } + mp_refqueue_write_out_pin(p->queue, in); + } else { + mp_refqueue_write_out_pin(p->queue, render(vf)); + } +} + +static void uninit(struct mp_filter *vf) +{ + struct priv *p = vf->priv; + + destroy_video_proc(vf); + + flush_frames(vf); + talloc_free(p->queue); + talloc_free(p->pool); + + if (p->video_ctx) + ID3D11VideoContext_Release(p->video_ctx); + + if (p->video_dev) + ID3D11VideoDevice_Release(p->video_dev); + + if (p->device_ctx) + ID3D11DeviceContext_Release(p->device_ctx); + + if (p->vo_dev) + ID3D11Device_Release(p->vo_dev); +} + +static const struct mp_filter_info vf_d3d11vpp_filter = { + .name = "d3d11vpp", + .process = vf_d3d11vpp_process, + .reset = flush_frames, + .destroy = uninit, + .priv_size = sizeof(struct priv), +}; + +static struct mp_filter *vf_d3d11vpp_create(struct mp_filter *parent, + void *options) +{ + struct mp_filter *f = mp_filter_create(parent, &vf_d3d11vpp_filter); + if (!f) { + talloc_free(options); + return NULL; + } + + mp_filter_add_pin(f, MP_PIN_IN, "in"); + mp_filter_add_pin(f, MP_PIN_OUT, "out"); + + struct priv *p = f->priv; + p->opts = talloc_steal(p, options); + + // Special path for vf_d3d11_create_outconv(): disable all processing except + // possibly surface format conversions. + if (!p->opts) { + static const struct opts opts = {0}; + p->opts = (struct opts *)&opts; + } + + p->queue = mp_refqueue_alloc(f); + + struct mp_stream_info *info = mp_filter_find_stream_info(f); + if (!info || !info->hwdec_devs) + goto fail; + + struct hwdec_imgfmt_request params = { + .imgfmt = IMGFMT_D3D11, + .probing = false, + }; + hwdec_devices_request_for_img_fmt(info->hwdec_devs, ¶ms); + + struct mp_hwdec_ctx *hwctx = + hwdec_devices_get_by_imgfmt(info->hwdec_devs, IMGFMT_D3D11); + if (!hwctx || !hwctx->av_device_ref) + goto fail; + AVHWDeviceContext *avhwctx = (void *)hwctx->av_device_ref->data; + AVD3D11VADeviceContext *d3dctx = avhwctx->hwctx; + + p->vo_dev = d3dctx->device; + ID3D11Device_AddRef(p->vo_dev); + + HRESULT hr; + + hr = ID3D11Device_QueryInterface(p->vo_dev, &IID_ID3D11VideoDevice, + (void **)&p->video_dev); + if (FAILED(hr)) + goto fail; + + ID3D11Device_GetImmediateContext(p->vo_dev, &p->device_ctx); + if (!p->device_ctx) + goto fail; + hr = ID3D11DeviceContext_QueryInterface(p->device_ctx, &IID_ID3D11VideoContext, + (void **)&p->video_ctx); + if (FAILED(hr)) + goto fail; + + p->pool = mp_image_pool_new(f); + mp_image_pool_set_allocator(p->pool, alloc_pool, f); + mp_image_pool_set_lru(p->pool); + + mp_refqueue_add_in_format(p->queue, IMGFMT_D3D11, 0); + + mp_refqueue_set_refs(p->queue, 0, 0); + mp_refqueue_set_mode(p->queue, + (p->opts->deint_enabled ? MP_MODE_DEINT : 0) | + MP_MODE_OUTPUT_FIELDS | + (p->opts->interlaced_only ? MP_MODE_INTERLACED_ONLY : 0)); + + return f; + +fail: + talloc_free(f); + return NULL; +} + +#define OPT_BASE_STRUCT struct opts +static const m_option_t vf_opts_fields[] = { + {"deint", OPT_BOOL(deint_enabled)}, + {"interlaced-only", OPT_BOOL(interlaced_only)}, + {"mode", OPT_CHOICE(mode, + {"blend", D3D11_VIDEO_PROCESSOR_PROCESSOR_CAPS_DEINTERLACE_BLEND}, + {"bob", D3D11_VIDEO_PROCESSOR_PROCESSOR_CAPS_DEINTERLACE_BOB}, + {"adaptive", D3D11_VIDEO_PROCESSOR_PROCESSOR_CAPS_DEINTERLACE_ADAPTIVE}, + {"mocomp", D3D11_VIDEO_PROCESSOR_PROCESSOR_CAPS_DEINTERLACE_MOTION_COMPENSATION}, + {"ivctc", D3D11_VIDEO_PROCESSOR_PROCESSOR_CAPS_INVERSE_TELECINE}, + {"none", 0})}, + {0} +}; + +const struct mp_user_filter_entry vf_d3d11vpp = { + .desc = { + .description = "D3D11 Video Post-Process Filter", + .name = "d3d11vpp", + .priv_size = sizeof(OPT_BASE_STRUCT), + .priv_defaults = &(const OPT_BASE_STRUCT) { + .deint_enabled = true, + .mode = D3D11_VIDEO_PROCESSOR_PROCESSOR_CAPS_DEINTERLACE_BOB, + }, + .options = vf_opts_fields, + }, + .create = vf_d3d11vpp_create, +}; diff --git a/video/filter/vf_fingerprint.c b/video/filter/vf_fingerprint.c new file mode 100644 index 0000000..8714382 --- /dev/null +++ b/video/filter/vf_fingerprint.c @@ -0,0 +1,229 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <math.h> + +#include "common/common.h" +#include "common/tags.h" +#include "filters/filter.h" +#include "filters/filter_internal.h" +#include "filters/user_filters.h" +#include "options/m_option.h" +#include "video/img_format.h" +#include "video/sws_utils.h" +#include "video/zimg.h" + +#include "osdep/timer.h" + +#define PRINT_ENTRY_NUM 10 + +struct f_opts { + int type; + bool clear; + bool print; +}; + +const struct m_opt_choice_alternatives type_names[] = { + {"gray-hex-8x8", 8}, + {"gray-hex-16x16", 16}, + {0} +}; + +#define OPT_BASE_STRUCT struct f_opts +static const struct m_option f_opts_list[] = { + {"type", OPT_CHOICE_C(type, type_names)}, + {"clear-on-query", OPT_BOOL(clear)}, + {"print", OPT_BOOL(print)}, + {0} +}; + +static const struct f_opts f_opts_def = { + .type = 16, + .clear = true, +}; + +struct print_entry { + double pts; + char *print; +}; + +struct priv { + struct f_opts *opts; + struct mp_image *scaled; + struct mp_sws_context *sws; + struct mp_zimg_context *zimg; + struct print_entry entries[PRINT_ENTRY_NUM]; + int num_entries; + bool fallback_warning; +}; + +// (Other code internal to this filter also calls this to reset the frame list.) +static void f_reset(struct mp_filter *f) +{ + struct priv *p = f->priv; + + for (int n = 0; n < p->num_entries; n++) + talloc_free(p->entries[n].print); + p->num_entries = 0; +} + +static void f_process(struct mp_filter *f) +{ + struct priv *p = f->priv; + + if (!mp_pin_can_transfer_data(f->ppins[1], f->ppins[0])) + return; + + struct mp_frame frame = mp_pin_out_read(f->ppins[0]); + + if (mp_frame_is_signaling(frame)) { + mp_pin_in_write(f->ppins[1], frame); + return; + } + + if (frame.type != MP_FRAME_VIDEO) + goto error; + + struct mp_image *mpi = frame.data; + + // Try to achieve minimum conversion, even if it makes the fingerprints less + // "portable" across source video. + p->scaled->params.color = mpi->params.color; + // Make output always full range; no reason to lose precision. + p->scaled->params.color.levels = MP_CSP_LEVELS_PC; + + if (!mp_zimg_convert(p->zimg, p->scaled, mpi)) { + if (!p->fallback_warning) { + MP_WARN(f, "Falling back to libswscale.\n"); + p->fallback_warning = true; + } + if (mp_sws_scale(p->sws, p->scaled, mpi) < 0) + goto error; + } + + if (p->num_entries >= PRINT_ENTRY_NUM) { + talloc_free(p->entries[0].print); + MP_TARRAY_REMOVE_AT(p->entries, p->num_entries, 0); + } + + int size = p->scaled->w; + + struct print_entry *e = &p->entries[p->num_entries++]; + e->pts = mpi->pts; + e->print = talloc_array(p, char, size * size * 2 + 1); + + for (int y = 0; y < size; y++) { + for (int x = 0; x < size; x++) { + char *offs = &e->print[(y * size + x) * 2]; + uint8_t v = p->scaled->planes[0][y * p->scaled->stride[0] + x]; + snprintf(offs, 3, "%02x", v); + } + } + + if (p->opts->print) + MP_INFO(f, "%f: %s\n", e->pts, e->print); + + mp_pin_in_write(f->ppins[1], frame); + return; + +error: + MP_ERR(f, "unsupported video format\n"); + mp_pin_in_write(f->ppins[1], frame); + mp_filter_internal_mark_failed(f); +} + +static bool f_command(struct mp_filter *f, struct mp_filter_command *cmd) +{ + struct priv *p = f->priv; + + switch (cmd->type) { + case MP_FILTER_COMMAND_GET_META: { + struct mp_tags *t = talloc_zero(NULL, struct mp_tags); + + for (int n = 0; n < p->num_entries; n++) { + struct print_entry *e = &p->entries[n]; + + if (e->pts != MP_NOPTS_VALUE) { + mp_tags_set_str(t, mp_tprintf(80, "fp%d.pts", n), + mp_tprintf(80, "%f", e->pts)); + } + mp_tags_set_str(t, mp_tprintf(80, "fp%d.hex", n), e->print); + } + + mp_tags_set_str(t, "type", m_opt_choice_str(type_names, p->opts->type)); + + if (p->opts->clear) + f_reset(f); + + *(struct mp_tags **)cmd->res = t; + return true; + } + default: + return false; + } +} + +static const struct mp_filter_info filter = { + .name = "fingerprint", + .process = f_process, + .command = f_command, + .reset = f_reset, + .priv_size = sizeof(struct priv), +}; + +static struct mp_filter *f_create(struct mp_filter *parent, void *options) +{ + struct mp_filter *f = mp_filter_create(parent, &filter); + if (!f) { + talloc_free(options); + return NULL; + } + + mp_filter_add_pin(f, MP_PIN_IN, "in"); + mp_filter_add_pin(f, MP_PIN_OUT, "out"); + + struct priv *p = f->priv; + p->opts = talloc_steal(p, options); + int size = p->opts->type; + p->scaled = mp_image_alloc(IMGFMT_Y8, size, size); + MP_HANDLE_OOM(p->scaled); + talloc_steal(p, p->scaled); + p->sws = mp_sws_alloc(p); + MP_HANDLE_OOM(p->sws); + p->zimg = mp_zimg_alloc(); + talloc_steal(p, p->zimg); + p->zimg->opts = (struct zimg_opts){ + .scaler = ZIMG_RESIZE_BILINEAR, + .scaler_params = {NAN, NAN}, + .scaler_chroma_params = {NAN, NAN}, + .scaler_chroma = ZIMG_RESIZE_BILINEAR, + .dither = ZIMG_DITHER_NONE, + .fast = true, + }; + return f; +} + +const struct mp_user_filter_entry vf_fingerprint = { + .desc = { + .description = "Compute video frame fingerprints", + .name = "fingerprint", + .priv_size = sizeof(OPT_BASE_STRUCT), + .priv_defaults = &f_opts_def, + .options = f_opts_list, + }, + .create = f_create, +}; diff --git a/video/filter/vf_format.c b/video/filter/vf_format.c new file mode 100644 index 0000000..4997d6f --- /dev/null +++ b/video/filter/vf_format.c @@ -0,0 +1,245 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <inttypes.h> +#include <math.h> + +#include <libavutil/rational.h> +#include <libavutil/buffer.h> + +#include "common/msg.h" +#include "common/common.h" +#include "filters/f_autoconvert.h" +#include "filters/filter.h" +#include "filters/filter_internal.h" +#include "filters/user_filters.h" +#include "video/img_format.h" +#include "video/mp_image.h" + +#include "options/m_option.h" + +struct priv { + struct vf_format_opts *opts; + struct mp_autoconvert *conv; +}; + +struct vf_format_opts { + int fmt; + int colormatrix; + int colorlevels; + int primaries; + int gamma; + float sig_peak; + int light; + int chroma_location; + int stereo_in; + int rotate; + int alpha; + int w, h; + int dw, dh; + double dar; + bool convert; + int force_scaler; + bool dovi; + bool film_grain; +}; + +static void set_params(struct vf_format_opts *p, struct mp_image_params *out, + bool set_size) +{ + if (p->colormatrix) + out->color.space = p->colormatrix; + if (p->colorlevels) + out->color.levels = p->colorlevels; + if (p->primaries) + out->color.primaries = p->primaries; + if (p->gamma) { + enum mp_csp_trc in_gamma = p->gamma; + out->color.gamma = p->gamma; + if (in_gamma != out->color.gamma) { + // When changing the gamma function explicitly, also reset stuff + // related to the gamma function since that information will almost + // surely be false now and have to be re-inferred + out->color.hdr = (struct pl_hdr_metadata){0}; + out->color.light = MP_CSP_LIGHT_AUTO; + } + } + if (p->sig_peak) + out->color.hdr = (struct pl_hdr_metadata){ .max_luma = p->sig_peak * MP_REF_WHITE }; + if (p->light) + out->color.light = p->light; + if (p->chroma_location) + out->chroma_location = p->chroma_location; + if (p->stereo_in) + out->stereo3d = p->stereo_in; + if (p->rotate >= 0) + out->rotate = p->rotate; + if (p->alpha) + out->alpha = p->alpha; + + if (p->w > 0 && set_size) + out->w = p->w; + if (p->h > 0 && set_size) + out->h = p->h; + AVRational dsize; + mp_image_params_get_dsize(out, &dsize.num, &dsize.den); + if (p->dw > 0) + dsize.num = p->dw; + if (p->dh > 0) + dsize.den = p->dh; + if (p->dar > 0) + dsize = av_d2q(p->dar, INT_MAX); + mp_image_params_set_dsize(out, dsize.num, dsize.den); +} + +static void vf_format_process(struct mp_filter *f) +{ + struct priv *priv = f->priv; + + if (mp_pin_can_transfer_data(priv->conv->f->pins[0], f->ppins[0])) { + struct mp_frame frame = mp_pin_out_read(f->ppins[0]); + + if (priv->opts->convert && frame.type == MP_FRAME_VIDEO) { + struct mp_image *img = frame.data; + struct mp_image_params par = img->params; + int outfmt = priv->opts->fmt; + + // If we convert from RGB to YUV, default to limited range. + if (mp_imgfmt_get_forced_csp(img->imgfmt) == MP_CSP_RGB && + outfmt && mp_imgfmt_get_forced_csp(outfmt) == MP_CSP_AUTO) + { + par.color.levels = MP_CSP_LEVELS_TV; + } + + set_params(priv->opts, &par, true); + + if (outfmt && par.imgfmt != outfmt) { + par.imgfmt = outfmt; + par.hw_subfmt = 0; + } + mp_image_params_guess_csp(&par); + + mp_autoconvert_set_target_image_params(priv->conv, &par); + } + + mp_pin_in_write(priv->conv->f->pins[0], frame); + } + + if (mp_pin_can_transfer_data(f->ppins[1], priv->conv->f->pins[1])) { + struct mp_frame frame = mp_pin_out_read(priv->conv->f->pins[1]); + struct mp_image *img = frame.data; + + if (frame.type != MP_FRAME_VIDEO) + goto write_out; + + if (!priv->opts->convert) { + set_params(priv->opts, &img->params, false); + mp_image_params_guess_csp(&img->params); + } + + if (!priv->opts->dovi) { + av_buffer_unref(&img->dovi); + av_buffer_unref(&img->dovi_buf); + } + + if (!priv->opts->film_grain) + av_buffer_unref(&img->film_grain); + +write_out: + mp_pin_in_write(f->ppins[1], frame); + } +} + +static const struct mp_filter_info vf_format_filter = { + .name = "format", + .process = vf_format_process, + .priv_size = sizeof(struct priv), +}; + +static struct mp_filter *vf_format_create(struct mp_filter *parent, void *options) +{ + struct mp_filter *f = mp_filter_create(parent, &vf_format_filter); + if (!f) { + talloc_free(options); + return NULL; + } + + struct priv *priv = f->priv; + priv->opts = talloc_steal(priv, options); + + mp_filter_add_pin(f, MP_PIN_IN, "in"); + mp_filter_add_pin(f, MP_PIN_OUT, "out"); + + priv->conv = mp_autoconvert_create(f); + if (!priv->conv) { + talloc_free(f); + return NULL; + } + + priv->conv->force_scaler = priv->opts->force_scaler; + + if (priv->opts->fmt) + mp_autoconvert_add_imgfmt(priv->conv, priv->opts->fmt, 0); + + return f; +} + +#define OPT_BASE_STRUCT struct vf_format_opts +static const m_option_t vf_opts_fields[] = { + {"fmt", OPT_IMAGEFORMAT(fmt)}, + {"colormatrix", OPT_CHOICE_C(colormatrix, mp_csp_names)}, + {"colorlevels", OPT_CHOICE_C(colorlevels, mp_csp_levels_names)}, + {"primaries", OPT_CHOICE_C(primaries, mp_csp_prim_names)}, + {"gamma", OPT_CHOICE_C(gamma, mp_csp_trc_names)}, + {"sig-peak", OPT_FLOAT(sig_peak)}, + {"light", OPT_CHOICE_C(light, mp_csp_light_names)}, + {"chroma-location", OPT_CHOICE_C(chroma_location, mp_chroma_names)}, + {"stereo-in", OPT_CHOICE_C(stereo_in, mp_stereo3d_names)}, + {"rotate", OPT_INT(rotate), M_RANGE(-1, 359)}, + {"alpha", OPT_CHOICE_C(alpha, mp_alpha_names)}, + {"w", OPT_INT(w)}, + {"h", OPT_INT(h)}, + {"dw", OPT_INT(dw)}, + {"dh", OPT_INT(dh)}, + {"dar", OPT_DOUBLE(dar)}, + {"convert", OPT_BOOL(convert)}, + {"dolbyvision", OPT_BOOL(dovi)}, + {"film-grain", OPT_BOOL(film_grain)}, + {"force-scaler", OPT_CHOICE(force_scaler, + {"auto", MP_SWS_AUTO}, + {"sws", MP_SWS_SWS}, + {"zimg", MP_SWS_ZIMG})}, + {0} +}; + +const struct mp_user_filter_entry vf_format = { + .desc = { + .description = "force output format", + .name = "format", + .priv_size = sizeof(OPT_BASE_STRUCT), + .priv_defaults = &(const OPT_BASE_STRUCT){ + .rotate = -1, + .dovi = true, + .film_grain = true, + }, + .options = vf_opts_fields, + }, + .create = vf_format_create, +}; diff --git a/video/filter/vf_gpu.c b/video/filter/vf_gpu.c new file mode 100644 index 0000000..fb11941 --- /dev/null +++ b/video/filter/vf_gpu.c @@ -0,0 +1,373 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "common/common.h" +#include "filters/filter.h" +#include "filters/filter_internal.h" +#include "filters/user_filters.h" +#include "options/m_config.h" +#include "options/m_option.h" +#include "options/options.h" +#include "video/out/aspect.h" +#include "video/out/gpu/video.h" +#include "video/out/opengl/egl_helpers.h" +#include "video/out/opengl/ra_gl.h" + +struct offscreen_ctx { + struct mp_log *log; + struct ra *ra; + void *priv; + + void (*set_context)(struct offscreen_ctx *ctx, bool enable); +}; + +struct gl_offscreen_ctx { + GL gl; + EGLDisplay egl_display; + EGLContext egl_context; +}; + +static void gl_ctx_destroy(void *p) +{ + struct offscreen_ctx *ctx = p; + struct gl_offscreen_ctx *gl = ctx->priv; + + ra_free(&ctx->ra); + + if (gl->egl_context) + eglDestroyContext(gl->egl_display, gl->egl_context); +} + +static void gl_ctx_set_context(struct offscreen_ctx *ctx, bool enable) +{ + struct gl_offscreen_ctx *gl = ctx->priv; + EGLContext c = enable ? gl->egl_context : EGL_NO_CONTEXT; + + if (!eglMakeCurrent(gl->egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, c)) + MP_ERR(ctx, "Could not make EGL context current.\n"); +} + +static struct offscreen_ctx *gl_offscreen_ctx_create(struct mpv_global *global, + struct mp_log *log) +{ + struct offscreen_ctx *ctx = talloc_zero(NULL, struct offscreen_ctx); + struct gl_offscreen_ctx *gl = talloc_zero(ctx, struct gl_offscreen_ctx); + talloc_set_destructor(ctx, gl_ctx_destroy); + *ctx = (struct offscreen_ctx){ + .log = log, + .priv = gl, + .set_context = gl_ctx_set_context, + }; + + // This appears to work with Mesa. EGL 1.5 doesn't specify what a "default + // display" is at all. + gl->egl_display = eglGetDisplay(EGL_DEFAULT_DISPLAY); + if (!eglInitialize(gl->egl_display, NULL, NULL)) { + MP_ERR(ctx, "Could not initialize EGL.\n"); + goto error; + } + + // Unfortunately, mpegl_create_context() is entangled with ra_ctx. + // Fortunately, it does not need much, and we can provide a stub. + struct ra_ctx ractx = { + .log = ctx->log, + .global = global, + }; + EGLConfig config; + if (!mpegl_create_context(&ractx, gl->egl_display, &gl->egl_context, &config)) + { + MP_ERR(ctx, "Could not create EGL context.\n"); + goto error; + } + + if (!eglMakeCurrent(gl->egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, + gl->egl_context)) + { + MP_ERR(ctx, "Could not make EGL context current.\n"); + goto error; + } + + mpegl_load_functions(&gl->gl, ctx->log); + ctx->ra = ra_create_gl(&gl->gl, ctx->log); + + if (!ctx->ra) + goto error; + + gl_ctx_set_context(ctx, false); + + return ctx; + +error: + talloc_free(ctx); + return NULL; +} + +static void offscreen_ctx_set_current(struct offscreen_ctx *ctx, bool enable) +{ + if (ctx->set_context) + ctx->set_context(ctx, enable); +} + +struct gpu_opts { + int w, h; +}; + +struct priv { + struct gpu_opts *opts; + struct m_config_cache *vo_opts_cache; + struct mp_vo_opts *vo_opts; + + struct offscreen_ctx *ctx; + struct gl_video *renderer; + struct ra_tex *target; + + struct mp_image_params img_params; + uint64_t next_frame_id; +}; + +static struct mp_image *gpu_render_frame(struct mp_filter *f, struct mp_image *in) +{ + struct priv *priv = f->priv; + bool ok = false; + struct mp_image *res = NULL; + struct ra *ra = priv->ctx->ra; + + if (priv->opts->w <= 0) + priv->opts->w = in->w; + if (priv->opts->h <= 0) + priv->opts->h = in->h; + + int w = priv->opts->w; + int h = priv->opts->h; + + struct vo_frame frame = { + .pts = in->pts, + .duration = -1, + .num_vsyncs = 1, + .current = in, + .num_frames = 1, + .frames = {in}, + .frame_id = ++(priv->next_frame_id), + }; + + bool need_reconfig = m_config_cache_update(priv->vo_opts_cache); + + if (!mp_image_params_equal(&priv->img_params, &in->params)) { + priv->img_params = in->params; + gl_video_config(priv->renderer, &in->params); + need_reconfig = true; + } + + if (need_reconfig) { + struct mp_rect src, dst; + struct mp_osd_res osd; + + struct mp_stream_info *info = mp_filter_find_stream_info(f); + struct osd_state *osd_state = info ? info->osd : NULL; + if (osd_state) { + osd_set_render_subs_in_filter(osd_state, true); + // Assume the osd_state doesn't somehow disappear. + gl_video_set_osd_source(priv->renderer, osd_state); + } + + mp_get_src_dst_rects(f->log, priv->vo_opts, VO_CAP_ROTATE90, &in->params, + w, h, 1, &src, &dst, &osd); + + gl_video_resize(priv->renderer, &src, &dst, &osd); + } + + if (!priv->target) { + struct ra_tex_params params = { + .dimensions = 2, + .downloadable = true, + .w = w, + .h = h, + .d = 1, + .render_dst = true, + }; + + params.format = ra_find_unorm_format(ra, 1, 4); + + if (!params.format || !params.format->renderable) + goto done; + + priv->target = ra_tex_create(ra, ¶ms); + if (!priv->target) + goto done; + } + + // (it doesn't have access to the OSD though) + int flags = RENDER_FRAME_SUBS | RENDER_FRAME_VF_SUBS; + gl_video_render_frame(priv->renderer, &frame, (struct ra_fbo){priv->target}, + flags); + + res = mp_image_alloc(IMGFMT_RGB0, w, h); + if (!res) + goto done; + + struct ra_tex_download_params download_params = { + .tex = priv->target, + .dst = res->planes[0], + .stride = res->stride[0], + }; + if (!ra->fns->tex_download(ra, &download_params)) + goto done; + + ok = true; +done: + if (!ok) + TA_FREEP(&res); + return res; +} + +static void gpu_process(struct mp_filter *f) +{ + struct priv *priv = f->priv; + + if (!mp_pin_can_transfer_data(f->ppins[1], f->ppins[0])) + return; + + struct mp_frame frame = mp_pin_out_read(f->ppins[0]); + + if (mp_frame_is_signaling(frame)) { + mp_pin_in_write(f->ppins[1], frame); + return; + } + + if (frame.type != MP_FRAME_VIDEO) + goto error; + + offscreen_ctx_set_current(priv->ctx, true); + + struct mp_image *mpi = frame.data; + struct mp_image *res = gpu_render_frame(f, mpi); + if (!res) { + MP_ERR(f, "Could not render or retrieve frame.\n"); + goto error; + } + + // It's not clear which parameters to copy. + res->pts = mpi->pts; + res->dts = mpi->dts; + res->nominal_fps = mpi->nominal_fps; + + talloc_free(mpi); + + mp_pin_in_write(f->ppins[1], MAKE_FRAME(MP_FRAME_VIDEO, res)); + return; + +error: + mp_frame_unref(&frame); + mp_filter_internal_mark_failed(f); + offscreen_ctx_set_current(priv->ctx, false); +} + +static void gpu_reset(struct mp_filter *f) +{ + struct priv *priv = f->priv; + + offscreen_ctx_set_current(priv->ctx, true); + gl_video_reset(priv->renderer); + offscreen_ctx_set_current(priv->ctx, false); +} + +static void gpu_destroy(struct mp_filter *f) +{ + struct priv *priv = f->priv; + + if (priv->ctx) { + offscreen_ctx_set_current(priv->ctx, true); + + gl_video_uninit(priv->renderer); + ra_tex_free(priv->ctx->ra, &priv->target); + + offscreen_ctx_set_current(priv->ctx, false); + } + + talloc_free(priv->ctx); +} + +static const struct mp_filter_info gpu_filter = { + .name = "gpu", + .process = gpu_process, + .reset = gpu_reset, + .destroy = gpu_destroy, + .priv_size = sizeof(struct priv), +}; + +static struct mp_filter *gpu_create(struct mp_filter *parent, void *options) +{ + struct mp_filter *f = mp_filter_create(parent, &gpu_filter); + if (!f) { + talloc_free(options); + return NULL; + } + + mp_filter_add_pin(f, MP_PIN_IN, "in"); + mp_filter_add_pin(f, MP_PIN_OUT, "out"); + + struct priv *priv = f->priv; + priv->opts = talloc_steal(priv, options); + priv->vo_opts_cache = m_config_cache_alloc(f, f->global, &vo_sub_opts); + priv->vo_opts = priv->vo_opts_cache->opts; + + priv->ctx = gl_offscreen_ctx_create(f->global, f->log); + if (!priv->ctx) { + MP_FATAL(f, "Could not create offscreen ra context.\n"); + goto error; + } + + if (!priv->ctx->ra->fns->tex_download) { + MP_FATAL(f, "Offscreen ra context does not support image retrieval.\n"); + goto error; + } + + offscreen_ctx_set_current(priv->ctx, true); + + priv->renderer = gl_video_init(priv->ctx->ra, f->log, f->global); + assert(priv->renderer); // can't fail (strangely) + + offscreen_ctx_set_current(priv->ctx, false); + + MP_WARN(f, "This is experimental. Keep in mind:\n"); + MP_WARN(f, " - OSD rendering is done in software.\n"); + MP_WARN(f, " - Encoding will convert the RGB output to yuv420p in software.\n"); + MP_WARN(f, " - Using this with --vo=gpu will filter the video twice!\n"); + MP_WARN(f, " (And you can't prevent this; they use the same options.)\n"); + MP_WARN(f, " - Some features are simply not supported.\n"); + + return f; + +error: + talloc_free(f); + return NULL; +} + +#define OPT_BASE_STRUCT struct gpu_opts +const struct mp_user_filter_entry vf_gpu = { + .desc = { + .description = "vo_gpu as filter", + .name = "gpu", + .priv_size = sizeof(OPT_BASE_STRUCT), + .options = (const struct m_option[]){ + {"w", OPT_INT(w)}, + {"h", OPT_INT(h)}, + {0} + }, + }, + .create = gpu_create, +}; diff --git a/video/filter/vf_sub.c b/video/filter/vf_sub.c new file mode 100644 index 0000000..de7f787 --- /dev/null +++ b/video/filter/vf_sub.c @@ -0,0 +1,164 @@ +/* + * Copyright (C) 2006 Evgeniy Stepanov <eugeni.stepanov@gmail.com> + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <inttypes.h> +#include <assert.h> +#include <libavutil/common.h> + +#include "common/msg.h" +#include "filters/filter.h" +#include "filters/filter_internal.h" +#include "filters/user_filters.h" +#include "options/options.h" +#include "video/img_format.h" +#include "video/mp_image.h" +#include "video/mp_image_pool.h" +#include "sub/osd.h" +#include "sub/dec_sub.h" + +#include "video/sws_utils.h" + +#include "options/m_option.h" + +struct vf_sub_opts { + int top_margin, bottom_margin; +}; + +struct priv { + struct vf_sub_opts *opts; + struct mp_image_pool *pool; +}; + +static void vf_sub_process(struct mp_filter *f) +{ + struct priv *priv = f->priv; + + if (!mp_pin_can_transfer_data(f->ppins[1], f->ppins[0])) + return; + + struct mp_frame frame = mp_pin_out_read(f->ppins[0]); + + if (mp_frame_is_signaling(frame)) { + mp_pin_in_write(f->ppins[1], frame); + return; + } + + struct mp_stream_info *info = mp_filter_find_stream_info(f); + struct osd_state *osd = info ? info->osd : NULL; + + if (!osd) + goto error; + + osd_set_render_subs_in_filter(osd, true); + + if (frame.type != MP_FRAME_VIDEO) + goto error; + + struct mp_image *mpi = frame.data; + + struct mp_osd_res dim = { + .w = mpi->w, + .h = mpi->h + priv->opts->top_margin + priv->opts->bottom_margin, + .mt = priv->opts->top_margin, + .mb = priv->opts->bottom_margin, + .display_par = mpi->params.p_w / (double)mpi->params.p_h, + }; + + if (dim.w != mpi->w || dim.h != mpi->h) { + struct mp_image *dmpi = + mp_image_pool_get(priv->pool, mpi->imgfmt, dim.w, dim.h); + if (!dmpi) + goto error; + mp_image_copy_attributes(dmpi, mpi); + int y1 = MP_ALIGN_DOWN(priv->opts->top_margin, mpi->fmt.align_y); + int y2 = MP_ALIGN_DOWN(y1 + mpi->h, mpi->fmt.align_y); + struct mp_image cropped = *dmpi; + mp_image_crop(&cropped, 0, y1, mpi->w, y1 + mpi->h); + mp_image_copy(&cropped, mpi); + mp_image_clear(dmpi, 0, 0, dmpi->w, y1); + mp_image_clear(dmpi, 0, y2, dmpi->w, dim.h); + mp_frame_unref(&frame); + mpi = dmpi; + frame = (struct mp_frame){MP_FRAME_VIDEO, mpi}; + } + + osd_draw_on_image_p(osd, dim, mpi->pts, OSD_DRAW_SUB_FILTER, priv->pool, mpi); + + mp_pin_in_write(f->ppins[1], frame); + return; + +error: + MP_ERR(f, "unsupported format, missing OSD, or failed allocation\n"); + mp_frame_unref(&frame); + mp_filter_internal_mark_failed(f); +} + +static void vf_sub_destroy(struct mp_filter *f) +{ + struct mp_stream_info *info = mp_filter_find_stream_info(f); + struct osd_state *osd = info ? info->osd : NULL; + if (osd) + osd_set_render_subs_in_filter(osd, false); +} + +static const struct mp_filter_info vf_sub_filter = { + .name = "sub", + .process = vf_sub_process, + .destroy = vf_sub_destroy, + .priv_size = sizeof(struct priv), +}; + +static struct mp_filter *vf_sub_create(struct mp_filter *parent, void *options) +{ + struct mp_filter *f = mp_filter_create(parent, &vf_sub_filter); + if (!f) { + talloc_free(options); + return NULL; + } + + mp_filter_add_pin(f, MP_PIN_IN, "in"); + mp_filter_add_pin(f, MP_PIN_OUT, "out"); + + struct priv *priv = f->priv; + priv->opts = talloc_steal(priv, options); + priv->pool = mp_image_pool_new(priv); + + return f; +} + +#define OPT_BASE_STRUCT struct vf_sub_opts +static const m_option_t vf_opts_fields[] = { + {"bottom-margin", OPT_INT(bottom_margin), M_RANGE(0, 2000)}, + {"top-margin", OPT_INT(top_margin), M_RANGE(0, 2000)}, + {0} +}; + +const struct mp_user_filter_entry vf_sub = { + .desc = { + .description = "Render subtitles", + .name = "sub", + .priv_size = sizeof(OPT_BASE_STRUCT), + .options = vf_opts_fields, + }, + .create = vf_sub_create, +}; diff --git a/video/filter/vf_vapoursynth.c b/video/filter/vf_vapoursynth.c new file mode 100644 index 0000000..583a196 --- /dev/null +++ b/video/filter/vf_vapoursynth.c @@ -0,0 +1,892 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <inttypes.h> +#include <limits.h> +#include <assert.h> + +#include <VapourSynth.h> +#include <VSHelper.h> + +#include <libavutil/rational.h> +#include <libavutil/cpu.h> + +#include "common/msg.h" +#include "filters/f_autoconvert.h" +#include "filters/f_utils.h" +#include "filters/filter_internal.h" +#include "filters/filter.h" +#include "filters/user_filters.h" +#include "options/m_option.h" +#include "options/path.h" +#include "osdep/threads.h" +#include "video/img_format.h" +#include "video/mp_image.h" +#include "video/sws_utils.h" + +struct vapoursynth_opts { + char *file; + int maxbuffer; + int maxrequests; + + const struct script_driver *drv; +}; + +struct priv { + struct mp_log *log; + struct vapoursynth_opts *opts; + char *script_path; + + VSCore *vscore; + const VSAPI *vsapi; + VSNodeRef *out_node; + VSNodeRef *in_node; + + const struct script_driver *drv; + // drv_vss + bool vs_initialized; + struct VSScript *se; + + struct mp_filter *f; + struct mp_pin *in_pin; + + // Format for which VS is currently configured. + struct mp_image_params fmt_in; + + mp_mutex lock; + mp_cond wakeup; + + // --- the following members are all protected by lock + struct mp_image **buffered; // oldest image first + int num_buffered; + int in_frameno; // frame number of buffered[0] (the oldest) + int requested_frameno; // last frame number for which we woke up core + int out_frameno; // frame number of first requested/ready frame + double out_pts; // pts corresponding to first requested/ready frame + struct mp_image **requested;// frame callback results (can point to dummy_img) + // requested[0] is the frame to return first + int max_requests; // upper bound for requested[] array + bool failed; // frame callback returned with an error + bool shutdown; // ask node to return + bool eof; // drain remaining data + int64_t frames_sent; // total nr. of frames ever added to input queue + bool initializing; // filters are being built + bool in_node_active; // node might still be called +}; + +// priv->requested[n] points to this if a request for frame n is in-progress +static const struct mp_image dummy_img; +// or if a request failed during EOF/reinit draining +static const struct mp_image dummy_img_eof; + +static void destroy_vs(struct priv *p); +static int reinit_vs(struct priv *p, struct mp_image *input); + +struct script_driver { + int (*init)(struct priv *p); // first time init + void (*uninit)(struct priv *p); // last time uninit + int (*load_core)(struct priv *p); // make vsapi/vscore available + int (*load)(struct priv *p, VSMap *vars); // also sets p->out_node + void (*unload)(struct priv *p); // unload script and maybe vs +}; + +struct mpvs_fmt { + VSPresetFormat vs; + int bits, xs, ys; +}; + +static const struct mpvs_fmt mpvs_fmt_table[] = { + {pfYUV420P8, 8, 1, 1}, + {pfYUV420P9, 9, 1, 1}, + {pfYUV420P10, 10, 1, 1}, + {pfYUV420P16, 16, 1, 1}, + {pfYUV422P8, 8, 1, 0}, + {pfYUV422P9, 9, 1, 0}, + {pfYUV422P10, 10, 1, 0}, + {pfYUV422P16, 16, 1, 0}, + {pfYUV410P8, 8, 2, 2}, + {pfYUV411P8, 8, 2, 0}, + {pfYUV440P8, 8, 0, 1}, + {pfYUV444P8, 8, 0, 0}, + {pfYUV444P9, 9, 0, 0}, + {pfYUV444P10, 10, 0, 0}, + {pfYUV444P16, 16, 0, 0}, + {pfNone} +}; + +static bool compare_fmt(int imgfmt, const struct mpvs_fmt *vs) +{ + struct mp_regular_imgfmt rfmt; + if (!mp_get_regular_imgfmt(&rfmt, imgfmt)) + return false; + if (rfmt.component_pad > 0) + return false; + if (rfmt.chroma_xs != vs->xs || rfmt.chroma_ys != vs->ys) + return false; + if (rfmt.component_size * 8 + rfmt.component_pad != vs->bits) + return false; + if (rfmt.num_planes != 3) + return false; + for (int n = 0; n < 3; n++) { + if (rfmt.planes[n].num_components != 1) + return false; + if (rfmt.planes[n].components[0] != n + 1) + return false; + } + return true; +} + +static VSPresetFormat mp_to_vs(int imgfmt) +{ + for (int n = 0; mpvs_fmt_table[n].bits; n++) { + const struct mpvs_fmt *vsentry = &mpvs_fmt_table[n]; + if (compare_fmt(imgfmt, vsentry)) + return vsentry->vs; + } + return pfNone; +} + +static int mp_from_vs(VSPresetFormat vs) +{ + for (int n = 0; mpvs_fmt_table[n].bits; n++) { + const struct mpvs_fmt *vsentry = &mpvs_fmt_table[n]; + if (vsentry->vs == vs) { + for (int imgfmt = IMGFMT_START; imgfmt < IMGFMT_END; imgfmt++) { + if (compare_fmt(imgfmt, vsentry)) + return imgfmt; + } + break; + } + } + return 0; +} + +static void copy_mp_to_vs_frame_props_map(struct priv *p, VSMap *map, + struct mp_image *img) +{ + struct mp_image_params *params = &img->params; + p->vsapi->propSetInt(map, "_SARNum", params->p_w, 0); + p->vsapi->propSetInt(map, "_SARDen", params->p_h, 0); + if (params->color.levels) { + p->vsapi->propSetInt(map, "_ColorRange", + params->color.levels == MP_CSP_LEVELS_TV, 0); + } + // The docs explicitly say it uses libavcodec values. + p->vsapi->propSetInt(map, "_ColorSpace", + mp_csp_to_avcol_spc(params->color.space), 0); + if (params->chroma_location) { + p->vsapi->propSetInt(map, "_ChromaLocation", + params->chroma_location == MP_CHROMA_CENTER, 0); + } + char pict_type = 0; + switch (img->pict_type) { + case 1: pict_type = 'I'; break; + case 2: pict_type = 'P'; break; + case 3: pict_type = 'B'; break; + } + if (pict_type) + p->vsapi->propSetData(map, "_PictType", &pict_type, 1, 0); + int field = 0; + if (img->fields & MP_IMGFIELD_INTERLACED) + field = img->fields & MP_IMGFIELD_TOP_FIRST ? 2 : 1; + p->vsapi->propSetInt(map, "_FieldBased", field, 0); +} + +static int set_vs_frame_props(struct priv *p, VSFrameRef *frame, + struct mp_image *img, int dur_num, int dur_den) +{ + VSMap *map = p->vsapi->getFramePropsRW(frame); + if (!map) + return -1; + p->vsapi->propSetInt(map, "_DurationNum", dur_num, 0); + p->vsapi->propSetInt(map, "_DurationDen", dur_den, 0); + copy_mp_to_vs_frame_props_map(p, map, img); + return 0; +} + +static VSFrameRef *alloc_vs_frame(struct priv *p, struct mp_image_params *fmt) +{ + const VSFormat *vsfmt = + p->vsapi->getFormatPreset(mp_to_vs(fmt->imgfmt), p->vscore); + return p->vsapi->newVideoFrame(vsfmt, fmt->w, fmt->h, NULL, p->vscore); +} + +static struct mp_image map_vs_frame(struct priv *p, const VSFrameRef *ref, + bool w) +{ + const VSFormat *fmt = p->vsapi->getFrameFormat(ref); + + struct mp_image img = {0}; + mp_image_setfmt(&img, mp_from_vs(fmt->id)); + mp_image_set_size(&img, p->vsapi->getFrameWidth(ref, 0), + p->vsapi->getFrameHeight(ref, 0)); + + for (int n = 0; n < img.num_planes; n++) { + if (w) { + img.planes[n] = p->vsapi->getWritePtr((VSFrameRef *)ref, n); + } else { + img.planes[n] = (uint8_t *)p->vsapi->getReadPtr(ref, n); + } + img.stride[n] = p->vsapi->getStride(ref, n); + } + + return img; +} + +static void drain_oldest_buffered_frame(struct priv *p) +{ + if (!p->num_buffered) + return; + talloc_free(p->buffered[0]); + for (int n = 0; n < p->num_buffered - 1; n++) + p->buffered[n] = p->buffered[n + 1]; + p->num_buffered--; + p->in_frameno++; +} + +static void VS_CC vs_frame_done(void *userData, const VSFrameRef *f, int n, + VSNodeRef *node, const char *errorMsg) +{ + struct priv *p = userData; + + struct mp_image *res = NULL; + if (f) { + struct mp_image img = map_vs_frame(p, f, false); + struct mp_image dummy = {.params = p->fmt_in}; + if (p->fmt_in.w != img.w || p->fmt_in.h != img.h) + dummy.params.crop = (struct mp_rect){0, 0, img.w, img.h}; + mp_image_copy_attributes(&img, &dummy); + img.pkt_duration = -1; + const VSMap *map = p->vsapi->getFramePropsRO(f); + if (map) { + int err1, err2; + int num = p->vsapi->propGetInt(map, "_DurationNum", 0, &err1); + int den = p->vsapi->propGetInt(map, "_DurationDen", 0, &err2); + if (!err1 && !err2) + img.pkt_duration = num / (double)den; + } + if (img.pkt_duration < 0) { + MP_ERR(p, "No PTS after filter at frame %d!\n", n); + } else { + img.nominal_fps = 1.0 / img.pkt_duration; + } + res = mp_image_new_copy(&img); + p->vsapi->freeFrame(f); + } + + mp_mutex_lock(&p->lock); + + // If these assertions fail, n is an unrequested frame (or filtered twice). + assert(n >= p->out_frameno && n < p->out_frameno + p->max_requests); + int index = n - p->out_frameno; + MP_TRACE(p, "filtered frame %d (%d)\n", n, index); + assert(p->requested[index] == &dummy_img); + + if (!res && !p->shutdown) { + if (p->eof) { + res = (struct mp_image *)&dummy_img_eof; + } else { + p->failed = true; + MP_ERR(p, "Filter error at frame %d: %s\n", n, errorMsg); + } + } + p->requested[index] = res; + mp_cond_broadcast(&p->wakeup); + mp_mutex_unlock(&p->lock); + mp_filter_wakeup(p->f); +} + +static void vf_vapoursynth_process(struct mp_filter *f) +{ + struct priv *p = f->priv; + + mp_mutex_lock(&p->lock); + + if (p->failed) { + // Not sure what we do on errors, but at least don't deadlock. + MP_ERR(f, "failed, no action taken\n"); + mp_filter_internal_mark_failed(f); + goto done; + } + + // Read input and pass it to the input queue VS reads. + while (p->num_buffered < MP_TALLOC_AVAIL(p->buffered) && !p->eof) { + // Note: this requests new input frames even if no output was ever + // requested. Normally this is not how mp_filter works, but since VS + // works asynchronously, it's probably ok. + struct mp_frame frame = mp_pin_out_read(p->in_pin); + if (frame.type == MP_FRAME_EOF) { + if (p->out_node && !p->eof) { + MP_VERBOSE(p, "initiate EOF\n"); + p->eof = true; + mp_cond_broadcast(&p->wakeup); + } + if (!p->out_node && mp_pin_in_needs_data(f->ppins[1])) { + MP_VERBOSE(p, "return EOF\n"); + mp_pin_in_write(f->ppins[1], frame); + } else { + // Keep it until we can propagate it. + mp_pin_out_unread(p->in_pin, frame); + break; + } + } else if (frame.type == MP_FRAME_VIDEO) { + struct mp_image *mpi = frame.data; + // Init VS script, or reinit it to change video format. (This + // includes derived parameters we pass manually to the script.) + if (!p->out_node || mpi->imgfmt != p->fmt_in.imgfmt || + mpi->w != p->fmt_in.w || mpi->h != p->fmt_in.h || + mpi->params.p_w != p->fmt_in.p_w || + mpi->params.p_h != p->fmt_in.p_h) + { + if (p->out_node) { + // Drain still buffered frames. + MP_VERBOSE(p, "draining VS for format change\n"); + mp_pin_out_unread(p->in_pin, frame); + p->eof = true; + mp_cond_broadcast(&p->wakeup); + mp_filter_internal_mark_progress(f); + goto done; + } + mp_mutex_unlock(&p->lock); + if (p->out_node) + destroy_vs(p); + p->fmt_in = mpi->params; + if (reinit_vs(p, mpi) < 0) { + MP_ERR(p, "could not init VS\n"); + mp_frame_unref(&frame); + mp_filter_internal_mark_failed(f); + return; + } + mp_mutex_lock(&p->lock); + } + if (p->out_pts == MP_NOPTS_VALUE) + p->out_pts = mpi->pts; + p->frames_sent++; + p->buffered[p->num_buffered++] = mpi; + mp_cond_broadcast(&p->wakeup); + } else if (frame.type != MP_FRAME_NONE) { + MP_ERR(p, "discarding unknown frame type\n"); + mp_frame_unref(&frame); + goto done; + } else { + break; // no new data available + } + } + + // Read output and return them from the VS output queue. + if (mp_pin_in_needs_data(f->ppins[1]) && p->requested[0] && + p->requested[0] != &dummy_img && + p->requested[0] != &dummy_img_eof) + { + struct mp_image *out = p->requested[0]; + + out->pts = p->out_pts; + if (p->out_pts != MP_NOPTS_VALUE && out->pkt_duration >= 0) + p->out_pts += out->pkt_duration; + + mp_pin_in_write(f->ppins[1], MAKE_FRAME(MP_FRAME_VIDEO, out)); + + for (int n = 0; n < p->max_requests - 1; n++) + p->requested[n] = p->requested[n + 1]; + p->requested[p->max_requests - 1] = NULL; + p->out_frameno++; + } + + // This happens on EOF draining and format changes. + if (p->requested[0] == &dummy_img_eof) { + MP_VERBOSE(p, "finishing up\n"); + assert(p->eof); + mp_mutex_unlock(&p->lock); + destroy_vs(p); + mp_filter_internal_mark_progress(f); + return; + } + + // Don't request frames if we haven't sent any input yet. + if (p->frames_sent && p->out_node) { + // Request new future frames as far as possible. + for (int n = 0; n < p->max_requests; n++) { + if (!p->requested[n]) { + // Note: this assumes getFrameAsync() will never call + // infiltGetFrame (if it does, we would deadlock) + p->requested[n] = (struct mp_image *)&dummy_img; + p->failed = false; + MP_TRACE(p, "requesting frame %d (%d)\n", p->out_frameno + n, n); + p->vsapi->getFrameAsync(p->out_frameno + n, p->out_node, + vs_frame_done, p); + } + } + } + +done: + mp_mutex_unlock(&p->lock); +} + +static void VS_CC infiltInit(VSMap *in, VSMap *out, void **instanceData, + VSNode *node, VSCore *core, const VSAPI *vsapi) +{ + struct priv *p = *instanceData; + // The number of frames of our input node is obviously unknown. The user + // could for example seek any time, randomly "ending" the clip. + // This specific value was suggested by the VapourSynth developer. + int enough_for_everyone = INT_MAX / 16; + + // Note: this is called from createFilter, so no need for locking. + + VSVideoInfo fmt = { + .format = p->vsapi->getFormatPreset(mp_to_vs(p->fmt_in.imgfmt), p->vscore), + .width = p->fmt_in.w, + .height = p->fmt_in.h, + .numFrames = enough_for_everyone, + }; + if (!fmt.format) { + p->vsapi->setError(out, "Unsupported input format.\n"); + return; + } + + p->vsapi->setVideoInfo(&fmt, 1, node); + p->in_node_active = true; +} + +static const VSFrameRef *VS_CC infiltGetFrame(int frameno, int activationReason, + void **instanceData, void **frameData, + VSFrameContext *frameCtx, VSCore *core, + const VSAPI *vsapi) +{ + struct priv *p = *instanceData; + VSFrameRef *ret = NULL; + + mp_mutex_lock(&p->lock); + MP_TRACE(p, "VS asking for frame %d (at %d)\n", frameno, p->in_frameno); + while (1) { + if (p->shutdown) { + p->vsapi->setFilterError("EOF or filter reset/uninit", frameCtx); + MP_DBG(p, "returning error on reset/uninit\n"); + break; + } + if (p->initializing) { + MP_WARN(p, "Frame requested during init! This is unsupported.\n" + "Returning black dummy frame with 0 duration.\n"); + ret = alloc_vs_frame(p, &p->fmt_in); + if (!ret) { + p->vsapi->setFilterError("Could not allocate VS frame", frameCtx); + break; + } + struct mp_image vsframe = map_vs_frame(p, ret, true); + mp_image_clear(&vsframe, 0, 0, p->fmt_in.w, p->fmt_in.h); + struct mp_image dummy = {0}; + mp_image_set_params(&dummy, &p->fmt_in); + set_vs_frame_props(p, ret, &dummy, 0, 1); + break; + } + if (frameno < p->in_frameno) { + char msg[180]; + snprintf(msg, sizeof(msg), + "Frame %d requested, but only have frames starting from %d. " + "Try increasing the buffered-frames suboption.", + frameno, p->in_frameno); + MP_FATAL(p, "%s\n", msg); + p->vsapi->setFilterError(msg, frameCtx); + break; + } + if (frameno >= p->in_frameno + MP_TALLOC_AVAIL(p->buffered)) { + // Too far in the future. Remove frames, so that the main thread can + // queue new frames. + if (p->num_buffered) { + drain_oldest_buffered_frame(p); + mp_cond_broadcast(&p->wakeup); + mp_filter_wakeup(p->f); + continue; + } + } + if (frameno >= p->in_frameno + p->num_buffered) { + // If there won't be any new frames, abort the request. + if (p->eof) { + p->vsapi->setFilterError("EOF or filter EOF/reinit", frameCtx); + MP_DBG(p, "returning error on EOF/reinit\n"); + break; + } + // Request more frames. + if (p->requested_frameno <= p->in_frameno + p->num_buffered) { + p->requested_frameno = p->in_frameno + p->num_buffered + 1; + mp_filter_wakeup(p->f); + } + } else { + struct mp_image *img = p->buffered[frameno - p->in_frameno]; + ret = alloc_vs_frame(p, &img->params); + if (!ret) { + p->vsapi->setFilterError("Could not allocate VS frame", frameCtx); + break; + } + + mp_mutex_unlock(&p->lock); + struct mp_image vsframe = map_vs_frame(p, ret, true); + mp_image_copy(&vsframe, img); + int res = 1e6; + int dur = img->pkt_duration * res + 0.5; + set_vs_frame_props(p, ret, img, dur, res); + mp_mutex_lock(&p->lock); + break; + } + mp_cond_wait(&p->wakeup, &p->lock); + } + mp_cond_broadcast(&p->wakeup); + mp_mutex_unlock(&p->lock); + return ret; +} + +static void VS_CC infiltFree(void *instanceData, VSCore *core, const VSAPI *vsapi) +{ + struct priv *p = instanceData; + + mp_mutex_lock(&p->lock); + p->in_node_active = false; + mp_cond_broadcast(&p->wakeup); + mp_mutex_unlock(&p->lock); +} + +// number of getAsyncFrame calls in progress +// must be called with p->lock held +static int num_requested(struct priv *p) +{ + int r = 0; + for (int n = 0; n < p->max_requests; n++) + r += p->requested[n] == &dummy_img; + return r; +} + +static void destroy_vs(struct priv *p) +{ + if (!p->out_node && !p->initializing) + return; + + MP_DBG(p, "destroying VS filters\n"); + + // Wait until our frame callbacks return. + mp_mutex_lock(&p->lock); + p->initializing = false; + p->shutdown = true; + mp_cond_broadcast(&p->wakeup); + while (num_requested(p)) + mp_cond_wait(&p->wakeup, &p->lock); + mp_mutex_unlock(&p->lock); + + MP_DBG(p, "all requests terminated\n"); + + if (p->in_node) + p->vsapi->freeNode(p->in_node); + if (p->out_node) + p->vsapi->freeNode(p->out_node); + p->in_node = p->out_node = NULL; + + p->drv->unload(p); + + assert(!p->in_node_active); + assert(num_requested(p) == 0); // async callback didn't return? + + p->shutdown = false; + p->eof = false; + p->frames_sent = 0; + // Kill filtered images that weren't returned yet + for (int n = 0; n < p->max_requests; n++) { + if (p->requested[n] != &dummy_img_eof) + mp_image_unrefp(&p->requested[n]); + p->requested[n] = NULL; + } + // Kill queued frames too + for (int n = 0; n < p->num_buffered; n++) + talloc_free(p->buffered[n]); + p->num_buffered = 0; + p->out_frameno = p->in_frameno = 0; + p->requested_frameno = 0; + p->failed = false; + + MP_DBG(p, "uninitialized.\n"); +} + +static int reinit_vs(struct priv *p, struct mp_image *input) +{ + VSMap *vars = NULL, *in = NULL, *out = NULL; + int res = -1; + + destroy_vs(p); + + MP_DBG(p, "initializing...\n"); + + struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(p->fmt_in.imgfmt); + if (p->fmt_in.w % desc.align_x || p->fmt_in.h % desc.align_y) { + MP_FATAL(p, "VapourSynth does not allow unaligned/cropped video sizes.\n"); + return -1; + } + + p->initializing = true; + p->out_pts = MP_NOPTS_VALUE; + + if (p->drv->load_core(p) < 0 || !p->vsapi || !p->vscore) { + MP_FATAL(p, "Could not get vapoursynth API handle.\n"); + goto error; + } + + in = p->vsapi->createMap(); + out = p->vsapi->createMap(); + vars = p->vsapi->createMap(); + if (!in || !out || !vars) + goto error; + + p->vsapi->createFilter(in, out, "Input", infiltInit, infiltGetFrame, + infiltFree, fmSerial, 0, p, p->vscore); + int vserr; + p->in_node = p->vsapi->propGetNode(out, "clip", 0, &vserr); + if (!p->in_node) { + MP_FATAL(p, "Could not get our own input node.\n"); + goto error; + } + + if (p->vsapi->propSetNode(vars, "video_in", p->in_node, 0)) + goto error; + + int d_w, d_h; + mp_image_params_get_dsize(&p->fmt_in, &d_w, &d_h); + + p->vsapi->propSetInt(vars, "video_in_dw", d_w, 0); + p->vsapi->propSetInt(vars, "video_in_dh", d_h, 0); + + struct mp_stream_info *info = mp_filter_find_stream_info(p->f); + double container_fps = input->nominal_fps; + double display_fps = 0; + int64_t display_res[2] = {0}; + if (info) { + if (info->get_display_fps) + display_fps = info->get_display_fps(info); + if (info->get_display_res) { + int tmp[2] = {0}; + info->get_display_res(info, tmp); + display_res[0] = tmp[0]; + display_res[1] = tmp[1]; + } + } + p->vsapi->propSetFloat(vars, "container_fps", container_fps, 0); + p->vsapi->propSetFloat(vars, "display_fps", display_fps, 0); + p->vsapi->propSetIntArray(vars, "display_res", display_res, 2); + + if (p->drv->load(p, vars) < 0) + goto error; + if (!p->out_node) { + MP_FATAL(p, "Could not get script output node.\n"); + goto error; + } + + const VSVideoInfo *vi = p->vsapi->getVideoInfo(p->out_node); + if (!mp_from_vs(vi->format->id)) { + MP_FATAL(p, "Unsupported output format.\n"); + goto error; + } + + mp_mutex_lock(&p->lock); + p->initializing = false; + mp_mutex_unlock(&p->lock); + MP_DBG(p, "initialized.\n"); + res = 0; +error: + if (p->vsapi) { + p->vsapi->freeMap(in); + p->vsapi->freeMap(out); + p->vsapi->freeMap(vars); + } + if (res < 0) + destroy_vs(p); + return res; +} + +static void vf_vapoursynth_reset(struct mp_filter *f) +{ + struct priv *p = f->priv; + + destroy_vs(p); +} + +static void vf_vapoursynth_destroy(struct mp_filter *f) +{ + struct priv *p = f->priv; + + destroy_vs(p); + p->drv->uninit(p); + + mp_cond_destroy(&p->wakeup); + mp_mutex_destroy(&p->lock); + + mp_filter_free_children(f); +} + +static const struct mp_filter_info vf_vapoursynth_filter = { + .name = "vapoursynth", + .process = vf_vapoursynth_process, + .reset = vf_vapoursynth_reset, + .destroy = vf_vapoursynth_destroy, + .priv_size = sizeof(struct priv), +}; + +static struct mp_filter *vf_vapoursynth_create(struct mp_filter *parent, + void *options) +{ + struct mp_filter *f = mp_filter_create(parent, &vf_vapoursynth_filter); + if (!f) { + talloc_free(options); + return NULL; + } + + // In theory, we could allow multiple inputs and outputs, but since this + // wrapper is for --vf only, we don't. + mp_filter_add_pin(f, MP_PIN_IN, "in"); + mp_filter_add_pin(f, MP_PIN_OUT, "out"); + + struct priv *p = f->priv; + p->opts = talloc_steal(p, options); + p->log = f->log; + p->drv = p->opts->drv; + p->f = f; + + mp_mutex_init(&p->lock); + mp_cond_init(&p->wakeup); + + if (!p->opts->file || !p->opts->file[0]) { + MP_FATAL(p, "'file' parameter must be set.\n"); + goto error; + } + p->script_path = mp_get_user_path(p, f->global, p->opts->file); + + p->max_requests = p->opts->maxrequests; + if (p->max_requests < 0) + p->max_requests = av_cpu_count(); + MP_VERBOSE(p, "using %d concurrent requests.\n", p->max_requests); + int maxbuffer = p->opts->maxbuffer * p->max_requests; + p->buffered = talloc_array(p, struct mp_image *, maxbuffer); + p->requested = talloc_zero_array(p, struct mp_image *, p->max_requests); + + struct mp_autoconvert *conv = mp_autoconvert_create(f); + if (!conv) + goto error; + + for (int n = 0; mpvs_fmt_table[n].bits; n++) { + int imgfmt = mp_from_vs(mpvs_fmt_table[n].vs); + if (imgfmt) + mp_autoconvert_add_imgfmt(conv, imgfmt, 0); + } + + struct mp_filter *dur = mp_compute_frame_duration_create(f); + if (!dur) + goto error; + + mp_pin_connect(conv->f->pins[0], f->ppins[0]); + mp_pin_connect(dur->pins[0], conv->f->pins[1]); + p->in_pin = dur->pins[1]; + + if (p->drv->init(p) < 0) + goto error; + + return f; + +error: + talloc_free(f); + return NULL; +} + + +#define OPT_BASE_STRUCT struct vapoursynth_opts +static const m_option_t vf_opts_fields[] = { + {"file", OPT_STRING(file), .flags = M_OPT_FILE}, + {"buffered-frames", OPT_INT(maxbuffer), M_RANGE(1, 9999), + OPTDEF_INT(4)}, + {"concurrent-frames", OPT_CHOICE(maxrequests, {"auto", -1}), + M_RANGE(1, 99), OPTDEF_INT(-1)}, + {0} +}; + +#include <VSScript.h> + +static int drv_vss_init(struct priv *p) +{ + if (!vsscript_init()) { + MP_FATAL(p, "Could not initialize VapourSynth scripting.\n"); + return -1; + } + p->vs_initialized = true; + return 0; +} + +static void drv_vss_uninit(struct priv *p) +{ + if (p->vs_initialized) + vsscript_finalize(); + p->vs_initialized = false; +} + +static int drv_vss_load_core(struct priv *p) +{ + // First load an empty script to get a VSScript, so that we get the vsapi + // and vscore. + if (vsscript_createScript(&p->se)) + return -1; + p->vsapi = vsscript_getVSApi(); + p->vscore = vsscript_getCore(p->se); + return 0; +} + +static int drv_vss_load(struct priv *p, VSMap *vars) +{ + vsscript_setVariable(p->se, vars); + + if (vsscript_evaluateFile(&p->se, p->script_path, 0)) { + MP_FATAL(p, "Script evaluation failed:\n%s\n", vsscript_getError(p->se)); + return -1; + } + p->out_node = vsscript_getOutput(p->se, 0); + return 0; +} + +static void drv_vss_unload(struct priv *p) +{ + if (p->se) + vsscript_freeScript(p->se); + p->se = NULL; + p->vsapi = NULL; + p->vscore = NULL; +} + +static const struct script_driver drv_vss = { + .init = drv_vss_init, + .uninit = drv_vss_uninit, + .load_core = drv_vss_load_core, + .load = drv_vss_load, + .unload = drv_vss_unload, +}; + +const struct mp_user_filter_entry vf_vapoursynth = { + .desc = { + .description = "VapourSynth bridge", + .name = "vapoursynth", + .priv_size = sizeof(OPT_BASE_STRUCT), + .priv_defaults = &(const OPT_BASE_STRUCT){ + .drv = &drv_vss, + }, + .options = vf_opts_fields, + }, + .create = vf_vapoursynth_create, +}; diff --git a/video/filter/vf_vavpp.c b/video/filter/vf_vavpp.c new file mode 100644 index 0000000..52be148 --- /dev/null +++ b/video/filter/vf_vavpp.c @@ -0,0 +1,503 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> + +#include <va/va.h> +#include <va/va_vpp.h> + +#include <libavutil/hwcontext.h> +#include <libavutil/hwcontext_vaapi.h> + +#include "options/options.h" +#include "filters/filter.h" +#include "filters/filter_internal.h" +#include "filters/user_filters.h" +#include "refqueue.h" + +#include "video/fmt-conversion.h" +#include "video/vaapi.h" +#include "video/hwdec.h" +#include "video/mp_image_pool.h" + +struct surface_refs { + VASurfaceID *surfaces; + int num_surfaces; + int max_surfaces; +}; + +struct pipeline { + VABufferID *filters; + int num_filters; + VAProcColorStandardType input_colors[VAProcColorStandardCount]; + VAProcColorStandardType output_colors[VAProcColorStandardCount]; + int num_input_colors, num_output_colors; + struct surface_refs forward, backward; +}; + +struct opts { + int deint_type; + bool interlaced_only; + bool reversal_bug; +}; + +struct priv { + struct opts *opts; + bool do_deint; + VABufferID buffers[VAProcFilterCount]; + int num_buffers; + VAConfigID config; + VAContextID context; + struct mp_image_params params; + VADisplay display; + AVBufferRef *av_device_ref; + struct pipeline pipe; + AVBufferRef *hw_pool; + + struct mp_refqueue *queue; +}; + +static void add_surfaces(struct priv *p, struct surface_refs *refs, int dir) +{ + for (int n = 0; n < refs->max_surfaces; n++) { + struct mp_image *s = mp_refqueue_get(p->queue, (1 + n) * dir); + if (!s) + break; + VASurfaceID id = va_surface_id(s); + if (id == VA_INVALID_ID) + break; + MP_TARRAY_APPEND(p, refs->surfaces, refs->num_surfaces, id); + } +} + +// The array items must match with the "deint" suboption values. +// They're also sorted by quality. +static const int deint_algorithm[] = { + [0] = VAProcDeinterlacingNone, + [1] = VAProcDeinterlacingBob, // first-field, special-cased + [2] = VAProcDeinterlacingBob, + [3] = VAProcDeinterlacingWeave, + [4] = VAProcDeinterlacingMotionAdaptive, + [5] = VAProcDeinterlacingMotionCompensated, +}; + +static void flush_frames(struct mp_filter *f) +{ + struct priv *p = f->priv; + mp_refqueue_flush(p->queue); +} + +static void update_pipeline(struct mp_filter *vf) +{ + struct priv *p = vf->priv; + VABufferID *filters = p->buffers; + int num_filters = p->num_buffers; + if (p->opts->deint_type && !p->do_deint) { + filters++; + num_filters--; + } + p->pipe.forward.num_surfaces = p->pipe.backward.num_surfaces = 0; + p->pipe.num_input_colors = p->pipe.num_output_colors = 0; + p->pipe.num_filters = 0; + p->pipe.filters = NULL; + if (!num_filters) + goto nodeint; + VAProcPipelineCaps caps = { + .input_color_standards = p->pipe.input_colors, + .output_color_standards = p->pipe.output_colors, + .num_input_color_standards = VAProcColorStandardCount, + .num_output_color_standards = VAProcColorStandardCount, + }; + VAStatus status = vaQueryVideoProcPipelineCaps(p->display, p->context, + filters, num_filters, &caps); + if (!CHECK_VA_STATUS(vf, "vaQueryVideoProcPipelineCaps()")) + goto nodeint; + p->pipe.filters = filters; + p->pipe.num_filters = num_filters; + p->pipe.num_input_colors = caps.num_input_color_standards; + p->pipe.num_output_colors = caps.num_output_color_standards; + p->pipe.forward.max_surfaces = caps.num_forward_references; + p->pipe.backward.max_surfaces = caps.num_backward_references; + if (p->opts->reversal_bug) { + int max = MPMAX(caps.num_forward_references, caps.num_backward_references); + mp_refqueue_set_refs(p->queue, max, max); + } else { + mp_refqueue_set_refs(p->queue, p->pipe.backward.max_surfaces, + p->pipe.forward.max_surfaces); + } + mp_refqueue_set_mode(p->queue, + (p->do_deint ? MP_MODE_DEINT : 0) | + (p->opts->deint_type >= 2 ? MP_MODE_OUTPUT_FIELDS : 0) | + (p->opts->interlaced_only ? MP_MODE_INTERLACED_ONLY : 0)); + return; + +nodeint: + mp_refqueue_set_refs(p->queue, 0, 0); + mp_refqueue_set_mode(p->queue, 0); +} + +static struct mp_image *alloc_out(struct mp_filter *vf) +{ + struct priv *p = vf->priv; + + struct mp_image *fmt = mp_refqueue_get_format(p->queue); + if (!fmt || !fmt->hwctx) + return NULL; + + AVHWFramesContext *hw_frames = (void *)fmt->hwctx->data; + // VAAPI requires the full surface size to match for input and output. + int src_w = hw_frames->width; + int src_h = hw_frames->height; + + if (!mp_update_av_hw_frames_pool(&p->hw_pool, p->av_device_ref, + IMGFMT_VAAPI, IMGFMT_NV12, src_w, src_h, + false)) + { + MP_ERR(vf, "Failed to create hw pool.\n"); + return NULL; + } + + AVFrame *av_frame = av_frame_alloc(); + MP_HANDLE_OOM(av_frame); + if (av_hwframe_get_buffer(p->hw_pool, av_frame, 0) < 0) { + MP_ERR(vf, "Failed to allocate frame from hw pool.\n"); + av_frame_free(&av_frame); + return NULL; + } + struct mp_image *img = mp_image_from_av_frame(av_frame); + av_frame_free(&av_frame); + if (!img) { + MP_ERR(vf, "Unknown error.\n"); + return NULL; + } + mp_image_set_size(img, fmt->w, fmt->h); + return img; +} + +static struct mp_image *render(struct mp_filter *vf) +{ + struct priv *p = vf->priv; + + struct mp_image *in = mp_refqueue_get(p->queue, 0); + struct mp_image *img = NULL; + bool need_end_picture = false; + bool success = false; + VABufferID buffer = VA_INVALID_ID; + + VASurfaceID in_id = va_surface_id(in); + if (!p->pipe.filters || in_id == VA_INVALID_ID) + goto cleanup; + + img = alloc_out(vf); + if (!img) + goto cleanup; + + mp_image_copy_attributes(img, in); + + unsigned int flags = va_get_colorspace_flag(p->params.color.space); + if (!mp_refqueue_should_deint(p->queue)) { + flags |= VA_FRAME_PICTURE; + } else if (mp_refqueue_is_top_field(p->queue)) { + flags |= VA_TOP_FIELD; + } else { + flags |= VA_BOTTOM_FIELD; + } + + VASurfaceID id = va_surface_id(img); + if (id == VA_INVALID_ID) + goto cleanup; + + VAStatus status = vaBeginPicture(p->display, p->context, id); + if (!CHECK_VA_STATUS(vf, "vaBeginPicture()")) + goto cleanup; + + need_end_picture = true; + + VAProcPipelineParameterBuffer *param = NULL; + status = vaCreateBuffer(p->display, p->context, + VAProcPipelineParameterBufferType, + sizeof(*param), 1, NULL, &buffer); + if (!CHECK_VA_STATUS(vf, "vaCreateBuffer()")) + goto cleanup; + + VAProcFilterParameterBufferDeinterlacing *filter_params; + status = vaMapBuffer(p->display, *(p->pipe.filters), (void**)&filter_params); + if (!CHECK_VA_STATUS(vf, "vaMapBuffer()")) + goto cleanup; + + filter_params->flags = flags & VA_TOP_FIELD ? 0 : VA_DEINTERLACING_BOTTOM_FIELD; + if (!mp_refqueue_top_field_first(p->queue)) + filter_params->flags |= VA_DEINTERLACING_BOTTOM_FIELD_FIRST; + + vaUnmapBuffer(p->display, *(p->pipe.filters)); + + status = vaMapBuffer(p->display, buffer, (void**)¶m); + if (!CHECK_VA_STATUS(vf, "vaMapBuffer()")) + goto cleanup; + + *param = (VAProcPipelineParameterBuffer){0}; + param->surface = in_id; + param->surface_region = &(VARectangle){0, 0, in->w, in->h}; + param->output_region = &(VARectangle){0, 0, img->w, img->h}; + param->output_background_color = 0; + param->filter_flags = flags; + param->filters = p->pipe.filters; + param->num_filters = p->pipe.num_filters; + + int dir = p->opts->reversal_bug ? -1 : 1; + + add_surfaces(p, &p->pipe.forward, 1 * dir); + param->forward_references = p->pipe.forward.surfaces; + param->num_forward_references = p->pipe.forward.num_surfaces; + + add_surfaces(p, &p->pipe.backward, -1 * dir); + param->backward_references = p->pipe.backward.surfaces; + param->num_backward_references = p->pipe.backward.num_surfaces; + + MP_TRACE(vf, "in=0x%x\n", (unsigned)in_id); + for (int n = 0; n < param->num_backward_references; n++) + MP_TRACE(vf, " b%d=0x%x\n", n, (unsigned)param->backward_references[n]); + for (int n = 0; n < param->num_forward_references; n++) + MP_TRACE(vf, " f%d=0x%x\n", n, (unsigned)param->forward_references[n]); + + vaUnmapBuffer(p->display, buffer); + + status = vaRenderPicture(p->display, p->context, &buffer, 1); + if (!CHECK_VA_STATUS(vf, "vaRenderPicture()")) + goto cleanup; + + success = true; + +cleanup: + if (need_end_picture) + vaEndPicture(p->display, p->context); + vaDestroyBuffer(p->display, buffer); + if (success) + return img; + talloc_free(img); + return NULL; +} + +static void vf_vavpp_process(struct mp_filter *f) +{ + struct priv *p = f->priv; + + update_pipeline(f); + + mp_refqueue_execute_reinit(p->queue); + + if (!mp_refqueue_can_output(p->queue)) + return; + + if (!p->pipe.num_filters || !mp_refqueue_should_deint(p->queue)) { + // no filtering + struct mp_image *in = mp_refqueue_get(p->queue, 0); + mp_refqueue_write_out_pin(p->queue, mp_image_new_ref(in)); + } else { + mp_refqueue_write_out_pin(p->queue, render(f)); + } +} + +static void uninit(struct mp_filter *vf) +{ + struct priv *p = vf->priv; + for (int i = 0; i < p->num_buffers; i++) + vaDestroyBuffer(p->display, p->buffers[i]); + if (p->context != VA_INVALID_ID) + vaDestroyContext(p->display, p->context); + if (p->config != VA_INVALID_ID) + vaDestroyConfig(p->display, p->config); + av_buffer_unref(&p->hw_pool); + flush_frames(vf); + talloc_free(p->queue); + av_buffer_unref(&p->av_device_ref); +} + +static int va_query_filter_caps(struct mp_filter *vf, VAProcFilterType type, + void *caps, unsigned int count) +{ + struct priv *p = vf->priv; + VAStatus status = vaQueryVideoProcFilterCaps(p->display, p->context, type, + caps, &count); + return CHECK_VA_STATUS(vf, "vaQueryVideoProcFilterCaps()") ? count : 0; +} + +static VABufferID va_create_filter_buffer(struct mp_filter *vf, int bytes, + int num, void *data) +{ + struct priv *p = vf->priv; + VABufferID buffer; + VAStatus status = vaCreateBuffer(p->display, p->context, + VAProcFilterParameterBufferType, + bytes, num, data, &buffer); + return CHECK_VA_STATUS(vf, "vaCreateBuffer()") ? buffer : VA_INVALID_ID; +} + +static bool initialize(struct mp_filter *vf) +{ + struct priv *p = vf->priv; + VAStatus status; + + VAConfigID config; + status = vaCreateConfig(p->display, VAProfileNone, VAEntrypointVideoProc, + NULL, 0, &config); + if (!CHECK_VA_STATUS(vf, "vaCreateConfig()")) // no entrypoint for video porc + return false; + p->config = config; + + VAContextID context; + status = vaCreateContext(p->display, p->config, 0, 0, 0, NULL, 0, &context); + if (!CHECK_VA_STATUS(vf, "vaCreateContext()")) + return false; + p->context = context; + + VAProcFilterType filters[VAProcFilterCount]; + int num_filters = VAProcFilterCount; + status = vaQueryVideoProcFilters(p->display, p->context, filters, &num_filters); + if (!CHECK_VA_STATUS(vf, "vaQueryVideoProcFilters()")) + return false; + + VABufferID buffers[VAProcFilterCount]; + for (int i = 0; i < VAProcFilterCount; i++) + buffers[i] = VA_INVALID_ID; + for (int i = 0; i < num_filters; i++) { + if (filters[i] == VAProcFilterDeinterlacing) { + VAProcFilterCapDeinterlacing caps[VAProcDeinterlacingCount]; + int num = va_query_filter_caps(vf, VAProcFilterDeinterlacing, caps, + VAProcDeinterlacingCount); + if (!num) + continue; + if (p->opts->deint_type < 0) { + for (int n = MP_ARRAY_SIZE(deint_algorithm) - 1; n > 0; n--) { + for (int x = 0; x < num; x++) { + if (caps[x].type == deint_algorithm[n]) { + p->opts->deint_type = n; + MP_VERBOSE(vf, "Selected deinterlacing algorithm: " + "%d\n", deint_algorithm[n]); + goto found; + } + } + } + found: ; + } + if (p->opts->deint_type <= 0) + continue; + VAProcDeinterlacingType algorithm = + deint_algorithm[p->opts->deint_type]; + for (int n=0; n < num; n++) { // find the algorithm + if (caps[n].type != algorithm) + continue; + VAProcFilterParameterBufferDeinterlacing param = {0}; + param.type = VAProcFilterDeinterlacing; + param.algorithm = algorithm; + buffers[VAProcFilterDeinterlacing] = + va_create_filter_buffer(vf, sizeof(param), 1, ¶m); + } + if (buffers[VAProcFilterDeinterlacing] == VA_INVALID_ID) + MP_WARN(vf, "Selected deinterlacing algorithm not supported.\n"); + } // check other filters + } + if (p->opts->deint_type < 0) + p->opts->deint_type = 0; + p->num_buffers = 0; + if (buffers[VAProcFilterDeinterlacing] != VA_INVALID_ID) + p->buffers[p->num_buffers++] = buffers[VAProcFilterDeinterlacing]; + p->do_deint = !!p->opts->deint_type; + // next filters: p->buffers[p->num_buffers++] = buffers[next_filter]; + return true; +} + +static const struct mp_filter_info vf_vavpp_filter = { + .name = "vavpp", + .process = vf_vavpp_process, + .reset = flush_frames, + .destroy = uninit, + .priv_size = sizeof(struct priv), +}; + +static struct mp_filter *vf_vavpp_create(struct mp_filter *parent, void *options) +{ + struct mp_filter *f = mp_filter_create(parent, &vf_vavpp_filter); + if (!f) { + talloc_free(options); + return NULL; + } + + mp_filter_add_pin(f, MP_PIN_IN, "in"); + mp_filter_add_pin(f, MP_PIN_OUT, "out"); + + struct priv *p = f->priv; + p->opts = talloc_steal(p, options); + p->config = VA_INVALID_ID; + p->context = VA_INVALID_ID; + + p->queue = mp_refqueue_alloc(f); + + struct mp_hwdec_ctx *hwdec_ctx = + mp_filter_load_hwdec_device(f, IMGFMT_VAAPI); + if (!hwdec_ctx || !hwdec_ctx->av_device_ref) + goto error; + p->av_device_ref = av_buffer_ref(hwdec_ctx->av_device_ref); + if (!p->av_device_ref) + goto error; + + AVHWDeviceContext *hwctx = (void *)p->av_device_ref->data; + AVVAAPIDeviceContext *vactx = hwctx->hwctx; + + p->display = vactx->display; + + mp_refqueue_add_in_format(p->queue, IMGFMT_VAAPI, 0); + + if (!initialize(f)) + goto error; + + return f; + +error: + talloc_free(f); + return NULL; +} + +#define OPT_BASE_STRUCT struct opts +static const m_option_t vf_opts_fields[] = { + {"deint", OPT_CHOICE(deint_type, + // The values >=0 must match with deint_algorithm[]. + {"auto", -1}, + {"no", 0}, + {"first-field", 1}, + {"bob", 2}, + {"weave", 3}, + {"motion-adaptive", 4}, + {"motion-compensated", 5})}, + {"interlaced-only", OPT_BOOL(interlaced_only)}, + {"reversal-bug", OPT_BOOL(reversal_bug)}, + {0} +}; + +const struct mp_user_filter_entry vf_vavpp = { + .desc = { + .description = "VA-API Video Post-Process Filter", + .name = "vavpp", + .priv_size = sizeof(OPT_BASE_STRUCT), + .priv_defaults = &(const OPT_BASE_STRUCT){ + .deint_type = -1, + .reversal_bug = true, + }, + .options = vf_opts_fields, + }, + .create = vf_vavpp_create, +}; diff --git a/video/filter/vf_vdpaupp.c b/video/filter/vf_vdpaupp.c new file mode 100644 index 0000000..0519f5a --- /dev/null +++ b/video/filter/vf_vdpaupp.c @@ -0,0 +1,195 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <inttypes.h> +#include <assert.h> + +#include <libavutil/hwcontext.h> + +#include "common/common.h" +#include "common/msg.h" +#include "options/m_option.h" +#include "filters/filter.h" +#include "filters/filter_internal.h" +#include "filters/user_filters.h" +#include "video/img_format.h" +#include "video/mp_image.h" +#include "video/hwdec.h" +#include "video/vdpau.h" +#include "video/vdpau_mixer.h" +#include "refqueue.h" + +// Note: this filter does no actual filtering; it merely sets appropriate +// flags on vdpau images (mp_vdpau_mixer_frame) to do the appropriate +// processing on the final rendering process in the VO. + +struct opts { + bool deint_enabled; + bool interlaced_only; + struct mp_vdpau_mixer_opts opts; +}; + +struct priv { + struct opts *opts; + struct mp_vdpau_ctx *ctx; + struct mp_refqueue *queue; + struct mp_pin *in_pin; +}; + +static VdpVideoSurface ref_field(struct priv *p, + struct mp_vdpau_mixer_frame *frame, int pos) +{ + struct mp_image *mpi = mp_image_new_ref(mp_refqueue_get_field(p->queue, pos)); + if (!mpi) + return VDP_INVALID_HANDLE; + talloc_steal(frame, mpi); + return (uintptr_t)mpi->planes[3]; +} + +static void vf_vdpaupp_process(struct mp_filter *f) +{ + struct priv *p = f->priv; + + mp_refqueue_execute_reinit(p->queue); + + if (!mp_refqueue_can_output(p->queue)) + return; + + struct mp_image *mpi = + mp_vdpau_mixed_frame_create(mp_refqueue_get_field(p->queue, 0)); + if (!mpi) + return; // OOM + struct mp_vdpau_mixer_frame *frame = mp_vdpau_mixed_frame_get(mpi); + + if (!mp_refqueue_should_deint(p->queue)) { + frame->field = VDP_VIDEO_MIXER_PICTURE_STRUCTURE_FRAME; + } else if (mp_refqueue_is_top_field(p->queue)) { + frame->field = VDP_VIDEO_MIXER_PICTURE_STRUCTURE_TOP_FIELD; + } else { + frame->field = VDP_VIDEO_MIXER_PICTURE_STRUCTURE_BOTTOM_FIELD; + } + + frame->future[0] = ref_field(p, frame, 1); + frame->current = ref_field(p, frame, 0); + frame->past[0] = ref_field(p, frame, -1); + frame->past[1] = ref_field(p, frame, -2); + + frame->opts = p->opts->opts; + + mpi->planes[3] = (void *)(uintptr_t)frame->current; + + mpi->params.hw_subfmt = 0; // force mixer + + mp_refqueue_write_out_pin(p->queue, mpi); +} + +static void vf_vdpaupp_reset(struct mp_filter *f) +{ + struct priv *p = f->priv; + mp_refqueue_flush(p->queue); +} + +static void vf_vdpaupp_destroy(struct mp_filter *f) +{ + struct priv *p = f->priv; + talloc_free(p->queue); +} + +static const struct mp_filter_info vf_vdpaupp_filter = { + .name = "vdpaupp", + .process = vf_vdpaupp_process, + .reset = vf_vdpaupp_reset, + .destroy = vf_vdpaupp_destroy, + .priv_size = sizeof(struct priv), +}; + +static struct mp_filter *vf_vdpaupp_create(struct mp_filter *parent, void *options) +{ + struct mp_filter *f = mp_filter_create(parent, &vf_vdpaupp_filter); + if (!f) { + talloc_free(options); + return NULL; + } + + mp_filter_add_pin(f, MP_PIN_IN, "in"); + mp_filter_add_pin(f, MP_PIN_OUT, "out"); + + struct priv *p = f->priv; + p->opts = talloc_steal(p, options); + + p->queue = mp_refqueue_alloc(f); + + struct mp_hwdec_ctx *hwdec_ctx = + mp_filter_load_hwdec_device(f, IMGFMT_VDPAU); + if (!hwdec_ctx || !hwdec_ctx->av_device_ref) + goto error; + p->ctx = mp_vdpau_get_ctx_from_av(hwdec_ctx->av_device_ref); + if (!p->ctx) + goto error; + + if (!p->opts->deint_enabled) + p->opts->opts.deint = 0; + + if (p->opts->opts.deint >= 2) { + mp_refqueue_set_refs(p->queue, 1, 1); // 2 past fields, 1 future field + } else { + mp_refqueue_set_refs(p->queue, 0, 0); + } + mp_refqueue_set_mode(p->queue, + (p->opts->deint_enabled ? MP_MODE_DEINT : 0) | + (p->opts->interlaced_only ? MP_MODE_INTERLACED_ONLY : 0) | + (p->opts->opts.deint >= 2 ? MP_MODE_OUTPUT_FIELDS : 0)); + + mp_refqueue_add_in_format(p->queue, IMGFMT_VDPAU, 0); + + return f; + +error: + talloc_free(f); + return NULL; +} + +#define OPT_BASE_STRUCT struct opts +static const m_option_t vf_opts_fields[] = { + {"deint-mode", OPT_CHOICE(opts.deint, + {"first-field", 1}, + {"bob", 2}, + {"temporal", 3}, + {"temporal-spatial", 4}), + OPTDEF_INT(3)}, + {"deint", OPT_BOOL(deint_enabled)}, + {"chroma-deint", OPT_BOOL(opts.chroma_deint), OPTDEF_INT(1)}, + {"pullup", OPT_BOOL(opts.pullup)}, + {"denoise", OPT_FLOAT(opts.denoise), M_RANGE(0, 1)}, + {"sharpen", OPT_FLOAT(opts.sharpen), M_RANGE(-1, 1)}, + {"hqscaling", OPT_INT(opts.hqscaling), M_RANGE(0, 9)}, + {"interlaced-only", OPT_BOOL(interlaced_only)}, + {0} +}; + +const struct mp_user_filter_entry vf_vdpaupp = { + .desc = { + .description = "vdpau postprocessing", + .name = "vdpaupp", + .priv_size = sizeof(OPT_BASE_STRUCT), + .options = vf_opts_fields, + }, + .create = vf_vdpaupp_create, +}; diff --git a/video/fmt-conversion.c b/video/fmt-conversion.c new file mode 100644 index 0000000..aa7d857 --- /dev/null +++ b/video/fmt-conversion.c @@ -0,0 +1,112 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <libavutil/pixdesc.h> +#include <libavutil/avutil.h> + +#include "video/img_format.h" +#include "fmt-conversion.h" + +static const struct { + int fmt; + enum AVPixelFormat pix_fmt; +} conversion_map[] = { + {IMGFMT_ARGB, AV_PIX_FMT_ARGB}, + {IMGFMT_BGRA, AV_PIX_FMT_BGRA}, + {IMGFMT_BGR24, AV_PIX_FMT_BGR24}, + {IMGFMT_RGB565, AV_PIX_FMT_RGB565}, + {IMGFMT_ABGR, AV_PIX_FMT_ABGR}, + {IMGFMT_RGBA, AV_PIX_FMT_RGBA}, + {IMGFMT_RGB24, AV_PIX_FMT_RGB24}, + {IMGFMT_PAL8, AV_PIX_FMT_PAL8}, + {IMGFMT_UYVY, AV_PIX_FMT_UYVY422}, + {IMGFMT_NV12, AV_PIX_FMT_NV12}, + {IMGFMT_Y8, AV_PIX_FMT_GRAY8}, + {IMGFMT_Y16, AV_PIX_FMT_GRAY16}, + {IMGFMT_420P, AV_PIX_FMT_YUV420P}, + {IMGFMT_444P, AV_PIX_FMT_YUV444P}, + + // YUVJ are YUV formats that use the full Y range. Decoder color range + // information is used instead. Deprecated in ffmpeg. + {IMGFMT_420P, AV_PIX_FMT_YUVJ420P}, + {IMGFMT_444P, AV_PIX_FMT_YUVJ444P}, + + {IMGFMT_BGR0, AV_PIX_FMT_BGR0}, + {IMGFMT_0RGB, AV_PIX_FMT_0RGB}, + {IMGFMT_RGB0, AV_PIX_FMT_RGB0}, + {IMGFMT_0BGR, AV_PIX_FMT_0BGR}, + + {IMGFMT_RGBA64, AV_PIX_FMT_RGBA64}, + +#ifdef AV_PIX_FMT_X2RGB10 + {IMGFMT_RGB30, AV_PIX_FMT_X2RGB10}, +#endif + + {IMGFMT_VDPAU, AV_PIX_FMT_VDPAU}, + {IMGFMT_VIDEOTOOLBOX, AV_PIX_FMT_VIDEOTOOLBOX}, + {IMGFMT_MEDIACODEC, AV_PIX_FMT_MEDIACODEC}, + {IMGFMT_VAAPI, AV_PIX_FMT_VAAPI}, + {IMGFMT_DXVA2, AV_PIX_FMT_DXVA2_VLD}, + {IMGFMT_D3D11, AV_PIX_FMT_D3D11}, + {IMGFMT_MMAL, AV_PIX_FMT_MMAL}, + {IMGFMT_CUDA, AV_PIX_FMT_CUDA}, + {IMGFMT_P010, AV_PIX_FMT_P010}, + {IMGFMT_DRMPRIME, AV_PIX_FMT_DRM_PRIME}, +#if HAVE_VULKAN_INTEROP + {IMGFMT_VULKAN, AV_PIX_FMT_VULKAN}, +#endif + + {0, AV_PIX_FMT_NONE} +}; + +enum AVPixelFormat imgfmt2pixfmt(int fmt) +{ + if (fmt == IMGFMT_NONE) + return AV_PIX_FMT_NONE; + + if (fmt >= IMGFMT_AVPIXFMT_START && fmt < IMGFMT_AVPIXFMT_END) { + enum AVPixelFormat pixfmt = fmt - IMGFMT_AVPIXFMT_START; + // Avoid duplicate format - each format must be unique. + int mpfmt = pixfmt2imgfmt(pixfmt); + if (mpfmt == fmt && av_pix_fmt_desc_get(pixfmt)) + return pixfmt; + return AV_PIX_FMT_NONE; + } + + for (int i = 0; conversion_map[i].fmt; i++) { + if (conversion_map[i].fmt == fmt) + return conversion_map[i].pix_fmt; + } + return AV_PIX_FMT_NONE; +} + +int pixfmt2imgfmt(enum AVPixelFormat pix_fmt) +{ + if (pix_fmt == AV_PIX_FMT_NONE) + return IMGFMT_NONE; + + for (int i = 0; conversion_map[i].pix_fmt != AV_PIX_FMT_NONE; i++) { + if (conversion_map[i].pix_fmt == pix_fmt) + return conversion_map[i].fmt; + } + + int generic = IMGFMT_AVPIXFMT_START + pix_fmt; + if (generic < IMGFMT_AVPIXFMT_END && av_pix_fmt_desc_get(pix_fmt)) + return generic; + + return 0; +} diff --git a/video/fmt-conversion.h b/video/fmt-conversion.h new file mode 100644 index 0000000..962e4b8 --- /dev/null +++ b/video/fmt-conversion.h @@ -0,0 +1,26 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MPLAYER_FMT_CONVERSION_H +#define MPLAYER_FMT_CONVERSION_H + +#include <libavutil/pixfmt.h> + +enum AVPixelFormat imgfmt2pixfmt(int fmt); +int pixfmt2imgfmt(enum AVPixelFormat pix_fmt); + +#endif /* MPLAYER_FMT_CONVERSION_H */ diff --git a/video/hwdec.c b/video/hwdec.c new file mode 100644 index 0000000..f397f3b --- /dev/null +++ b/video/hwdec.c @@ -0,0 +1,140 @@ +#include <assert.h> + +#include <libavutil/hwcontext.h> + +#include "config.h" +#include "hwdec.h" +#include "osdep/threads.h" + +struct mp_hwdec_devices { + mp_mutex lock; + + struct mp_hwdec_ctx **hwctxs; + int num_hwctxs; + + void (*load_api)(void *ctx, + struct hwdec_imgfmt_request *params); + void *load_api_ctx; +}; + +struct mp_hwdec_devices *hwdec_devices_create(void) +{ + struct mp_hwdec_devices *devs = talloc_zero(NULL, struct mp_hwdec_devices); + mp_mutex_init(&devs->lock); + return devs; +} + +void hwdec_devices_destroy(struct mp_hwdec_devices *devs) +{ + if (!devs) + return; + assert(!devs->num_hwctxs); // must have been hwdec_devices_remove()ed + assert(!devs->load_api); // must have been unset + mp_mutex_destroy(&devs->lock); + talloc_free(devs); +} + +struct mp_hwdec_ctx *hwdec_devices_get_by_imgfmt(struct mp_hwdec_devices *devs, + int hw_imgfmt) +{ + struct mp_hwdec_ctx *res = NULL; + mp_mutex_lock(&devs->lock); + for (int n = 0; n < devs->num_hwctxs; n++) { + struct mp_hwdec_ctx *dev = devs->hwctxs[n]; + if (dev->hw_imgfmt == hw_imgfmt) { + res = dev; + break; + } + } + mp_mutex_unlock(&devs->lock); + return res; +} + +struct mp_hwdec_ctx *hwdec_devices_get_first(struct mp_hwdec_devices *devs) +{ + return hwdec_devices_get_n(devs, 0); +} + +struct mp_hwdec_ctx *hwdec_devices_get_n(struct mp_hwdec_devices *devs, int n) +{ + mp_mutex_lock(&devs->lock); + struct mp_hwdec_ctx *res = n < devs->num_hwctxs ? devs->hwctxs[n] : NULL; + mp_mutex_unlock(&devs->lock); + return res; +} + +void hwdec_devices_add(struct mp_hwdec_devices *devs, struct mp_hwdec_ctx *ctx) +{ + mp_mutex_lock(&devs->lock); + MP_TARRAY_APPEND(devs, devs->hwctxs, devs->num_hwctxs, ctx); + mp_mutex_unlock(&devs->lock); +} + +void hwdec_devices_remove(struct mp_hwdec_devices *devs, struct mp_hwdec_ctx *ctx) +{ + mp_mutex_lock(&devs->lock); + for (int n = 0; n < devs->num_hwctxs; n++) { + if (devs->hwctxs[n] == ctx) { + MP_TARRAY_REMOVE_AT(devs->hwctxs, devs->num_hwctxs, n); + break; + } + } + mp_mutex_unlock(&devs->lock); +} + +void hwdec_devices_set_loader(struct mp_hwdec_devices *devs, + void (*load_api)(void *ctx, struct hwdec_imgfmt_request *params), + void *load_api_ctx) +{ + devs->load_api = load_api; + devs->load_api_ctx = load_api_ctx; +} + +void hwdec_devices_request_for_img_fmt(struct mp_hwdec_devices *devs, + struct hwdec_imgfmt_request *params) +{ + if (devs->load_api) + devs->load_api(devs->load_api_ctx, params); +} + +char *hwdec_devices_get_names(struct mp_hwdec_devices *devs) +{ + char *res = NULL; + for (int n = 0; n < devs->num_hwctxs; n++) { + if (res) + ta_xstrdup_append(&res, ","); + ta_xstrdup_append(&res, devs->hwctxs[n]->driver_name); + } + return res; +} + +static const struct hwcontext_fns *const hwcontext_fns[] = { +#if HAVE_CUDA_HWACCEL + &hwcontext_fns_cuda, +#endif +#if HAVE_D3D_HWACCEL + &hwcontext_fns_d3d11, +#endif +#if HAVE_D3D9_HWACCEL + &hwcontext_fns_dxva2, +#endif +#if HAVE_DRM + &hwcontext_fns_drmprime, +#endif +#if HAVE_VAAPI + &hwcontext_fns_vaapi, +#endif +#if HAVE_VDPAU + &hwcontext_fns_vdpau, +#endif + NULL, +}; + +const struct hwcontext_fns *hwdec_get_hwcontext_fns(int av_hwdevice_type) +{ + for (int n = 0; hwcontext_fns[n]; n++) { + if (hwcontext_fns[n]->av_hwdevice_type == av_hwdevice_type) + return hwcontext_fns[n]; + } + return NULL; +} diff --git a/video/hwdec.h b/video/hwdec.h new file mode 100644 index 0000000..723c60f --- /dev/null +++ b/video/hwdec.h @@ -0,0 +1,108 @@ +#ifndef MP_HWDEC_H_ +#define MP_HWDEC_H_ + +#include <libavutil/buffer.h> + +#include "options/m_option.h" + +struct mp_image_pool; + +struct mp_hwdec_ctx { + const char *driver_name; // NULL if unknown/not loaded + + // libavutil-wrapped context, if available. + struct AVBufferRef *av_device_ref; // AVHWDeviceContext* + + // List of allowed IMGFMT_s, terminated with 0. + // If NULL, all software formats are considered to be supported. + const int *supported_formats; + // HW format used by the hwdec + int hw_imgfmt; + + // The name of this hwdec's matching conversion filter if available. + // This will be used for hardware conversion of frame formats. + // NULL otherwise. + const char *conversion_filter_name; + + // The libavutil hwconfig to be used when querying constraints for the + // conversion filter. Can be NULL if no special config is required. + void *conversion_config; +}; + +// Used to communicate hardware decoder device handles from VO to video decoder. +struct mp_hwdec_devices; + +struct mp_hwdec_devices *hwdec_devices_create(void); +void hwdec_devices_destroy(struct mp_hwdec_devices *devs); + +struct mp_hwdec_ctx *hwdec_devices_get_by_imgfmt(struct mp_hwdec_devices *devs, + int hw_imgfmt); + +// For code which still strictly assumes there is 1 (or none) device. +struct mp_hwdec_ctx *hwdec_devices_get_first(struct mp_hwdec_devices *devs); + +// Return the n-th device. NULL if none. +struct mp_hwdec_ctx *hwdec_devices_get_n(struct mp_hwdec_devices *devs, int n); + +// Add this to the list of internal devices. Adding the same pointer twice must +// be avoided. +void hwdec_devices_add(struct mp_hwdec_devices *devs, struct mp_hwdec_ctx *ctx); + +// Remove this from the list of internal devices. Idempotent/ignores entries +// not added yet. This is not thread-safe. +void hwdec_devices_remove(struct mp_hwdec_devices *devs, struct mp_hwdec_ctx *ctx); + +struct hwdec_imgfmt_request { + int imgfmt; + bool probing; +}; + +// Can be used to enable lazy loading of an API with hwdec_devices_request(). +// If used at all, this must be set/unset during initialization/uninitialization, +// as concurrent use with hwdec_devices_request() is a race condition. +void hwdec_devices_set_loader(struct mp_hwdec_devices *devs, + void (*load_api)(void *ctx, struct hwdec_imgfmt_request *params), + void *load_api_ctx); + +// Cause VO to lazily load all devices for a specified img format, and will +// block until this is done (even if not available). Pass IMGFMT_NONE to load +// all available devices. +void hwdec_devices_request_for_img_fmt(struct mp_hwdec_devices *devs, + struct hwdec_imgfmt_request *params); + +// Return "," concatenated list (for introspection/debugging). Use talloc_free(). +char *hwdec_devices_get_names(struct mp_hwdec_devices *devs); + +struct mp_image; +struct mpv_global; + +struct hwcontext_create_dev_params { + bool probing; // if true, don't log errors if unavailable +}; + +// Per AV_HWDEVICE_TYPE_* functions, queryable via hwdec_get_hwcontext_fns(). +// All entries are strictly optional. +struct hwcontext_fns { + int av_hwdevice_type; + // Fill in special format-specific requirements. + void (*refine_hwframes)(struct AVBufferRef *hw_frames_ctx); + // Returns a AVHWDeviceContext*. Used for copy hwdecs. + struct AVBufferRef *(*create_dev)(struct mpv_global *global, + struct mp_log *log, + struct hwcontext_create_dev_params *params); + // Return whether this is using some sort of sub-optimal emulation layer. + bool (*is_emulated)(struct AVBufferRef *hw_device_ctx); +}; + +// The parameter is of type enum AVHWDeviceType (as in int to avoid extensive +// recursive includes). May return NULL for unknown device types. +const struct hwcontext_fns *hwdec_get_hwcontext_fns(int av_hwdevice_type); + +extern const struct hwcontext_fns hwcontext_fns_cuda; +extern const struct hwcontext_fns hwcontext_fns_d3d11; +extern const struct hwcontext_fns hwcontext_fns_drmprime; +extern const struct hwcontext_fns hwcontext_fns_dxva2; +extern const struct hwcontext_fns hwcontext_fns_vaapi; +extern const struct hwcontext_fns hwcontext_fns_vdpau; + +#endif diff --git a/video/image_loader.c b/video/image_loader.c new file mode 100644 index 0000000..ba4d62a --- /dev/null +++ b/video/image_loader.c @@ -0,0 +1,48 @@ +#include <libavcodec/avcodec.h> + +#include "common/common.h" +#include "mp_image.h" +#include "player/screenshot.h" + +#include "image_loader.h" + +struct mp_image *load_image_png_buf(void *buffer, size_t buffer_size, int imgfmt) +{ + const AVCodec *codec = avcodec_find_decoder(AV_CODEC_ID_PNG); + if (!codec) + return NULL; + + AVCodecContext *avctx = avcodec_alloc_context3(codec); + if (!avctx) + return NULL; + + if (avcodec_open2(avctx, codec, NULL) < 0) { + avcodec_free_context(&avctx); + return NULL; + } + + AVPacket *pkt = av_packet_alloc(); + if (pkt) { + if (av_new_packet(pkt, buffer_size) >= 0) + memcpy(pkt->data, buffer, buffer_size); + } + + // (There is only 1 outcome: either it takes it and decodes it, or not.) + avcodec_send_packet(avctx, pkt); + avcodec_send_packet(avctx, NULL); + + av_packet_free(&pkt); + + struct mp_image *res = NULL; + AVFrame *frame = av_frame_alloc(); + if (frame && avcodec_receive_frame(avctx, frame) >= 0) { + struct mp_image *r = mp_image_from_av_frame(frame); + if (r) + res = convert_image(r, imgfmt, NULL, mp_null_log); + talloc_free(r); + } + av_frame_free(&frame); + + avcodec_free_context(&avctx); + return res; +} diff --git a/video/image_loader.h b/video/image_loader.h new file mode 100644 index 0000000..f8b20c8 --- /dev/null +++ b/video/image_loader.h @@ -0,0 +1,9 @@ +#ifndef MP_IMAGE_LOADER_H_ +#define MP_IMAGE_LOADER_H_ + +#include <stddef.h> + +struct mp_image; +struct mp_image *load_image_png_buf(void *buffer, size_t buffer_size, int imgfmt); + +#endif diff --git a/video/image_writer.c b/video/image_writer.c new file mode 100644 index 0000000..288d809 --- /dev/null +++ b/video/image_writer.c @@ -0,0 +1,757 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <inttypes.h> + +#include <libavcodec/avcodec.h> +#include <libavformat/avformat.h> +#include <libavutil/mem.h> +#include <libavutil/opt.h> +#include <libavutil/pixdesc.h> + +#include "common/msg.h" +#include "config.h" + +#if HAVE_JPEG +#include <setjmp.h> +#include <jpeglib.h> +#endif + +#include "osdep/io.h" + +#include "common/av_common.h" +#include "common/msg.h" +#include "image_writer.h" +#include "mpv_talloc.h" +#include "video/fmt-conversion.h" +#include "video/img_format.h" +#include "video/mp_image.h" +#include "video/sws_utils.h" + +#include "options/m_option.h" + +const struct image_writer_opts image_writer_opts_defaults = { + .format = AV_CODEC_ID_MJPEG, + .high_bit_depth = true, + .png_compression = 7, + .png_filter = 5, + .jpeg_quality = 90, + .jpeg_source_chroma = true, + .webp_quality = 75, + .webp_compression = 4, + .jxl_distance = 1.0, + .jxl_effort = 4, + .avif_encoder = "libaom-av1", + .avif_pixfmt = "yuv420p", + .avif_opts = (char*[]){ + "usage", "allintra", + "crf", "32", + "cpu-used", "8", + "tune", "ssim", + NULL + }, + .tag_csp = true, +}; + +const struct m_opt_choice_alternatives mp_image_writer_formats[] = { + {"jpg", AV_CODEC_ID_MJPEG}, + {"jpeg", AV_CODEC_ID_MJPEG}, + {"png", AV_CODEC_ID_PNG}, + {"webp", AV_CODEC_ID_WEBP}, +#if HAVE_JPEGXL + {"jxl", AV_CODEC_ID_JPEGXL}, +#endif +#if HAVE_AVIF_MUXER + {"avif", AV_CODEC_ID_AV1}, +#endif + {0} +}; + +#define OPT_BASE_STRUCT struct image_writer_opts + +const struct m_option image_writer_opts[] = { + {"format", OPT_CHOICE_C(format, mp_image_writer_formats)}, + {"jpeg-quality", OPT_INT(jpeg_quality), M_RANGE(0, 100)}, + {"jpeg-source-chroma", OPT_BOOL(jpeg_source_chroma)}, + {"png-compression", OPT_INT(png_compression), M_RANGE(0, 9)}, + {"png-filter", OPT_INT(png_filter), M_RANGE(0, 5)}, + {"webp-lossless", OPT_BOOL(webp_lossless)}, + {"webp-quality", OPT_INT(webp_quality), M_RANGE(0, 100)}, + {"webp-compression", OPT_INT(webp_compression), M_RANGE(0, 6)}, +#if HAVE_JPEGXL + {"jxl-distance", OPT_DOUBLE(jxl_distance), M_RANGE(0.0, 15.0)}, + {"jxl-effort", OPT_INT(jxl_effort), M_RANGE(1, 9)}, +#endif +#if HAVE_AVIF_MUXER + {"avif-encoder", OPT_STRING(avif_encoder)}, + {"avif-opts", OPT_KEYVALUELIST(avif_opts)}, + {"avif-pixfmt", OPT_STRING(avif_pixfmt)}, +#endif + {"high-bit-depth", OPT_BOOL(high_bit_depth)}, + {"tag-colorspace", OPT_BOOL(tag_csp)}, + {0}, +}; + +struct image_writer_ctx { + struct mp_log *log; + const struct image_writer_opts *opts; + struct mp_imgfmt_desc original_format; +}; + +static enum AVPixelFormat replace_j_format(enum AVPixelFormat fmt) +{ + switch (fmt) { + case AV_PIX_FMT_YUV420P: return AV_PIX_FMT_YUVJ420P; + case AV_PIX_FMT_YUV422P: return AV_PIX_FMT_YUVJ422P; + case AV_PIX_FMT_YUV444P: return AV_PIX_FMT_YUVJ444P; + } + return fmt; +} + +static void prepare_avframe(AVFrame *pic, AVCodecContext *avctx, + mp_image_t *image, bool tag_csp, + struct mp_log *log) +{ + for (int n = 0; n < 4; n++) { + pic->data[n] = image->planes[n]; + pic->linesize[n] = image->stride[n]; + } + pic->format = avctx->pix_fmt; + pic->width = avctx->width; + pic->height = avctx->height; + avctx->color_range = pic->color_range = + mp_csp_levels_to_avcol_range(image->params.color.levels); + + if (!tag_csp) + return; + avctx->color_primaries = pic->color_primaries = + mp_csp_prim_to_avcol_pri(image->params.color.primaries); + avctx->color_trc = pic->color_trc = + mp_csp_trc_to_avcol_trc(image->params.color.gamma); + avctx->colorspace = pic->colorspace = + mp_csp_to_avcol_spc(image->params.color.space); + avctx->chroma_sample_location = pic->chroma_location = + mp_chroma_location_to_av(image->params.chroma_location); + mp_dbg(log, "mapped color params:\n" + " trc = %s\n" + " primaries = %s\n" + " range = %s\n" + " colorspace = %s\n" + " chroma_location = %s\n", + av_color_transfer_name(avctx->color_trc), + av_color_primaries_name(avctx->color_primaries), + av_color_range_name(avctx->color_range), + av_color_space_name(avctx->colorspace), + av_chroma_location_name(avctx->chroma_sample_location) + ); +} + +static bool write_lavc(struct image_writer_ctx *ctx, mp_image_t *image, const char *filename) +{ + FILE *fp = fopen(filename, "wb"); + if (!fp) { + MP_ERR(ctx, "Error opening '%s' for writing!\n", filename); + return false; + } + + bool success = false; + AVFrame *pic = NULL; + AVPacket *pkt = NULL; + + const AVCodec *codec; + if (ctx->opts->format == AV_CODEC_ID_WEBP) { + codec = avcodec_find_encoder_by_name("libwebp"); // non-animated encoder + } else { + codec = avcodec_find_encoder(ctx->opts->format); + } + + AVCodecContext *avctx = NULL; + if (!codec) + goto print_open_fail; + avctx = avcodec_alloc_context3(codec); + if (!avctx) + goto print_open_fail; + + avctx->time_base = AV_TIME_BASE_Q; + avctx->width = image->w; + avctx->height = image->h; + avctx->pix_fmt = imgfmt2pixfmt(image->imgfmt); + if (codec->id == AV_CODEC_ID_MJPEG) { + // Annoying deprecated garbage for the jpg encoder. + if (image->params.color.levels == MP_CSP_LEVELS_PC) + avctx->pix_fmt = replace_j_format(avctx->pix_fmt); + } + if (avctx->pix_fmt == AV_PIX_FMT_NONE) { + MP_ERR(ctx, "Image format %s not supported by lavc.\n", + mp_imgfmt_to_name(image->imgfmt)); + goto error_exit; + } + + if (codec->id == AV_CODEC_ID_MJPEG) { + avctx->flags |= AV_CODEC_FLAG_QSCALE; + // jpeg_quality is set below + } else if (codec->id == AV_CODEC_ID_PNG) { + avctx->compression_level = ctx->opts->png_compression; + av_opt_set_int(avctx, "pred", ctx->opts->png_filter, + AV_OPT_SEARCH_CHILDREN); + } else if (codec->id == AV_CODEC_ID_WEBP) { + avctx->compression_level = ctx->opts->webp_compression; + av_opt_set_int(avctx, "lossless", ctx->opts->webp_lossless, + AV_OPT_SEARCH_CHILDREN); + av_opt_set_int(avctx, "quality", ctx->opts->webp_quality, + AV_OPT_SEARCH_CHILDREN); +#if HAVE_JPEGXL + } else if (codec->id == AV_CODEC_ID_JPEGXL) { + av_opt_set_double(avctx, "distance", ctx->opts->jxl_distance, + AV_OPT_SEARCH_CHILDREN); + av_opt_set_int(avctx, "effort", ctx->opts->jxl_effort, + AV_OPT_SEARCH_CHILDREN); +#endif + } + + if (avcodec_open2(avctx, codec, NULL) < 0) { + print_open_fail: + MP_ERR(ctx, "Could not open libavcodec encoder for saving images\n"); + goto error_exit; + } + + pic = av_frame_alloc(); + if (!pic) + goto error_exit; + prepare_avframe(pic, avctx, image, ctx->opts->tag_csp, ctx->log); + if (codec->id == AV_CODEC_ID_MJPEG) { + int qscale = 1 + (100 - ctx->opts->jpeg_quality) * 30 / 100; + pic->quality = qscale * FF_QP2LAMBDA; + } + + int ret = avcodec_send_frame(avctx, pic); + if (ret < 0) + goto error_exit; + ret = avcodec_send_frame(avctx, NULL); // send EOF + if (ret < 0) + goto error_exit; + pkt = av_packet_alloc(); + if (!pkt) + goto error_exit; + ret = avcodec_receive_packet(avctx, pkt); + if (ret < 0) + goto error_exit; + + success = fwrite(pkt->data, pkt->size, 1, fp) == 1; + +error_exit: + avcodec_free_context(&avctx); + av_frame_free(&pic); + av_packet_free(&pkt); + return !fclose(fp) && success; +} + +#if HAVE_JPEG + +static void write_jpeg_error_exit(j_common_ptr cinfo) +{ + // NOTE: do not write error message, too much effort to connect the libjpeg + // log callbacks with mplayer's log function mp_msp() + + // Return control to the setjmp point + longjmp(*(jmp_buf*)cinfo->client_data, 1); +} + +static bool write_jpeg(struct image_writer_ctx *ctx, mp_image_t *image, + const char *filename) +{ + FILE *fp = fopen(filename, "wb"); + if (!fp) { + MP_ERR(ctx, "Error opening '%s' for writing!\n", filename); + return false; + } + + struct jpeg_compress_struct cinfo; + struct jpeg_error_mgr jerr; + + cinfo.err = jpeg_std_error(&jerr); + jerr.error_exit = write_jpeg_error_exit; + + jmp_buf error_return_jmpbuf; + cinfo.client_data = &error_return_jmpbuf; + if (setjmp(cinfo.client_data)) { + jpeg_destroy_compress(&cinfo); + fclose(fp); + return false; + } + + jpeg_create_compress(&cinfo); + jpeg_stdio_dest(&cinfo, fp); + + cinfo.image_width = image->w; + cinfo.image_height = image->h; + cinfo.input_components = 3; + cinfo.in_color_space = JCS_RGB; + + cinfo.write_JFIF_header = TRUE; + cinfo.JFIF_major_version = 1; + cinfo.JFIF_minor_version = 2; + + jpeg_set_defaults(&cinfo); + jpeg_set_quality(&cinfo, ctx->opts->jpeg_quality, 0); + + if (ctx->opts->jpeg_source_chroma) { + cinfo.comp_info[0].h_samp_factor = 1 << ctx->original_format.chroma_xs; + cinfo.comp_info[0].v_samp_factor = 1 << ctx->original_format.chroma_ys; + } + + jpeg_start_compress(&cinfo, TRUE); + + while (cinfo.next_scanline < cinfo.image_height) { + JSAMPROW row_pointer[1]; + row_pointer[0] = image->planes[0] + + (ptrdiff_t)cinfo.next_scanline * image->stride[0]; + jpeg_write_scanlines(&cinfo, row_pointer,1); + } + + jpeg_finish_compress(&cinfo); + + jpeg_destroy_compress(&cinfo); + + return !fclose(fp); +} + +#endif + +#if HAVE_AVIF_MUXER + +static void log_side_data(struct image_writer_ctx *ctx, AVPacketSideData *data, + size_t size) +{ + if (!mp_msg_test(ctx->log, MSGL_DEBUG)) + return; + char dbgbuff[129]; + if (size) + MP_DBG(ctx, "write_avif() packet side data:\n"); + for (int i = 0; i < size; i++) { + AVPacketSideData *sd = &data[i]; + for (int k = 0; k < MPMIN(sd->size, 64); k++) + snprintf(dbgbuff + k*2, 3, "%02x", (int)sd->data[k]); + MP_DBG(ctx, " [%d] = {[%s], '%s'}\n", + i, av_packet_side_data_name(sd->type), dbgbuff); + } +} + +static bool write_avif(struct image_writer_ctx *ctx, mp_image_t *image, + const char *filename) +{ + const AVCodec *codec = NULL; + const AVOutputFormat *ofmt = NULL; + AVCodecContext *avctx = NULL; + AVIOContext *avioctx = NULL; + AVFormatContext *fmtctx = NULL; + AVStream *stream = NULL; + AVFrame *pic = NULL; + AVPacket *pkt = NULL; + int ret; + bool success = false; + + codec = avcodec_find_encoder_by_name(ctx->opts->avif_encoder); + if (!codec) { + MP_ERR(ctx, "Could not find encoder '%s', for saving images\n", + ctx->opts->avif_encoder); + goto free_data; + } + + ofmt = av_guess_format("avif", NULL, NULL); + if (!ofmt) { + MP_ERR(ctx, "Could not guess output format 'avif'\n"); + goto free_data; + } + + avctx = avcodec_alloc_context3(codec); + if (!avctx) { + MP_ERR(ctx, "Failed to allocate AVContext.\n"); + goto free_data; + } + + avctx->width = image->w; + avctx->height = image->h; + avctx->time_base = (AVRational){1, 30}; + avctx->pkt_timebase = (AVRational){1, 30}; + avctx->codec_type = AVMEDIA_TYPE_VIDEO; + avctx->pix_fmt = imgfmt2pixfmt(image->imgfmt); + if (avctx->pix_fmt == AV_PIX_FMT_NONE) { + MP_ERR(ctx, "Image format %s not supported by lavc.\n", + mp_imgfmt_to_name(image->imgfmt)); + goto free_data; + } + + av_opt_set_int(avctx, "still-picture", 1, AV_OPT_SEARCH_CHILDREN); + + AVDictionary *avd = NULL; + mp_set_avdict(&avd, ctx->opts->avif_opts); + av_opt_set_dict2(avctx, &avd, AV_OPT_SEARCH_CHILDREN); + av_dict_free(&avd); + + pic = av_frame_alloc(); + if (!pic) { + MP_ERR(ctx, "Could not allocate AVFrame\n"); + goto free_data; + } + + prepare_avframe(pic, avctx, image, ctx->opts->tag_csp, ctx->log); + // Not setting this flag caused ffmpeg to output avif that was not passing + // standard checks but ffmpeg would still read and not complain... + avctx->flags |= AV_CODEC_FLAG_GLOBAL_HEADER; + + ret = avcodec_open2(avctx, codec, NULL); + if (ret < 0) { + MP_ERR(ctx, "Could not open libavcodec encoder for saving images\n"); + goto free_data; + } + + ret = avio_open(&avioctx, filename, AVIO_FLAG_WRITE); + if (ret < 0) { + MP_ERR(ctx, "Could not open file '%s' for saving images\n", filename); + goto free_data; + } + + fmtctx = avformat_alloc_context(); + if (!fmtctx) { + MP_ERR(ctx, "Could not allocate format context\n"); + goto free_data; + } + fmtctx->pb = avioctx; + fmtctx->oformat = ofmt; + + stream = avformat_new_stream(fmtctx, codec); + if (!stream) { + MP_ERR(ctx, "Could not allocate stream\n"); + goto free_data; + } + + ret = avcodec_parameters_from_context(stream->codecpar, avctx); + if (ret < 0) { + MP_ERR(ctx, "Could not copy parameters from context\n"); + goto free_data; + } + + ret = avformat_init_output(fmtctx, NULL); + if (ret < 0) { + MP_ERR(ctx, "Could not initialize output\n"); + goto free_data; + } + + ret = avformat_write_header(fmtctx, NULL); + if (ret < 0) { + MP_ERR(ctx, "Could not write format header\n"); + goto free_data; + } + + pkt = av_packet_alloc(); + if (!pkt) { + MP_ERR(ctx, "Could not allocate packet\n"); + goto free_data; + } + + ret = avcodec_send_frame(avctx, pic); + if (ret < 0) { + MP_ERR(ctx, "Error sending frame\n"); + goto free_data; + } + ret = avcodec_send_frame(avctx, NULL); // send EOF + if (ret < 0) + goto free_data; + + int pts = 0; + log_side_data(ctx, avctx->coded_side_data, avctx->nb_coded_side_data); + while (ret >= 0) { + ret = avcodec_receive_packet(avctx, pkt); + if (ret == AVERROR(EAGAIN) || ret == AVERROR_EOF) + break; + if (ret < 0) { + MP_ERR(ctx, "Error receiving packet\n"); + goto free_data; + } + pkt->dts = pkt->pts = ++pts; + pkt->stream_index = stream->index; + log_side_data(ctx, pkt->side_data, pkt->side_data_elems); + + ret = av_write_frame(fmtctx, pkt); + if (ret < 0) { + MP_ERR(ctx, "Error writing frame\n"); + goto free_data; + } + av_packet_unref(pkt); + } + + ret = av_write_trailer(fmtctx); + if (ret < 0) { + MP_ERR(ctx, "Could not write trailer\n"); + goto free_data; + } + MP_DBG(ctx, "write_avif(): avio_size() = %"PRIi64"\n", avio_size(avioctx)); + + success = true; + +free_data: + success = !avio_closep(&avioctx) && success; + avformat_free_context(fmtctx); + avcodec_free_context(&avctx); + av_packet_free(&pkt); + av_frame_free(&pic); + + return success; +} + +#endif + +static int get_encoder_format(const AVCodec *codec, int srcfmt, bool highdepth) +{ + const enum AVPixelFormat *pix_fmts = codec->pix_fmts; + int current = 0; + for (int n = 0; pix_fmts && pix_fmts[n] != AV_PIX_FMT_NONE; n++) { + int fmt = pixfmt2imgfmt(pix_fmts[n]); + if (!fmt) + continue; + if (!highdepth) { + // Ignore formats larger than 8 bit per pixel. (Or which are unknown.) + struct mp_regular_imgfmt rdesc; + if (!mp_get_regular_imgfmt(&rdesc, fmt)) { + int ofmt = mp_find_other_endian(fmt); + if (!mp_get_regular_imgfmt(&rdesc, ofmt)) + continue; + } + if (rdesc.component_size > 1) + continue; + } + current = current ? mp_imgfmt_select_best(current, fmt, srcfmt) : fmt; + } + return current; +} + +static int get_target_format(struct image_writer_ctx *ctx) +{ + const AVCodec *codec = avcodec_find_encoder(ctx->opts->format); + if (!codec) + goto unknown; + + int srcfmt = ctx->original_format.id; + + int target = get_encoder_format(codec, srcfmt, ctx->opts->high_bit_depth); + if (!target) { + mp_dbg(ctx->log, "Falling back to high-depth format.\n"); + target = get_encoder_format(codec, srcfmt, true); + } + + if (!target) + goto unknown; + + return target; + +unknown: + return IMGFMT_RGB0; +} + +const char *image_writer_file_ext(const struct image_writer_opts *opts) +{ + struct image_writer_opts defs = image_writer_opts_defaults; + + if (!opts) + opts = &defs; + + return m_opt_choice_str(mp_image_writer_formats, opts->format); +} + +bool image_writer_high_depth(const struct image_writer_opts *opts) +{ + return opts->format == AV_CODEC_ID_PNG +#if HAVE_JPEGXL + || opts->format == AV_CODEC_ID_JPEGXL +#endif +#if HAVE_AVIF_MUXER + || opts->format == AV_CODEC_ID_AV1 +#endif + ; +} + +bool image_writer_flexible_csp(const struct image_writer_opts *opts) +{ + if (!opts->tag_csp) + return false; + return false +#if HAVE_JPEGXL + || opts->format == AV_CODEC_ID_JPEGXL +#endif +#if HAVE_AVIF_MUXER + || opts->format == AV_CODEC_ID_AV1 +#endif +#if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(59, 58, 100) + // This version added support for cICP tag writing + || opts->format == AV_CODEC_ID_PNG +#endif + ; +} + +int image_writer_format_from_ext(const char *ext) +{ + for (int n = 0; mp_image_writer_formats[n].name; n++) { + if (ext && strcmp(mp_image_writer_formats[n].name, ext) == 0) + return mp_image_writer_formats[n].value; + } + return 0; +} + +static struct mp_image *convert_image(struct mp_image *image, int destfmt, + enum mp_csp_levels yuv_levels, + const struct image_writer_opts *opts, + struct mpv_global *global, + struct mp_log *log) +{ + int d_w, d_h; + mp_image_params_get_dsize(&image->params, &d_w, &d_h); + + struct mp_image_params p = { + .imgfmt = destfmt, + .w = d_w, + .h = d_h, + .p_w = 1, + .p_h = 1, + .color = image->params.color, + }; + mp_image_params_guess_csp(&p); + + if (!image_writer_flexible_csp(opts)) { + // If our format can't tag csps, set something sane + p.color.primaries = MP_CSP_PRIM_BT_709; + p.color.gamma = MP_CSP_TRC_AUTO; + p.color.light = MP_CSP_LIGHT_DISPLAY; + p.color.hdr = (struct pl_hdr_metadata){0}; + if (p.color.space != MP_CSP_RGB) { + p.color.levels = yuv_levels; + p.color.space = MP_CSP_BT_601; + p.chroma_location = MP_CHROMA_CENTER; + } + mp_image_params_guess_csp(&p); + } + + if (mp_image_params_equal(&p, &image->params)) + return mp_image_new_ref(image); + + mp_dbg(log, "will convert image to %s\n", mp_imgfmt_to_name(p.imgfmt)); + + struct mp_image *src = image; + if (mp_image_crop_valid(&src->params) && + (mp_rect_w(src->params.crop) != src->w || + mp_rect_h(src->params.crop) != src->h)) + { + src = mp_image_new_ref(src); + if (!src) { + mp_err(log, "mp_image_new_ref failed!\n"); + return NULL; + } + mp_image_crop_rc(src, src->params.crop); + } + + struct mp_image *dst = mp_image_alloc(p.imgfmt, p.w, p.h); + if (!dst) { + mp_err(log, "Out of memory.\n"); + return NULL; + } + mp_image_copy_attributes(dst, src); + + dst->params = p; + + struct mp_sws_context *sws = mp_sws_alloc(NULL); + sws->log = log; + if (global) + mp_sws_enable_cmdline_opts(sws, global); + bool ok = mp_sws_scale(sws, dst, src) >= 0; + talloc_free(sws); + + if (src != image) + talloc_free(src); + + if (!ok) { + mp_err(log, "Error when converting image.\n"); + talloc_free(dst); + return NULL; + } + + return dst; +} + +bool write_image(struct mp_image *image, const struct image_writer_opts *opts, + const char *filename, struct mpv_global *global, + struct mp_log *log) +{ + struct image_writer_opts defs = image_writer_opts_defaults; + if (!opts) + opts = &defs; + + mp_dbg(log, "input: %s\n", mp_image_params_to_str(&image->params)); + + struct image_writer_ctx ctx = { log, opts, image->fmt }; + bool (*write)(struct image_writer_ctx *, mp_image_t *, const char *) = write_lavc; + int destfmt = 0; + +#if HAVE_JPEG + if (opts->format == AV_CODEC_ID_MJPEG) { + write = write_jpeg; + destfmt = IMGFMT_RGB24; + } +#endif +#if HAVE_AVIF_MUXER + if (opts->format == AV_CODEC_ID_AV1) { + write = write_avif; + destfmt = mp_imgfmt_from_name(bstr0(opts->avif_pixfmt)); + } +#endif + if (opts->format == AV_CODEC_ID_WEBP && !opts->webp_lossless) { + // For lossy images, libwebp has its own RGB->YUV conversion. + // We don't want that, so force YUV/YUVA here. + int alpha = image->fmt.flags & MP_IMGFLAG_ALPHA; + destfmt = alpha ? pixfmt2imgfmt(AV_PIX_FMT_YUVA420P) : IMGFMT_420P; + } + + if (!destfmt) + destfmt = get_target_format(&ctx); + + enum mp_csp_levels levels; // Ignored if destfmt is a RGB format + if (opts->format == AV_CODEC_ID_WEBP) { + levels = MP_CSP_LEVELS_TV; + } else { + levels = MP_CSP_LEVELS_PC; + } + + struct mp_image *dst = convert_image(image, destfmt, levels, opts, global, log); + if (!dst) + return false; + + bool success = write(&ctx, dst, filename); + if (!success) + mp_err(log, "Error writing file '%s'!\n", filename); + + talloc_free(dst); + return success; +} + +void dump_png(struct mp_image *image, const char *filename, struct mp_log *log) +{ + struct image_writer_opts opts = image_writer_opts_defaults; + opts.format = AV_CODEC_ID_PNG; + write_image(image, &opts, filename, NULL, log); +} diff --git a/video/image_writer.h b/video/image_writer.h new file mode 100644 index 0000000..72d1602 --- /dev/null +++ b/video/image_writer.h @@ -0,0 +1,74 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "options/m_option.h" + +struct mp_image; +struct mp_log; + +struct image_writer_opts { + int format; + bool high_bit_depth; + int png_compression; + int png_filter; + int jpeg_quality; + bool jpeg_source_chroma; + bool webp_lossless; + int webp_quality; + int webp_compression; + double jxl_distance; + int jxl_effort; + char *avif_encoder; + char *avif_pixfmt; + char **avif_opts; + bool tag_csp; +}; + +extern const struct image_writer_opts image_writer_opts_defaults; + +extern const struct m_option image_writer_opts[]; + +// Return the file extension that will be used, e.g. "png". +const char *image_writer_file_ext(const struct image_writer_opts *opts); + +// Return whether the selected format likely supports >8 bit per component. +bool image_writer_high_depth(const struct image_writer_opts *opts); + +// Return whether the selected format likely supports non-sRGB colorspaces +bool image_writer_flexible_csp(const struct image_writer_opts *opts); + +// Map file extension to format ID - return 0 (which is invalid) if unknown. +int image_writer_format_from_ext(const char *ext); + +/* + * Save the given image under the given filename. The parameters csp and opts + * are optional. All pixel formats supported by swscale are supported. + * + * File format and compression settings are controlled via the opts parameter. + * + * If global!=NULL, use command line scaler options etc. + * + * NOTE: The fields w/h/width/height of the passed mp_image must be all set + * accordingly. Setting w and width or h and height to different values + * can be used to store snapshots of anamorphic video. + */ +bool write_image(struct mp_image *image, const struct image_writer_opts *opts, + const char *filename, struct mpv_global *global, + struct mp_log *log); + +// Debugging helper. +void dump_png(struct mp_image *image, const char *filename, struct mp_log *log); diff --git a/video/img_format.c b/video/img_format.c new file mode 100644 index 0000000..6b7857f --- /dev/null +++ b/video/img_format.c @@ -0,0 +1,824 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <string.h> + +#include <libavcodec/avcodec.h> +#include <libavutil/imgutils.h> +#include <libavutil/pixfmt.h> +#include <libavutil/pixdesc.h> + +#include "video/img_format.h" +#include "video/mp_image.h" +#include "video/fmt-conversion.h" + +struct mp_imgfmt_entry { + const char *name; + // Valid if flags!=0. + // This can be incomplete, and missing fields are filled in: + // - sets num_planes and bpp[], derived from comps[] (rounds to bytes) + // - sets MP_IMGFLAG_GRAY, derived from comps[] + // - sets MP_IMGFLAG_ALPHA, derived from comps[] + // - sets align_x/y if 0, derived from chroma shift + // - sets xs[]/ys[] always, derived from num_planes/chroma_shift + // - sets MP_IMGFLAG_HAS_COMPS|MP_IMGFLAG_NE if num_planes>0 + // - sets MP_IMGFLAG_TYPE_UINT if no other type set + // - sets id to mp_imgfmt_list[] implied format + struct mp_imgfmt_desc desc; +}; + +#define FRINGE_GBRP(def, dname, b) \ + [def - IMGFMT_CUST_BASE] = { \ + .name = dname, \ + .desc = { .flags = MP_IMGFLAG_COLOR_RGB, \ + .comps = { {2, 0, 8, (b) - 8}, {0, 0, 8, (b) - 8}, \ + {1, 0, 8, (b) - 8}, }, }} + +#define FLOAT_YUV(def, dname, xs, ys, a) \ + [def - IMGFMT_CUST_BASE] = { \ + .name = dname, \ + .desc = { .flags = MP_IMGFLAG_COLOR_YUV | MP_IMGFLAG_TYPE_FLOAT, \ + .chroma_xs = xs, .chroma_ys = ys, \ + .comps = { {0, 0, 32}, {1, 0, 32}, {2, 0, 32}, \ + {3 * (a), 0, 32 * (a)} }, }} + +static const struct mp_imgfmt_entry mp_imgfmt_list[] = { + // not in ffmpeg + [IMGFMT_VDPAU_OUTPUT - IMGFMT_CUST_BASE] = { + .name = "vdpau_output", + .desc = { + .flags = MP_IMGFLAG_NE | MP_IMGFLAG_RGB | MP_IMGFLAG_HWACCEL, + }, + }, + [IMGFMT_RGB30 - IMGFMT_CUST_BASE] = { + .name = "rgb30", + .desc = { + .flags = MP_IMGFLAG_RGB, + .comps = { {0, 20, 10}, {0, 10, 10}, {0, 0, 10} }, + }, + }, + [IMGFMT_YAP8 - IMGFMT_CUST_BASE] = { + .name = "yap8", + .desc = { + .flags = MP_IMGFLAG_COLOR_YUV, + .comps = { {0, 0, 8}, {0}, {0}, {1, 0, 8} }, + }, + }, + [IMGFMT_YAP16 - IMGFMT_CUST_BASE] = { + .name = "yap16", + .desc = { + .flags = MP_IMGFLAG_COLOR_YUV, + .comps = { {0, 0, 16}, {0}, {0}, {1, 0, 16} }, + }, + }, + [IMGFMT_Y1 - IMGFMT_CUST_BASE] = { + .name = "y1", + .desc = { + .flags = MP_IMGFLAG_COLOR_RGB, + .comps = { {0, 0, 8, -7} }, + }, + }, + [IMGFMT_YAPF - IMGFMT_CUST_BASE] = { + .name = "grayaf32", // try to mimic ffmpeg naming convention + .desc = { + .flags = MP_IMGFLAG_COLOR_YUV | MP_IMGFLAG_TYPE_FLOAT, + .comps = { {0, 0, 32}, {0}, {0}, {1, 0, 32} }, + }, + }, + FLOAT_YUV(IMGFMT_444PF, "yuv444pf", 0, 0, 0), + FLOAT_YUV(IMGFMT_444APF, "yuva444pf", 0, 0, 1), + FLOAT_YUV(IMGFMT_420PF, "yuv420pf", 1, 1, 0), + FLOAT_YUV(IMGFMT_420APF, "yuva420pf", 1, 1, 1), + FLOAT_YUV(IMGFMT_422PF, "yuv422pf", 1, 0, 0), + FLOAT_YUV(IMGFMT_422APF, "yuva422pf", 1, 0, 1), + FLOAT_YUV(IMGFMT_440PF, "yuv440pf", 0, 1, 0), + FLOAT_YUV(IMGFMT_440APF, "yuva440pf", 0, 1, 1), + FLOAT_YUV(IMGFMT_410PF, "yuv410pf", 2, 2, 0), + FLOAT_YUV(IMGFMT_410APF, "yuva410pf", 2, 2, 1), + FLOAT_YUV(IMGFMT_411PF, "yuv411pf", 2, 0, 0), + FLOAT_YUV(IMGFMT_411APF, "yuva411pf", 2, 0, 1), + FRINGE_GBRP(IMGFMT_GBRP1, "gbrp1", 1), + FRINGE_GBRP(IMGFMT_GBRP2, "gbrp2", 2), + FRINGE_GBRP(IMGFMT_GBRP3, "gbrp3", 3), + FRINGE_GBRP(IMGFMT_GBRP4, "gbrp4", 4), + FRINGE_GBRP(IMGFMT_GBRP5, "gbrp5", 5), + FRINGE_GBRP(IMGFMT_GBRP6, "gbrp6", 6), + // in FFmpeg, but FFmpeg names have an annoying "_vld" suffix + [IMGFMT_VIDEOTOOLBOX - IMGFMT_CUST_BASE] = { + .name = "videotoolbox", + }, + [IMGFMT_VAAPI - IMGFMT_CUST_BASE] = { + .name = "vaapi", + }, +}; + +static const struct mp_imgfmt_entry *get_mp_desc(int imgfmt) +{ + if (imgfmt < IMGFMT_CUST_BASE) + return NULL; + int index = imgfmt - IMGFMT_CUST_BASE; + if (index >= MP_ARRAY_SIZE(mp_imgfmt_list)) + return NULL; + const struct mp_imgfmt_entry *e = &mp_imgfmt_list[index]; + return e->name ? e : NULL; +} + +char **mp_imgfmt_name_list(void) +{ + int count = IMGFMT_END - IMGFMT_START; + char **list = talloc_zero_array(NULL, char *, count + 1); + int num = 0; + for (int n = IMGFMT_START; n < IMGFMT_END; n++) { + const char *name = mp_imgfmt_to_name(n); + if (strcmp(name, "unknown") != 0) + list[num++] = talloc_strdup(list, name); + } + return list; +} + +int mp_imgfmt_from_name(bstr name) +{ + if (bstr_equals0(name, "none")) + return 0; + for (int n = 0; n < MP_ARRAY_SIZE(mp_imgfmt_list); n++) { + const struct mp_imgfmt_entry *p = &mp_imgfmt_list[n]; + if (p->name && bstr_equals0(name, p->name)) + return IMGFMT_CUST_BASE + n; + } + return pixfmt2imgfmt(av_get_pix_fmt(mp_tprintf(80, "%.*s", BSTR_P(name)))); +} + +char *mp_imgfmt_to_name_buf(char *buf, size_t buf_size, int fmt) +{ + const struct mp_imgfmt_entry *p = get_mp_desc(fmt); + const char *name = p ? p->name : NULL; + if (!name) { + const AVPixFmtDescriptor *pixdesc = av_pix_fmt_desc_get(imgfmt2pixfmt(fmt)); + if (pixdesc) + name = pixdesc->name; + } + if (!name) + name = "unknown"; + snprintf(buf, buf_size, "%s", name); + int len = strlen(buf); + if (len > 2 && buf[len - 2] == MP_SELECT_LE_BE('l', 'b') && buf[len - 1] == 'e') + buf[len - 2] = '\0'; + return buf; +} + +static void fill_pixdesc_layout(struct mp_imgfmt_desc *desc, + enum AVPixelFormat fmt, + const AVPixFmtDescriptor *pd) +{ + if (pd->flags & AV_PIX_FMT_FLAG_PAL || + pd->flags & AV_PIX_FMT_FLAG_HWACCEL) + goto fail; + + bool has_alpha = pd->flags & AV_PIX_FMT_FLAG_ALPHA; + if (pd->nb_components != 1 + has_alpha && + pd->nb_components != 3 + has_alpha) + goto fail; + + // Very convenient: we assume we're always on little endian, and FFmpeg + // explicitly marks big endian formats => don't need to guess whether a + // format is little endian, or not affected by byte order. + bool is_be = pd->flags & AV_PIX_FMT_FLAG_BE; + bool is_ne = MP_SELECT_LE_BE(false, true) == is_be; + + // Packed sub-sampled YUV is very... special. + bool is_packed_ss_yuv = pd->log2_chroma_w && !pd->log2_chroma_h && + pd->comp[1].plane == 0 && pd->comp[2].plane == 0 && + pd->nb_components == 3; + + if (is_packed_ss_yuv) + desc->bpp[0] = pd->comp[1].step * 8; + + // Determine if there are any byte overlaps => relevant for determining + // access unit for endian, since pixdesc does not expose this, and assumes + // a weird model where you do separate memory fetches for each component. + bool any_shared_bytes = !!(pd->flags & AV_PIX_FMT_FLAG_BITSTREAM); + for (int c = 0; c < pd->nb_components; c++) { + for (int i = 0; i < c; i++) { + const AVComponentDescriptor *d1 = &pd->comp[c]; + const AVComponentDescriptor *d2 = &pd->comp[i]; + if (d1->plane == d2->plane) { + if (d1->offset + (d1->depth + 7) / 8u > d2->offset && + d2->offset + (d2->depth + 7) / 8u > d1->offset) + any_shared_bytes = true; + } + } + } + + int el_bits = (pd->flags & AV_PIX_FMT_FLAG_BITSTREAM) ? 1 : 8; + for (int c = 0; c < pd->nb_components; c++) { + const AVComponentDescriptor *d = &pd->comp[c]; + if (d->plane >= MP_MAX_PLANES) + goto fail; + + desc->num_planes = MPMAX(desc->num_planes, d->plane + 1); + + int plane_bits = desc->bpp[d->plane]; + int c_bits = d->step * el_bits; + + // The first component wins, because either all components result in + // the same value, or luma wins (luma always comes before chroma). + if (plane_bits) { + if (c_bits > plane_bits) + goto fail; // inconsistent + } else { + desc->bpp[d->plane] = plane_bits = c_bits; + } + + int shift = d->shift; + // What the fuck: for some inexplicable reason, MONOB uses shift=7 + // in pixdesc, which is basically out of bounds. Pixdesc bug? + // Make it behave like MONOW. (No, the bit-order is not different.) + if (fmt == AV_PIX_FMT_MONOBLACK) + shift = 0; + + int offset = d->offset * el_bits; + // The pixdesc logic for reading and endian swapping is as follows + // (reverse engineered from av_read_image_line2()): + // - determine a word size that will include the component fully; + // this includes the "active" bits and the amount "shifted" away + // (for example shift=7/depth=18 => 32 bit word reading [31:0]) + // - the same format can use different word sizes (e.g. bgr565: the R + // component at offset 0 is read as 8 bit; BG is read as 16 bits) + // - if BE flag is set, swap the word before proceeding + // - extract via shift and mask derived by depth + int word = mp_round_next_power_of_2(MPMAX(d->depth + shift, 8)); + // The purpose of this is unknown. It's an absurdity fished out of + // av_read_image_line2()'s implementation. It seems technically + // unnecessary, and provides no information. On the other hand, it + // compensates for seemingly bogus packed integer pixdescs; this + // is "why" some formats use d->offset = -1. + if (is_be && el_bits == 8 && word == 8) + offset += 8; + // Pixdesc's model sometimes requires accesses with varying word-sizes, + // as seen in bgr565 and other formats. Also, it makes you read some + // formats with multiple endian-dependent accesses, where accessing a + // larger unit would make more sense. (Consider X2RGB10BE, for which + // pixdesc wants you to perform 3 * 2 byte accesses, and swap each of + // the read 16 bit words. What you really want is to swap the entire 4 + // byte thing, and then extract the components with bit shifts). + // This is complete bullshit, so we transform it into word swaps before + // further processing. Care needs to be taken to not change formats like + // P010 or YA16 (prefer component accesses for them; P010 isn't even + // representable, because endian_shift is for all planes). + // As a heuristic, assume that if any components share a byte, the whole + // pixel is read as a single memory access and endian swapped at once. + int access_size = 8; + if (plane_bits > 8) { + if (any_shared_bytes) { + access_size = plane_bits; + if (is_be && word != access_size) { + // Before: offset = 8*byte_offset (with word bits of data) + // After: offset = bit_offset into swapped endian_size word + offset = access_size - word - offset; + } + } else { + access_size = word; + } + } + int endian_size = (access_size && !is_ne) ? access_size : 8; + int endian_shift = mp_log2(endian_size) - 3; + if (!MP_IS_POWER_OF_2(endian_size) || endian_shift < 0 || endian_shift > 3) + goto fail; + if (desc->endian_shift && desc->endian_shift != endian_shift) + goto fail; + desc->endian_shift = endian_shift; + + // We always use bit offsets; this doesn't lose any information, + // and pixdesc is merely more redundant. + offset += shift; + if (offset < 0 || offset >= (1 << 6)) + goto fail; + if (offset + d->depth > plane_bits) + goto fail; + if (d->depth < 0 || d->depth >= (1 << 6)) + goto fail; + desc->comps[c] = (struct mp_imgfmt_comp_desc){ + .plane = d->plane, + .offset = offset, + .size = d->depth, + }; + } + + for (int p = 0; p < desc->num_planes; p++) { + if (!desc->bpp[p]) + goto fail; // plane doesn't exist + } + + // What the fuck: this is probably a pixdesc bug, so fix it. + if (fmt == AV_PIX_FMT_RGB8) { + desc->comps[2] = (struct mp_imgfmt_comp_desc){0, 0, 2}; + desc->comps[1] = (struct mp_imgfmt_comp_desc){0, 2, 3}; + desc->comps[0] = (struct mp_imgfmt_comp_desc){0, 5, 3}; + } + + // Overlap test. If any shared bits are happening, this is not a format we + // can represent (or it's something like Bayer: components in the same bits, + // but different alternating lines). + bool any_shared_bits = false; + for (int c = 0; c < pd->nb_components; c++) { + for (int i = 0; i < c; i++) { + struct mp_imgfmt_comp_desc *c1 = &desc->comps[c]; + struct mp_imgfmt_comp_desc *c2 = &desc->comps[i]; + if (c1->plane == c2->plane) { + if (c1->offset + c1->size > c2->offset && + c2->offset + c2->size > c1->offset) + any_shared_bits = true; + } + } + } + + if (any_shared_bits) { + for (int c = 0; c < pd->nb_components; c++) + desc->comps[c] = (struct mp_imgfmt_comp_desc){0}; + } + + // Many important formats have padding within an access word. For example + // yuv420p10 has the upper 6 bit cleared to 0; P010 has the lower 6 bits + // cleared to 0. Pixdesc cannot represent that these bits are 0. There are + // other formats where padding is not guaranteed to be 0, but they are + // described in the same way. + // Apply a heuristic that is supposed to identify formats which use + // guaranteed 0 padding. This could fail, but nobody said this pixdesc crap + // is robust. + for (int c = 0; c < pd->nb_components; c++) { + struct mp_imgfmt_comp_desc *cd = &desc->comps[c]; + // Note: rgb444 would defeat our heuristic if we checked only per comp. + // also, exclude "bitstream" formats due to monow/monob + int fsize = MP_ALIGN_UP(cd->size, 8); + if (!any_shared_bytes && el_bits == 8 && fsize != cd->size && + fsize - cd->size <= (1 << 3)) + { + if (!(cd->offset % 8u)) { + cd->pad = -(fsize - cd->size); + cd->size = fsize; + } else if (!((cd->offset + cd->size) % 8u)) { + cd->pad = fsize - cd->size; + cd->size = fsize; + cd->offset = MP_ALIGN_DOWN(cd->offset, 8); + } + } + } + + // The alpha component always has ID 4 (index 3) in our representation, so + // move the alpha component to there. + if (has_alpha && pd->nb_components < 4) { + desc->comps[3] = desc->comps[pd->nb_components - 1]; + desc->comps[pd->nb_components - 1] = (struct mp_imgfmt_comp_desc){0}; + } + + if (is_packed_ss_yuv) { + desc->flags |= MP_IMGFLAG_PACKED_SS_YUV; + desc->bpp[0] /= 1 << pd->log2_chroma_w; + } else if (!any_shared_bits) { + desc->flags |= MP_IMGFLAG_HAS_COMPS; + } + + return; + +fail: + for (int n = 0; n < 4; n++) + desc->comps[n] = (struct mp_imgfmt_comp_desc){0}; + // Average bit size fallback. + desc->num_planes = av_pix_fmt_count_planes(fmt); + for (int p = 0; p < desc->num_planes; p++) { + int ls = av_image_get_linesize(fmt, 256, p); + desc->bpp[p] = ls > 0 ? ls * 8 / 256 : 0; + } +} + +static bool mp_imgfmt_get_desc_from_pixdesc(int mpfmt, struct mp_imgfmt_desc *out) +{ + enum AVPixelFormat fmt = imgfmt2pixfmt(mpfmt); + const AVPixFmtDescriptor *pd = av_pix_fmt_desc_get(fmt); + if (!pd || pd->nb_components > 4) + return false; + + struct mp_imgfmt_desc desc = { + .id = mpfmt, + .chroma_xs = pd->log2_chroma_w, + .chroma_ys = pd->log2_chroma_h, + }; + + if (pd->flags & AV_PIX_FMT_FLAG_ALPHA) + desc.flags |= MP_IMGFLAG_ALPHA; + + if (pd->flags & AV_PIX_FMT_FLAG_HWACCEL) + desc.flags |= MP_IMGFLAG_TYPE_HW; + + // Pixdesc does not provide a flag for XYZ, so this is the best we can do. + if (strncmp(pd->name, "xyz", 3) == 0) { + desc.flags |= MP_IMGFLAG_COLOR_XYZ; + } else if (pd->flags & AV_PIX_FMT_FLAG_RGB) { + desc.flags |= MP_IMGFLAG_COLOR_RGB; + } else if (fmt == AV_PIX_FMT_MONOBLACK || fmt == AV_PIX_FMT_MONOWHITE) { + desc.flags |= MP_IMGFLAG_COLOR_RGB; + } else if (fmt == AV_PIX_FMT_PAL8) { + desc.flags |= MP_IMGFLAG_COLOR_RGB | MP_IMGFLAG_TYPE_PAL8; + } + + if (pd->flags & AV_PIX_FMT_FLAG_FLOAT) + desc.flags |= MP_IMGFLAG_TYPE_FLOAT; + + // Educated guess. + if (!(desc.flags & MP_IMGFLAG_COLOR_MASK) && + !(desc.flags & MP_IMGFLAG_TYPE_HW)) + desc.flags |= MP_IMGFLAG_COLOR_YUV; + + desc.align_x = 1 << desc.chroma_xs; + desc.align_y = 1 << desc.chroma_ys; + + fill_pixdesc_layout(&desc, fmt, pd); + + if (desc.flags & (MP_IMGFLAG_HAS_COMPS | MP_IMGFLAG_PACKED_SS_YUV)) { + if (!(desc.flags & MP_IMGFLAG_TYPE_MASK)) + desc.flags |= MP_IMGFLAG_TYPE_UINT; + } + + if (desc.bpp[0] % 8u && (pd->flags & AV_PIX_FMT_FLAG_BITSTREAM)) + desc.align_x = 8 / desc.bpp[0]; // expect power of 2 + + // Very heuristical. + bool is_ne = !desc.endian_shift; + bool need_endian = (desc.comps[0].size % 8u && desc.bpp[0] > 8) || + desc.comps[0].size > 8; + + if (need_endian) { + bool is_le = MP_SELECT_LE_BE(is_ne, !is_ne); + desc.flags |= is_le ? MP_IMGFLAG_LE : MP_IMGFLAG_BE; + } else { + desc.flags |= MP_IMGFLAG_LE | MP_IMGFLAG_BE; + } + + *out = desc; + return true; +} + +bool mp_imgfmt_get_packed_yuv_locations(int imgfmt, uint8_t *luma_offsets) +{ + struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(imgfmt); + if (!(desc.flags & MP_IMGFLAG_PACKED_SS_YUV)) + return false; + + assert(desc.num_planes == 1); + + // Guess at which positions the additional luma samples are. We iterate + // starting with the first byte, and then put a luma sample at places + // not covered by other luma/chroma. + // Pixdesc does not and can not provide this information. This heuristic + // may fail in certain cases. What a load of bullshit, right? + int lsize = desc.comps[0].size; + int cur_offset = 0; + for (int lsample = 1; lsample < (1 << desc.chroma_xs); lsample++) { + while (1) { + if (cur_offset + lsize > desc.bpp[0] * desc.align_x) + return false; + bool free = true; + for (int c = 0; c < 3; c++) { + struct mp_imgfmt_comp_desc *cd = &desc.comps[c]; + if (!cd->size) + continue; + if (cd->offset + cd->size > cur_offset && + cur_offset + lsize > cd->offset) + { + free = false; + break; + } + } + if (free) + break; + cur_offset += lsize; + } + luma_offsets[lsample] = cur_offset; + cur_offset += lsize; + } + + luma_offsets[0] = desc.comps[0].offset; + return true; +} + +static bool get_native_desc(int mpfmt, struct mp_imgfmt_desc *desc) +{ + const struct mp_imgfmt_entry *p = get_mp_desc(mpfmt); + if (!p || !p->desc.flags) + return false; + + *desc = p->desc; + + // Fill in some fields mp_imgfmt_entry.desc is not required to set. + + desc->id = mpfmt; + + for (int n = 0; n < MP_NUM_COMPONENTS; n++) { + struct mp_imgfmt_comp_desc *cd = &desc->comps[n]; + if (cd->size) + desc->num_planes = MPMAX(desc->num_planes, cd->plane + 1); + desc->bpp[cd->plane] = + MPMAX(desc->bpp[cd->plane], MP_ALIGN_UP(cd->offset + cd->size, 8)); + } + + if (!desc->align_x && !desc->align_y) { + desc->align_x = 1 << desc->chroma_xs; + desc->align_y = 1 << desc->chroma_ys; + } + + if (desc->num_planes) + desc->flags |= MP_IMGFLAG_HAS_COMPS | MP_IMGFLAG_NE; + + if (!(desc->flags & MP_IMGFLAG_TYPE_MASK)) + desc->flags |= MP_IMGFLAG_TYPE_UINT; + + return true; +} + +int mp_imgfmt_desc_get_num_comps(struct mp_imgfmt_desc *desc) +{ + int flags = desc->flags; + if (!(flags & MP_IMGFLAG_COLOR_MASK)) + return 0; + return 3 + (flags & MP_IMGFLAG_GRAY ? -2 : 0) + !!(flags & MP_IMGFLAG_ALPHA); +} + +struct mp_imgfmt_desc mp_imgfmt_get_desc(int mpfmt) +{ + struct mp_imgfmt_desc desc; + + if (!get_native_desc(mpfmt, &desc) && + !mp_imgfmt_get_desc_from_pixdesc(mpfmt, &desc)) + return (struct mp_imgfmt_desc){0}; + + for (int p = 0; p < desc.num_planes; p++) { + desc.xs[p] = (p == 1 || p == 2) ? desc.chroma_xs : 0; + desc.ys[p] = (p == 1 || p == 2) ? desc.chroma_ys : 0; + } + + bool is_ba = desc.num_planes > 0; + for (int p = 0; p < desc.num_planes; p++) + is_ba = !(desc.bpp[p] % 8u); + + if (is_ba) + desc.flags |= MP_IMGFLAG_BYTE_ALIGNED; + + if (desc.flags & MP_IMGFLAG_HAS_COMPS) { + if (desc.comps[3].size) + desc.flags |= MP_IMGFLAG_ALPHA; + + // Assuming all colors are (CCC+[A]) or (C+[A]), the latter being gray. + if (!desc.comps[1].size) + desc.flags |= MP_IMGFLAG_GRAY; + + bool bb = true; + for (int n = 0; n < MP_NUM_COMPONENTS; n++) { + if (desc.comps[n].offset % 8u || desc.comps[n].size % 8u) + bb = false; + } + if (bb) + desc.flags |= MP_IMGFLAG_BYTES; + } + + if ((desc.flags & (MP_IMGFLAG_YUV | MP_IMGFLAG_RGB)) + && (desc.flags & MP_IMGFLAG_HAS_COMPS) + && (desc.flags & MP_IMGFLAG_BYTES) + && ((desc.flags & MP_IMGFLAG_TYPE_MASK) == MP_IMGFLAG_TYPE_UINT)) + { + int cnt = mp_imgfmt_desc_get_num_comps(&desc); + bool same_depth = true; + for (int p = 0; p < desc.num_planes; p++) + same_depth &= desc.bpp[p] == desc.bpp[0]; + if (same_depth && cnt == desc.num_planes) { + if (desc.flags & MP_IMGFLAG_YUV) { + desc.flags |= MP_IMGFLAG_YUV_P; + } else { + desc.flags |= MP_IMGFLAG_RGB_P; + } + } + if (cnt == 3 && desc.num_planes == 2 && + desc.bpp[1] == desc.bpp[0] * 2 && + (desc.flags & MP_IMGFLAG_YUV)) + { + + desc.flags |= MP_IMGFLAG_YUV_NV; + } + } + + return desc; +} + +static bool validate_regular_imgfmt(const struct mp_regular_imgfmt *fmt) +{ + bool present[MP_NUM_COMPONENTS] = {0}; + int n_comp = 0; + + for (int n = 0; n < fmt->num_planes; n++) { + const struct mp_regular_imgfmt_plane *plane = &fmt->planes[n]; + n_comp += plane->num_components; + if (n_comp > MP_NUM_COMPONENTS) + return false; + if (!plane->num_components) + return false; // no empty planes in between allowed + + bool pad_only = true; + int chroma_luma = 0; // luma: 1, chroma: 2, both: 3 + for (int i = 0; i < plane->num_components; i++) { + int comp = plane->components[i]; + if (comp > MP_NUM_COMPONENTS) + return false; + if (comp == 0) + continue; + pad_only = false; + if (present[comp - 1]) + return false; // no duplicates + present[comp - 1] = true; + chroma_luma |= (comp == 2 || comp == 3) ? 2 : 1; + } + if (pad_only) + return false; // no planes with only padding allowed + if ((fmt->chroma_xs > 0 || fmt->chroma_ys > 0) && chroma_luma == 3) + return false; // separate chroma/luma planes required + } + + if (!(present[0] || present[3]) || // at least component 1 or alpha needed + (present[1] && !present[0]) || // component 2 requires component 1 + (present[2] && !present[1])) // component 3 requires component 2 + return false; + + return true; +} + +static enum mp_csp get_forced_csp_from_flags(int flags) +{ + if (flags & MP_IMGFLAG_COLOR_XYZ) + return MP_CSP_XYZ; + + if (flags & MP_IMGFLAG_COLOR_RGB) + return MP_CSP_RGB; + + return MP_CSP_AUTO; +} + +enum mp_csp mp_imgfmt_get_forced_csp(int imgfmt) +{ + return get_forced_csp_from_flags(mp_imgfmt_get_desc(imgfmt).flags); +} + +static enum mp_component_type get_component_type_from_flags(int flags) +{ + if (flags & MP_IMGFLAG_TYPE_UINT) + return MP_COMPONENT_TYPE_UINT; + + if (flags & MP_IMGFLAG_TYPE_FLOAT) + return MP_COMPONENT_TYPE_FLOAT; + + return MP_COMPONENT_TYPE_UNKNOWN; +} + +enum mp_component_type mp_imgfmt_get_component_type(int imgfmt) +{ + return get_component_type_from_flags(mp_imgfmt_get_desc(imgfmt).flags); +} + +int mp_find_other_endian(int imgfmt) +{ + return pixfmt2imgfmt(av_pix_fmt_swap_endianness(imgfmt2pixfmt(imgfmt))); +} + +bool mp_get_regular_imgfmt(struct mp_regular_imgfmt *dst, int imgfmt) +{ + struct mp_regular_imgfmt res = {0}; + + struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(imgfmt); + if (!desc.num_planes) + return false; + res.num_planes = desc.num_planes; + + if (desc.endian_shift || !(desc.flags & MP_IMGFLAG_HAS_COMPS)) + return false; + + res.component_type = get_component_type_from_flags(desc.flags); + if (!res.component_type) + return false; + + struct mp_imgfmt_comp_desc *comp0 = &desc.comps[0]; + if (comp0->size < 1 || comp0->size > 64 || (comp0->size % 8u)) + return false; + + res.component_size = comp0->size / 8u; + res.component_pad = comp0->pad; + + for (int n = 0; n < res.num_planes; n++) { + if (desc.bpp[n] % comp0->size) + return false; + res.planes[n].num_components = desc.bpp[n] / comp0->size; + } + + for (int n = 0; n < MP_NUM_COMPONENTS; n++) { + struct mp_imgfmt_comp_desc *comp = &desc.comps[n]; + if (!comp->size) + continue; + + struct mp_regular_imgfmt_plane *plane = &res.planes[comp->plane]; + + res.num_planes = MPMAX(res.num_planes, comp->plane + 1); + + // We support uniform depth only. + if (comp->size != comp0->size || comp->pad != comp0->pad) + return false; + + // Size-aligned only. + int pos = comp->offset / comp->size; + if (comp->offset != pos * comp->size || pos >= MP_NUM_COMPONENTS) + return false; + + if (plane->components[pos]) + return false; + plane->components[pos] = n + 1; + } + + res.chroma_xs = desc.chroma_xs; + res.chroma_ys = desc.chroma_ys; + + res.forced_csp = get_forced_csp_from_flags(desc.flags); + + if (!validate_regular_imgfmt(&res)) + return false; + + *dst = res; + return true; +} + +static bool regular_imgfmt_equals(struct mp_regular_imgfmt *a, + struct mp_regular_imgfmt *b) +{ + if (a->component_type != b->component_type || + a->component_size != b->component_size || + a->num_planes != b->num_planes || + a->component_pad != b->component_pad || + a->forced_csp != b->forced_csp || + a->chroma_xs != b->chroma_xs || + a->chroma_ys != b->chroma_ys) + return false; + + for (int n = 0; n < a->num_planes; n++) { + int num_comps = a->planes[n].num_components; + if (num_comps != b->planes[n].num_components) + return false; + for (int i = 0; i < num_comps; i++) { + if (a->planes[n].components[i] != b->planes[n].components[i]) + return false; + } + } + + return true; +} + +// Find a format that matches this one exactly. +int mp_find_regular_imgfmt(struct mp_regular_imgfmt *src) +{ + for (int n = IMGFMT_START + 1; n < IMGFMT_END; n++) { + struct mp_regular_imgfmt f; + if (mp_get_regular_imgfmt(&f, n) && regular_imgfmt_equals(src, &f)) + return n; + } + return 0; +} + +// Compare the dst image formats, and return the one which can carry more data +// (e.g. higher depth, more color components, lower chroma subsampling, etc.), +// with respect to what is required to keep most of the src format. +// Returns the imgfmt, or 0 on error. +int mp_imgfmt_select_best(int dst1, int dst2, int src) +{ + enum AVPixelFormat dst1pxf = imgfmt2pixfmt(dst1); + enum AVPixelFormat dst2pxf = imgfmt2pixfmt(dst2); + enum AVPixelFormat srcpxf = imgfmt2pixfmt(src); + enum AVPixelFormat dstlist[] = {dst1pxf, dst2pxf, AV_PIX_FMT_NONE}; + return pixfmt2imgfmt(avcodec_find_best_pix_fmt_of_list(dstlist, srcpxf, 1, 0)); +} + +// Same as mp_imgfmt_select_best(), but with a list of dst formats. +int mp_imgfmt_select_best_list(int *dst, int num_dst, int src) +{ + int best = 0; + for (int n = 0; n < num_dst; n++) + best = best ? mp_imgfmt_select_best(best, dst[n], src) : dst[n]; + return best; +} diff --git a/video/img_format.h b/video/img_format.h new file mode 100644 index 0000000..0753829 --- /dev/null +++ b/video/img_format.h @@ -0,0 +1,342 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MPLAYER_IMG_FORMAT_H +#define MPLAYER_IMG_FORMAT_H + +#include <inttypes.h> + +#include "config.h" +#include "osdep/endian.h" +#include "misc/bstr.h" +#include "video/csputils.h" + +#define MP_MAX_PLANES 4 +#define MP_NUM_COMPONENTS 4 + +// mp_imgfmt_desc.comps[] is set to useful values. Some types of formats will +// use comps[], but not set this flag, because it doesn't cover all requirements +// (for example MP_IMGFLAG_PACKED_SS_YUV). +#define MP_IMGFLAG_HAS_COMPS (1 << 0) + +// all components start on byte boundaries +#define MP_IMGFLAG_BYTES (1 << 1) + +// all pixels start in byte boundaries +#define MP_IMGFLAG_BYTE_ALIGNED (1 << 2) + +// set if in little endian, or endian independent +#define MP_IMGFLAG_LE (1 << 3) + +// set if in big endian, or endian independent +#define MP_IMGFLAG_BE (1 << 4) + +// set if in native (host) endian, or endian independent +#define MP_IMGFLAG_NE MP_SELECT_LE_BE(MP_IMGFLAG_LE, MP_IMGFLAG_BE) + +// set if an alpha component is included +#define MP_IMGFLAG_ALPHA (1 << 5) + +// color class flags - can use via bit tests, or use the mask and compare +#define MP_IMGFLAG_COLOR_MASK (15 << 6) +#define MP_IMGFLAG_COLOR_YUV (1 << 6) +#define MP_IMGFLAG_COLOR_RGB (2 << 6) +#define MP_IMGFLAG_COLOR_XYZ (4 << 6) + +// component type flags (same access conventions as MP_IMGFLAG_COLOR_*) +#define MP_IMGFLAG_TYPE_MASK (15 << 10) +#define MP_IMGFLAG_TYPE_UINT (1 << 10) +#define MP_IMGFLAG_TYPE_FLOAT (2 << 10) +#define MP_IMGFLAG_TYPE_PAL8 (4 << 10) +#define MP_IMGFLAG_TYPE_HW (8 << 10) + +#define MP_IMGFLAG_YUV MP_IMGFLAG_COLOR_YUV +#define MP_IMGFLAG_RGB MP_IMGFLAG_COLOR_RGB +#define MP_IMGFLAG_PAL MP_IMGFLAG_TYPE_PAL8 +#define MP_IMGFLAG_HWACCEL MP_IMGFLAG_TYPE_HW + +// 1 component format (or 2 components if MP_IMGFLAG_ALPHA is set). +// This should probably be a separate MP_IMGFLAG_COLOR_GRAY, but for now it +// is too much of a mess. +#define MP_IMGFLAG_GRAY (1 << 14) + +// Packed, sub-sampled YUV format. Does not apply to packed non-subsampled YUV. +// These formats pack multiple pixels into one sample with strange organization. +// In this specific case, mp_imgfmt_desc.align_x gives the size of a "full" +// pixel, which has align_x luma samples, and 1 chroma sample of each Cb and Cr. +// mp_imgfmt_desc.comps describes the chroma samples, and the first luma sample. +// All luma samples have the same configuration as the first one, and you can +// get their offsets with mp_imgfmt_get_packed_yuv_locations(). Note that the +// component offsets can be >= bpp[0]; the actual range is bpp[0]*align_x. +// These formats have no alpha. +#define MP_IMGFLAG_PACKED_SS_YUV (1 << 15) + +// set if the format is in a standard YUV format: +// - planar and yuv colorspace +// - chroma shift 0-2 +// - 1-4 planes (1: gray, 2: gray/alpha, 3: yuv, 4: yuv/alpha) +// - 8-16 bit per pixel/plane, all planes have same depth, +// each plane has exactly one component +#define MP_IMGFLAG_YUV_P (1 << 16) + +// Like MP_IMGFLAG_YUV_P, but RGB. This can be e.g. AV_PIX_FMT_GBRP. The planes +// are always shuffled (G - B - R [- A]). +#define MP_IMGFLAG_RGB_P (1 << 17) + +// Semi-planar YUV formats, like AV_PIX_FMT_NV12. +#define MP_IMGFLAG_YUV_NV (1 << 18) + +struct mp_imgfmt_comp_desc { + // Plane on which this component is. + uint8_t plane; + // Bit offset of first sample, from start of the pixel group (little endian). + uint8_t offset : 6; + // Number of bits used by each sample. + uint8_t size : 6; + // Internal padding. See mp_regular_imgfmt.component_pad. + int8_t pad : 4; +}; + +struct mp_imgfmt_desc { + int id; // IMGFMT_* + int flags; // MP_IMGFLAG_* bitfield + int8_t num_planes; + int8_t chroma_xs, chroma_ys; // chroma shift (i.e. log2 of chroma pixel size) + int8_t align_x, align_y; // pixel count to get byte alignment and to get + // to a pixel pos where luma & chroma aligns + // always power of 2 + int8_t bpp[MP_MAX_PLANES]; // bits per pixel (may be "average"; the real + // byte value is determined by align_x*bpp/8 + // for align_x pixels) + // chroma shifts per plane (provided for convenience with planar formats) + // Packed YUV always uses xs[0]=ys[0]=0, because plane 0 contains luma in + // addition to chroma, and thus is not sub-sampled (uses align_x=2 instead). + int8_t xs[MP_MAX_PLANES]; + int8_t ys[MP_MAX_PLANES]; + + // Description for each component. Generally valid only if flags has + // MP_IMGFLAG_HAS_COMPS set. + // This is indexed by component_type-1 (so 0=R, 1=G, etc.), see + // mp_regular_imgfmt_plane.components[x] for component_type. Components not + // present use size=0. Bits not covered by any component are random and not + // interpreted by any software. + // In particular, don't make the mistake to index this by plane. + struct mp_imgfmt_comp_desc comps[MP_NUM_COMPONENTS]; + + // log(2) of the word size in bytes for endian swapping that needs to be + // performed for converting to native endian. This is performed before any + // other unpacking steps, and for all data covered by bits. + // Always 0 if IMGFLAG_NE is set. + uint8_t endian_shift : 2; +}; + +struct mp_imgfmt_desc mp_imgfmt_get_desc(int imgfmt); + +// Return the number of component types, or 0 if unknown. +int mp_imgfmt_desc_get_num_comps(struct mp_imgfmt_desc *desc); + +// For MP_IMGFLAG_PACKED_SS_YUV formats (packed sub-sampled YUV): positions of +// further luma samples. luma_offsets must be an array of align_x size, and the +// function will return the offset (like in mp_imgfmt_comp_desc.offset) of each +// luma pixel. luma_offsets[0] == mp_imgfmt_desc.comps[0].offset. +bool mp_imgfmt_get_packed_yuv_locations(int imgfmt, uint8_t *luma_offsets); + +// MP_CSP_AUTO for YUV, MP_CSP_RGB or MP_CSP_XYZ otherwise. +// (Because IMGFMT/AV_PIX_FMT conflate format and csp for RGB and XYZ.) +enum mp_csp mp_imgfmt_get_forced_csp(int imgfmt); + +enum mp_component_type { + MP_COMPONENT_TYPE_UNKNOWN = 0, + MP_COMPONENT_TYPE_UINT, + MP_COMPONENT_TYPE_FLOAT, +}; + +enum mp_component_type mp_imgfmt_get_component_type(int imgfmt); + +struct mp_regular_imgfmt_plane { + uint8_t num_components; + // 1 is red/luminance/gray, 2 is green/Cb, 3 is blue/Cr, 4 is alpha. + // 0 is used for padding (undefined contents). + // It is guaranteed that non-0 values occur only once in the whole format. + uint8_t components[MP_NUM_COMPONENTS]; +}; + +// This describes pixel formats that are byte aligned, have byte aligned +// components, native endian, etc. +struct mp_regular_imgfmt { + // Type of each component. + enum mp_component_type component_type; + + // See mp_imgfmt_get_forced_csp(). Normally code should use + // mp_image_params.colors. This field is only needed to map the format + // unambiguously to FFmpeg formats. + enum mp_csp forced_csp; + + // Size of each component in bytes. + uint8_t component_size; + + // If >0, LSB padding, if <0, MSB padding. The padding bits are always 0. + // This applies: bit_depth = component_size * 8 - abs(component_pad) + // bit_size = component_size * 8 + MPMIN(0, component_pad) + // E.g. P010: component_pad=6 (LSB always implied 0, all data in MSB) + // => has a "depth" of 10 bit, but usually treated as 16 bit value + // yuv420p10: component_pad=-6 (like a 10 bit value 0-extended to 16) + // => has depth of 10 bit, needs <<6 to get a 16 bit value + int8_t component_pad; + + uint8_t num_planes; + struct mp_regular_imgfmt_plane planes[MP_MAX_PLANES]; + + // Chroma shifts for chroma planes. 0/0 is 4:4:4 YUV or RGB. If not 0/0, + // then this is always a yuv format, with components 2/3 on separate planes + // (reduced by the shift), and planes for components 1/4 are full sized. + uint8_t chroma_xs, chroma_ys; +}; + +bool mp_get_regular_imgfmt(struct mp_regular_imgfmt *dst, int imgfmt); +int mp_find_regular_imgfmt(struct mp_regular_imgfmt *src); + +// If imgfmt is valid, and there exists a format that is exactly the same, but +// has inverse endianness, return this other format. Otherwise return 0. +int mp_find_other_endian(int imgfmt); + +enum mp_imgfmt { + IMGFMT_NONE = 0, + + // Offset to make confusing with ffmpeg formats harder + IMGFMT_START = 1000, + + // Planar YUV formats + IMGFMT_444P, // 1x1 + IMGFMT_420P, // 2x2 + + // Gray + IMGFMT_Y8, + IMGFMT_Y16, + + // Packed YUV formats (components are byte-accessed) + IMGFMT_UYVY, // U Y0 V Y1 + + // Y plane + packed plane for chroma + IMGFMT_NV12, + + // Like IMGFMT_NV12, but with 10 bits per component (and 6 bits of padding) + IMGFMT_P010, + + // RGB/BGR Formats + + // Byte accessed (low address to high address) + IMGFMT_ARGB, + IMGFMT_BGRA, + IMGFMT_ABGR, + IMGFMT_RGBA, + IMGFMT_BGR24, // 3 bytes per pixel + IMGFMT_RGB24, + + // Like e.g. IMGFMT_ARGB, but has a padding byte instead of alpha + IMGFMT_0RGB, + IMGFMT_BGR0, + IMGFMT_0BGR, + IMGFMT_RGB0, + + // Like IMGFMT_RGBA, but 2 bytes per component. + IMGFMT_RGBA64, + + // Accessed with bit-shifts after endian-swapping the uint16_t pixel + IMGFMT_RGB565, // 5r 6g 5b (MSB to LSB) + + // AV_PIX_FMT_PAL8 + IMGFMT_PAL8, + + // Hardware accelerated formats. Plane data points to special data + // structures, instead of pixel data. + IMGFMT_VDPAU, // VdpVideoSurface + // plane 0: ID3D11Texture2D + // plane 1: slice index casted to pointer + IMGFMT_D3D11, + IMGFMT_DXVA2, // IDirect3DSurface9 (NV12/P010/P016) + IMGFMT_MMAL, // MMAL_BUFFER_HEADER_T + IMGFMT_MEDIACODEC, // AVMediaCodecBuffer + IMGFMT_CUDA, // CUDA Buffer + + // Not an actual format; base for mpv-specific descriptor table. + // Some may still map to AV_PIX_FMT_*. + IMGFMT_CUST_BASE, + + // Planar gray/alpha. + IMGFMT_YAP8, + IMGFMT_YAP16, + + // Planar YUV/alpha formats. Sometimes useful for internal processing. There + // should be one for each subsampling factor, with and without alpha, gray. + IMGFMT_YAPF, // Note: non-alpha version exists in ffmpeg + IMGFMT_444PF, + IMGFMT_444APF, + IMGFMT_420PF, + IMGFMT_420APF, + IMGFMT_422PF, + IMGFMT_422APF, + IMGFMT_440PF, + IMGFMT_440APF, + IMGFMT_410PF, + IMGFMT_410APF, + IMGFMT_411PF, + IMGFMT_411APF, + + // Accessed with bit-shifts, uint32_t units. + IMGFMT_RGB30, // 2pad 10r 10g 10b (MSB to LSB) + + // Fringe formats for fringe RGB format repacking. + IMGFMT_Y1, // gray with 1 bit per pixel + IMGFMT_GBRP1, // planar RGB with N bits per color component + IMGFMT_GBRP2, + IMGFMT_GBRP3, + IMGFMT_GBRP4, + IMGFMT_GBRP5, + IMGFMT_GBRP6, + + // Hardware accelerated formats (again). + IMGFMT_VDPAU_OUTPUT, // VdpOutputSurface + IMGFMT_VAAPI, + IMGFMT_VIDEOTOOLBOX, // CVPixelBufferRef +#if HAVE_VULKAN_INTEROP + IMGFMT_VULKAN, // VKImage +#endif + IMGFMT_DRMPRIME, // AVDRMFrameDescriptor + + // Generic pass-through of AV_PIX_FMT_*. Used for formats which don't have + // a corresponding IMGFMT_ value. + IMGFMT_AVPIXFMT_START, + IMGFMT_AVPIXFMT_END = IMGFMT_AVPIXFMT_START + 500, + + IMGFMT_END, +}; + +#define IMGFMT_IS_HWACCEL(fmt) (!!(mp_imgfmt_get_desc(fmt).flags & MP_IMGFLAG_HWACCEL)) + +int mp_imgfmt_from_name(bstr name); +char *mp_imgfmt_to_name_buf(char *buf, size_t buf_size, int fmt); +#define mp_imgfmt_to_name(fmt) mp_imgfmt_to_name_buf((char[16]){0}, 16, (fmt)) + +char **mp_imgfmt_name_list(void); + +#define vo_format_name mp_imgfmt_to_name + +int mp_imgfmt_select_best(int dst1, int dst2, int src); +int mp_imgfmt_select_best_list(int *dst, int num_dst, int src); + +#endif /* MPLAYER_IMG_FORMAT_H */ diff --git a/video/mp_image.c b/video/mp_image.c new file mode 100644 index 0000000..dff2051 --- /dev/null +++ b/video/mp_image.c @@ -0,0 +1,1289 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <limits.h> +#include <assert.h> + +#include <libavutil/mem.h> +#include <libavutil/common.h> +#include <libavutil/display.h> +#include <libavutil/bswap.h> +#include <libavutil/hwcontext.h> +#include <libavutil/intreadwrite.h> +#include <libavutil/rational.h> +#include <libavcodec/avcodec.h> +#include <libavutil/mastering_display_metadata.h> +#include <libplacebo/utils/libav.h> + +#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 16, 100) +# include <libavutil/dovi_meta.h> +#endif + +#include "mpv_talloc.h" + +#include "common/av_common.h" +#include "common/common.h" +#include "fmt-conversion.h" +#include "hwdec.h" +#include "mp_image.h" +#include "osdep/threads.h" +#include "sws_utils.h" +#include "out/placebo/utils.h" + +// Determine strides, plane sizes, and total required size for an image +// allocation. Returns total size on success, <0 on error. Unused planes +// have out_stride/out_plane_size to 0, and out_plane_offset set to -1 up +// until MP_MAX_PLANES-1. +static int mp_image_layout(int imgfmt, int w, int h, int stride_align, + int out_stride[MP_MAX_PLANES], + int out_plane_offset[MP_MAX_PLANES], + int out_plane_size[MP_MAX_PLANES]) +{ + struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(imgfmt); + + w = MP_ALIGN_UP(w, desc.align_x); + h = MP_ALIGN_UP(h, desc.align_y); + + struct mp_image_params params = {.imgfmt = imgfmt, .w = w, .h = h}; + + if (!mp_image_params_valid(¶ms) || desc.flags & MP_IMGFLAG_HWACCEL) + return -1; + + // Note: for non-mod-2 4:2:0 YUV frames, we have to allocate an additional + // top/right border. This is needed for correct handling of such + // images in filter and VO code (e.g. vo_vdpau or vo_gpu). + + for (int n = 0; n < MP_MAX_PLANES; n++) { + int alloc_w = mp_chroma_div_up(w, desc.xs[n]); + int alloc_h = MP_ALIGN_UP(h, 32) >> desc.ys[n]; + int line_bytes = (alloc_w * desc.bpp[n] + 7) / 8; + out_stride[n] = MP_ALIGN_NPOT(line_bytes, stride_align); + out_plane_size[n] = out_stride[n] * alloc_h; + } + if (desc.flags & MP_IMGFLAG_PAL) + out_plane_size[1] = AVPALETTE_SIZE; + + int sum = 0; + for (int n = 0; n < MP_MAX_PLANES; n++) { + out_plane_offset[n] = out_plane_size[n] ? sum : -1; + sum += out_plane_size[n]; + } + + return sum; +} + +// Return the total size needed for an image allocation of the given +// configuration (imgfmt, w, h must be set). Returns -1 on error. +// Assumes the allocation is already aligned on stride_align (otherwise you +// need to add padding yourself). +int mp_image_get_alloc_size(int imgfmt, int w, int h, int stride_align) +{ + int stride[MP_MAX_PLANES]; + int plane_offset[MP_MAX_PLANES]; + int plane_size[MP_MAX_PLANES]; + return mp_image_layout(imgfmt, w, h, stride_align, stride, plane_offset, + plane_size); +} + +// Fill the mpi->planes and mpi->stride fields of the given mpi with data +// from buffer according to the mpi's w/h/imgfmt fields. See mp_image_from_buffer +// aboud remarks how to allocate/use buffer/buffer_size. +// This does not free the data. You are expected to setup refcounting by +// setting mp_image.bufs before or after this function is called. +// Returns true on success, false on failure. +static bool mp_image_fill_alloc(struct mp_image *mpi, int stride_align, + void *buffer, int buffer_size) +{ + int stride[MP_MAX_PLANES]; + int plane_offset[MP_MAX_PLANES]; + int plane_size[MP_MAX_PLANES]; + int size = mp_image_layout(mpi->imgfmt, mpi->w, mpi->h, stride_align, + stride, plane_offset, plane_size); + if (size < 0 || size > buffer_size) + return false; + + int align = MP_ALIGN_UP((uintptr_t)buffer, stride_align) - (uintptr_t)buffer; + if (buffer_size - size < align) + return false; + uint8_t *s = buffer; + s += align; + + for (int n = 0; n < MP_MAX_PLANES; n++) { + mpi->planes[n] = plane_offset[n] >= 0 ? s + plane_offset[n] : NULL; + mpi->stride[n] = stride[n]; + } + + return true; +} + +// Create a mp_image from the provided buffer. The mp_image is filled according +// to the imgfmt/w/h parameters, and respecting the stride_align parameter to +// align the plane start pointers and strides. Once the last reference to the +// returned image is destroyed, free(free_opaque, buffer) is called. (Be aware +// that this can happen from any thread.) +// The allocated size of buffer must be given by buffer_size. buffer_size should +// be at least the value returned by mp_image_get_alloc_size(). If buffer is not +// already aligned to stride_align, the function will attempt to align the +// pointer itself by incrementing the buffer pointer until their alignment is +// achieved (if buffer_size is not large enough to allow aligning the buffer +// safely, the function fails). To be safe, you may want to overallocate the +// buffer by stride_align bytes, and include the overallocation in buffer_size. +// Returns NULL on failure. On failure, the free() callback is not called. +struct mp_image *mp_image_from_buffer(int imgfmt, int w, int h, int stride_align, + uint8_t *buffer, int buffer_size, + void *free_opaque, + void (*free)(void *opaque, uint8_t *data)) +{ + struct mp_image *mpi = mp_image_new_dummy_ref(NULL); + mp_image_setfmt(mpi, imgfmt); + mp_image_set_size(mpi, w, h); + + if (!mp_image_fill_alloc(mpi, stride_align, buffer, buffer_size)) + goto fail; + + mpi->bufs[0] = av_buffer_create(buffer, buffer_size, free, free_opaque, 0); + if (!mpi->bufs[0]) + goto fail; + + return mpi; + +fail: + talloc_free(mpi); + return NULL; +} + +static bool mp_image_alloc_planes(struct mp_image *mpi) +{ + assert(!mpi->planes[0]); + assert(!mpi->bufs[0]); + + int align = MP_IMAGE_BYTE_ALIGN; + + int size = mp_image_get_alloc_size(mpi->imgfmt, mpi->w, mpi->h, align); + if (size < 0) + return false; + + // Note: mp_image_pool assumes this creates only 1 AVBufferRef. + mpi->bufs[0] = av_buffer_alloc(size + align); + if (!mpi->bufs[0]) + return false; + + if (!mp_image_fill_alloc(mpi, align, mpi->bufs[0]->data, mpi->bufs[0]->size)) { + av_buffer_unref(&mpi->bufs[0]); + return false; + } + + return true; +} + +void mp_image_setfmt(struct mp_image *mpi, int out_fmt) +{ + struct mp_image_params params = mpi->params; + struct mp_imgfmt_desc fmt = mp_imgfmt_get_desc(out_fmt); + params.imgfmt = fmt.id; + mpi->fmt = fmt; + mpi->imgfmt = fmt.id; + mpi->num_planes = fmt.num_planes; + mpi->params = params; +} + +static void mp_image_destructor(void *ptr) +{ + mp_image_t *mpi = ptr; + for (int p = 0; p < MP_MAX_PLANES; p++) + av_buffer_unref(&mpi->bufs[p]); + av_buffer_unref(&mpi->hwctx); + av_buffer_unref(&mpi->icc_profile); + av_buffer_unref(&mpi->a53_cc); + av_buffer_unref(&mpi->dovi); + av_buffer_unref(&mpi->film_grain); + av_buffer_unref(&mpi->dovi_buf); + for (int n = 0; n < mpi->num_ff_side_data; n++) + av_buffer_unref(&mpi->ff_side_data[n].buf); + talloc_free(mpi->ff_side_data); +} + +int mp_chroma_div_up(int size, int shift) +{ + return (size + (1 << shift) - 1) >> shift; +} + +// Return the storage width in pixels of the given plane. +int mp_image_plane_w(struct mp_image *mpi, int plane) +{ + return mp_chroma_div_up(mpi->w, mpi->fmt.xs[plane]); +} + +// Return the storage height in pixels of the given plane. +int mp_image_plane_h(struct mp_image *mpi, int plane) +{ + return mp_chroma_div_up(mpi->h, mpi->fmt.ys[plane]); +} + +// Caller has to make sure this doesn't exceed the allocated plane data/strides. +void mp_image_set_size(struct mp_image *mpi, int w, int h) +{ + assert(w >= 0 && h >= 0); + mpi->w = mpi->params.w = w; + mpi->h = mpi->params.h = h; +} + +void mp_image_set_params(struct mp_image *image, + const struct mp_image_params *params) +{ + // possibly initialize other stuff + mp_image_setfmt(image, params->imgfmt); + mp_image_set_size(image, params->w, params->h); + image->params = *params; +} + +struct mp_image *mp_image_alloc(int imgfmt, int w, int h) +{ + struct mp_image *mpi = talloc_zero(NULL, struct mp_image); + talloc_set_destructor(mpi, mp_image_destructor); + + mp_image_set_size(mpi, w, h); + mp_image_setfmt(mpi, imgfmt); + if (!mp_image_alloc_planes(mpi)) { + talloc_free(mpi); + return NULL; + } + return mpi; +} + +int mp_image_approx_byte_size(struct mp_image *img) +{ + int total = sizeof(*img); + + for (int n = 0; n < MP_MAX_PLANES; n++) { + struct AVBufferRef *buf = img->bufs[n]; + if (buf) + total += buf->size; + } + + return total; +} + +struct mp_image *mp_image_new_copy(struct mp_image *img) +{ + struct mp_image *new = mp_image_alloc(img->imgfmt, img->w, img->h); + if (!new) + return NULL; + mp_image_copy(new, img); + mp_image_copy_attributes(new, img); + return new; +} + +// Make dst take over the image data of src, and free src. +// This is basically a safe version of *dst = *src; free(src); +// Only works with ref-counted images, and can't change image size/format. +void mp_image_steal_data(struct mp_image *dst, struct mp_image *src) +{ + assert(dst->imgfmt == src->imgfmt && dst->w == src->w && dst->h == src->h); + assert(dst->bufs[0] && src->bufs[0]); + + mp_image_destructor(dst); // unref old + talloc_free_children(dst); + + *dst = *src; + + *src = (struct mp_image){0}; + talloc_free(src); +} + +// Unref most data buffer (and clear the data array), but leave other fields +// allocated. In particular, mp_image.hwctx is preserved. +void mp_image_unref_data(struct mp_image *img) +{ + for (int n = 0; n < MP_MAX_PLANES; n++) { + img->planes[n] = NULL; + img->stride[n] = 0; + av_buffer_unref(&img->bufs[n]); + } +} + +static void ref_buffer(AVBufferRef **dst) +{ + if (*dst) { + *dst = av_buffer_ref(*dst); + MP_HANDLE_OOM(*dst); + } +} + +// Return a new reference to img. The returned reference is owned by the caller, +// while img is left untouched. +struct mp_image *mp_image_new_ref(struct mp_image *img) +{ + if (!img) + return NULL; + + if (!img->bufs[0]) + return mp_image_new_copy(img); + + struct mp_image *new = talloc_ptrtype(NULL, new); + talloc_set_destructor(new, mp_image_destructor); + *new = *img; + + for (int p = 0; p < MP_MAX_PLANES; p++) + ref_buffer(&new->bufs[p]); + + ref_buffer(&new->hwctx); + ref_buffer(&new->icc_profile); + ref_buffer(&new->a53_cc); + ref_buffer(&new->dovi); + ref_buffer(&new->film_grain); + ref_buffer(&new->dovi_buf); + + new->ff_side_data = talloc_memdup(NULL, new->ff_side_data, + new->num_ff_side_data * sizeof(new->ff_side_data[0])); + for (int n = 0; n < new->num_ff_side_data; n++) + ref_buffer(&new->ff_side_data[n].buf); + + return new; +} + +struct free_args { + void *arg; + void (*free)(void *arg); +}; + +static void call_free(void *opaque, uint8_t *data) +{ + struct free_args *args = opaque; + args->free(args->arg); + talloc_free(args); +} + +// Create a new mp_image based on img, but don't set any buffers. +// Using this is only valid until the original img is unreferenced (including +// implicit unreferencing of the data by mp_image_make_writeable()), unless +// a new reference is set. +struct mp_image *mp_image_new_dummy_ref(struct mp_image *img) +{ + struct mp_image *new = talloc_ptrtype(NULL, new); + talloc_set_destructor(new, mp_image_destructor); + *new = img ? *img : (struct mp_image){0}; + for (int p = 0; p < MP_MAX_PLANES; p++) + new->bufs[p] = NULL; + new->hwctx = NULL; + new->icc_profile = NULL; + new->a53_cc = NULL; + new->dovi = NULL; + new->film_grain = NULL; + new->dovi_buf = NULL; + new->num_ff_side_data = 0; + new->ff_side_data = NULL; + return new; +} + +// Return a reference counted reference to img. If the reference count reaches +// 0, call free(free_arg). The data passed by img must not be free'd before +// that. The new reference will be writeable. +// On allocation failure, unref the frame and return NULL. +// This is only used for hw decoding; this is important, because libav* expects +// all plane data to be accounted for by AVBufferRefs. +struct mp_image *mp_image_new_custom_ref(struct mp_image *img, void *free_arg, + void (*free)(void *arg)) +{ + struct mp_image *new = mp_image_new_dummy_ref(img); + + struct free_args *args = talloc_ptrtype(NULL, args); + *args = (struct free_args){free_arg, free}; + new->bufs[0] = av_buffer_create(NULL, 0, call_free, args, + AV_BUFFER_FLAG_READONLY); + if (new->bufs[0]) + return new; + talloc_free(new); + return NULL; +} + +bool mp_image_is_writeable(struct mp_image *img) +{ + if (!img->bufs[0]) + return true; // not ref-counted => always considered writeable + for (int p = 0; p < MP_MAX_PLANES; p++) { + if (!img->bufs[p]) + break; + if (!av_buffer_is_writable(img->bufs[p])) + return false; + } + return true; +} + +// Make the image data referenced by img writeable. This allocates new data +// if the data wasn't already writeable, and img->planes[] and img->stride[] +// will be set to the copy. +// Returns success; if false is returned, the image could not be made writeable. +bool mp_image_make_writeable(struct mp_image *img) +{ + if (mp_image_is_writeable(img)) + return true; + + struct mp_image *new = mp_image_new_copy(img); + if (!new) + return false; + mp_image_steal_data(img, new); + assert(mp_image_is_writeable(img)); + return true; +} + +// Helper function: unrefs *p_img, and sets *p_img to a new ref of new_value. +// Only unrefs *p_img and sets it to NULL if out of memory. +void mp_image_setrefp(struct mp_image **p_img, struct mp_image *new_value) +{ + if (*p_img != new_value) { + talloc_free(*p_img); + *p_img = new_value ? mp_image_new_ref(new_value) : NULL; + } +} + +// Mere helper function (mp_image can be directly free'd with talloc_free) +void mp_image_unrefp(struct mp_image **p_img) +{ + talloc_free(*p_img); + *p_img = NULL; +} + +void memcpy_pic(void *dst, const void *src, int bytesPerLine, int height, + int dstStride, int srcStride) +{ + if (bytesPerLine == dstStride && dstStride == srcStride && height) { + if (srcStride < 0) { + src = (uint8_t*)src + (height - 1) * srcStride; + dst = (uint8_t*)dst + (height - 1) * dstStride; + srcStride = -srcStride; + } + + memcpy(dst, src, srcStride * (height - 1) + bytesPerLine); + } else { + for (int i = 0; i < height; i++) { + memcpy(dst, src, bytesPerLine); + src = (uint8_t*)src + srcStride; + dst = (uint8_t*)dst + dstStride; + } + } +} + +void mp_image_copy(struct mp_image *dst, struct mp_image *src) +{ + assert(dst->imgfmt == src->imgfmt); + assert(dst->w == src->w && dst->h == src->h); + assert(mp_image_is_writeable(dst)); + for (int n = 0; n < dst->num_planes; n++) { + int line_bytes = (mp_image_plane_w(dst, n) * dst->fmt.bpp[n] + 7) / 8; + int plane_h = mp_image_plane_h(dst, n); + memcpy_pic(dst->planes[n], src->planes[n], line_bytes, plane_h, + dst->stride[n], src->stride[n]); + } + if (dst->fmt.flags & MP_IMGFLAG_PAL) + memcpy(dst->planes[1], src->planes[1], AVPALETTE_SIZE); +} + +static enum mp_csp mp_image_params_get_forced_csp(struct mp_image_params *params) +{ + int imgfmt = params->hw_subfmt ? params->hw_subfmt : params->imgfmt; + return mp_imgfmt_get_forced_csp(imgfmt); +} + +static void assign_bufref(AVBufferRef **dst, AVBufferRef *new) +{ + av_buffer_unref(dst); + if (new) { + *dst = av_buffer_ref(new); + MP_HANDLE_OOM(*dst); + } +} + +void mp_image_copy_attributes(struct mp_image *dst, struct mp_image *src) +{ + assert(dst != src); + + dst->pict_type = src->pict_type; + dst->fields = src->fields; + dst->pts = src->pts; + dst->dts = src->dts; + dst->pkt_duration = src->pkt_duration; + dst->params.rotate = src->params.rotate; + dst->params.stereo3d = src->params.stereo3d; + dst->params.p_w = src->params.p_w; + dst->params.p_h = src->params.p_h; + dst->params.color = src->params.color; + dst->params.chroma_location = src->params.chroma_location; + dst->params.alpha = src->params.alpha; + dst->params.crop = src->params.crop; + dst->nominal_fps = src->nominal_fps; + + // ensure colorspace consistency + enum mp_csp dst_forced_csp = mp_image_params_get_forced_csp(&dst->params); + if (mp_image_params_get_forced_csp(&src->params) != dst_forced_csp) { + dst->params.color.space = dst_forced_csp != MP_CSP_AUTO ? + dst_forced_csp : + mp_csp_guess_colorspace(src->w, src->h); + } + + if ((dst->fmt.flags & MP_IMGFLAG_PAL) && (src->fmt.flags & MP_IMGFLAG_PAL)) { + if (dst->planes[1] && src->planes[1]) { + if (mp_image_make_writeable(dst)) + memcpy(dst->planes[1], src->planes[1], AVPALETTE_SIZE); + } + } + assign_bufref(&dst->icc_profile, src->icc_profile); + assign_bufref(&dst->dovi, src->dovi); + assign_bufref(&dst->dovi_buf, src->dovi_buf); + assign_bufref(&dst->film_grain, src->film_grain); + assign_bufref(&dst->a53_cc, src->a53_cc); + + for (int n = 0; n < dst->num_ff_side_data; n++) + av_buffer_unref(&dst->ff_side_data[n].buf); + + MP_RESIZE_ARRAY(NULL, dst->ff_side_data, src->num_ff_side_data); + dst->num_ff_side_data = src->num_ff_side_data; + + for (int n = 0; n < dst->num_ff_side_data; n++) { + dst->ff_side_data[n].type = src->ff_side_data[n].type; + dst->ff_side_data[n].buf = av_buffer_ref(src->ff_side_data[n].buf); + MP_HANDLE_OOM(dst->ff_side_data[n].buf); + } +} + +// Crop the given image to (x0, y0)-(x1, y1) (bottom/right border exclusive) +// x0/y0 must be naturally aligned. +void mp_image_crop(struct mp_image *img, int x0, int y0, int x1, int y1) +{ + assert(x0 >= 0 && y0 >= 0); + assert(x0 <= x1 && y0 <= y1); + assert(x1 <= img->w && y1 <= img->h); + assert(!(x0 & (img->fmt.align_x - 1))); + assert(!(y0 & (img->fmt.align_y - 1))); + + for (int p = 0; p < img->num_planes; ++p) { + img->planes[p] += (y0 >> img->fmt.ys[p]) * img->stride[p] + + (x0 >> img->fmt.xs[p]) * img->fmt.bpp[p] / 8; + } + mp_image_set_size(img, x1 - x0, y1 - y0); +} + +void mp_image_crop_rc(struct mp_image *img, struct mp_rect rc) +{ + mp_image_crop(img, rc.x0, rc.y0, rc.x1, rc.y1); +} + +// Repeatedly write count patterns of src[0..src_size] to p. +static void memset_pattern(void *p, size_t count, uint8_t *src, size_t src_size) +{ + assert(src_size >= 1); + + if (src_size == 1) { + memset(p, src[0], count); + } else if (src_size == 2) { // >8 bit YUV => common, be slightly less naive + uint16_t val; + memcpy(&val, src, 2); + uint16_t *p16 = p; + while (count--) + *p16++ = val; + } else { + while (count--) { + memcpy(p, src, src_size); + p = (char *)p + src_size; + } + } +} + +static bool endian_swap_bytes(void *d, size_t bytes, size_t word_size) +{ + if (word_size != 2 && word_size != 4) + return false; + + size_t num_words = bytes / word_size; + uint8_t *ud = d; + + switch (word_size) { + case 2: + for (size_t x = 0; x < num_words; x++) + AV_WL16(ud + x * 2, AV_RB16(ud + x * 2)); + break; + case 4: + for (size_t x = 0; x < num_words; x++) + AV_WL32(ud + x * 2, AV_RB32(ud + x * 2)); + break; + default: + MP_ASSERT_UNREACHABLE(); + } + + return true; +} + +// Bottom/right border is allowed not to be aligned, but it might implicitly +// overwrite pixel data until the alignment (align_x/align_y) is reached. +// Alpha is cleared to 0 (fully transparent). +void mp_image_clear(struct mp_image *img, int x0, int y0, int x1, int y1) +{ + assert(x0 >= 0 && y0 >= 0); + assert(x0 <= x1 && y0 <= y1); + assert(x1 <= img->w && y1 <= img->h); + assert(!(x0 & (img->fmt.align_x - 1))); + assert(!(y0 & (img->fmt.align_y - 1))); + + struct mp_image area = *img; + struct mp_imgfmt_desc *fmt = &area.fmt; + mp_image_crop(&area, x0, y0, x1, y1); + + // "Black" color for each plane. + uint8_t plane_clear[MP_MAX_PLANES][8] = {0}; + int plane_size[MP_MAX_PLANES] = {0}; + int misery = 1; // pixel group width + + // YUV integer chroma needs special consideration, and technically luma is + // usually not 0 either. + if ((fmt->flags & (MP_IMGFLAG_HAS_COMPS | MP_IMGFLAG_PACKED_SS_YUV)) && + (fmt->flags & MP_IMGFLAG_TYPE_MASK) == MP_IMGFLAG_TYPE_UINT && + (fmt->flags & MP_IMGFLAG_COLOR_MASK) == MP_IMGFLAG_COLOR_YUV) + { + uint64_t plane_clear_i[MP_MAX_PLANES] = {0}; + + // Need to handle "multiple" pixels with packed YUV. + uint8_t luma_offsets[4] = {0}; + if (fmt->flags & MP_IMGFLAG_PACKED_SS_YUV) { + misery = fmt->align_x; + if (misery <= MP_ARRAY_SIZE(luma_offsets)) // ignore if out of bounds + mp_imgfmt_get_packed_yuv_locations(fmt->id, luma_offsets); + } + + for (int c = 0; c < 4; c++) { + struct mp_imgfmt_comp_desc *cd = &fmt->comps[c]; + int plane_bits = fmt->bpp[cd->plane] * misery; + if (plane_bits <= 64 && plane_bits % 8u == 0 && cd->size) { + plane_size[cd->plane] = plane_bits / 8u; + int depth = cd->size + MPMIN(cd->pad, 0); + double m, o; + mp_get_csp_uint_mul(area.params.color.space, + area.params.color.levels, + depth, c + 1, &m, &o); + uint64_t val = MPCLAMP(lrint((0 - o) / m), 0, 1ull << depth); + plane_clear_i[cd->plane] |= val << cd->offset; + for (int x = 1; x < (c ? 0 : misery); x++) + plane_clear_i[cd->plane] |= val << luma_offsets[x]; + } + } + + for (int p = 0; p < MP_MAX_PLANES; p++) { + if (!plane_clear_i[p]) + plane_size[p] = 0; + memcpy(&plane_clear[p][0], &plane_clear_i[p], 8); // endian dependent + + if (fmt->endian_shift) { + endian_swap_bytes(&plane_clear[p][0], plane_size[p], + 1 << fmt->endian_shift); + } + } + } + + for (int p = 0; p < area.num_planes; p++) { + int p_h = mp_image_plane_h(&area, p); + int p_w = mp_image_plane_w(&area, p); + for (int y = 0; y < p_h; y++) { + void *ptr = area.planes[p] + (ptrdiff_t)area.stride[p] * y; + if (plane_size[p]) { + memset_pattern(ptr, p_w / misery, plane_clear[p], plane_size[p]); + } else { + memset(ptr, 0, mp_image_plane_bytes(&area, p, 0, area.w)); + } + } + } +} + +void mp_image_clear_rc(struct mp_image *mpi, struct mp_rect rc) +{ + mp_image_clear(mpi, rc.x0, rc.y0, rc.x1, rc.y1); +} + +// Clear the are of the image _not_ covered by rc. +void mp_image_clear_rc_inv(struct mp_image *mpi, struct mp_rect rc) +{ + struct mp_rect clr[4]; + int cnt = mp_rect_subtract(&(struct mp_rect){0, 0, mpi->w, mpi->h}, &rc, clr); + for (int n = 0; n < cnt; n++) + mp_image_clear_rc(mpi, clr[n]); +} + +void mp_image_vflip(struct mp_image *img) +{ + for (int p = 0; p < img->num_planes; p++) { + int plane_h = mp_image_plane_h(img, p); + img->planes[p] = img->planes[p] + img->stride[p] * (plane_h - 1); + img->stride[p] = -img->stride[p]; + } +} + +bool mp_image_crop_valid(const struct mp_image_params *p) +{ + return p->crop.x1 > p->crop.x0 && p->crop.y1 > p->crop.y0 && + p->crop.x0 >= 0 && p->crop.y0 >= 0 && + p->crop.x1 <= p->w && p->crop.y1 <= p->h; +} + +// Display size derived from image size and pixel aspect ratio. +void mp_image_params_get_dsize(const struct mp_image_params *p, + int *d_w, int *d_h) +{ + if (mp_image_crop_valid(p)) + { + *d_w = mp_rect_w(p->crop); + *d_h = mp_rect_h(p->crop); + } else { + *d_w = p->w; + *d_h = p->h; + } + + if (p->p_w > p->p_h && p->p_h >= 1) + *d_w = MPCLAMP(*d_w * (int64_t)p->p_w / p->p_h, 1, INT_MAX); + if (p->p_h > p->p_w && p->p_w >= 1) + *d_h = MPCLAMP(*d_h * (int64_t)p->p_h / p->p_w, 1, INT_MAX); +} + +void mp_image_params_set_dsize(struct mp_image_params *p, int d_w, int d_h) +{ + AVRational ds = av_div_q((AVRational){d_w, d_h}, (AVRational){p->w, p->h}); + p->p_w = ds.num; + p->p_h = ds.den; +} + +char *mp_image_params_to_str_buf(char *b, size_t bs, + const struct mp_image_params *p) +{ + if (p && p->imgfmt) { + snprintf(b, bs, "%dx%d", p->w, p->h); + if (p->p_w != p->p_h || !p->p_w) + mp_snprintf_cat(b, bs, " [%d:%d]", p->p_w, p->p_h); + mp_snprintf_cat(b, bs, " %s", mp_imgfmt_to_name(p->imgfmt)); + if (p->hw_subfmt) + mp_snprintf_cat(b, bs, "[%s]", mp_imgfmt_to_name(p->hw_subfmt)); + mp_snprintf_cat(b, bs, " %s/%s/%s/%s/%s", + m_opt_choice_str(mp_csp_names, p->color.space), + m_opt_choice_str(mp_csp_prim_names, p->color.primaries), + m_opt_choice_str(mp_csp_trc_names, p->color.gamma), + m_opt_choice_str(mp_csp_levels_names, p->color.levels), + m_opt_choice_str(mp_csp_light_names, p->color.light)); + mp_snprintf_cat(b, bs, " CL=%s", + m_opt_choice_str(mp_chroma_names, p->chroma_location)); + if (mp_image_crop_valid(p)) { + mp_snprintf_cat(b, bs, " crop=%dx%d+%d+%d", mp_rect_w(p->crop), + mp_rect_h(p->crop), p->crop.x0, p->crop.y0); + } + if (p->rotate) + mp_snprintf_cat(b, bs, " rot=%d", p->rotate); + if (p->stereo3d > 0) { + mp_snprintf_cat(b, bs, " stereo=%s", + MP_STEREO3D_NAME_DEF(p->stereo3d, "?")); + } + if (p->alpha) { + mp_snprintf_cat(b, bs, " A=%s", + m_opt_choice_str(mp_alpha_names, p->alpha)); + } + } else { + snprintf(b, bs, "???"); + } + return b; +} + +// Return whether the image parameters are valid. +// Some non-essential fields are allowed to be unset (like colorspace flags). +bool mp_image_params_valid(const struct mp_image_params *p) +{ + // av_image_check_size has similar checks and triggers around 16000*16000 + // It's mostly needed to deal with the fact that offsets are sometimes + // ints. We also should (for now) do the same as FFmpeg, to be sure large + // images don't crash with libswscale or when wrapping with AVFrame and + // passing the result to filters. + if (p->w <= 0 || p->h <= 0 || (p->w + 128LL) * (p->h + 128LL) >= INT_MAX / 8) + return false; + + if (p->p_w < 0 || p->p_h < 0) + return false; + + if (p->rotate < 0 || p->rotate >= 360) + return false; + + struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(p->imgfmt); + if (!desc.id) + return false; + + if (p->hw_subfmt && !(desc.flags & MP_IMGFLAG_HWACCEL)) + return false; + + return true; +} + +bool mp_image_params_equal(const struct mp_image_params *p1, + const struct mp_image_params *p2) +{ + return p1->imgfmt == p2->imgfmt && + p1->hw_subfmt == p2->hw_subfmt && + p1->w == p2->w && p1->h == p2->h && + p1->p_w == p2->p_w && p1->p_h == p2->p_h && + p1->force_window == p2->force_window && + mp_colorspace_equal(p1->color, p2->color) && + p1->chroma_location == p2->chroma_location && + p1->rotate == p2->rotate && + p1->stereo3d == p2->stereo3d && + p1->alpha == p2->alpha && + mp_rect_equals(&p1->crop, &p2->crop); +} + +// Set most image parameters, but not image format or size. +// Display size is used to set the PAR. +void mp_image_set_attributes(struct mp_image *image, + const struct mp_image_params *params) +{ + struct mp_image_params nparams = *params; + nparams.imgfmt = image->imgfmt; + nparams.w = image->w; + nparams.h = image->h; + if (nparams.imgfmt != params->imgfmt) + nparams.color = (struct mp_colorspace){0}; + mp_image_set_params(image, &nparams); +} + +static enum mp_csp_levels infer_levels(enum mp_imgfmt imgfmt) +{ + switch (imgfmt2pixfmt(imgfmt)) { + case AV_PIX_FMT_YUVJ420P: + case AV_PIX_FMT_YUVJ411P: + case AV_PIX_FMT_YUVJ422P: + case AV_PIX_FMT_YUVJ444P: + case AV_PIX_FMT_YUVJ440P: + case AV_PIX_FMT_GRAY8: + case AV_PIX_FMT_YA8: + case AV_PIX_FMT_GRAY9LE: + case AV_PIX_FMT_GRAY9BE: + case AV_PIX_FMT_GRAY10LE: + case AV_PIX_FMT_GRAY10BE: + case AV_PIX_FMT_GRAY12LE: + case AV_PIX_FMT_GRAY12BE: + case AV_PIX_FMT_GRAY14LE: + case AV_PIX_FMT_GRAY14BE: + case AV_PIX_FMT_GRAY16LE: + case AV_PIX_FMT_GRAY16BE: + case AV_PIX_FMT_YA16BE: + case AV_PIX_FMT_YA16LE: + return MP_CSP_LEVELS_PC; + default: + return MP_CSP_LEVELS_TV; + } +} + +// If details like params->colorspace/colorlevels are missing, guess them from +// the other settings. Also, even if they are set, make them consistent with +// the colorspace as implied by the pixel format. +void mp_image_params_guess_csp(struct mp_image_params *params) +{ + enum mp_csp forced_csp = mp_image_params_get_forced_csp(params); + if (forced_csp == MP_CSP_AUTO) { // YUV/other + if (params->color.space != MP_CSP_BT_601 && + params->color.space != MP_CSP_BT_709 && + params->color.space != MP_CSP_BT_2020_NC && + params->color.space != MP_CSP_BT_2020_C && + params->color.space != MP_CSP_SMPTE_240M && + params->color.space != MP_CSP_YCGCO) + { + // Makes no sense, so guess instead + // YCGCO should be separate, but libavcodec disagrees + params->color.space = MP_CSP_AUTO; + } + if (params->color.space == MP_CSP_AUTO) + params->color.space = mp_csp_guess_colorspace(params->w, params->h); + if (params->color.levels == MP_CSP_LEVELS_AUTO) { + if (params->color.gamma == MP_CSP_TRC_V_LOG) { + params->color.levels = MP_CSP_LEVELS_PC; + } else { + params->color.levels = infer_levels(params->imgfmt); + } + } + if (params->color.primaries == MP_CSP_PRIM_AUTO) { + // Guess based on the colormatrix as a first priority + if (params->color.space == MP_CSP_BT_2020_NC || + params->color.space == MP_CSP_BT_2020_C) { + params->color.primaries = MP_CSP_PRIM_BT_2020; + } else if (params->color.space == MP_CSP_BT_709) { + params->color.primaries = MP_CSP_PRIM_BT_709; + } else { + // Ambiguous colormatrix for BT.601, guess based on res + params->color.primaries = mp_csp_guess_primaries(params->w, params->h); + } + } + if (params->color.gamma == MP_CSP_TRC_AUTO) + params->color.gamma = MP_CSP_TRC_BT_1886; + } else if (forced_csp == MP_CSP_RGB) { + params->color.space = MP_CSP_RGB; + params->color.levels = MP_CSP_LEVELS_PC; + + // The majority of RGB content is either sRGB or (rarely) some other + // color space which we don't even handle, like AdobeRGB or + // ProPhotoRGB. The only reasonable thing we can do is assume it's + // sRGB and hope for the best, which should usually just work out fine. + // Note: sRGB primaries = BT.709 primaries + if (params->color.primaries == MP_CSP_PRIM_AUTO) + params->color.primaries = MP_CSP_PRIM_BT_709; + if (params->color.gamma == MP_CSP_TRC_AUTO) + params->color.gamma = MP_CSP_TRC_SRGB; + } else if (forced_csp == MP_CSP_XYZ) { + params->color.space = MP_CSP_XYZ; + params->color.levels = MP_CSP_LEVELS_PC; + // Force gamma to ST428 as this is the only correct for DCDM X'Y'Z' + params->color.gamma = MP_CSP_TRC_ST428; + // Don't care about primaries, they shouldn't be used, or if anything + // MP_CSP_PRIM_ST428 should be defined. + } else { + // We have no clue. + params->color.space = MP_CSP_AUTO; + params->color.levels = MP_CSP_LEVELS_AUTO; + params->color.primaries = MP_CSP_PRIM_AUTO; + params->color.gamma = MP_CSP_TRC_AUTO; + } + + if (!params->color.hdr.max_luma) { + if (params->color.gamma == MP_CSP_TRC_HLG) { + params->color.hdr.max_luma = 1000; // reference display + } else { + // If the signal peak is unknown, we're forced to pick the TRC's + // nominal range as the signal peak to prevent clipping + params->color.hdr.max_luma = mp_trc_nom_peak(params->color.gamma) * MP_REF_WHITE; + } + } + + if (!mp_trc_is_hdr(params->color.gamma)) { + // Some clips have leftover HDR metadata after conversion to SDR, so to + // avoid blowing up the tone mapping code, strip/sanitize it + params->color.hdr = pl_hdr_metadata_empty; + } + + if (params->chroma_location == MP_CHROMA_AUTO) { + if (params->color.levels == MP_CSP_LEVELS_TV) + params->chroma_location = MP_CHROMA_LEFT; + if (params->color.levels == MP_CSP_LEVELS_PC) + params->chroma_location = MP_CHROMA_CENTER; + } + + if (params->color.light == MP_CSP_LIGHT_AUTO) { + // HLG is always scene-referred (using its own OOTF), everything else + // we assume is display-referred by default. + if (params->color.gamma == MP_CSP_TRC_HLG) { + params->color.light = MP_CSP_LIGHT_SCENE_HLG; + } else { + params->color.light = MP_CSP_LIGHT_DISPLAY; + } + } +} + +// Create a new mp_image reference to av_frame. +struct mp_image *mp_image_from_av_frame(struct AVFrame *src) +{ + struct mp_image *dst = &(struct mp_image){0}; + AVFrameSideData *sd; + + for (int p = 0; p < MP_MAX_PLANES; p++) + dst->bufs[p] = src->buf[p]; + + dst->hwctx = src->hw_frames_ctx; + + mp_image_setfmt(dst, pixfmt2imgfmt(src->format)); + mp_image_set_size(dst, src->width, src->height); + + dst->params.p_w = src->sample_aspect_ratio.num; + dst->params.p_h = src->sample_aspect_ratio.den; + + for (int i = 0; i < 4; i++) { + dst->planes[i] = src->data[i]; + dst->stride[i] = src->linesize[i]; + } + + dst->pict_type = src->pict_type; + + dst->params.crop.x0 = src->crop_left; + dst->params.crop.y0 = src->crop_top; + dst->params.crop.x1 = src->width - src->crop_right; + dst->params.crop.y1 = src->height - src->crop_bottom; + + dst->fields = 0; +#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(58, 7, 100) + if (src->flags & AV_FRAME_FLAG_INTERLACED) + dst->fields |= MP_IMGFIELD_INTERLACED; + if (src->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) + dst->fields |= MP_IMGFIELD_TOP_FIRST; +#else + if (src->interlaced_frame) + dst->fields |= MP_IMGFIELD_INTERLACED; + if (src->top_field_first) + dst->fields |= MP_IMGFIELD_TOP_FIRST; +#endif + if (src->repeat_pict == 1) + dst->fields |= MP_IMGFIELD_REPEAT_FIRST; + + dst->params.color = (struct mp_colorspace){ + .space = avcol_spc_to_mp_csp(src->colorspace), + .levels = avcol_range_to_mp_csp_levels(src->color_range), + .primaries = avcol_pri_to_mp_csp_prim(src->color_primaries), + .gamma = avcol_trc_to_mp_csp_trc(src->color_trc), + }; + + dst->params.chroma_location = avchroma_location_to_mp(src->chroma_location); + + if (src->opaque_ref) { + struct mp_image_params *p = (void *)src->opaque_ref->data; + dst->params.stereo3d = p->stereo3d; + // Might be incorrect if colorspace changes. + dst->params.color.light = p->color.light; + dst->params.alpha = p->alpha; + } + + sd = av_frame_get_side_data(src, AV_FRAME_DATA_DISPLAYMATRIX); + if (sd) { + double r = av_display_rotation_get((int32_t *)(sd->data)); + if (!isnan(r)) + dst->params.rotate = (((int)(-r) % 360) + 360) % 360; + } + + sd = av_frame_get_side_data(src, AV_FRAME_DATA_ICC_PROFILE); + if (sd) + dst->icc_profile = sd->buf; + + AVFrameSideData *mdm = av_frame_get_side_data(src, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA); + AVFrameSideData *clm = av_frame_get_side_data(src, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL); + AVFrameSideData *dhp = av_frame_get_side_data(src, AV_FRAME_DATA_DYNAMIC_HDR_PLUS); + pl_map_hdr_metadata(&dst->params.color.hdr, &(struct pl_av_hdr_metadata) { + .mdm = (void *)(mdm ? mdm->data : NULL), + .clm = (void *)(clm ? clm->data : NULL), + .dhp = (void *)(dhp ? dhp->data : NULL), + }); + + sd = av_frame_get_side_data(src, AV_FRAME_DATA_A53_CC); + if (sd) + dst->a53_cc = sd->buf; + +#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 16, 100) + sd = av_frame_get_side_data(src, AV_FRAME_DATA_DOVI_METADATA); + if (sd) + dst->dovi = sd->buf; + + sd = av_frame_get_side_data(src, AV_FRAME_DATA_DOVI_RPU_BUFFER); + if (sd) + dst->dovi_buf = sd->buf; +#endif + + sd = av_frame_get_side_data(src, AV_FRAME_DATA_FILM_GRAIN_PARAMS); + if (sd) + dst->film_grain = sd->buf; + + for (int n = 0; n < src->nb_side_data; n++) { + sd = src->side_data[n]; + struct mp_ff_side_data mpsd = { + .type = sd->type, + .buf = sd->buf, + }; + MP_TARRAY_APPEND(NULL, dst->ff_side_data, dst->num_ff_side_data, mpsd); + } + + if (dst->hwctx) { + AVHWFramesContext *fctx = (void *)dst->hwctx->data; + dst->params.hw_subfmt = pixfmt2imgfmt(fctx->sw_format); + } + + struct mp_image *res = mp_image_new_ref(dst); + + // Allocated, but non-refcounted data. + talloc_free(dst->ff_side_data); + + return res; +} + + +// Convert the mp_image reference to a AVFrame reference. +struct AVFrame *mp_image_to_av_frame(struct mp_image *src) +{ + struct mp_image *new_ref = mp_image_new_ref(src); + AVFrame *dst = av_frame_alloc(); + if (!dst || !new_ref) { + talloc_free(new_ref); + av_frame_free(&dst); + return NULL; + } + + for (int p = 0; p < MP_MAX_PLANES; p++) { + dst->buf[p] = new_ref->bufs[p]; + new_ref->bufs[p] = NULL; + } + + dst->hw_frames_ctx = new_ref->hwctx; + new_ref->hwctx = NULL; + + dst->format = imgfmt2pixfmt(src->imgfmt); + dst->width = src->w; + dst->height = src->h; + + dst->crop_left = src->params.crop.x0; + dst->crop_top = src->params.crop.y0; + dst->crop_right = dst->width - src->params.crop.x1; + dst->crop_bottom = dst->height - src->params.crop.y1; + + dst->sample_aspect_ratio.num = src->params.p_w; + dst->sample_aspect_ratio.den = src->params.p_h; + + for (int i = 0; i < 4; i++) { + dst->data[i] = src->planes[i]; + dst->linesize[i] = src->stride[i]; + } + dst->extended_data = dst->data; + + dst->pict_type = src->pict_type; +#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(58, 7, 100) + if (src->fields & MP_IMGFIELD_INTERLACED) + dst->flags |= AV_FRAME_FLAG_INTERLACED; + if (src->fields & MP_IMGFIELD_TOP_FIRST) + dst->flags |= AV_FRAME_FLAG_TOP_FIELD_FIRST; +#else + if (src->fields & MP_IMGFIELD_INTERLACED) + dst->interlaced_frame = 1; + if (src->fields & MP_IMGFIELD_TOP_FIRST) + dst->top_field_first = 1; +#endif + if (src->fields & MP_IMGFIELD_REPEAT_FIRST) + dst->repeat_pict = 1; + + dst->colorspace = mp_csp_to_avcol_spc(src->params.color.space); + dst->color_range = mp_csp_levels_to_avcol_range(src->params.color.levels); + dst->color_primaries = + mp_csp_prim_to_avcol_pri(src->params.color.primaries); + dst->color_trc = mp_csp_trc_to_avcol_trc(src->params.color.gamma); + + dst->chroma_location = mp_chroma_location_to_av(src->params.chroma_location); + + dst->opaque_ref = av_buffer_alloc(sizeof(struct mp_image_params)); + MP_HANDLE_OOM(dst->opaque_ref); + *(struct mp_image_params *)dst->opaque_ref->data = src->params; + + if (src->icc_profile) { + AVFrameSideData *sd = + av_frame_new_side_data_from_buf(dst, AV_FRAME_DATA_ICC_PROFILE, + new_ref->icc_profile); + MP_HANDLE_OOM(sd); + new_ref->icc_profile = NULL; + } + + pl_avframe_set_color(dst, (struct pl_color_space){ + .primaries = mp_prim_to_pl(src->params.color.primaries), + .transfer = mp_trc_to_pl(src->params.color.gamma), + .hdr = src->params.color.hdr, + }); + + { + AVFrameSideData *sd = av_frame_new_side_data(dst, + AV_FRAME_DATA_DISPLAYMATRIX, + sizeof(int32_t) * 9); + MP_HANDLE_OOM(sd); + av_display_rotation_set((int32_t *)sd->data, src->params.rotate); + } + + // Add back side data, but only for types which are not specially handled + // above. Keep in mind that the types above will be out of sync anyway. + for (int n = 0; n < new_ref->num_ff_side_data; n++) { + struct mp_ff_side_data *mpsd = &new_ref->ff_side_data[n]; + if (!av_frame_get_side_data(dst, mpsd->type)) { + AVFrameSideData *sd = av_frame_new_side_data_from_buf(dst, mpsd->type, + mpsd->buf); + MP_HANDLE_OOM(sd); + mpsd->buf = NULL; + } + } + + talloc_free(new_ref); + + if (dst->format == AV_PIX_FMT_NONE) + av_frame_free(&dst); + return dst; +} + +// Same as mp_image_to_av_frame(), but unref img. (It does so even on failure.) +struct AVFrame *mp_image_to_av_frame_and_unref(struct mp_image *img) +{ + AVFrame *frame = mp_image_to_av_frame(img); + talloc_free(img); + return frame; +} + +void memset_pic(void *dst, int fill, int bytesPerLine, int height, int stride) +{ + if (bytesPerLine == stride && height) { + memset(dst, fill, stride * (height - 1) + bytesPerLine); + } else { + for (int i = 0; i < height; i++) { + memset(dst, fill, bytesPerLine); + dst = (uint8_t *)dst + stride; + } + } +} + +void memset16_pic(void *dst, int fill, int unitsPerLine, int height, int stride) +{ + if (fill == 0) { + memset_pic(dst, 0, unitsPerLine * 2, height, stride); + } else { + for (int i = 0; i < height; i++) { + uint16_t *line = dst; + uint16_t *end = line + unitsPerLine; + while (line < end) + *line++ = fill; + dst = (uint8_t *)dst + stride; + } + } +} + +// Pixel at the given luma position on the given plane. x/y always refer to +// non-subsampled coordinates (even if plane is chroma). +// The coordinates must be aligned to mp_imgfmt_desc.align_x/y (these are byte +// and chroma boundaries). +// You cannot access e.g. individual luma pixels on the luma plane with yuv420p. +void *mp_image_pixel_ptr(struct mp_image *img, int plane, int x, int y) +{ + assert(MP_IS_ALIGNED(x, img->fmt.align_x)); + assert(MP_IS_ALIGNED(y, img->fmt.align_y)); + return mp_image_pixel_ptr_ny(img, plane, x, y); +} + +// Like mp_image_pixel_ptr(), but do not require alignment on Y coordinates if +// the plane does not require it. Use with care. +// Useful for addressing luma rows. +void *mp_image_pixel_ptr_ny(struct mp_image *img, int plane, int x, int y) +{ + assert(MP_IS_ALIGNED(x, img->fmt.align_x)); + assert(MP_IS_ALIGNED(y, 1 << img->fmt.ys[plane])); + return img->planes[plane] + + img->stride[plane] * (ptrdiff_t)(y >> img->fmt.ys[plane]) + + (x >> img->fmt.xs[plane]) * (size_t)img->fmt.bpp[plane] / 8; +} + +// Return size of pixels [x0, x0+w-1] in bytes. The coordinates refer to non- +// subsampled pixels (basically plane 0), and the size is rounded to chroma +// and byte alignment boundaries for the entire image, even if plane!=0. +// x0!=0 is useful for rounding (e.g. 8 bpp, x0=7, w=7 => 0..15 => 2 bytes). +size_t mp_image_plane_bytes(struct mp_image *img, int plane, int x0, int w) +{ + int x1 = MP_ALIGN_UP(x0 + w, img->fmt.align_x); + x0 = MP_ALIGN_DOWN(x0, img->fmt.align_x); + size_t bpp = img->fmt.bpp[plane]; + int xs = img->fmt.xs[plane]; + return (x1 >> xs) * bpp / 8 - (x0 >> xs) * bpp / 8; +} diff --git a/video/mp_image.h b/video/mp_image.h new file mode 100644 index 0000000..0408aab --- /dev/null +++ b/video/mp_image.h @@ -0,0 +1,203 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MPLAYER_MP_IMAGE_H +#define MPLAYER_MP_IMAGE_H + +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <inttypes.h> +#include "common/common.h" +#include "common/msg.h" +#include "csputils.h" +#include "video/img_format.h" + +// Assumed minimum align needed for image allocation. It's notable that FFmpeg's +// libraries except libavcodec don't really know what alignment they want. +// Things will randomly crash or get slower if the alignment is not satisfied. +// Whatever. This value should be pretty safe with current CPU architectures. +#define MP_IMAGE_BYTE_ALIGN 64 + +#define MP_IMGFIELD_TOP_FIRST 0x02 +#define MP_IMGFIELD_REPEAT_FIRST 0x04 +#define MP_IMGFIELD_INTERLACED 0x20 + +// Describes image parameters that usually stay constant. +// New fields can be added in the future. Code changing the parameters should +// usually copy the whole struct, so that fields added later will be preserved. +struct mp_image_params { + enum mp_imgfmt imgfmt; // pixel format + enum mp_imgfmt hw_subfmt; // underlying format for some hwaccel pixfmts + int w, h; // image dimensions + int p_w, p_h; // define pixel aspect ratio (undefined: 0/0) + bool force_window; // fake image created by handle_force_window + struct mp_colorspace color; + enum mp_chroma_location chroma_location; + // The image should be rotated clockwise (0-359 degrees). + int rotate; + enum mp_stereo3d_mode stereo3d; // image is encoded with this mode + enum mp_alpha_type alpha; // usually auto; only set if explicitly known + struct mp_rect crop; // crop applied on image +}; + +/* Memory management: + * - mp_image is a light-weight reference to the actual image data (pixels). + * The actual image data is reference counted and can outlive mp_image + * allocations. mp_image references can be created with mp_image_new_ref() + * and free'd with talloc_free() (the helpers mp_image_setrefp() and + * mp_image_unrefp() can also be used). The actual image data is free'd when + * the last mp_image reference to it is free'd. + * - Each mp_image has a clear owner. The owner can do anything with it, such + * as changing mp_image fields. Instead of making ownership ambiguous by + * sharing a mp_image reference, new references should be created. + * - Write access to the actual image data is allowed only after calling + * mp_image_make_writeable(), or if mp_image_is_writeable() returns true. + * Conceptually, images can be changed by their owner only, and copy-on-write + * is used to ensure that other references do not see any changes to the + * image data. mp_image_make_writeable() will do that copy if required. + */ +typedef struct mp_image { + int w, h; // visible dimensions (redundant with params.w/h) + + struct mp_image_params params; + + // fields redundant to params.imgfmt, for convenience or compatibility + struct mp_imgfmt_desc fmt; + enum mp_imgfmt imgfmt; + int num_planes; + + uint8_t *planes[MP_MAX_PLANES]; + int stride[MP_MAX_PLANES]; + + int pict_type; // 0->unknown, 1->I, 2->P, 3->B + int fields; + + /* only inside filter chain */ + double pts; + /* only after decoder */ + double dts, pkt_duration; + /* container reported FPS; can be incorrect, or 0 if unknown */ + double nominal_fps; + /* for private use */ + void* priv; + + // Reference-counted data references. + // These do not necessarily map directly to planes[]. They can have + // different order or count. There shouldn't be more buffers than planes. + // If bufs[n] is NULL, bufs[n+1] must also be NULL. + // All mp_* functions manage this automatically; do not mess with it. + // (See also AVFrame.buf.) + struct AVBufferRef *bufs[MP_MAX_PLANES]; + // Points to AVHWFramesContext* (same as AVFrame.hw_frames_ctx) + struct AVBufferRef *hwctx; + // Embedded ICC profile, if any + struct AVBufferRef *icc_profile; + // Closed captions packet, if any (only after decoder) + struct AVBufferRef *a53_cc; + // Dolby Vision metadata, if any + struct AVBufferRef *dovi; + // Film grain data, if any + struct AVBufferRef *film_grain; + // Dolby Vision RPU buffer, if any + struct AVBufferRef *dovi_buf; + // Other side data we don't care about. + struct mp_ff_side_data *ff_side_data; + int num_ff_side_data; +} mp_image_t; + +struct mp_ff_side_data { + int type; + struct AVBufferRef *buf; +}; + +int mp_chroma_div_up(int size, int shift); + +int mp_image_get_alloc_size(int imgfmt, int w, int h, int stride_align); +struct mp_image *mp_image_from_buffer(int imgfmt, int w, int h, int stride_align, + uint8_t *buffer, int buffer_size, + void *free_opaque, + void (*free)(void *opaque, uint8_t *data)); + +struct mp_image *mp_image_alloc(int fmt, int w, int h); +void mp_image_copy(struct mp_image *dmpi, struct mp_image *mpi); +void mp_image_copy_attributes(struct mp_image *dmpi, struct mp_image *mpi); +struct mp_image *mp_image_new_copy(struct mp_image *img); +struct mp_image *mp_image_new_ref(struct mp_image *img); +bool mp_image_is_writeable(struct mp_image *img); +bool mp_image_make_writeable(struct mp_image *img); +void mp_image_setrefp(struct mp_image **p_img, struct mp_image *new_value); +void mp_image_unrefp(struct mp_image **p_img); + +void mp_image_clear(struct mp_image *mpi, int x0, int y0, int x1, int y1); +void mp_image_clear_rc(struct mp_image *mpi, struct mp_rect rc); +void mp_image_clear_rc_inv(struct mp_image *mpi, struct mp_rect rc); +void mp_image_crop(struct mp_image *img, int x0, int y0, int x1, int y1); +void mp_image_crop_rc(struct mp_image *img, struct mp_rect rc); +void mp_image_vflip(struct mp_image *img); + +void mp_image_set_size(struct mp_image *mpi, int w, int h); +int mp_image_plane_w(struct mp_image *mpi, int plane); +int mp_image_plane_h(struct mp_image *mpi, int plane); + +void mp_image_setfmt(mp_image_t* mpi, int out_fmt); +void mp_image_steal_data(struct mp_image *dst, struct mp_image *src); +void mp_image_unref_data(struct mp_image *img); + +int mp_image_approx_byte_size(struct mp_image *img); + +struct mp_image *mp_image_new_dummy_ref(struct mp_image *img); +struct mp_image *mp_image_new_custom_ref(struct mp_image *img, void *arg, + void (*free)(void *arg)); + +void mp_image_params_guess_csp(struct mp_image_params *params); + +char *mp_image_params_to_str_buf(char *b, size_t bs, + const struct mp_image_params *p); +#define mp_image_params_to_str(p) mp_image_params_to_str_buf((char[256]){0}, 256, p) + +bool mp_image_crop_valid(const struct mp_image_params *p); +bool mp_image_params_valid(const struct mp_image_params *p); +bool mp_image_params_equal(const struct mp_image_params *p1, + const struct mp_image_params *p2); + +void mp_image_params_get_dsize(const struct mp_image_params *p, + int *d_w, int *d_h); +void mp_image_params_set_dsize(struct mp_image_params *p, int d_w, int d_h); + +void mp_image_set_params(struct mp_image *image, + const struct mp_image_params *params); + +void mp_image_set_attributes(struct mp_image *image, + const struct mp_image_params *params); + +struct AVFrame; +struct mp_image *mp_image_from_av_frame(struct AVFrame *av_frame); +struct AVFrame *mp_image_to_av_frame(struct mp_image *img); +struct AVFrame *mp_image_to_av_frame_and_unref(struct mp_image *img); + +void memcpy_pic(void *dst, const void *src, int bytesPerLine, int height, + int dstStride, int srcStride); +void memset_pic(void *dst, int fill, int bytesPerLine, int height, int stride); +void memset16_pic(void *dst, int fill, int unitsPerLine, int height, int stride); + +void *mp_image_pixel_ptr(struct mp_image *img, int plane, int x, int y); +void *mp_image_pixel_ptr_ny(struct mp_image *img, int plane, int x, int y); +size_t mp_image_plane_bytes(struct mp_image *img, int plane, int x0, int w); + +#endif /* MPLAYER_MP_IMAGE_H */ diff --git a/video/mp_image_pool.c b/video/mp_image_pool.c new file mode 100644 index 0000000..0b5e520 --- /dev/null +++ b/video/mp_image_pool.c @@ -0,0 +1,472 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "config.h" + +#include <stddef.h> +#include <stdbool.h> +#include <assert.h> + +#include <libavutil/buffer.h> +#include <libavutil/hwcontext.h> +#if HAVE_VULKAN_INTEROP +#include <libavutil/hwcontext_vulkan.h> +#endif +#include <libavutil/mem.h> +#include <libavutil/pixdesc.h> + +#include "mpv_talloc.h" + +#include "common/common.h" + +#include "fmt-conversion.h" +#include "mp_image_pool.h" +#include "mp_image.h" +#include "osdep/threads.h" + +static mp_static_mutex pool_mutex = MP_STATIC_MUTEX_INITIALIZER; +#define pool_lock() mp_mutex_lock(&pool_mutex) +#define pool_unlock() mp_mutex_unlock(&pool_mutex) + +// Thread-safety: the pool itself is not thread-safe, but pool-allocated images +// can be referenced and unreferenced from other threads. (As long as the image +// destructors are thread-safe.) + +struct mp_image_pool { + struct mp_image **images; + int num_images; + + int fmt, w, h; + + mp_image_allocator allocator; + void *allocator_ctx; + + bool use_lru; + unsigned int lru_counter; +}; + +// Used to gracefully handle the case when the pool is freed while image +// references allocated from the image pool are still held by someone. +struct image_flags { + // If both of these are false, the image must be freed. + bool referenced; // outside mp_image reference exists + bool pool_alive; // the mp_image_pool references this + unsigned int order; // for LRU allocation (basically a timestamp) +}; + +static void image_pool_destructor(void *ptr) +{ + struct mp_image_pool *pool = ptr; + mp_image_pool_clear(pool); +} + +// If tparent!=NULL, set it as talloc parent for the pool. +struct mp_image_pool *mp_image_pool_new(void *tparent) +{ + struct mp_image_pool *pool = talloc_ptrtype(tparent, pool); + talloc_set_destructor(pool, image_pool_destructor); + *pool = (struct mp_image_pool) {0}; + return pool; +} + +void mp_image_pool_clear(struct mp_image_pool *pool) +{ + for (int n = 0; n < pool->num_images; n++) { + struct mp_image *img = pool->images[n]; + struct image_flags *it = img->priv; + bool referenced; + pool_lock(); + assert(it->pool_alive); + it->pool_alive = false; + referenced = it->referenced; + pool_unlock(); + if (!referenced) + talloc_free(img); + } + pool->num_images = 0; +} + +// This is the only function that is allowed to run in a different thread. +// (Consider passing an image to another thread, which frees it.) +static void unref_image(void *opaque, uint8_t *data) +{ + struct mp_image *img = opaque; + struct image_flags *it = img->priv; + bool alive; + pool_lock(); + assert(it->referenced); + it->referenced = false; + alive = it->pool_alive; + pool_unlock(); + if (!alive) + talloc_free(img); +} + +// Return a new image of given format/size. Unlike mp_image_pool_get(), this +// returns NULL if there is no free image of this format/size. +struct mp_image *mp_image_pool_get_no_alloc(struct mp_image_pool *pool, int fmt, + int w, int h) +{ + struct mp_image *new = NULL; + pool_lock(); + for (int n = 0; n < pool->num_images; n++) { + struct mp_image *img = pool->images[n]; + struct image_flags *img_it = img->priv; + assert(img_it->pool_alive); + if (!img_it->referenced) { + if (img->imgfmt == fmt && img->w == w && img->h == h) { + if (pool->use_lru) { + struct image_flags *new_it = new ? new->priv : NULL; + if (!new_it || new_it->order > img_it->order) + new = img; + } else { + new = img; + break; + } + } + } + } + pool_unlock(); + if (!new) + return NULL; + + // Reference the new image. Since mp_image_pool is not declared thread-safe, + // and unreffing images from other threads does not allocate new images, + // no synchronization is required here. + for (int p = 0; p < MP_MAX_PLANES; p++) + assert(!!new->bufs[p] == !p); // only 1 AVBufferRef + + struct mp_image *ref = mp_image_new_dummy_ref(new); + + // This assumes the buffer is at this point exclusively owned by us: we + // can't track whether the buffer is unique otherwise. + // (av_buffer_is_writable() checks the refcount of the new buffer only.) + int flags = av_buffer_is_writable(new->bufs[0]) ? 0 : AV_BUFFER_FLAG_READONLY; + ref->bufs[0] = av_buffer_create(new->bufs[0]->data, new->bufs[0]->size, + unref_image, new, flags); + if (!ref->bufs[0]) { + talloc_free(ref); + return NULL; + } + + struct image_flags *it = new->priv; + assert(!it->referenced && it->pool_alive); + it->referenced = true; + it->order = ++pool->lru_counter; + return ref; +} + +void mp_image_pool_add(struct mp_image_pool *pool, struct mp_image *new) +{ + struct image_flags *it = talloc_ptrtype(new, it); + *it = (struct image_flags) { .pool_alive = true }; + new->priv = it; + MP_TARRAY_APPEND(pool, pool->images, pool->num_images, new); +} + +// Return a new image of given format/size. The only difference to +// mp_image_alloc() is that there is a transparent mechanism to recycle image +// data allocations through this pool. +// If pool==NULL, mp_image_alloc() is called (for convenience). +// The image can be free'd with talloc_free(). +// Returns NULL on OOM. +struct mp_image *mp_image_pool_get(struct mp_image_pool *pool, int fmt, + int w, int h) +{ + if (!pool) + return mp_image_alloc(fmt, w, h); + struct mp_image *new = mp_image_pool_get_no_alloc(pool, fmt, w, h); + if (!new) { + if (fmt != pool->fmt || w != pool->w || h != pool->h) + mp_image_pool_clear(pool); + pool->fmt = fmt; + pool->w = w; + pool->h = h; + if (pool->allocator) { + new = pool->allocator(pool->allocator_ctx, fmt, w, h); + } else { + new = mp_image_alloc(fmt, w, h); + } + if (!new) + return NULL; + mp_image_pool_add(pool, new); + new = mp_image_pool_get_no_alloc(pool, fmt, w, h); + } + return new; +} + +// Like mp_image_new_copy(), but allocate the image out of the pool. +// If pool==NULL, a plain copy is made (for convenience). +// Returns NULL on OOM. +struct mp_image *mp_image_pool_new_copy(struct mp_image_pool *pool, + struct mp_image *img) +{ + struct mp_image *new = mp_image_pool_get(pool, img->imgfmt, img->w, img->h); + if (new) { + mp_image_copy(new, img); + mp_image_copy_attributes(new, img); + } + return new; +} + +// Like mp_image_make_writeable(), but if a copy has to be made, allocate it +// out of the pool. +// If pool==NULL, mp_image_make_writeable() is called (for convenience). +// Returns false on failure (see mp_image_make_writeable()). +bool mp_image_pool_make_writeable(struct mp_image_pool *pool, + struct mp_image *img) +{ + if (mp_image_is_writeable(img)) + return true; + struct mp_image *new = mp_image_pool_new_copy(pool, img); + if (!new) + return false; + mp_image_steal_data(img, new); + assert(mp_image_is_writeable(img)); + return true; +} + +// Call cb(cb_data, fmt, w, h) to allocate an image. Note that the resulting +// image must use only 1 AVBufferRef. The returned image must also be owned +// exclusively by the image pool, otherwise mp_image_is_writeable() will not +// work due to FFmpeg restrictions. +void mp_image_pool_set_allocator(struct mp_image_pool *pool, + mp_image_allocator cb, void *cb_data) +{ + pool->allocator = cb; + pool->allocator_ctx = cb_data; +} + +// Put into LRU mode. (Likely better for hwaccel surfaces, but worse for memory.) +void mp_image_pool_set_lru(struct mp_image_pool *pool) +{ + pool->use_lru = true; +} + +// Return the sw image format mp_image_hw_download() would use. This can be +// different from src->params.hw_subfmt in obscure cases. +int mp_image_hw_download_get_sw_format(struct mp_image *src) +{ + if (!src->hwctx) + return 0; + + // Try to find the first format which we can apparently use. + int imgfmt = 0; + enum AVPixelFormat *fmts; + if (av_hwframe_transfer_get_formats(src->hwctx, + AV_HWFRAME_TRANSFER_DIRECTION_FROM, &fmts, 0) < 0) + return 0; + for (int n = 0; fmts[n] != AV_PIX_FMT_NONE; n++) { + imgfmt = pixfmt2imgfmt(fmts[n]); + if (imgfmt) + break; + } + av_free(fmts); + + return imgfmt; +} + +// Copies the contents of the HW surface src to system memory and returns it. +// If swpool is not NULL, it's used to allocate the target image. +// src must be a hw surface with a AVHWFramesContext attached. +// The returned image is cropped as needed. +// Returns NULL on failure. +struct mp_image *mp_image_hw_download(struct mp_image *src, + struct mp_image_pool *swpool) +{ + int imgfmt = mp_image_hw_download_get_sw_format(src); + if (!imgfmt) + return NULL; + + assert(src->hwctx); + AVHWFramesContext *fctx = (void *)src->hwctx->data; + + struct mp_image *dst = + mp_image_pool_get(swpool, imgfmt, fctx->width, fctx->height); + if (!dst) + return NULL; + + // Target image must be writable, so unref it. + AVFrame *dstav = mp_image_to_av_frame_and_unref(dst); + if (!dstav) + return NULL; + + AVFrame *srcav = mp_image_to_av_frame(src); + if (!srcav) { + av_frame_unref(dstav); + return NULL; + } + + int res = av_hwframe_transfer_data(dstav, srcav, 0); + av_frame_free(&srcav); + dst = mp_image_from_av_frame(dstav); + av_frame_free(&dstav); + if (res >= 0 && dst) { + mp_image_set_size(dst, src->w, src->h); + mp_image_copy_attributes(dst, src); + } else { + mp_image_unrefp(&dst); + } + return dst; +} + +bool mp_image_hw_upload(struct mp_image *hw_img, struct mp_image *src) +{ + if (hw_img->w != src->w || hw_img->h != src->h) + return false; + + if (!hw_img->hwctx) + return false; + + bool ok = false; + AVFrame *dstav = NULL; + AVFrame *srcav = NULL; + + // This means the destination image will not be "writable", which would be + // a pain if Libav enforced this - fortunately it doesn't care. We can + // transfer data to it even if there are multiple refs. + dstav = mp_image_to_av_frame(hw_img); + if (!dstav) + goto done; + + srcav = mp_image_to_av_frame(src); + if (!srcav) + goto done; + + ok = av_hwframe_transfer_data(dstav, srcav, 0) >= 0; + +done: + av_frame_free(&srcav); + av_frame_free(&dstav); + + if (ok) + mp_image_copy_attributes(hw_img, src); + return ok; +} + +bool mp_update_av_hw_frames_pool(struct AVBufferRef **hw_frames_ctx, + struct AVBufferRef *hw_device_ctx, + int imgfmt, int sw_imgfmt, int w, int h, + bool disable_multiplane) +{ + enum AVPixelFormat format = imgfmt2pixfmt(imgfmt); + enum AVPixelFormat sw_format = imgfmt2pixfmt(sw_imgfmt); + + if (format == AV_PIX_FMT_NONE || sw_format == AV_PIX_FMT_NONE || + !hw_device_ctx || w < 1 || h < 1) + { + av_buffer_unref(hw_frames_ctx); + return false; + } + + if (*hw_frames_ctx) { + AVHWFramesContext *hw_frames = (void *)(*hw_frames_ctx)->data; + + if (hw_frames->device_ref->data != hw_device_ctx->data || + hw_frames->format != format || hw_frames->sw_format != sw_format || + hw_frames->width != w || hw_frames->height != h) + av_buffer_unref(hw_frames_ctx); + } + + if (!*hw_frames_ctx) { + *hw_frames_ctx = av_hwframe_ctx_alloc(hw_device_ctx); + if (!*hw_frames_ctx) + return false; + + AVHWFramesContext *hw_frames = (void *)(*hw_frames_ctx)->data; + hw_frames->format = format; + hw_frames->sw_format = sw_format; + hw_frames->width = w; + hw_frames->height = h; + +#if HAVE_VULKAN_INTEROP + if (format == AV_PIX_FMT_VULKAN && disable_multiplane) { + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(sw_format); + if ((desc->flags & AV_PIX_FMT_FLAG_PLANAR) && + !(desc->flags & AV_PIX_FMT_FLAG_RGB)) { + AVVulkanFramesContext *vk_frames = hw_frames->hwctx; + vk_frames->flags = AV_VK_FRAME_FLAG_DISABLE_MULTIPLANE; + } + } +#endif + + if (av_hwframe_ctx_init(*hw_frames_ctx) < 0) { + av_buffer_unref(hw_frames_ctx); + return false; + } + } + + return true; +} + +struct mp_image *mp_av_pool_image_hw_upload(struct AVBufferRef *hw_frames_ctx, + struct mp_image *src) +{ + AVFrame *av_frame = av_frame_alloc(); + if (!av_frame) + return NULL; + if (av_hwframe_get_buffer(hw_frames_ctx, av_frame, 0) < 0) { + av_frame_free(&av_frame); + return NULL; + } + struct mp_image *dst = mp_image_from_av_frame(av_frame); + av_frame_free(&av_frame); + if (!dst) + return NULL; + + if (dst->w < src->w || dst->h < src->h) { + talloc_free(dst); + return NULL; + } + + mp_image_set_size(dst, src->w, src->h); + + if (!mp_image_hw_upload(dst, src)) { + talloc_free(dst); + return NULL; + } + + mp_image_copy_attributes(dst, src); + return dst; +} + +struct mp_image *mp_av_pool_image_hw_map(struct AVBufferRef *hw_frames_ctx, + struct mp_image *src) +{ + AVFrame *dst_frame = av_frame_alloc(); + if (!dst_frame) + return NULL; + + dst_frame->format = ((AVHWFramesContext*)hw_frames_ctx->data)->format; + dst_frame->hw_frames_ctx = av_buffer_ref(hw_frames_ctx); + + AVFrame *src_frame = mp_image_to_av_frame(src); + if (av_hwframe_map(dst_frame, src_frame, 0) < 0) { + av_frame_free(&src_frame); + av_frame_free(&dst_frame); + return NULL; + } + av_frame_free(&src_frame); + + struct mp_image *dst = mp_image_from_av_frame(dst_frame); + av_frame_free(&dst_frame); + if (!dst) + return NULL; + + mp_image_copy_attributes(dst, src); + return dst; +} diff --git a/video/mp_image_pool.h b/video/mp_image_pool.h new file mode 100644 index 0000000..8cb2a5f --- /dev/null +++ b/video/mp_image_pool.h @@ -0,0 +1,47 @@ +#ifndef MPV_MP_IMAGE_POOL_H +#define MPV_MP_IMAGE_POOL_H + +#include <stdbool.h> + +struct mp_image_pool; + +struct mp_image_pool *mp_image_pool_new(void *tparent); +struct mp_image *mp_image_pool_get(struct mp_image_pool *pool, int fmt, + int w, int h); +// the reference to "new" is transferred to the pool +void mp_image_pool_add(struct mp_image_pool *pool, struct mp_image *new); +void mp_image_pool_clear(struct mp_image_pool *pool); + +void mp_image_pool_set_lru(struct mp_image_pool *pool); + +struct mp_image *mp_image_pool_get_no_alloc(struct mp_image_pool *pool, int fmt, + int w, int h); + +typedef struct mp_image *(*mp_image_allocator)(void *data, int fmt, int w, int h); +void mp_image_pool_set_allocator(struct mp_image_pool *pool, + mp_image_allocator cb, void *cb_data); + +struct mp_image *mp_image_pool_new_copy(struct mp_image_pool *pool, + struct mp_image *img); +bool mp_image_pool_make_writeable(struct mp_image_pool *pool, + struct mp_image *img); + +struct mp_image *mp_image_hw_download(struct mp_image *img, + struct mp_image_pool *swpool); + +int mp_image_hw_download_get_sw_format(struct mp_image *img); + +bool mp_image_hw_upload(struct mp_image *hw_img, struct mp_image *src); + +struct AVBufferRef; +bool mp_update_av_hw_frames_pool(struct AVBufferRef **hw_frames_ctx, + struct AVBufferRef *hw_device_ctx, + int imgfmt, int sw_imgfmt, int w, int h, + bool disable_multiplane); + +struct mp_image *mp_av_pool_image_hw_upload(struct AVBufferRef *hw_frames_ctx, + struct mp_image *src); + +struct mp_image *mp_av_pool_image_hw_map(struct AVBufferRef *hw_frames_ctx, + struct mp_image *src); +#endif diff --git a/video/out/android_common.c b/video/out/android_common.c new file mode 100644 index 0000000..27e7b5b --- /dev/null +++ b/video/out/android_common.c @@ -0,0 +1,99 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <libavcodec/jni.h> +#include <android/native_window_jni.h> + +#include "android_common.h" +#include "common/msg.h" +#include "misc/jni.h" +#include "options/m_config.h" +#include "vo.h" + +struct vo_android_state { + struct mp_log *log; + ANativeWindow *native_window; +}; + +bool vo_android_init(struct vo *vo) +{ + vo->android = talloc_zero(vo, struct vo_android_state); + struct vo_android_state *ctx = vo->android; + + *ctx = (struct vo_android_state){ + .log = mp_log_new(ctx, vo->log, "android"), + }; + + JNIEnv *env = MP_JNI_GET_ENV(ctx); + if (!env) { + MP_FATAL(ctx, "Could not attach java VM.\n"); + goto fail; + } + + assert(vo->opts->WinID != 0 && vo->opts->WinID != -1); + jobject surface = (jobject)(intptr_t)vo->opts->WinID; + ctx->native_window = ANativeWindow_fromSurface(env, surface); + if (!ctx->native_window) { + MP_FATAL(ctx, "Failed to create ANativeWindow\n"); + goto fail; + } + + return true; +fail: + talloc_free(ctx); + vo->android = NULL; + return false; +} + +void vo_android_uninit(struct vo *vo) +{ + struct vo_android_state *ctx = vo->android; + if (!ctx) + return; + + if (ctx->native_window) + ANativeWindow_release(ctx->native_window); + + talloc_free(ctx); + vo->android = NULL; +} + +ANativeWindow *vo_android_native_window(struct vo *vo) +{ + struct vo_android_state *ctx = vo->android; + return ctx->native_window; +} + +bool vo_android_surface_size(struct vo *vo, int *out_w, int *out_h) +{ + struct vo_android_state *ctx = vo->android; + + int w = vo->opts->android_surface_size.w, + h = vo->opts->android_surface_size.h; + if (!w) + w = ANativeWindow_getWidth(ctx->native_window); + if (!h) + h = ANativeWindow_getHeight(ctx->native_window); + + if (w <= 0 || h <= 0) { + MP_ERR(ctx, "Failed to get height and width.\n"); + return false; + } + *out_w = w; + *out_h = h; + return true; +} diff --git a/video/out/android_common.h b/video/out/android_common.h new file mode 100644 index 0000000..7f075ea --- /dev/null +++ b/video/out/android_common.h @@ -0,0 +1,29 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <android/native_window_jni.h> + +#include "common/common.h" + +struct vo; + +bool vo_android_init(struct vo *vo); +void vo_android_uninit(struct vo *vo); +ANativeWindow *vo_android_native_window(struct vo *vo); +bool vo_android_surface_size(struct vo *vo, int *w, int *h); diff --git a/video/out/aspect.c b/video/out/aspect.c new file mode 100644 index 0000000..6e1cd63 --- /dev/null +++ b/video/out/aspect.c @@ -0,0 +1,216 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +/* Stuff for correct aspect scaling. */ +#include "aspect.h" +#include "math.h" +#include "vo.h" +#include "common/msg.h" +#include "options/options.h" +#include "video/mp_image.h" + +#include "vo.h" +#include "sub/osd.h" + +static void aspect_calc_panscan(struct mp_vo_opts *opts, + int w, int h, int d_w, int d_h, int unscaled, + int window_w, int window_h, double monitor_par, + int *out_w, int *out_h) +{ + int fwidth = window_w; + int fheight = (float)window_w / d_w * d_h / monitor_par; + if (fheight > window_h || fheight < h) { + int tmpw = (float)window_h / d_h * d_w * monitor_par; + if (tmpw <= window_w) { + fheight = window_h; + fwidth = tmpw; + } + } + + int vo_panscan_area = window_h - fheight; + double f_w = fwidth / (double)MPMAX(fheight, 1); + double f_h = 1; + if (vo_panscan_area == 0) { + vo_panscan_area = window_w - fwidth; + f_w = 1; + f_h = fheight / (double)MPMAX(fwidth, 1); + } + + if (unscaled) { + vo_panscan_area = 0; + if (unscaled != 2 || (d_w <= window_w && d_h <= window_h)) { + fwidth = d_w * monitor_par; + fheight = d_h; + } + } + + *out_w = fwidth + vo_panscan_area * opts->panscan * f_w; + *out_h = fheight + vo_panscan_area * opts->panscan * f_h; +} + +// Clamp [start, end) to range [0, size) with various fallbacks. +static void clamp_size(int size, int *start, int *end) +{ + *start = MPMAX(0, *start); + *end = MPMIN(size, *end); + if (*start >= *end) { + *start = 0; + *end = 1; + } +} + +static void src_dst_split_scaling(int src_size, int dst_size, + int scaled_src_size, + float zoom, float align, float pan, float scale, + int *src_start, int *src_end, + int *dst_start, int *dst_end, + int *osd_margin_a, int *osd_margin_b) +{ + scaled_src_size *= powf(2, zoom) * scale; + scaled_src_size = MPMAX(scaled_src_size, 1); + align = (align + 1) / 2; + + *dst_start = (dst_size - scaled_src_size) * align + pan * scaled_src_size; + *dst_end = *dst_start + scaled_src_size; + + // Distance of screen frame to video + *osd_margin_a = *dst_start; + *osd_margin_b = dst_size - *dst_end; + + // Clip to screen + int s_src = *src_end - *src_start; + int s_dst = *dst_end - *dst_start; + if (*dst_start < 0) { + int border = -(*dst_start) * s_src / s_dst; + *src_start += border; + *dst_start = 0; + } + if (*dst_end > dst_size) { + int border = (*dst_end - dst_size) * s_src / s_dst; + *src_end -= border; + *dst_end = dst_size; + } + + // For sanity: avoid bothering VOs with corner cases + clamp_size(src_size, src_start, src_end); + clamp_size(dst_size, dst_start, dst_end); +} + +static void calc_margin(float opts[2], int out[2], int size) +{ + out[0] = MPCLAMP((int)(opts[0] * size), 0, size); + out[1] = MPCLAMP((int)(opts[1] * size), 0, size); + + if (out[0] + out[1] >= size) { + // This case is not really supported. Show an error by 1 pixel. + out[0] = 0; + out[1] = MPMAX(0, size - 1); + } +} + +void mp_get_src_dst_rects(struct mp_log *log, struct mp_vo_opts *opts, + int vo_caps, struct mp_image_params *video, + int window_w, int window_h, double monitor_par, + struct mp_rect *out_src, + struct mp_rect *out_dst, + struct mp_osd_res *out_osd) +{ + int src_w = video->w; + int src_h = video->h; + int src_dw, src_dh; + + mp_image_params_get_dsize(video, &src_dw, &src_dh); + window_w = MPMAX(1, window_w); + window_h = MPMAX(1, window_h); + + int margin_x[2] = {0}; + int margin_y[2] = {0}; + if (opts->keepaspect) { + calc_margin(opts->margin_x, margin_x, window_w); + calc_margin(opts->margin_y, margin_y, window_h); + } + + int vid_window_w = window_w - margin_x[0] - margin_x[1]; + int vid_window_h = window_h - margin_y[0] - margin_y[1]; + + struct mp_rect dst = {0, 0, window_w, window_h}; + struct mp_rect src = {0, 0, src_w, src_h}; + if (mp_image_crop_valid(video)) + src = video->crop; + + if (vo_caps & VO_CAP_ROTATE90) { + if (video->rotate % 180 == 90) { + MPSWAP(int, src_w, src_h); + MPSWAP(int, src_dw, src_dh); + } + mp_rect_rotate(&src, src_w, src_h, video->rotate); + } + + struct mp_osd_res osd = { + .w = window_w, + .h = window_h, + .display_par = monitor_par, + }; + + if (opts->keepaspect) { + int scaled_width, scaled_height; + aspect_calc_panscan(opts, src_w, src_h, src_dw, src_dh, opts->unscaled, + vid_window_w, vid_window_h, monitor_par, + &scaled_width, &scaled_height); + src_dst_split_scaling(src_w, vid_window_w, scaled_width, + opts->zoom, opts->align_x, opts->pan_x, opts->scale_x, + &src.x0, &src.x1, &dst.x0, &dst.x1, + &osd.ml, &osd.mr); + src_dst_split_scaling(src_h, vid_window_h, scaled_height, + opts->zoom, opts->align_y, opts->pan_y, opts->scale_y, + &src.y0, &src.y1, &dst.y0, &dst.y1, + &osd.mt, &osd.mb); + } + + dst.x0 += margin_x[0]; + dst.y0 += margin_y[0]; + dst.x1 += margin_x[0]; + dst.y1 += margin_y[0]; + + // OSD really uses the full window, but was computed on the margin-cut + // video sub-window. Correct it to the full window. + osd.ml += margin_x[0]; + osd.mr += margin_x[1]; + osd.mt += margin_y[0]; + osd.mb += margin_y[1]; + + *out_src = src; + *out_dst = dst; + *out_osd = osd; + + int sw = src.x1 - src.x0, sh = src.y1 - src.y0; + int dw = dst.x1 - dst.x0, dh = dst.y1 - dst.y0; + + mp_verbose(log, "Window size: %dx%d (Borders: l=%d t=%d r=%d b=%d)\n", + window_w, window_h, + margin_x[0], margin_y[0], margin_x[1], margin_y[1]); + mp_verbose(log, "Video source: %dx%d (%d:%d)\n", + video->w, video->h, video->p_w, video->p_h); + mp_verbose(log, "Video display: (%d, %d) %dx%d -> (%d, %d) %dx%d\n", + src.x0, src.y0, sw, sh, dst.x0, dst.y0, dw, dh); + mp_verbose(log, "Video scale: %f/%f\n", + (double)dw / sw, (double)dh / sh); + mp_verbose(log, "OSD borders: l=%d t=%d r=%d b=%d\n", + osd.ml, osd.mt, osd.mr, osd.mb); + mp_verbose(log, "Video borders: l=%d t=%d r=%d b=%d\n", + dst.x0, dst.y0, window_w - dst.x1, window_h - dst.y1); +} diff --git a/video/out/aspect.h b/video/out/aspect.h new file mode 100644 index 0000000..4123311 --- /dev/null +++ b/video/out/aspect.h @@ -0,0 +1,33 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MPLAYER_ASPECT_H +#define MPLAYER_ASPECT_H + +struct mp_log; +struct mp_vo_opts; +struct mp_image_params; +struct mp_rect; +struct mp_osd_res; +void mp_get_src_dst_rects(struct mp_log *log, struct mp_vo_opts *opts, + int vo_caps, struct mp_image_params *video, + int window_w, int window_h, double monitor_par, + struct mp_rect *out_src, + struct mp_rect *out_dst, + struct mp_osd_res *out_osd); + +#endif /* MPLAYER_ASPECT_H */ diff --git a/video/out/bitmap_packer.c b/video/out/bitmap_packer.c new file mode 100644 index 0000000..5ef090b --- /dev/null +++ b/video/out/bitmap_packer.c @@ -0,0 +1,197 @@ +/* + * Calculate how to pack bitmap rectangles into a larger surface + * + * Copyright 2009, 2012 Uoti Urpala + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdlib.h> +#include <assert.h> +#include <stdio.h> +#include <limits.h> + +#include "mpv_talloc.h" +#include "bitmap_packer.h" +#include "common/common.h" + +#define IS_POWER_OF_2(x) (((x) > 0) && !(((x) - 1) & (x))) + +void packer_reset(struct bitmap_packer *packer) +{ + struct bitmap_packer old = *packer; + *packer = (struct bitmap_packer) { + .w_max = old.w_max, + .h_max = old.h_max, + }; + talloc_free_children(packer); +} + +void packer_get_bb(struct bitmap_packer *packer, struct pos out_bb[2]) +{ + out_bb[0] = (struct pos) {0}; + out_bb[1] = (struct pos) {packer->used_width, packer->used_height}; +} + +#define HEIGHT_SORT_BITS 4 +static int size_index(int s) +{ + int n = mp_log2(s); + return (n << HEIGHT_SORT_BITS) + + ((- 1 - (s << HEIGHT_SORT_BITS >> n)) & ((1 << HEIGHT_SORT_BITS) - 1)); +} + +/* Pack the given rectangles into an area of size w * h. + * The size of each rectangle is read from in[i].x / in[i].y. + * The height of each rectangle must be less than 65536. + * 'scratch' must point to work memory for num_rects+16 ints. + * The packed position for rectangle number i is set in out[i]. + * Return 0 on success, -1 if the rectangles did not fit in w*h. + * + * The rectangles are placed in rows in order approximately sorted by + * height (the approximate sorting is simpler than a full one would be, + * and allows the algorithm to work in linear time). Additionally, to + * reduce wasted space when there are a few tall rectangles, empty + * lower-right parts of rows are filled recursively when the size of + * rectangles in the row drops past a power-of-two threshold. So if a + * row starts with rectangles of size 3x50, 10x40 and 5x20 then the + * free rectangle with corners (13, 20)-(w, 50) is filled recursively. + */ +static int pack_rectangles(struct pos *in, struct pos *out, int num_rects, + int w, int h, int *scratch, int *used_width) +{ + int bins[16 << HEIGHT_SORT_BITS]; + int sizes[16 << HEIGHT_SORT_BITS] = { 0 }; + for (int i = 0; i < num_rects; i++) + sizes[size_index(in[i].y)]++; + int idx = 0; + for (int i = 0; i < 16 << HEIGHT_SORT_BITS; i += 1 << HEIGHT_SORT_BITS) { + for (int j = 0; j < 1 << HEIGHT_SORT_BITS; j++) { + bins[i + j] = idx; + idx += sizes[i + j]; + } + scratch[idx++] = -1; + } + for (int i = 0; i < num_rects; i++) + scratch[bins[size_index(in[i].y)]++] = i; + for (int i = 0; i < 16; i++) + bins[i] = bins[i << HEIGHT_SORT_BITS] - sizes[i << HEIGHT_SORT_BITS]; + struct { + int size, x, bottom; + } stack[16] = {{15, 0, h}}, s = {0}; + int stackpos = 1; + int y; + while (stackpos) { + y = s.bottom; + s = stack[--stackpos]; + s.size++; + while (s.size--) { + int maxy = -1; + int obj; + while ((obj = scratch[bins[s.size]]) >= 0) { + int bottom = y + in[obj].y; + if (bottom > s.bottom) + break; + int right = s.x + in[obj].x; + if (right > w) + break; + bins[s.size]++; + out[obj] = (struct pos){s.x, y}; + num_rects--; + if (maxy < 0) + stack[stackpos++] = s; + s.x = right; + maxy = MPMAX(maxy, bottom); + } + *used_width = MPMAX(*used_width, s.x); + if (maxy > 0) + s.bottom = maxy; + } + } + return num_rects ? -1 : y; +} + +int packer_pack(struct bitmap_packer *packer) +{ + if (packer->count == 0) + return 0; + int w_orig = packer->w, h_orig = packer->h; + struct pos *in = packer->in; + int xmax = 0, ymax = 0; + for (int i = 0; i < packer->count; i++) { + if (in[i].x <= 0 || in[i].y <= 0) { + in[i] = (struct pos){0, 0}; + } else { + in[i].x += packer->padding * 2; + in[i].y += packer->padding * 2; + } + if (in[i].x < 0 || in [i].x > 65535 || in[i].y < 0 || in[i].y > 65535) { + fprintf(stderr, "Invalid OSD / subtitle bitmap size\n"); + abort(); + } + xmax = MPMAX(xmax, in[i].x); + ymax = MPMAX(ymax, in[i].y); + } + if (xmax > packer->w) + packer->w = 1 << (mp_log2(xmax - 1) + 1); + if (ymax > packer->h) + packer->h = 1 << (mp_log2(ymax - 1) + 1); + while (1) { + int used_width = 0; + int y = pack_rectangles(in, packer->result, packer->count, + packer->w, packer->h, + packer->scratch, &used_width); + if (y >= 0) { + packer->used_width = MPMIN(used_width, packer->w); + packer->used_height = MPMIN(y, packer->h); + assert(packer->w == 0 || IS_POWER_OF_2(packer->w)); + assert(packer->h == 0 || IS_POWER_OF_2(packer->h)); + if (packer->padding) { + for (int i = 0; i < packer->count; i++) { + packer->result[i].x += packer->padding; + packer->result[i].y += packer->padding; + } + } + return packer->w != w_orig || packer->h != h_orig; + } + int w_max = packer->w_max > 0 ? packer->w_max : INT_MAX; + int h_max = packer->h_max > 0 ? packer->h_max : INT_MAX; + if (packer->w <= packer->h && packer->w != w_max) + packer->w = MPMIN(packer->w * 2, w_max); + else if (packer->h != h_max) + packer->h = MPMIN(packer->h * 2, h_max); + else { + packer->w = w_orig; + packer->h = h_orig; + return -1; + } + } +} + +void packer_set_size(struct bitmap_packer *packer, int size) +{ + packer->count = size; + if (size <= packer->asize) + return; + packer->asize = MPMAX(packer->asize * 2, size); + talloc_free(packer->result); + talloc_free(packer->scratch); + packer->in = talloc_realloc(packer, packer->in, struct pos, packer->asize); + packer->result = talloc_array_ptrtype(packer, packer->result, + packer->asize); + packer->scratch = talloc_array_ptrtype(packer, packer->scratch, + packer->asize + 16); +} diff --git a/video/out/bitmap_packer.h b/video/out/bitmap_packer.h new file mode 100644 index 0000000..97bf88f --- /dev/null +++ b/video/out/bitmap_packer.h @@ -0,0 +1,51 @@ +#ifndef MPLAYER_PACK_RECTANGLES_H +#define MPLAYER_PACK_RECTANGLES_H + +struct pos { + int x; + int y; +}; + +struct bitmap_packer { + int w; + int h; + int w_max; + int h_max; + int padding; + int count; + struct pos *in; + struct pos *result; + int used_width; + int used_height; + + // internal + int *scratch; + int asize; +}; + +struct sub_bitmaps; + +// Clear all internal state. Leave the following fields: w_max, h_max +void packer_reset(struct bitmap_packer *packer); + +// Get the bounding box used for bitmap data (including padding). +// The bounding box doesn't exceed (0,0)-(packer->w,packer->h). +void packer_get_bb(struct bitmap_packer *packer, struct pos out_bb[2]); + +/* Reallocate packer->in for at least to desired number of items. + * Also sets packer->count to the same value. + */ +void packer_set_size(struct bitmap_packer *packer, int size); + +/* To use this, set packer->count to number of rectangles, w_max and h_max + * to maximum output rectangle size, and w and h to start size (may be 0). + * Write input sizes in packer->in. + * Resulting packing will be written in packer->result. + * w and h will be increased if necessary for successful packing. + * There is a strong guarantee that w and h will be powers of 2 (or set to 0). + * Return value is -1 if packing failed because w and h were set to max + * values but that wasn't enough, 1 if w or h was increased, and 0 otherwise. + */ +int packer_pack(struct bitmap_packer *packer); + +#endif diff --git a/video/out/cocoa_cb_common.swift b/video/out/cocoa_cb_common.swift new file mode 100644 index 0000000..9c0054a --- /dev/null +++ b/video/out/cocoa_cb_common.swift @@ -0,0 +1,230 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +import Cocoa + +class CocoaCB: Common { + var libmpv: LibmpvHelper + var layer: GLLayer? + + @objc var isShuttingDown: Bool = false + + enum State { + case uninitialized + case needsInit + case initialized + } + var backendState: State = .uninitialized + + + @objc init(_ mpvHandle: OpaquePointer) { + let newlog = mp_log_new(UnsafeMutablePointer<MPContext>(mpvHandle), mp_client_get_log(mpvHandle), "cocoacb") + libmpv = LibmpvHelper(mpvHandle, newlog) + super.init(newlog) + layer = GLLayer(cocoaCB: self) + } + + func preinit(_ vo: UnsafeMutablePointer<vo>) { + mpv = MPVHelper(vo, log) + + if backendState == .uninitialized { + backendState = .needsInit + + guard let layer = self.layer else { + log.sendError("Something went wrong, no GLLayer was initialized") + exit(1) + } + + initView(vo, layer) + initMisc(vo) + } + } + + func uninit() { + window?.orderOut(nil) + window?.close() + mpv = nil + } + + func reconfig(_ vo: UnsafeMutablePointer<vo>) { + mpv?.vo = vo + if backendState == .needsInit { + DispatchQueue.main.sync { self.initBackend(vo) } + } else { + DispatchQueue.main.async { + self.updateWindowSize(vo) + self.layer?.update(force: true) + } + } + } + + func initBackend(_ vo: UnsafeMutablePointer<vo>) { + let previousActiveApp = getActiveApp() + initApp() + initWindow(vo, previousActiveApp) + updateICCProfile() + initWindowState() + + backendState = .initialized + } + + func updateWindowSize(_ vo: UnsafeMutablePointer<vo>) { + guard let targetScreen = getTargetScreen(forFullscreen: false) ?? NSScreen.main else + { + log.sendWarning("Couldn't update Window size, no Screen available") + return + } + + let wr = getWindowGeometry(forScreen: targetScreen, videoOut: vo) + if !(window?.isVisible ?? false) && + !(window?.isMiniaturized ?? false) && + !NSApp.isHidden + { + window?.makeKeyAndOrderFront(nil) + } + layer?.atomicDrawingStart() + window?.updateSize(wr.size) + } + + override func displayLinkCallback(_ displayLink: CVDisplayLink, + _ inNow: UnsafePointer<CVTimeStamp>, + _ inOutputTime: UnsafePointer<CVTimeStamp>, + _ flagsIn: CVOptionFlags, + _ flagsOut: UnsafeMutablePointer<CVOptionFlags>) -> CVReturn + { + libmpv.reportRenderFlip() + return kCVReturnSuccess + } + + override func lightSensorUpdate() { + libmpv.setRenderLux(lmuToLux(lastLmu)) + } + + override func updateICCProfile() { + guard let colorSpace = window?.screen?.colorSpace else { + log.sendWarning("Couldn't update ICC Profile, no color space available") + return + } + + libmpv.setRenderICCProfile(colorSpace) + layer?.colorspace = colorSpace.cgColorSpace + } + + override func windowDidEndAnimation() { + layer?.update() + checkShutdown() + } + + override func windowSetToFullScreen() { + layer?.update(force: true) + } + + override func windowSetToWindow() { + layer?.update(force: true) + } + + override func windowDidUpdateFrame() { + layer?.update(force: true) + } + + override func windowDidChangeScreen() { + layer?.update(force: true) + } + + override func windowDidChangeScreenProfile() { + layer?.needsICCUpdate = true + } + + override func windowDidChangeBackingProperties() { + layer?.contentsScale = window?.backingScaleFactor ?? 1 + } + + override func windowWillStartLiveResize() { + layer?.inLiveResize = true + } + + override func windowDidEndLiveResize() { + layer?.inLiveResize = false + } + + override func windowDidChangeOcclusionState() { + layer?.update(force: true) + } + + var controlCallback: mp_render_cb_control_fn = { ( v, ctx, e, request, data ) -> Int32 in + let ccb = unsafeBitCast(ctx, to: CocoaCB.self) + + guard let vo = v, let events = e else { + ccb.log.sendWarning("Unexpected nil value in Control Callback") + return VO_FALSE + } + + return ccb.control(vo, events: events, request: request, data: data) + } + + override func control(_ vo: UnsafeMutablePointer<vo>, + events: UnsafeMutablePointer<Int32>, + request: UInt32, + data: UnsafeMutableRawPointer?) -> Int32 + { + switch mp_voctrl(request) { + case VOCTRL_PREINIT: + DispatchQueue.main.sync { self.preinit(vo) } + return VO_TRUE + case VOCTRL_UNINIT: + DispatchQueue.main.async { self.uninit() } + return VO_TRUE + case VOCTRL_RECONFIG: + reconfig(vo) + return VO_TRUE + default: + break + } + + return super.control(vo, events: events, request: request, data: data) + } + + func shutdown(_ destroy: Bool = false) { + isShuttingDown = window?.isAnimating ?? false || + window?.isInFullscreen ?? false && mpv?.opts.native_fs ?? true + if window?.isInFullscreen ?? false && !(window?.isAnimating ?? false) { + window?.close() + } + if isShuttingDown { return } + + uninit() + uninitCommon() + + libmpv.deinitRender() + libmpv.deinitMPV(destroy) + } + + func checkShutdown() { + if isShuttingDown { + shutdown(true) + } + } + + @objc func processEvent(_ event: UnsafePointer<mpv_event>) { + switch event.pointee.event_id { + case MPV_EVENT_SHUTDOWN: + shutdown() + default: + break + } + } +} diff --git a/video/out/d3d11/context.c b/video/out/d3d11/context.c new file mode 100644 index 0000000..05f04fd --- /dev/null +++ b/video/out/d3d11/context.c @@ -0,0 +1,566 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "common/msg.h" +#include "options/m_config.h" +#include "osdep/timer.h" +#include "osdep/windows_utils.h" + +#include "video/out/gpu/context.h" +#include "video/out/gpu/d3d11_helpers.h" +#include "video/out/gpu/spirv.h" +#include "video/out/w32_common.h" +#include "context.h" +#include "ra_d3d11.h" + +static int d3d11_validate_adapter(struct mp_log *log, + const struct m_option *opt, + struct bstr name, const char **value); + +struct d3d11_opts { + int feature_level; + int warp; + bool flip; + int sync_interval; + char *adapter_name; + int output_format; + int color_space; + bool exclusive_fs; +}; + +#define OPT_BASE_STRUCT struct d3d11_opts +const struct m_sub_options d3d11_conf = { + .opts = (const struct m_option[]) { + {"d3d11-warp", OPT_CHOICE(warp, + {"auto", -1}, + {"no", 0}, + {"yes", 1})}, + {"d3d11-feature-level", OPT_CHOICE(feature_level, + {"12_1", D3D_FEATURE_LEVEL_12_1}, + {"12_0", D3D_FEATURE_LEVEL_12_0}, + {"11_1", D3D_FEATURE_LEVEL_11_1}, + {"11_0", D3D_FEATURE_LEVEL_11_0}, + {"10_1", D3D_FEATURE_LEVEL_10_1}, + {"10_0", D3D_FEATURE_LEVEL_10_0}, + {"9_3", D3D_FEATURE_LEVEL_9_3}, + {"9_2", D3D_FEATURE_LEVEL_9_2}, + {"9_1", D3D_FEATURE_LEVEL_9_1})}, + {"d3d11-flip", OPT_BOOL(flip)}, + {"d3d11-sync-interval", OPT_INT(sync_interval), M_RANGE(0, 4)}, + {"d3d11-adapter", OPT_STRING_VALIDATE(adapter_name, + d3d11_validate_adapter)}, + {"d3d11-output-format", OPT_CHOICE(output_format, + {"auto", DXGI_FORMAT_UNKNOWN}, + {"rgba8", DXGI_FORMAT_R8G8B8A8_UNORM}, + {"bgra8", DXGI_FORMAT_B8G8R8A8_UNORM}, + {"rgb10_a2", DXGI_FORMAT_R10G10B10A2_UNORM}, + {"rgba16f", DXGI_FORMAT_R16G16B16A16_FLOAT})}, + {"d3d11-output-csp", OPT_CHOICE(color_space, + {"auto", -1}, + {"srgb", DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709}, + {"linear", DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709}, + {"pq", DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020}, + {"bt.2020", DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P2020})}, + {"d3d11-exclusive-fs", OPT_BOOL(exclusive_fs)}, + {0} + }, + .defaults = &(const struct d3d11_opts) { + .feature_level = D3D_FEATURE_LEVEL_12_1, + .warp = -1, + .flip = true, + .sync_interval = 1, + .adapter_name = NULL, + .output_format = DXGI_FORMAT_UNKNOWN, + .color_space = -1, + }, + .size = sizeof(struct d3d11_opts) +}; + +struct priv { + struct d3d11_opts *opts; + struct m_config_cache *opts_cache; + + struct mp_vo_opts *vo_opts; + struct m_config_cache *vo_opts_cache; + + struct ra_tex *backbuffer; + ID3D11Device *device; + IDXGISwapChain *swapchain; + struct mp_colorspace swapchain_csp; + + int64_t perf_freq; + unsigned sync_refresh_count; + int64_t sync_qpc_time; + int64_t vsync_duration_qpc; + int64_t last_submit_qpc; +}; + +static int d3d11_validate_adapter(struct mp_log *log, + const struct m_option *opt, + struct bstr name, const char **value) +{ + struct bstr param = bstr0(*value); + bool help = bstr_equals0(param, "help"); + bool adapter_matched = false; + struct bstr listing = { 0 }; + + if (bstr_equals0(param, "")) { + return 0; + } + + adapter_matched = mp_d3d11_list_or_verify_adapters(log, + help ? bstr0(NULL) : param, + help ? &listing : NULL); + + if (help) { + mp_info(log, "Available D3D11 adapters:\n%.*s", + BSTR_P(listing)); + talloc_free(listing.start); + return M_OPT_EXIT; + } + + if (!adapter_matched) { + mp_err(log, "No adapter matching '%.*s'!\n", BSTR_P(param)); + } + + return adapter_matched ? 0 : M_OPT_INVALID; +} + +static struct ra_tex *get_backbuffer(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + ID3D11Texture2D *backbuffer = NULL; + struct ra_tex *tex = NULL; + HRESULT hr; + + hr = IDXGISwapChain_GetBuffer(p->swapchain, 0, &IID_ID3D11Texture2D, + (void**)&backbuffer); + if (FAILED(hr)) { + MP_ERR(ctx, "Couldn't get swapchain image\n"); + goto done; + } + + tex = ra_d3d11_wrap_tex(ctx->ra, (ID3D11Resource *)backbuffer); +done: + SAFE_RELEASE(backbuffer); + return tex; +} + +static bool resize(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + HRESULT hr; + + if (p->backbuffer) { + MP_ERR(ctx, "Attempt at resizing while a frame was in progress!\n"); + return false; + } + + hr = IDXGISwapChain_ResizeBuffers(p->swapchain, 0, ctx->vo->dwidth, + ctx->vo->dheight, DXGI_FORMAT_UNKNOWN, 0); + if (FAILED(hr)) { + MP_FATAL(ctx, "Couldn't resize swapchain: %s\n", mp_HRESULT_to_str(hr)); + return false; + } + + return true; +} + +static bool d3d11_reconfig(struct ra_ctx *ctx) +{ + vo_w32_config(ctx->vo); + return resize(ctx); +} + +static int d3d11_color_depth(struct ra_swapchain *sw) +{ + struct priv *p = sw->priv; + DXGI_SWAP_CHAIN_DESC desc; + + HRESULT hr = IDXGISwapChain_GetDesc(p->swapchain, &desc); + if (FAILED(hr)) { + MP_ERR(sw->ctx, "Failed to query swap chain description: %s!\n", + mp_HRESULT_to_str(hr)); + return 0; + } + + const struct ra_format *ra_fmt = + ra_d3d11_get_ra_format(sw->ctx->ra, desc.BufferDesc.Format); + if (!ra_fmt) + return 0; + + return ra_fmt->component_depth[0]; +} + +static bool d3d11_start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo) +{ + struct priv *p = sw->priv; + + if (!out_fbo) + return true; + + assert(!p->backbuffer); + + p->backbuffer = get_backbuffer(sw->ctx); + if (!p->backbuffer) + return false; + + *out_fbo = (struct ra_fbo) { + .tex = p->backbuffer, + .flip = false, + .color_space = p->swapchain_csp + }; + return true; +} + +static bool d3d11_submit_frame(struct ra_swapchain *sw, + const struct vo_frame *frame) +{ + struct priv *p = sw->priv; + + ra_d3d11_flush(sw->ctx->ra); + ra_tex_free(sw->ctx->ra, &p->backbuffer); + return true; +} + +static int64_t qpc_to_ns(struct ra_swapchain *sw, int64_t qpc) +{ + struct priv *p = sw->priv; + + // Convert QPC units (1/perf_freq seconds) to nanoseconds. This will work + // without overflow because the QPC value is guaranteed not to roll-over + // within 100 years, so perf_freq must be less than 2.9*10^9. + return qpc / p->perf_freq * INT64_C(1000000000) + + qpc % p->perf_freq * INT64_C(1000000000) / p->perf_freq; +} + +static int64_t qpc_ns_now(struct ra_swapchain *sw) +{ + LARGE_INTEGER perf_count; + QueryPerformanceCounter(&perf_count); + return qpc_to_ns(sw, perf_count.QuadPart); +} + +static void d3d11_swap_buffers(struct ra_swapchain *sw) +{ + struct priv *p = sw->priv; + + m_config_cache_update(p->opts_cache); + + LARGE_INTEGER perf_count; + QueryPerformanceCounter(&perf_count); + p->last_submit_qpc = perf_count.QuadPart; + + IDXGISwapChain_Present(p->swapchain, p->opts->sync_interval, 0); +} + +static void d3d11_get_vsync(struct ra_swapchain *sw, struct vo_vsync_info *info) +{ + struct priv *p = sw->priv; + HRESULT hr; + + m_config_cache_update(p->opts_cache); + + // The calculations below are only valid if mpv presents on every vsync + if (p->opts->sync_interval != 1) + return; + + // They're also only valid for flip model swapchains + DXGI_SWAP_CHAIN_DESC desc; + hr = IDXGISwapChain_GetDesc(p->swapchain, &desc); + if (FAILED(hr) || (desc.SwapEffect != DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL && + desc.SwapEffect != DXGI_SWAP_EFFECT_FLIP_DISCARD)) + { + return; + } + + // GetLastPresentCount returns a sequential ID for the frame submitted by + // the last call to IDXGISwapChain::Present() + UINT submit_count; + hr = IDXGISwapChain_GetLastPresentCount(p->swapchain, &submit_count); + if (FAILED(hr)) + return; + + // GetFrameStatistics returns two pairs. The first is (PresentCount, + // PresentRefreshCount) which relates a present ID (on the same timeline as + // GetLastPresentCount) to the physical vsync it was displayed on. The + // second is (SyncRefreshCount, SyncQPCTime), which relates a physical vsync + // to a timestamp on the same clock as QueryPerformanceCounter. + DXGI_FRAME_STATISTICS stats; + hr = IDXGISwapChain_GetFrameStatistics(p->swapchain, &stats); + if (hr == DXGI_ERROR_FRAME_STATISTICS_DISJOINT) { + p->sync_refresh_count = 0; + p->sync_qpc_time = 0; + } + if (FAILED(hr)) + return; + + info->last_queue_display_time = 0; + info->vsync_duration = 0; + // Detecting skipped vsyncs is possible but not supported yet + info->skipped_vsyncs = -1; + + // Get the number of physical vsyncs that have passed since the start of the + // playback or disjoint event. + // Check for 0 here, since sometimes GetFrameStatistics returns S_OK but + // with 0s in some (all?) members of DXGI_FRAME_STATISTICS. + unsigned src_passed = 0; + if (stats.SyncRefreshCount && p->sync_refresh_count) + src_passed = stats.SyncRefreshCount - p->sync_refresh_count; + if (p->sync_refresh_count == 0) + p->sync_refresh_count = stats.SyncRefreshCount; + + // Get the elapsed time passed between the above vsyncs + unsigned sqt_passed = 0; + if (stats.SyncQPCTime.QuadPart && p->sync_qpc_time) + sqt_passed = stats.SyncQPCTime.QuadPart - p->sync_qpc_time; + if (p->sync_qpc_time == 0) + p->sync_qpc_time = stats.SyncQPCTime.QuadPart; + + // If any vsyncs have passed, estimate the physical frame rate + if (src_passed && sqt_passed) + p->vsync_duration_qpc = sqt_passed / src_passed; + if (p->vsync_duration_qpc) + info->vsync_duration = qpc_to_ns(sw, p->vsync_duration_qpc); + + // If the physical frame rate is known and the other members of + // DXGI_FRAME_STATISTICS are non-0, estimate the timing of the next frame + if (p->vsync_duration_qpc && stats.PresentCount && + stats.PresentRefreshCount && stats.SyncRefreshCount && + stats.SyncQPCTime.QuadPart) + { + // It's not clear if PresentRefreshCount and SyncRefreshCount can refer + // to different frames, but in case they can, assuming mpv presents on + // every frame, guess the present count that relates to SyncRefreshCount. + unsigned expected_sync_pc = stats.PresentCount + + (stats.SyncRefreshCount - stats.PresentRefreshCount); + + // Now guess the timestamp of the last submitted frame based on the + // timestamp of the frame at SyncRefreshCount and the frame rate + int queued_frames = submit_count - expected_sync_pc; + int64_t last_queue_display_time_qpc = stats.SyncQPCTime.QuadPart + + queued_frames * p->vsync_duration_qpc; + + // Only set the estimated display time if it's after the last submission + // time. It could be before if mpv skips a lot of frames. + if (last_queue_display_time_qpc >= p->last_submit_qpc) { + info->last_queue_display_time = mp_time_ns() + + (qpc_to_ns(sw, last_queue_display_time_qpc) - qpc_ns_now(sw)); + } + } +} + +static bool d3d11_set_fullscreen(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + HRESULT hr; + + m_config_cache_update(p->opts_cache); + + if (!p->swapchain) { + MP_ERR(ctx, "Full screen configuration was requested before D3D11 " + "swap chain was ready!"); + return false; + } + + // we only want exclusive FS if we are entering FS and + // exclusive FS is enabled. Otherwise disable exclusive FS. + bool enable_exclusive_fs = p->vo_opts->fullscreen && + p->opts->exclusive_fs; + + MP_VERBOSE(ctx, "%s full-screen exclusive mode while %s fullscreen\n", + enable_exclusive_fs ? "Enabling" : "Disabling", + ctx->vo->opts->fullscreen ? "entering" : "leaving"); + + hr = IDXGISwapChain_SetFullscreenState(p->swapchain, + enable_exclusive_fs, NULL); + if (FAILED(hr)) + return false; + + if (!resize(ctx)) + return false; + + return true; +} + +static int d3d11_control(struct ra_ctx *ctx, int *events, int request, void *arg) +{ + struct priv *p = ctx->priv; + int ret = -1; + bool fullscreen_switch_needed = false; + + switch (request) { + case VOCTRL_VO_OPTS_CHANGED: { + void *changed_option; + + while (m_config_cache_get_next_changed(p->vo_opts_cache, + &changed_option)) + { + struct mp_vo_opts *vo_opts = p->vo_opts_cache->opts; + + if (changed_option == &vo_opts->fullscreen) { + fullscreen_switch_needed = true; + } + } + + break; + } + default: + break; + } + + // if leaving full screen, handle d3d11 stuff first, then general + // windowing + if (fullscreen_switch_needed && !p->vo_opts->fullscreen) { + if (!d3d11_set_fullscreen(ctx)) + return VO_FALSE; + + fullscreen_switch_needed = false; + } + + ret = vo_w32_control(ctx->vo, events, request, arg); + + // if entering full screen, handle d3d11 after general windowing stuff + if (fullscreen_switch_needed && p->vo_opts->fullscreen) { + if (!d3d11_set_fullscreen(ctx)) + return VO_FALSE; + + fullscreen_switch_needed = false; + } + + if (*events & VO_EVENT_RESIZE) { + if (!resize(ctx)) + return VO_ERROR; + } + return ret; +} + +static void d3d11_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + if (p->swapchain) + IDXGISwapChain_SetFullscreenState(p->swapchain, FALSE, NULL); + + if (ctx->ra) + ra_tex_free(ctx->ra, &p->backbuffer); + SAFE_RELEASE(p->swapchain); + vo_w32_uninit(ctx->vo); + SAFE_RELEASE(p->device); + + // Destroy the RA last to prevent objects we hold from showing up in D3D's + // leak checker + if (ctx->ra) + ctx->ra->fns->destroy(ctx->ra); +} + +static const struct ra_swapchain_fns d3d11_swapchain = { + .color_depth = d3d11_color_depth, + .start_frame = d3d11_start_frame, + .submit_frame = d3d11_submit_frame, + .swap_buffers = d3d11_swap_buffers, + .get_vsync = d3d11_get_vsync, +}; + +static bool d3d11_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + p->opts_cache = m_config_cache_alloc(ctx, ctx->global, &d3d11_conf); + p->opts = p->opts_cache->opts; + + p->vo_opts_cache = m_config_cache_alloc(ctx, ctx->vo->global, &vo_sub_opts); + p->vo_opts = p->vo_opts_cache->opts; + + LARGE_INTEGER perf_freq; + QueryPerformanceFrequency(&perf_freq); + p->perf_freq = perf_freq.QuadPart; + + struct ra_swapchain *sw = ctx->swapchain = talloc_zero(ctx, struct ra_swapchain); + sw->priv = p; + sw->ctx = ctx; + sw->fns = &d3d11_swapchain; + + struct d3d11_device_opts dopts = { + .debug = ctx->opts.debug, + .allow_warp = p->opts->warp != 0, + .force_warp = p->opts->warp == 1, + .max_feature_level = p->opts->feature_level, + .max_frame_latency = ctx->vo->opts->swapchain_depth, + .adapter_name = p->opts->adapter_name, + }; + if (!mp_d3d11_create_present_device(ctx->log, &dopts, &p->device)) + goto error; + + if (!spirv_compiler_init(ctx)) + goto error; + ctx->ra = ra_d3d11_create(p->device, ctx->log, ctx->spirv); + if (!ctx->ra) + goto error; + + if (!vo_w32_init(ctx->vo)) + goto error; + + UINT usage = DXGI_USAGE_RENDER_TARGET_OUTPUT | DXGI_USAGE_SHADER_INPUT; + if (ID3D11Device_GetFeatureLevel(p->device) >= D3D_FEATURE_LEVEL_11_0 && + p->opts->output_format != DXGI_FORMAT_B8G8R8A8_UNORM) + { + usage |= DXGI_USAGE_UNORDERED_ACCESS; + } + + struct d3d11_swapchain_opts scopts = { + .window = vo_w32_hwnd(ctx->vo), + .width = ctx->vo->dwidth, + .height = ctx->vo->dheight, + .format = p->opts->output_format, + .color_space = p->opts->color_space, + .configured_csp = &p->swapchain_csp, + .flip = p->opts->flip, + // Add one frame for the backbuffer and one frame of "slack" to reduce + // contention with the window manager when acquiring the backbuffer + .length = ctx->vo->opts->swapchain_depth + 2, + .usage = usage, + }; + if (!mp_d3d11_create_swapchain(p->device, ctx->log, &scopts, &p->swapchain)) + goto error; + + return true; + +error: + d3d11_uninit(ctx); + return false; +} + +IDXGISwapChain *ra_d3d11_ctx_get_swapchain(struct ra_ctx *ra) +{ + if (ra->swapchain->fns != &d3d11_swapchain) + return NULL; + + struct priv *p = ra->priv; + + IDXGISwapChain_AddRef(p->swapchain); + + return p->swapchain; +} + +const struct ra_ctx_fns ra_ctx_d3d11 = { + .type = "d3d11", + .name = "d3d11", + .reconfig = d3d11_reconfig, + .control = d3d11_control, + .init = d3d11_init, + .uninit = d3d11_uninit, +}; diff --git a/video/out/d3d11/context.h b/video/out/d3d11/context.h new file mode 100644 index 0000000..8a9ef4c --- /dev/null +++ b/video/out/d3d11/context.h @@ -0,0 +1,9 @@ +#pragma once + +#include <dxgi.h> + +#include "video/out/gpu/context.h" + +// Get the underlying D3D11 swap chain from an RA context. The returned swap chain is +// refcounted and must be released by the caller. +IDXGISwapChain *ra_d3d11_ctx_get_swapchain(struct ra_ctx *ra); diff --git a/video/out/d3d11/hwdec_d3d11va.c b/video/out/d3d11/hwdec_d3d11va.c new file mode 100644 index 0000000..6aaa12b --- /dev/null +++ b/video/out/d3d11/hwdec_d3d11va.c @@ -0,0 +1,258 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <windows.h> +#include <d3d11.h> +#include <d3d11_1.h> + +#include "common/common.h" +#include "options/m_config.h" +#include "osdep/windows_utils.h" +#include "video/hwdec.h" +#include "video/d3d.h" +#include "video/out/d3d11/ra_d3d11.h" +#include "video/out/gpu/hwdec.h" + +struct d3d11va_opts { + bool zero_copy; +}; + +#define OPT_BASE_STRUCT struct d3d11va_opts +const struct m_sub_options d3d11va_conf = { + .opts = (const struct m_option[]) { + {"d3d11va-zero-copy", OPT_BOOL(zero_copy)}, + {0} + }, + .defaults = &(const struct d3d11va_opts) {0}, + .size = sizeof(struct d3d11va_opts) +}; + +struct priv_owner { + struct d3d11va_opts *opts; + + struct mp_hwdec_ctx hwctx; + ID3D11Device *device; + ID3D11Device1 *device1; +}; + +struct priv { + // 1-copy path + ID3D11DeviceContext1 *ctx; + ID3D11Texture2D *copy_tex; + + // zero-copy path + int num_planes; + const struct ra_format *fmt[4]; +}; + +static void uninit(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + hwdec_devices_remove(hw->devs, &p->hwctx); + av_buffer_unref(&p->hwctx.av_device_ref); + SAFE_RELEASE(p->device); + SAFE_RELEASE(p->device1); +} + +static int init(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + HRESULT hr; + + if (!ra_is_d3d11(hw->ra_ctx->ra)) + return -1; + p->device = ra_d3d11_get_device(hw->ra_ctx->ra); + if (!p->device) + return -1; + + p->opts = mp_get_config_group(hw->priv, hw->global, &d3d11va_conf); + + // D3D11VA requires Direct3D 11.1, so this should always succeed + hr = ID3D11Device_QueryInterface(p->device, &IID_ID3D11Device1, + (void**)&p->device1); + if (FAILED(hr)) { + MP_ERR(hw, "Failed to get D3D11.1 interface: %s\n", + mp_HRESULT_to_str(hr)); + return -1; + } + + ID3D10Multithread *multithread; + hr = ID3D11Device_QueryInterface(p->device, &IID_ID3D10Multithread, + (void **)&multithread); + if (FAILED(hr)) { + MP_ERR(hw, "Failed to get Multithread interface: %s\n", + mp_HRESULT_to_str(hr)); + return -1; + } + ID3D10Multithread_SetMultithreadProtected(multithread, TRUE); + ID3D10Multithread_Release(multithread); + + static const int subfmts[] = {IMGFMT_NV12, IMGFMT_P010, 0}; + p->hwctx = (struct mp_hwdec_ctx){ + .driver_name = hw->driver->name, + .av_device_ref = d3d11_wrap_device_ref(p->device), + .supported_formats = subfmts, + .hw_imgfmt = IMGFMT_D3D11, + }; + + if (!p->hwctx.av_device_ref) { + MP_VERBOSE(hw, "Failed to create hwdevice_ctx\n"); + return -1; + } + + hwdec_devices_add(hw->devs, &p->hwctx); + return 0; +} + +static void mapper_uninit(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + for (int i = 0; i < 4; i++) + ra_tex_free(mapper->ra, &mapper->tex[i]); + SAFE_RELEASE(p->copy_tex); + SAFE_RELEASE(p->ctx); +} + +static int mapper_init(struct ra_hwdec_mapper *mapper) +{ + struct priv_owner *o = mapper->owner->priv; + struct priv *p = mapper->priv; + HRESULT hr; + + mapper->dst_params = mapper->src_params; + mapper->dst_params.imgfmt = mapper->src_params.hw_subfmt; + mapper->dst_params.hw_subfmt = 0; + + struct ra_imgfmt_desc desc = {0}; + + if (!ra_get_imgfmt_desc(mapper->ra, mapper->dst_params.imgfmt, &desc)) + return -1; + + if (o->opts->zero_copy) { + // In the zero-copy path, we create the ra_tex objects in the map + // operation, so we just need to store the format of each plane + p->num_planes = desc.num_planes; + for (int i = 0; i < desc.num_planes; i++) + p->fmt[i] = desc.planes[i]; + } else { + struct mp_image layout = {0}; + mp_image_set_params(&layout, &mapper->dst_params); + + DXGI_FORMAT copy_fmt; + switch (mapper->dst_params.imgfmt) { + case IMGFMT_NV12: copy_fmt = DXGI_FORMAT_NV12; break; + case IMGFMT_P010: copy_fmt = DXGI_FORMAT_P010; break; + default: return -1; + } + + D3D11_TEXTURE2D_DESC copy_desc = { + .Width = mapper->dst_params.w, + .Height = mapper->dst_params.h, + .MipLevels = 1, + .ArraySize = 1, + .SampleDesc.Count = 1, + .Format = copy_fmt, + .BindFlags = D3D11_BIND_SHADER_RESOURCE, + }; + hr = ID3D11Device_CreateTexture2D(o->device, ©_desc, NULL, + &p->copy_tex); + if (FAILED(hr)) { + MP_FATAL(mapper, "Could not create shader resource texture\n"); + return -1; + } + + for (int i = 0; i < desc.num_planes; i++) { + mapper->tex[i] = ra_d3d11_wrap_tex_video(mapper->ra, p->copy_tex, + mp_image_plane_w(&layout, i), mp_image_plane_h(&layout, i), 0, + desc.planes[i]); + if (!mapper->tex[i]) { + MP_FATAL(mapper, "Could not create RA texture view\n"); + return -1; + } + } + + // A ref to the immediate context is needed for CopySubresourceRegion + ID3D11Device1_GetImmediateContext1(o->device1, &p->ctx); + } + + return 0; +} + +static int mapper_map(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + ID3D11Texture2D *tex = (void *)mapper->src->planes[0]; + int subresource = (intptr_t)mapper->src->planes[1]; + + if (p->copy_tex) { + ID3D11DeviceContext1_CopySubresourceRegion1(p->ctx, + (ID3D11Resource *)p->copy_tex, 0, 0, 0, 0, + (ID3D11Resource *)tex, subresource, (&(D3D11_BOX) { + .left = 0, + .top = 0, + .front = 0, + .right = mapper->dst_params.w, + .bottom = mapper->dst_params.h, + .back = 1, + }), D3D11_COPY_DISCARD); + + // We no longer need the original texture after copying it. + mp_image_unrefp(&mapper->src); + } else { + D3D11_TEXTURE2D_DESC desc2d; + ID3D11Texture2D_GetDesc(tex, &desc2d); + + for (int i = 0; i < p->num_planes; i++) { + // The video decode texture may include padding, so the size of the + // ra_tex needs to be determined by the actual size of the Tex2D + bool chroma = i >= 1; + int w = desc2d.Width / (chroma ? 2 : 1); + int h = desc2d.Height / (chroma ? 2 : 1); + + mapper->tex[i] = ra_d3d11_wrap_tex_video(mapper->ra, tex, + w, h, subresource, p->fmt[i]); + if (!mapper->tex[i]) + return -1; + } + } + + return 0; +} + +static void mapper_unmap(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + if (p->copy_tex) + return; + for (int i = 0; i < 4; i++) + ra_tex_free(mapper->ra, &mapper->tex[i]); +} + +const struct ra_hwdec_driver ra_hwdec_d3d11va = { + .name = "d3d11va", + .priv_size = sizeof(struct priv_owner), + .imgfmts = {IMGFMT_D3D11, 0}, + .init = init, + .uninit = uninit, + .mapper = &(const struct ra_hwdec_mapper_driver){ + .priv_size = sizeof(struct priv), + .init = mapper_init, + .uninit = mapper_uninit, + .map = mapper_map, + .unmap = mapper_unmap, + }, +}; diff --git a/video/out/d3d11/hwdec_dxva2dxgi.c b/video/out/d3d11/hwdec_dxva2dxgi.c new file mode 100644 index 0000000..62158d4 --- /dev/null +++ b/video/out/d3d11/hwdec_dxva2dxgi.c @@ -0,0 +1,478 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <windows.h> +#include <d3d9.h> +#include <d3d11.h> +#include <dxva2api.h> + +#include "common/common.h" +#include "osdep/windows_utils.h" +#include "video/hwdec.h" +#include "video/d3d.h" +#include "video/out/d3d11/ra_d3d11.h" +#include "video/out/gpu/hwdec.h" + +struct priv_owner { + struct mp_hwdec_ctx hwctx; + ID3D11Device *dev11; + IDirect3DDevice9Ex *dev9; +}; + +struct queue_surf { + ID3D11Texture2D *tex11; + ID3D11Query *idle11; + ID3D11Texture2D *stage11; + IDirect3DTexture9 *tex9; + IDirect3DSurface9 *surf9; + IDirect3DSurface9 *stage9; + struct ra_tex *tex; + + bool busy11; // The surface is currently being used by D3D11 +}; + +struct priv { + ID3D11Device *dev11; + ID3D11DeviceContext *ctx11; + IDirect3DDevice9Ex *dev9; + + // Surface queue stuff. Following Microsoft recommendations, a queue of + // surfaces is used to share images between D3D9 and D3D11. This allows + // multiple D3D11 frames to be in-flight at once. + struct queue_surf **queue; + int queue_len; + int queue_pos; +}; + +static void uninit(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + hwdec_devices_remove(hw->devs, &p->hwctx); + av_buffer_unref(&p->hwctx.av_device_ref); + SAFE_RELEASE(p->dev11); + SAFE_RELEASE(p->dev9); +} + +static int init(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + IDirect3D9Ex *d3d9ex = NULL; + int ret = -1; + HRESULT hr; + + if (!ra_is_d3d11(hw->ra_ctx->ra)) + goto done; + p->dev11 = ra_d3d11_get_device(hw->ra_ctx->ra); + if (!p->dev11) + goto done; + + d3d_load_dlls(); + if (!d3d9_dll) { + MP_FATAL(hw, "Failed to load \"d3d9.dll\": %s\n", mp_LastError_to_str()); + goto done; + } + if (!dxva2_dll) { + MP_FATAL(hw, "Failed to load \"dxva2.dll\": %s\n", mp_LastError_to_str()); + goto done; + } + + HRESULT (WINAPI *Direct3DCreate9Ex)(UINT SDKVersion, IDirect3D9Ex **ppD3D); + Direct3DCreate9Ex = (void *)GetProcAddress(d3d9_dll, "Direct3DCreate9Ex"); + if (!Direct3DCreate9Ex) { + MP_FATAL(hw, "Direct3D 9Ex not supported\n"); + goto done; + } + + hr = Direct3DCreate9Ex(D3D_SDK_VERSION, &d3d9ex); + if (FAILED(hr)) { + MP_FATAL(hw, "Couldn't create Direct3D9Ex: %s\n", mp_HRESULT_to_str(hr)); + goto done; + } + + D3DPRESENT_PARAMETERS pparams = { + .BackBufferWidth = 16, + .BackBufferHeight = 16, + .BackBufferCount = 1, + .SwapEffect = D3DSWAPEFFECT_DISCARD, + .hDeviceWindow = GetDesktopWindow(), + .Windowed = TRUE, + .Flags = D3DPRESENTFLAG_VIDEO, + }; + hr = IDirect3D9Ex_CreateDeviceEx(d3d9ex, D3DADAPTER_DEFAULT, + D3DDEVTYPE_HAL, GetDesktopWindow(), D3DCREATE_NOWINDOWCHANGES | + D3DCREATE_FPU_PRESERVE | D3DCREATE_HARDWARE_VERTEXPROCESSING | + D3DCREATE_DISABLE_PSGP_THREADING | D3DCREATE_MULTITHREADED, &pparams, + NULL, &p->dev9); + if (FAILED(hr)) { + MP_FATAL(hw, "Failed to create Direct3D9Ex device: %s\n", + mp_HRESULT_to_str(hr)); + goto done; + } + + // Check if it's possible to StretchRect() from NV12 to XRGB surfaces + hr = IDirect3D9Ex_CheckDeviceFormatConversion(d3d9ex, D3DADAPTER_DEFAULT, + D3DDEVTYPE_HAL, MAKEFOURCC('N', 'V', '1', '2'), D3DFMT_X8R8G8B8); + if (hr != S_OK) { + MP_FATAL(hw, "Can't StretchRect from NV12 to XRGB surfaces\n"); + goto done; + } + + p->hwctx = (struct mp_hwdec_ctx){ + .driver_name = hw->driver->name, + .av_device_ref = d3d9_wrap_device_ref((IDirect3DDevice9 *)p->dev9), + .hw_imgfmt = IMGFMT_DXVA2, + }; + + if (!p->hwctx.av_device_ref) { + MP_VERBOSE(hw, "Failed to create hwdevice_ctx\n"); + goto done; + } + + hwdec_devices_add(hw->devs, &p->hwctx); + + ret = 0; +done: + SAFE_RELEASE(d3d9ex); + return ret; +} + +static int mapper_init(struct ra_hwdec_mapper *mapper) +{ + struct priv_owner *o = mapper->owner->priv; + struct priv *p = mapper->priv; + + ID3D11Device_AddRef(o->dev11); + p->dev11 = o->dev11; + IDirect3DDevice9Ex_AddRef(o->dev9); + p->dev9 = o->dev9; + ID3D11Device_GetImmediateContext(o->dev11, &p->ctx11); + + mapper->dst_params = mapper->src_params; + mapper->dst_params.imgfmt = IMGFMT_RGB0; + mapper->dst_params.hw_subfmt = 0; + return 0; +} + +static void surf_destroy(struct ra_hwdec_mapper *mapper, + struct queue_surf *surf) +{ + if (!surf) + return; + SAFE_RELEASE(surf->tex11); + SAFE_RELEASE(surf->idle11); + SAFE_RELEASE(surf->stage11); + SAFE_RELEASE(surf->tex9); + SAFE_RELEASE(surf->surf9); + SAFE_RELEASE(surf->stage9); + ra_tex_free(mapper->ra, &surf->tex); + talloc_free(surf); +} + +static struct queue_surf *surf_create(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + IDXGIResource *res11 = NULL; + bool success = false; + HRESULT hr; + + struct queue_surf *surf = talloc_ptrtype(p, surf); + + D3D11_TEXTURE2D_DESC desc11 = { + .Width = mapper->src->w, + .Height = mapper->src->h, + .MipLevels = 1, + .ArraySize = 1, + .Format = DXGI_FORMAT_B8G8R8X8_UNORM, + .SampleDesc.Count = 1, + .Usage = D3D11_USAGE_DEFAULT, + .BindFlags = D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_RENDER_TARGET, + .MiscFlags = D3D11_RESOURCE_MISC_SHARED, + }; + hr = ID3D11Device_CreateTexture2D(p->dev11, &desc11, NULL, &surf->tex11); + if (FAILED(hr)) { + MP_ERR(mapper, "Failed to create D3D11 texture: %s\n", + mp_HRESULT_to_str(hr)); + goto done; + } + + // Try to use a 16x16 staging texture, unless the source surface is + // smaller. Ideally, a 1x1 texture would be sufficient, but Microsoft's + // D3D9ExDXGISharedSurf example uses 16x16 to avoid driver bugs. + D3D11_TEXTURE2D_DESC sdesc11 = { + .Width = MPMIN(16, desc11.Width), + .Height = MPMIN(16, desc11.Height), + .MipLevels = 1, + .ArraySize = 1, + .Format = DXGI_FORMAT_B8G8R8X8_UNORM, + .SampleDesc.Count = 1, + .Usage = D3D11_USAGE_STAGING, + .CPUAccessFlags = D3D11_CPU_ACCESS_READ, + }; + hr = ID3D11Device_CreateTexture2D(p->dev11, &sdesc11, NULL, &surf->stage11); + if (FAILED(hr)) { + MP_ERR(mapper, "Failed to create D3D11 staging texture: %s\n", + mp_HRESULT_to_str(hr)); + goto done; + } + + hr = ID3D11Texture2D_QueryInterface(surf->tex11, &IID_IDXGIResource, + (void**)&res11); + if (FAILED(hr)) { + MP_ERR(mapper, "Failed to get share handle: %s\n", + mp_HRESULT_to_str(hr)); + goto done; + } + + HANDLE share_handle; + hr = IDXGIResource_GetSharedHandle(res11, &share_handle); + if (FAILED(hr)) { + MP_ERR(mapper, "Failed to get share handle: %s\n", + mp_HRESULT_to_str(hr)); + goto done; + } + + hr = ID3D11Device_CreateQuery(p->dev11, + &(D3D11_QUERY_DESC) { D3D11_QUERY_EVENT }, &surf->idle11); + if (FAILED(hr)) { + MP_ERR(mapper, "Failed to create D3D11 query: %s\n", + mp_HRESULT_to_str(hr)); + goto done; + } + + // Share the D3D11 texture with D3D9Ex + hr = IDirect3DDevice9Ex_CreateTexture(p->dev9, desc11.Width, desc11.Height, + 1, D3DUSAGE_RENDERTARGET, D3DFMT_X8R8G8B8, D3DPOOL_DEFAULT, + &surf->tex9, &share_handle); + if (FAILED(hr)) { + MP_ERR(mapper, "Failed to create D3D9 texture: %s\n", + mp_HRESULT_to_str(hr)); + goto done; + } + + hr = IDirect3DTexture9_GetSurfaceLevel(surf->tex9, 0, &surf->surf9); + if (FAILED(hr)) { + MP_ERR(mapper, "Failed to get D3D9 surface: %s\n", + mp_HRESULT_to_str(hr)); + goto done; + } + + // As above, try to use a 16x16 staging texture to avoid driver bugs + hr = IDirect3DDevice9Ex_CreateRenderTarget(p->dev9, + MPMIN(16, desc11.Width), MPMIN(16, desc11.Height), D3DFMT_X8R8G8B8, + D3DMULTISAMPLE_NONE, 0, TRUE, &surf->stage9, NULL); + if (FAILED(hr)) { + MP_ERR(mapper, "Failed to create D3D9 staging surface: %s\n", + mp_HRESULT_to_str(hr)); + goto done; + } + + surf->tex = ra_d3d11_wrap_tex(mapper->ra, (ID3D11Resource *)surf->tex11); + if (!surf->tex) + goto done; + + success = true; +done: + if (!success) + surf_destroy(mapper, surf); + SAFE_RELEASE(res11); + return success ? surf : NULL; +} + +// true if the surface is currently in-use by the D3D11 graphics pipeline +static bool surf_is_idle11(struct ra_hwdec_mapper *mapper, + struct queue_surf *surf) +{ + struct priv *p = mapper->priv; + HRESULT hr; + BOOL idle; + + if (!surf->busy11) + return true; + + hr = ID3D11DeviceContext_GetData(p->ctx11, + (ID3D11Asynchronous *)surf->idle11, &idle, sizeof(idle), + D3D11_ASYNC_GETDATA_DONOTFLUSH); + if (FAILED(hr) || hr == S_FALSE || !idle) + return false; + + surf->busy11 = false; + return true; +} + +// If the surface is currently in-use by the D3D11 graphics pipeline, wait for +// it to become idle. Should only be called in the queue-underflow case. +static bool surf_wait_idle11(struct ra_hwdec_mapper *mapper, + struct queue_surf *surf) +{ + struct priv *p = mapper->priv; + HRESULT hr; + + ID3D11DeviceContext_CopySubresourceRegion(p->ctx11, + (ID3D11Resource *)surf->stage11, 0, 0, 0, 0, + (ID3D11Resource *)surf->tex11, 0, (&(D3D11_BOX){ + .right = MPMIN(16, mapper->src->w), + .bottom = MPMIN(16, mapper->src->h), + .back = 1, + })); + + // Block until the surface becomes idle (see surf_wait_idle9()) + D3D11_MAPPED_SUBRESOURCE map = {0}; + hr = ID3D11DeviceContext_Map(p->ctx11, (ID3D11Resource *)surf->stage11, 0, + D3D11_MAP_READ, 0, &map); + if (FAILED(hr)) { + MP_ERR(mapper, "Couldn't map D3D11 staging texture: %s\n", + mp_HRESULT_to_str(hr)); + return false; + } + + ID3D11DeviceContext_Unmap(p->ctx11, (ID3D11Resource *)surf->stage11, 0); + surf->busy11 = false; + return true; +} + +static bool surf_wait_idle9(struct ra_hwdec_mapper *mapper, + struct queue_surf *surf) +{ + struct priv *p = mapper->priv; + HRESULT hr; + + // Rather than polling for the surface to become idle, copy part of the + // surface to a staging texture and map it. This should block until the + // surface becomes idle. Microsoft's ISurfaceQueue does this as well. + RECT rc = {0, 0, MPMIN(16, mapper->src->w), MPMIN(16, mapper->src->h)}; + hr = IDirect3DDevice9Ex_StretchRect(p->dev9, surf->surf9, &rc, surf->stage9, + &rc, D3DTEXF_NONE); + if (FAILED(hr)) { + MP_ERR(mapper, "Couldn't copy to D3D9 staging texture: %s\n", + mp_HRESULT_to_str(hr)); + return false; + } + + D3DLOCKED_RECT lock; + hr = IDirect3DSurface9_LockRect(surf->stage9, &lock, NULL, D3DLOCK_READONLY); + if (FAILED(hr)) { + MP_ERR(mapper, "Couldn't map D3D9 staging texture: %s\n", + mp_HRESULT_to_str(hr)); + return false; + } + + IDirect3DSurface9_UnlockRect(surf->stage9); + p->queue[p->queue_pos]->busy11 = true; + return true; +} + +static struct queue_surf *surf_acquire(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + + if (!p->queue_len || !surf_is_idle11(mapper, p->queue[p->queue_pos])) { + if (p->queue_len < 16) { + struct queue_surf *surf = surf_create(mapper); + if (!surf) + return NULL; + + // The next surface is busy, so grow the queue + MP_TARRAY_INSERT_AT(p, p->queue, p->queue_len, p->queue_pos, surf); + MP_DBG(mapper, "Queue grew to %d surfaces\n", p->queue_len); + } else { + // For sanity, don't let the queue grow beyond 16 surfaces. It + // should never get this big. If it does, wait for the surface to + // become idle rather than polling it. + if (!surf_wait_idle11(mapper, p->queue[p->queue_pos])) + return NULL; + MP_WARN(mapper, "Queue underflow!\n"); + } + } + return p->queue[p->queue_pos]; +} + +static void surf_release(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + ID3D11DeviceContext_End(p->ctx11, + (ID3D11Asynchronous *)p->queue[p->queue_pos]->idle11); + + // The current surface is now in-flight, move to the next surface + p->queue_pos++; + if (p->queue_pos >= p->queue_len) + p->queue_pos = 0; +} + +static void mapper_uninit(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + + for (int i = 0; i < p->queue_len; i++) + surf_destroy(mapper, p->queue[i]); + + SAFE_RELEASE(p->ctx11); + SAFE_RELEASE(p->dev9); + SAFE_RELEASE(p->dev11); +} + +static int mapper_map(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + HRESULT hr; + + struct queue_surf *surf = surf_acquire(mapper); + if (!surf) + return -1; + + RECT rc = {0, 0, mapper->src->w, mapper->src->h}; + IDirect3DSurface9* hw_surface = (IDirect3DSurface9 *)mapper->src->planes[3]; + + hr = IDirect3DDevice9Ex_StretchRect(p->dev9, hw_surface, &rc, surf->surf9, + &rc, D3DTEXF_NONE); + if (FAILED(hr)) { + MP_ERR(mapper, "StretchRect() failed: %s\n", mp_HRESULT_to_str(hr)); + return -1; + } + + if (!surf_wait_idle9(mapper, surf)) + return -1; + + mapper->tex[0] = surf->tex; + return 0; +} + +static void mapper_unmap(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + + if (p->queue_pos < p->queue_len && + p->queue[p->queue_pos]->tex == mapper->tex[0]) + { + surf_release(mapper); + mapper->tex[0] = NULL; + } +} + +const struct ra_hwdec_driver ra_hwdec_dxva2dxgi = { + .name = "dxva2-dxgi", + .priv_size = sizeof(struct priv_owner), + .imgfmts = {IMGFMT_DXVA2, 0}, + .init = init, + .uninit = uninit, + .mapper = &(const struct ra_hwdec_mapper_driver){ + .priv_size = sizeof(struct priv), + .init = mapper_init, + .uninit = mapper_uninit, + .map = mapper_map, + .unmap = mapper_unmap, + }, +}; diff --git a/video/out/d3d11/ra_d3d11.c b/video/out/d3d11/ra_d3d11.c new file mode 100644 index 0000000..84fd004 --- /dev/null +++ b/video/out/d3d11/ra_d3d11.c @@ -0,0 +1,2544 @@ +#include <windows.h> +#include <versionhelpers.h> +#include <d3d11_1.h> +#include <d3d11sdklayers.h> +#include <dxgi1_2.h> +#include <d3dcompiler.h> +#include <spirv_cross_c.h> + +#include "common/msg.h" +#include "osdep/io.h" +#include "osdep/subprocess.h" +#include "osdep/timer.h" +#include "osdep/windows_utils.h" +#include "video/out/gpu/spirv.h" +#include "video/out/gpu/utils.h" + +#include "ra_d3d11.h" + +#ifndef D3D11_1_UAV_SLOT_COUNT +#define D3D11_1_UAV_SLOT_COUNT (64) +#endif +#define D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE (0x80) + +// D3D11.3 message IDs, not present in mingw-w64 v9 +#define D3D11_MESSAGE_ID_CREATE_FENCE ((D3D11_MESSAGE_ID)0x300209) +#define D3D11_MESSAGE_ID_DESTROY_FENCE ((D3D11_MESSAGE_ID)0x30020b) + +struct dll_version { + uint16_t major; + uint16_t minor; + uint16_t build; + uint16_t revision; +}; + +struct ra_d3d11 { + struct spirv_compiler *spirv; + + ID3D11Device *dev; + ID3D11Device1 *dev1; + ID3D11DeviceContext *ctx; + ID3D11DeviceContext1 *ctx1; + pD3DCompile D3DCompile; + + struct dll_version d3d_compiler_ver; + + // Debug interfaces (--gpu-debug) + ID3D11Debug *debug; + ID3D11InfoQueue *iqueue; + + // Device capabilities + D3D_FEATURE_LEVEL fl; + bool has_clear_view; + bool has_timestamp_queries; + int max_uavs; + + // Streaming dynamic vertex buffer, which is used for all renderpasses + ID3D11Buffer *vbuf; + size_t vbuf_size; + size_t vbuf_used; + + // clear() renderpass resources (only used when has_clear_view is false) + ID3D11PixelShader *clear_ps; + ID3D11VertexShader *clear_vs; + ID3D11InputLayout *clear_layout; + ID3D11Buffer *clear_vbuf; + ID3D11Buffer *clear_cbuf; + + // blit() renderpass resources + ID3D11PixelShader *blit_float_ps; + ID3D11VertexShader *blit_vs; + ID3D11InputLayout *blit_layout; + ID3D11Buffer *blit_vbuf; + ID3D11SamplerState *blit_sampler; +}; + +struct d3d_tex { + // res mirrors one of tex1d, tex2d or tex3d for convenience. It does not + // hold an additional reference to the texture object. + ID3D11Resource *res; + + ID3D11Texture1D *tex1d; + ID3D11Texture2D *tex2d; + ID3D11Texture3D *tex3d; + int array_slice; + + // Staging texture for tex_download(), 2D only + ID3D11Texture2D *staging; + + ID3D11ShaderResourceView *srv; + ID3D11RenderTargetView *rtv; + ID3D11UnorderedAccessView *uav; + ID3D11SamplerState *sampler; +}; + +struct d3d_buf { + ID3D11Buffer *buf; + ID3D11UnorderedAccessView *uav; + void *data; // System-memory mirror of the data in buf + bool dirty; // Is buf out of date? +}; + +struct d3d_rpass { + ID3D11PixelShader *ps; + ID3D11VertexShader *vs; + ID3D11ComputeShader *cs; + ID3D11InputLayout *layout; + ID3D11BlendState *bstate; +}; + +struct d3d_timer { + ID3D11Query *ts_start; + ID3D11Query *ts_end; + ID3D11Query *disjoint; + uint64_t result; // Latches the result from the previous use of the timer +}; + +struct d3d_fmt { + const char *name; + int components; + int bytes; + int bits[4]; + DXGI_FORMAT fmt; + enum ra_ctype ctype; + bool unordered; +}; + +static const char clear_vs[] = "\ +float4 main(float2 pos : POSITION) : SV_Position\n\ +{\n\ + return float4(pos, 0.0, 1.0);\n\ +}\n\ +"; + +static const char clear_ps[] = "\ +cbuffer ps_cbuf : register(b0) {\n\ + float4 color : packoffset(c0);\n\ +}\n\ +\n\ +float4 main(float4 pos : SV_Position) : SV_Target\n\ +{\n\ + return color;\n\ +}\n\ +"; + +struct blit_vert { + float x, y, u, v; +}; + +static const char blit_vs[] = "\ +void main(float2 pos : POSITION, float2 coord : TEXCOORD0,\n\ + out float4 out_pos : SV_Position, out float2 out_coord : TEXCOORD0)\n\ +{\n\ + out_pos = float4(pos, 0.0, 1.0);\n\ + out_coord = coord;\n\ +}\n\ +"; + +static const char blit_float_ps[] = "\ +Texture2D<float4> tex : register(t0);\n\ +SamplerState samp : register(s0);\n\ +\n\ +float4 main(float4 pos : SV_Position, float2 coord : TEXCOORD0) : SV_Target\n\ +{\n\ + return tex.Sample(samp, coord);\n\ +}\n\ +"; + +#define DXFMT(f, t) .fmt = DXGI_FORMAT_##f##_##t, .ctype = RA_CTYPE_##t +static struct d3d_fmt formats[] = { + { "r8", 1, 1, { 8}, DXFMT(R8, UNORM) }, + { "rg8", 2, 2, { 8, 8}, DXFMT(R8G8, UNORM) }, + { "rgba8", 4, 4, { 8, 8, 8, 8}, DXFMT(R8G8B8A8, UNORM) }, + { "r16", 1, 2, {16}, DXFMT(R16, UNORM) }, + { "rg16", 2, 4, {16, 16}, DXFMT(R16G16, UNORM) }, + { "rgba16", 4, 8, {16, 16, 16, 16}, DXFMT(R16G16B16A16, UNORM) }, + + { "r32ui", 1, 4, {32}, DXFMT(R32, UINT) }, + { "rg32ui", 2, 8, {32, 32}, DXFMT(R32G32, UINT) }, + { "rgb32ui", 3, 12, {32, 32, 32}, DXFMT(R32G32B32, UINT) }, + { "rgba32ui", 4, 16, {32, 32, 32, 32}, DXFMT(R32G32B32A32, UINT) }, + + { "r16hf", 1, 2, {16}, DXFMT(R16, FLOAT) }, + { "rg16hf", 2, 4, {16, 16}, DXFMT(R16G16, FLOAT) }, + { "rgba16hf", 4, 8, {16, 16, 16, 16}, DXFMT(R16G16B16A16, FLOAT) }, + { "r32f", 1, 4, {32}, DXFMT(R32, FLOAT) }, + { "rg32f", 2, 8, {32, 32}, DXFMT(R32G32, FLOAT) }, + { "rgb32f", 3, 12, {32, 32, 32}, DXFMT(R32G32B32, FLOAT) }, + { "rgba32f", 4, 16, {32, 32, 32, 32}, DXFMT(R32G32B32A32, FLOAT) }, + + { "rgb10_a2", 4, 4, {10, 10, 10, 2}, DXFMT(R10G10B10A2, UNORM) }, + { "bgra8", 4, 4, { 8, 8, 8, 8}, DXFMT(B8G8R8A8, UNORM), .unordered = true }, + { "bgrx8", 3, 4, { 8, 8, 8}, DXFMT(B8G8R8X8, UNORM), .unordered = true }, +}; + +static bool dll_version_equal(struct dll_version a, struct dll_version b) +{ + return a.major == b.major && + a.minor == b.minor && + a.build == b.build && + a.revision == b.revision; +} + +DXGI_FORMAT ra_d3d11_get_format(const struct ra_format *fmt) +{ + struct d3d_fmt *d3d = fmt->priv; + return d3d->fmt; +} + +const struct ra_format *ra_d3d11_get_ra_format(struct ra *ra, DXGI_FORMAT fmt) +{ + for (int i = 0; i < ra->num_formats; i++) { + struct ra_format *ra_fmt = ra->formats[i]; + + if (ra_d3d11_get_format(ra_fmt) == fmt) + return ra_fmt; + } + + return NULL; +} + +static void setup_formats(struct ra *ra) +{ + // All formats must be usable as a 2D texture + static const UINT sup_basic = D3D11_FORMAT_SUPPORT_TEXTURE2D; + // SHADER_SAMPLE indicates support for linear sampling, point always works + static const UINT sup_filter = D3D11_FORMAT_SUPPORT_SHADER_SAMPLE; + // RA requires renderable surfaces to be blendable as well + static const UINT sup_render = D3D11_FORMAT_SUPPORT_RENDER_TARGET | + D3D11_FORMAT_SUPPORT_BLENDABLE; + // Typed UAVs are equivalent to images. RA only cares if they're storable. + static const UINT sup_store = D3D11_FORMAT_SUPPORT_TYPED_UNORDERED_ACCESS_VIEW; + static const UINT sup2_store = D3D11_FORMAT_SUPPORT2_UAV_TYPED_STORE; + + struct ra_d3d11 *p = ra->priv; + HRESULT hr; + + for (int i = 0; i < MP_ARRAY_SIZE(formats); i++) { + struct d3d_fmt *d3dfmt = &formats[i]; + UINT support = 0; + hr = ID3D11Device_CheckFormatSupport(p->dev, d3dfmt->fmt, &support); + if (FAILED(hr)) + continue; + if ((support & sup_basic) != sup_basic) + continue; + + D3D11_FEATURE_DATA_FORMAT_SUPPORT2 sup2 = { .InFormat = d3dfmt->fmt }; + ID3D11Device_CheckFeatureSupport(p->dev, D3D11_FEATURE_FORMAT_SUPPORT2, + ², sizeof(sup2)); + UINT support2 = sup2.OutFormatSupport2; + + struct ra_format *fmt = talloc_zero(ra, struct ra_format); + *fmt = (struct ra_format) { + .name = d3dfmt->name, + .priv = d3dfmt, + .ctype = d3dfmt->ctype, + .ordered = !d3dfmt->unordered, + .num_components = d3dfmt->components, + .pixel_size = d3dfmt->bytes, + .linear_filter = (support & sup_filter) == sup_filter, + .renderable = (support & sup_render) == sup_render, + .storable = p->fl >= D3D_FEATURE_LEVEL_11_0 && + (support & sup_store) == sup_store && + (support2 & sup2_store) == sup2_store, + }; + + if (support & D3D11_FORMAT_SUPPORT_TEXTURE1D) + ra->caps |= RA_CAP_TEX_1D; + + for (int j = 0; j < d3dfmt->components; j++) + fmt->component_size[j] = fmt->component_depth[j] = d3dfmt->bits[j]; + + fmt->glsl_format = ra_fmt_glsl_format(fmt); + + MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt); + } +} + +static bool tex_init(struct ra *ra, struct ra_tex *tex) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_tex *tex_p = tex->priv; + struct ra_tex_params *params = &tex->params; + HRESULT hr; + + // A SRV is required for renderpasses and blitting, since blitting can use + // a renderpass internally + if (params->render_src || params->blit_src) { + // Always specify the SRV format for simplicity. This will match the + // texture format for textures created with tex_create, but it can be + // different for wrapped planar video textures. + D3D11_SHADER_RESOURCE_VIEW_DESC srvdesc = { + .Format = ra_d3d11_get_format(params->format), + }; + switch (params->dimensions) { + case 1: + if (tex_p->array_slice >= 0) { + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1DARRAY; + srvdesc.Texture1DArray.MipLevels = 1; + srvdesc.Texture1DArray.FirstArraySlice = tex_p->array_slice; + srvdesc.Texture1DArray.ArraySize = 1; + } else { + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE1D; + srvdesc.Texture1D.MipLevels = 1; + } + break; + case 2: + if (tex_p->array_slice >= 0) { + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2DARRAY; + srvdesc.Texture2DArray.MipLevels = 1; + srvdesc.Texture2DArray.FirstArraySlice = tex_p->array_slice; + srvdesc.Texture2DArray.ArraySize = 1; + } else { + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE2D; + srvdesc.Texture2D.MipLevels = 1; + } + break; + case 3: + // D3D11 does not have Texture3D arrays + srvdesc.ViewDimension = D3D11_SRV_DIMENSION_TEXTURE3D; + srvdesc.Texture3D.MipLevels = 1; + break; + } + hr = ID3D11Device_CreateShaderResourceView(p->dev, tex_p->res, &srvdesc, + &tex_p->srv); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create SRV: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + } + + // Samplers are required for renderpasses, but not blitting, since the blit + // code uses its own point sampler + if (params->render_src) { + D3D11_SAMPLER_DESC sdesc = { + .AddressU = D3D11_TEXTURE_ADDRESS_CLAMP, + .AddressV = D3D11_TEXTURE_ADDRESS_CLAMP, + .AddressW = D3D11_TEXTURE_ADDRESS_CLAMP, + .ComparisonFunc = D3D11_COMPARISON_NEVER, + .MinLOD = 0, + .MaxLOD = D3D11_FLOAT32_MAX, + .MaxAnisotropy = 1, + }; + if (params->src_linear) + sdesc.Filter = D3D11_FILTER_MIN_MAG_MIP_LINEAR; + if (params->src_repeat) { + sdesc.AddressU = sdesc.AddressV = sdesc.AddressW = + D3D11_TEXTURE_ADDRESS_WRAP; + } + // The runtime pools sampler state objects internally, so we don't have + // to worry about resource usage when creating one for every ra_tex + hr = ID3D11Device_CreateSamplerState(p->dev, &sdesc, &tex_p->sampler); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create sampler: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + } + + // Like SRVs, an RTV is required for renderpass output and blitting + if (params->render_dst || params->blit_dst) { + hr = ID3D11Device_CreateRenderTargetView(p->dev, tex_p->res, NULL, + &tex_p->rtv); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create RTV: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + } + + if (p->fl >= D3D_FEATURE_LEVEL_11_0 && params->storage_dst) { + hr = ID3D11Device_CreateUnorderedAccessView(p->dev, tex_p->res, NULL, + &tex_p->uav); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create UAV: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + } + + return true; +error: + return false; +} + +static void tex_destroy(struct ra *ra, struct ra_tex *tex) +{ + if (!tex) + return; + struct d3d_tex *tex_p = tex->priv; + + SAFE_RELEASE(tex_p->srv); + SAFE_RELEASE(tex_p->rtv); + SAFE_RELEASE(tex_p->uav); + SAFE_RELEASE(tex_p->sampler); + SAFE_RELEASE(tex_p->res); + SAFE_RELEASE(tex_p->staging); + talloc_free(tex); +} + +static struct ra_tex *tex_create(struct ra *ra, + const struct ra_tex_params *params) +{ + // Only 2D textures may be downloaded for now + if (params->downloadable && params->dimensions != 2) + return NULL; + + struct ra_d3d11 *p = ra->priv; + HRESULT hr; + + struct ra_tex *tex = talloc_zero(NULL, struct ra_tex); + tex->params = *params; + tex->params.initial_data = NULL; + + struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex); + DXGI_FORMAT fmt = ra_d3d11_get_format(params->format); + + D3D11_SUBRESOURCE_DATA data; + D3D11_SUBRESOURCE_DATA *pdata = NULL; + if (params->initial_data) { + data = (D3D11_SUBRESOURCE_DATA) { + .pSysMem = params->initial_data, + .SysMemPitch = params->w * params->format->pixel_size, + }; + if (params->dimensions >= 3) + data.SysMemSlicePitch = data.SysMemPitch * params->h; + pdata = &data; + } + + D3D11_USAGE usage = D3D11_USAGE_DEFAULT; + D3D11_BIND_FLAG bind_flags = 0; + + if (params->render_src || params->blit_src) + bind_flags |= D3D11_BIND_SHADER_RESOURCE; + if (params->render_dst || params->blit_dst) + bind_flags |= D3D11_BIND_RENDER_TARGET; + if (p->fl >= D3D_FEATURE_LEVEL_11_0 && params->storage_dst) + bind_flags |= D3D11_BIND_UNORDERED_ACCESS; + + // Apparently IMMUTABLE textures are efficient, so try to infer whether we + // can use one + if (params->initial_data && !params->render_dst && !params->storage_dst && + !params->blit_dst && !params->host_mutable) + usage = D3D11_USAGE_IMMUTABLE; + + switch (params->dimensions) { + case 1:; + D3D11_TEXTURE1D_DESC desc1d = { + .Width = params->w, + .MipLevels = 1, + .ArraySize = 1, + .Format = fmt, + .Usage = usage, + .BindFlags = bind_flags, + }; + hr = ID3D11Device_CreateTexture1D(p->dev, &desc1d, pdata, &tex_p->tex1d); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create Texture1D: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + tex_p->res = (ID3D11Resource *)tex_p->tex1d; + break; + case 2:; + D3D11_TEXTURE2D_DESC desc2d = { + .Width = params->w, + .Height = params->h, + .MipLevels = 1, + .ArraySize = 1, + .SampleDesc.Count = 1, + .Format = fmt, + .Usage = usage, + .BindFlags = bind_flags, + }; + hr = ID3D11Device_CreateTexture2D(p->dev, &desc2d, pdata, &tex_p->tex2d); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create Texture2D: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + tex_p->res = (ID3D11Resource *)tex_p->tex2d; + + // Create a staging texture with CPU access for tex_download() + if (params->downloadable) { + desc2d.BindFlags = 0; + desc2d.CPUAccessFlags = D3D11_CPU_ACCESS_READ; + desc2d.Usage = D3D11_USAGE_STAGING; + + hr = ID3D11Device_CreateTexture2D(p->dev, &desc2d, NULL, + &tex_p->staging); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to staging texture: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + } + break; + case 3:; + D3D11_TEXTURE3D_DESC desc3d = { + .Width = params->w, + .Height = params->h, + .Depth = params->d, + .MipLevels = 1, + .Format = fmt, + .Usage = usage, + .BindFlags = bind_flags, + }; + hr = ID3D11Device_CreateTexture3D(p->dev, &desc3d, pdata, &tex_p->tex3d); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create Texture3D: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + tex_p->res = (ID3D11Resource *)tex_p->tex3d; + break; + default: + MP_ASSERT_UNREACHABLE(); + } + + tex_p->array_slice = -1; + + if (!tex_init(ra, tex)) + goto error; + + return tex; + +error: + tex_destroy(ra, tex); + return NULL; +} + +struct ra_tex *ra_d3d11_wrap_tex(struct ra *ra, ID3D11Resource *res) +{ + HRESULT hr; + + struct ra_tex *tex = talloc_zero(NULL, struct ra_tex); + struct ra_tex_params *params = &tex->params; + struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex); + + DXGI_FORMAT fmt = DXGI_FORMAT_UNKNOWN; + D3D11_USAGE usage = D3D11_USAGE_DEFAULT; + D3D11_BIND_FLAG bind_flags = 0; + + D3D11_RESOURCE_DIMENSION type; + ID3D11Resource_GetType(res, &type); + switch (type) { + case D3D11_RESOURCE_DIMENSION_TEXTURE2D: + hr = ID3D11Resource_QueryInterface(res, &IID_ID3D11Texture2D, + (void**)&tex_p->tex2d); + if (FAILED(hr)) { + MP_ERR(ra, "Resource is not a ID3D11Texture2D\n"); + goto error; + } + tex_p->res = (ID3D11Resource *)tex_p->tex2d; + + D3D11_TEXTURE2D_DESC desc2d; + ID3D11Texture2D_GetDesc(tex_p->tex2d, &desc2d); + if (desc2d.MipLevels != 1) { + MP_ERR(ra, "Mipmapped textures not supported for wrapping\n"); + goto error; + } + if (desc2d.ArraySize != 1) { + MP_ERR(ra, "Texture arrays not supported for wrapping\n"); + goto error; + } + if (desc2d.SampleDesc.Count != 1) { + MP_ERR(ra, "Multisampled textures not supported for wrapping\n"); + goto error; + } + + params->dimensions = 2; + params->w = desc2d.Width; + params->h = desc2d.Height; + params->d = 1; + usage = desc2d.Usage; + bind_flags = desc2d.BindFlags; + fmt = desc2d.Format; + break; + default: + // We could wrap Texture1D/3D as well, but keep it simple, since this + // function is only used for swapchain backbuffers at the moment + MP_ERR(ra, "Resource is not suitable to wrap\n"); + goto error; + } + + for (int i = 0; i < ra->num_formats; i++) { + DXGI_FORMAT target_fmt = ra_d3d11_get_format(ra->formats[i]); + if (fmt == target_fmt) { + params->format = ra->formats[i]; + break; + } + } + if (!params->format) { + MP_ERR(ra, "Could not find a suitable RA format for wrapped resource\n"); + goto error; + } + + if (bind_flags & D3D11_BIND_SHADER_RESOURCE) { + params->render_src = params->blit_src = true; + params->src_linear = params->format->linear_filter; + } + if (bind_flags & D3D11_BIND_RENDER_TARGET) + params->render_dst = params->blit_dst = true; + if (bind_flags & D3D11_BIND_UNORDERED_ACCESS) + params->storage_dst = true; + + if (usage != D3D11_USAGE_DEFAULT) { + MP_ERR(ra, "Resource is not D3D11_USAGE_DEFAULT\n"); + goto error; + } + + tex_p->array_slice = -1; + + if (!tex_init(ra, tex)) + goto error; + + return tex; +error: + tex_destroy(ra, tex); + return NULL; +} + +struct ra_tex *ra_d3d11_wrap_tex_video(struct ra *ra, ID3D11Texture2D *res, + int w, int h, int array_slice, + const struct ra_format *fmt) +{ + struct ra_tex *tex = talloc_zero(NULL, struct ra_tex); + struct ra_tex_params *params = &tex->params; + struct d3d_tex *tex_p = tex->priv = talloc_zero(tex, struct d3d_tex); + + tex_p->tex2d = res; + tex_p->res = (ID3D11Resource *)tex_p->tex2d; + ID3D11Texture2D_AddRef(res); + + D3D11_TEXTURE2D_DESC desc2d; + ID3D11Texture2D_GetDesc(tex_p->tex2d, &desc2d); + if (!(desc2d.BindFlags & D3D11_BIND_SHADER_RESOURCE)) { + MP_ERR(ra, "Video resource is not bindable\n"); + goto error; + } + + params->dimensions = 2; + params->w = w; + params->h = h; + params->d = 1; + params->render_src = true; + params->src_linear = true; + // fmt can be different to the texture format for planar video textures + params->format = fmt; + + if (desc2d.ArraySize > 1) { + tex_p->array_slice = array_slice; + } else { + tex_p->array_slice = -1; + } + + if (!tex_init(ra, tex)) + goto error; + + return tex; +error: + tex_destroy(ra, tex); + return NULL; +} + +ID3D11Resource *ra_d3d11_get_raw_tex(struct ra *ra, struct ra_tex *tex, + int *array_slice) +{ + struct d3d_tex *tex_p = tex->priv; + + ID3D11Resource_AddRef(tex_p->res); + if (array_slice) + *array_slice = tex_p->array_slice; + return tex_p->res; +} + +static bool tex_upload(struct ra *ra, const struct ra_tex_upload_params *params) +{ + struct ra_d3d11 *p = ra->priv; + struct ra_tex *tex = params->tex; + struct d3d_tex *tex_p = tex->priv; + + if (!params->src) { + MP_ERR(ra, "Pixel buffers are not supported\n"); + return false; + } + + const char *src = params->src; + ptrdiff_t stride = tex->params.dimensions >= 2 ? tex->params.w : 0; + ptrdiff_t pitch = tex->params.dimensions >= 3 ? stride * tex->params.h : 0; + bool invalidate = true; + D3D11_BOX rc; + D3D11_BOX *prc = NULL; + + if (tex->params.dimensions == 2) { + stride = params->stride; + + if (params->rc && (params->rc->x0 != 0 || params->rc->y0 != 0 || + params->rc->x1 != tex->params.w || params->rc->y1 != tex->params.h)) + { + rc = (D3D11_BOX) { + .left = params->rc->x0, + .top = params->rc->y0, + .front = 0, + .right = params->rc->x1, + .bottom = params->rc->y1, + .back = 1, + }; + prc = &rc; + invalidate = params->invalidate; + } + } + + int subresource = tex_p->array_slice >= 0 ? tex_p->array_slice : 0; + if (p->ctx1) { + ID3D11DeviceContext1_UpdateSubresource1(p->ctx1, tex_p->res, + subresource, prc, src, stride, pitch, + invalidate ? D3D11_COPY_DISCARD : 0); + } else { + ID3D11DeviceContext_UpdateSubresource(p->ctx, tex_p->res, subresource, + prc, src, stride, pitch); + } + + return true; +} + +static bool tex_download(struct ra *ra, struct ra_tex_download_params *params) +{ + struct ra_d3d11 *p = ra->priv; + struct ra_tex *tex = params->tex; + struct d3d_tex *tex_p = tex->priv; + HRESULT hr; + + if (!tex_p->staging) + return false; + + ID3D11DeviceContext_CopyResource(p->ctx, (ID3D11Resource*)tex_p->staging, + tex_p->res); + + D3D11_MAPPED_SUBRESOURCE lock; + hr = ID3D11DeviceContext_Map(p->ctx, (ID3D11Resource*)tex_p->staging, 0, + D3D11_MAP_READ, 0, &lock); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to map staging texture: %s\n", mp_HRESULT_to_str(hr)); + return false; + } + + char *cdst = params->dst; + char *csrc = lock.pData; + for (int y = 0; y < tex->params.h; y++) { + memcpy(cdst + y * params->stride, csrc + y * lock.RowPitch, + MPMIN(params->stride, lock.RowPitch)); + } + + ID3D11DeviceContext_Unmap(p->ctx, (ID3D11Resource*)tex_p->staging, 0); + + return true; +} + +static void buf_destroy(struct ra *ra, struct ra_buf *buf) +{ + if (!buf) + return; + struct d3d_buf *buf_p = buf->priv; + SAFE_RELEASE(buf_p->buf); + SAFE_RELEASE(buf_p->uav); + talloc_free(buf); +} + +static struct ra_buf *buf_create(struct ra *ra, + const struct ra_buf_params *params) +{ + // D3D11 does not support permanent mapping or pixel buffers + if (params->host_mapped || params->type == RA_BUF_TYPE_TEX_UPLOAD) + return NULL; + + struct ra_d3d11 *p = ra->priv; + HRESULT hr; + + struct ra_buf *buf = talloc_zero(NULL, struct ra_buf); + buf->params = *params; + buf->params.initial_data = NULL; + + struct d3d_buf *buf_p = buf->priv = talloc_zero(buf, struct d3d_buf); + + D3D11_SUBRESOURCE_DATA data; + D3D11_SUBRESOURCE_DATA *pdata = NULL; + if (params->initial_data) { + data = (D3D11_SUBRESOURCE_DATA) { .pSysMem = params->initial_data }; + pdata = &data; + } + + D3D11_BUFFER_DESC desc = { .ByteWidth = params->size }; + switch (params->type) { + case RA_BUF_TYPE_SHADER_STORAGE: + desc.BindFlags = D3D11_BIND_UNORDERED_ACCESS; + desc.ByteWidth = MP_ALIGN_UP(desc.ByteWidth, sizeof(float)); + desc.MiscFlags = D3D11_RESOURCE_MISC_BUFFER_ALLOW_RAW_VIEWS; + break; + case RA_BUF_TYPE_UNIFORM: + desc.BindFlags = D3D11_BIND_CONSTANT_BUFFER; + desc.ByteWidth = MP_ALIGN_UP(desc.ByteWidth, sizeof(float[4])); + break; + } + + hr = ID3D11Device_CreateBuffer(p->dev, &desc, pdata, &buf_p->buf); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create buffer: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + + // D3D11 doesn't allow constant buffer updates that aren't aligned to a + // full constant boundary (vec4,) and some drivers don't allow partial + // constant buffer updates at all. To support partial buffer updates, keep + // a mirror of the buffer data in system memory and upload the whole thing + // before the buffer is used. + if (params->host_mutable) + buf_p->data = talloc_zero_size(buf, desc.ByteWidth); + + if (params->type == RA_BUF_TYPE_SHADER_STORAGE) { + D3D11_UNORDERED_ACCESS_VIEW_DESC udesc = { + .Format = DXGI_FORMAT_R32_TYPELESS, + .ViewDimension = D3D11_UAV_DIMENSION_BUFFER, + .Buffer = { + .NumElements = desc.ByteWidth / sizeof(float), + .Flags = D3D11_BUFFER_UAV_FLAG_RAW, + }, + }; + hr = ID3D11Device_CreateUnorderedAccessView(p->dev, + (ID3D11Resource *)buf_p->buf, &udesc, &buf_p->uav); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create UAV: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + } + + return buf; +error: + buf_destroy(ra, buf); + return NULL; +} + +static void buf_resolve(struct ra *ra, struct ra_buf *buf) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_buf *buf_p = buf->priv; + + if (!buf->params.host_mutable || !buf_p->dirty) + return; + + // Synchronize the GPU buffer with the system-memory copy + ID3D11DeviceContext_UpdateSubresource(p->ctx, (ID3D11Resource *)buf_p->buf, + 0, NULL, buf_p->data, 0, 0); + buf_p->dirty = false; +} + +static void buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset, + const void *data, size_t size) +{ + struct d3d_buf *buf_p = buf->priv; + + char *cdata = buf_p->data; + memcpy(cdata + offset, data, size); + buf_p->dirty = true; +} + +static const char *get_shader_target(struct ra *ra, enum glsl_shader type) +{ + struct ra_d3d11 *p = ra->priv; + switch (p->fl) { + default: + switch (type) { + case GLSL_SHADER_VERTEX: return "vs_5_0"; + case GLSL_SHADER_FRAGMENT: return "ps_5_0"; + case GLSL_SHADER_COMPUTE: return "cs_5_0"; + } + break; + case D3D_FEATURE_LEVEL_10_1: + switch (type) { + case GLSL_SHADER_VERTEX: return "vs_4_1"; + case GLSL_SHADER_FRAGMENT: return "ps_4_1"; + case GLSL_SHADER_COMPUTE: return "cs_4_1"; + } + break; + case D3D_FEATURE_LEVEL_10_0: + switch (type) { + case GLSL_SHADER_VERTEX: return "vs_4_0"; + case GLSL_SHADER_FRAGMENT: return "ps_4_0"; + case GLSL_SHADER_COMPUTE: return "cs_4_0"; + } + break; + case D3D_FEATURE_LEVEL_9_3: + switch (type) { + case GLSL_SHADER_VERTEX: return "vs_4_0_level_9_3"; + case GLSL_SHADER_FRAGMENT: return "ps_4_0_level_9_3"; + } + break; + case D3D_FEATURE_LEVEL_9_2: + case D3D_FEATURE_LEVEL_9_1: + switch (type) { + case GLSL_SHADER_VERTEX: return "vs_4_0_level_9_1"; + case GLSL_SHADER_FRAGMENT: return "ps_4_0_level_9_1"; + } + break; + } + return NULL; +} + +static const char *shader_type_name(enum glsl_shader type) +{ + switch (type) { + case GLSL_SHADER_VERTEX: return "vertex"; + case GLSL_SHADER_FRAGMENT: return "fragment"; + case GLSL_SHADER_COMPUTE: return "compute"; + default: return "unknown"; + } +} + +static bool setup_clear_rpass(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + ID3DBlob *vs_blob = NULL; + ID3DBlob *ps_blob = NULL; + HRESULT hr; + + hr = p->D3DCompile(clear_vs, sizeof(clear_vs), NULL, NULL, NULL, "main", + get_shader_target(ra, GLSL_SHADER_VERTEX), + D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &vs_blob, NULL); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to compile clear() vertex shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + hr = ID3D11Device_CreateVertexShader(p->dev, + ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob), + NULL, &p->clear_vs); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create clear() vertex shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + hr = p->D3DCompile(clear_ps, sizeof(clear_ps), NULL, NULL, NULL, "main", + get_shader_target(ra, GLSL_SHADER_FRAGMENT), + D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &ps_blob, NULL); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to compile clear() pixel shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + hr = ID3D11Device_CreatePixelShader(p->dev, + ID3D10Blob_GetBufferPointer(ps_blob), ID3D10Blob_GetBufferSize(ps_blob), + NULL, &p->clear_ps); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create clear() pixel shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + D3D11_INPUT_ELEMENT_DESC in_descs[] = { + { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0 }, + }; + hr = ID3D11Device_CreateInputLayout(p->dev, in_descs, + MP_ARRAY_SIZE(in_descs), ID3D10Blob_GetBufferPointer(vs_blob), + ID3D10Blob_GetBufferSize(vs_blob), &p->clear_layout); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create clear() IA layout: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + // clear() always draws to a quad covering the whole viewport + static const float verts[] = { + -1, -1, + 1, -1, + 1, 1, + -1, 1, + -1, -1, + 1, 1, + }; + D3D11_BUFFER_DESC vdesc = { + .ByteWidth = sizeof(verts), + .Usage = D3D11_USAGE_IMMUTABLE, + .BindFlags = D3D11_BIND_VERTEX_BUFFER, + }; + D3D11_SUBRESOURCE_DATA vdata = { + .pSysMem = verts, + }; + hr = ID3D11Device_CreateBuffer(p->dev, &vdesc, &vdata, &p->clear_vbuf); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create clear() vertex buffer: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + D3D11_BUFFER_DESC cdesc = { + .ByteWidth = sizeof(float[4]), + .BindFlags = D3D11_BIND_CONSTANT_BUFFER, + }; + hr = ID3D11Device_CreateBuffer(p->dev, &cdesc, NULL, &p->clear_cbuf); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create clear() constant buffer: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + SAFE_RELEASE(vs_blob); + SAFE_RELEASE(ps_blob); + return true; +error: + SAFE_RELEASE(vs_blob); + SAFE_RELEASE(ps_blob); + return false; +} + +static void clear_rpass(struct ra *ra, struct ra_tex *tex, float color[4], + struct mp_rect *rc) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_tex *tex_p = tex->priv; + struct ra_tex_params *params = &tex->params; + + ID3D11DeviceContext_UpdateSubresource(p->ctx, + (ID3D11Resource *)p->clear_cbuf, 0, NULL, color, 0, 0); + + ID3D11DeviceContext_IASetInputLayout(p->ctx, p->clear_layout); + ID3D11DeviceContext_IASetVertexBuffers(p->ctx, 0, 1, &p->clear_vbuf, + &(UINT) { sizeof(float[2]) }, &(UINT) { 0 }); + ID3D11DeviceContext_IASetPrimitiveTopology(p->ctx, + D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + ID3D11DeviceContext_VSSetShader(p->ctx, p->clear_vs, NULL, 0); + + ID3D11DeviceContext_RSSetViewports(p->ctx, 1, (&(D3D11_VIEWPORT) { + .Width = params->w, + .Height = params->h, + .MinDepth = 0, + .MaxDepth = 1, + })); + ID3D11DeviceContext_RSSetScissorRects(p->ctx, 1, (&(D3D11_RECT) { + .left = rc->x0, + .top = rc->y0, + .right = rc->x1, + .bottom = rc->y1, + })); + ID3D11DeviceContext_PSSetShader(p->ctx, p->clear_ps, NULL, 0); + ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, 1, &p->clear_cbuf); + + ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 1, &tex_p->rtv, NULL); + ID3D11DeviceContext_OMSetBlendState(p->ctx, NULL, NULL, + D3D11_DEFAULT_SAMPLE_MASK); + + ID3D11DeviceContext_Draw(p->ctx, 6, 0); + + ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, 1, + &(ID3D11Buffer *){ NULL }); + ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 0, NULL, NULL); +} + +static void clear(struct ra *ra, struct ra_tex *tex, float color[4], + struct mp_rect *rc) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_tex *tex_p = tex->priv; + struct ra_tex_params *params = &tex->params; + + if (!tex_p->rtv) + return; + + if (rc->x0 || rc->y0 || rc->x1 != params->w || rc->y1 != params->h) { + if (p->has_clear_view) { + ID3D11DeviceContext1_ClearView(p->ctx1, (ID3D11View *)tex_p->rtv, + color, (&(D3D11_RECT) { + .left = rc->x0, + .top = rc->y0, + .right = rc->x1, + .bottom = rc->y1, + }), 1); + } else { + clear_rpass(ra, tex, color, rc); + } + } else { + ID3D11DeviceContext_ClearRenderTargetView(p->ctx, tex_p->rtv, color); + } +} + +static bool setup_blit_rpass(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + ID3DBlob *vs_blob = NULL; + ID3DBlob *float_ps_blob = NULL; + HRESULT hr; + + hr = p->D3DCompile(blit_vs, sizeof(blit_vs), NULL, NULL, NULL, "main", + get_shader_target(ra, GLSL_SHADER_VERTEX), + D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &vs_blob, NULL); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to compile blit() vertex shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + hr = ID3D11Device_CreateVertexShader(p->dev, + ID3D10Blob_GetBufferPointer(vs_blob), ID3D10Blob_GetBufferSize(vs_blob), + NULL, &p->blit_vs); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create blit() vertex shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + hr = p->D3DCompile(blit_float_ps, sizeof(blit_float_ps), NULL, NULL, NULL, + "main", get_shader_target(ra, GLSL_SHADER_FRAGMENT), + D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, &float_ps_blob, NULL); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to compile blit() pixel shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + hr = ID3D11Device_CreatePixelShader(p->dev, + ID3D10Blob_GetBufferPointer(float_ps_blob), + ID3D10Blob_GetBufferSize(float_ps_blob), + NULL, &p->blit_float_ps); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create blit() pixel shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + D3D11_INPUT_ELEMENT_DESC in_descs[] = { + { "POSITION", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 0 }, + { "TEXCOORD", 0, DXGI_FORMAT_R32G32_FLOAT, 0, 8 }, + }; + hr = ID3D11Device_CreateInputLayout(p->dev, in_descs, + MP_ARRAY_SIZE(in_descs), ID3D10Blob_GetBufferPointer(vs_blob), + ID3D10Blob_GetBufferSize(vs_blob), &p->blit_layout); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create blit() IA layout: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + D3D11_BUFFER_DESC vdesc = { + .ByteWidth = sizeof(struct blit_vert[6]), + .Usage = D3D11_USAGE_DEFAULT, + .BindFlags = D3D11_BIND_VERTEX_BUFFER, + }; + hr = ID3D11Device_CreateBuffer(p->dev, &vdesc, NULL, &p->blit_vbuf); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create blit() vertex buffer: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + // Blit always uses point sampling, regardless of the source texture + D3D11_SAMPLER_DESC sdesc = { + .AddressU = D3D11_TEXTURE_ADDRESS_CLAMP, + .AddressV = D3D11_TEXTURE_ADDRESS_CLAMP, + .AddressW = D3D11_TEXTURE_ADDRESS_CLAMP, + .ComparisonFunc = D3D11_COMPARISON_NEVER, + .MinLOD = 0, + .MaxLOD = D3D11_FLOAT32_MAX, + .MaxAnisotropy = 1, + }; + hr = ID3D11Device_CreateSamplerState(p->dev, &sdesc, &p->blit_sampler); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create blit() sampler: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + SAFE_RELEASE(vs_blob); + SAFE_RELEASE(float_ps_blob); + return true; +error: + SAFE_RELEASE(vs_blob); + SAFE_RELEASE(float_ps_blob); + return false; +} + +static void blit_rpass(struct ra *ra, struct ra_tex *dst, struct ra_tex *src, + struct mp_rect *dst_rc, struct mp_rect *src_rc) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_tex *dst_p = dst->priv; + struct d3d_tex *src_p = src->priv; + + float u_min = (double)src_rc->x0 / src->params.w; + float u_max = (double)src_rc->x1 / src->params.w; + float v_min = (double)src_rc->y0 / src->params.h; + float v_max = (double)src_rc->y1 / src->params.h; + + struct blit_vert verts[6] = { + { .x = -1, .y = -1, .u = u_min, .v = v_max }, + { .x = 1, .y = -1, .u = u_max, .v = v_max }, + { .x = 1, .y = 1, .u = u_max, .v = v_min }, + { .x = -1, .y = 1, .u = u_min, .v = v_min }, + }; + verts[4] = verts[0]; + verts[5] = verts[2]; + ID3D11DeviceContext_UpdateSubresource(p->ctx, + (ID3D11Resource *)p->blit_vbuf, 0, NULL, verts, 0, 0); + + ID3D11DeviceContext_IASetInputLayout(p->ctx, p->blit_layout); + ID3D11DeviceContext_IASetVertexBuffers(p->ctx, 0, 1, &p->blit_vbuf, + &(UINT) { sizeof(verts[0]) }, &(UINT) { 0 }); + ID3D11DeviceContext_IASetPrimitiveTopology(p->ctx, + D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + ID3D11DeviceContext_VSSetShader(p->ctx, p->blit_vs, NULL, 0); + + ID3D11DeviceContext_RSSetViewports(p->ctx, 1, (&(D3D11_VIEWPORT) { + .TopLeftX = dst_rc->x0, + .TopLeftY = dst_rc->y0, + .Width = mp_rect_w(*dst_rc), + .Height = mp_rect_h(*dst_rc), + .MinDepth = 0, + .MaxDepth = 1, + })); + ID3D11DeviceContext_RSSetScissorRects(p->ctx, 1, (&(D3D11_RECT) { + .left = dst_rc->x0, + .top = dst_rc->y0, + .right = dst_rc->x1, + .bottom = dst_rc->y1, + })); + + ID3D11DeviceContext_PSSetShader(p->ctx, p->blit_float_ps, NULL, 0); + ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, 1, &src_p->srv); + ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, 1, &p->blit_sampler); + + ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 1, &dst_p->rtv, NULL); + ID3D11DeviceContext_OMSetBlendState(p->ctx, NULL, NULL, + D3D11_DEFAULT_SAMPLE_MASK); + + ID3D11DeviceContext_Draw(p->ctx, 6, 0); + + ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, 1, + &(ID3D11ShaderResourceView *) { NULL }); + ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, 1, + &(ID3D11SamplerState *) { NULL }); + ID3D11DeviceContext_OMSetRenderTargets(p->ctx, 0, NULL, NULL); +} + +static void blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src, + struct mp_rect *dst_rc_ptr, struct mp_rect *src_rc_ptr) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_tex *dst_p = dst->priv; + struct d3d_tex *src_p = src->priv; + struct mp_rect dst_rc = *dst_rc_ptr; + struct mp_rect src_rc = *src_rc_ptr; + + assert(dst->params.dimensions == 2); + assert(src->params.dimensions == 2); + + // A zero-sized target rectangle is a no-op + if (!mp_rect_w(dst_rc) || !mp_rect_h(dst_rc)) + return; + + // ra.h seems to imply that both dst_rc and src_rc can be flipped, but it's + // easier for blit_rpass() if only src_rc can be flipped, so unflip dst_rc. + if (dst_rc.x0 > dst_rc.x1) { + MPSWAP(int, dst_rc.x0, dst_rc.x1); + MPSWAP(int, src_rc.x0, src_rc.x1); + } + if (dst_rc.y0 > dst_rc.y1) { + MPSWAP(int, dst_rc.y0, dst_rc.y1); + MPSWAP(int, src_rc.y0, src_rc.y1); + } + + // If format conversion, stretching or flipping is required, a renderpass + // must be used + if (dst->params.format != src->params.format || + mp_rect_w(dst_rc) != mp_rect_w(src_rc) || + mp_rect_h(dst_rc) != mp_rect_h(src_rc)) + { + blit_rpass(ra, dst, src, &dst_rc, &src_rc); + } else { + int dst_sr = dst_p->array_slice >= 0 ? dst_p->array_slice : 0; + int src_sr = src_p->array_slice >= 0 ? src_p->array_slice : 0; + ID3D11DeviceContext_CopySubresourceRegion(p->ctx, dst_p->res, dst_sr, + dst_rc.x0, dst_rc.y0, 0, src_p->res, src_sr, (&(D3D11_BOX) { + .left = src_rc.x0, + .top = src_rc.y0, + .front = 0, + .right = src_rc.x1, + .bottom = src_rc.y1, + .back = 1, + })); + } +} + +static int desc_namespace(struct ra *ra, enum ra_vartype type) +{ + // Images and SSBOs both use UAV bindings + if (type == RA_VARTYPE_IMG_W) + type = RA_VARTYPE_BUF_RW; + return type; +} + +static bool compile_glsl(struct ra *ra, enum glsl_shader type, + const char *glsl, ID3DBlob **out) +{ + struct ra_d3d11 *p = ra->priv; + struct spirv_compiler *spirv = p->spirv; + void *ta_ctx = talloc_new(NULL); + spvc_result sc_res = SPVC_SUCCESS; + spvc_context sc_ctx = NULL; + spvc_parsed_ir sc_ir = NULL; + spvc_compiler sc_compiler = NULL; + spvc_compiler_options sc_opts = NULL; + const char *hlsl = NULL; + ID3DBlob *errors = NULL; + bool success = false; + HRESULT hr; + + int sc_shader_model; + if (p->fl >= D3D_FEATURE_LEVEL_11_0) { + sc_shader_model = 50; + } else if (p->fl >= D3D_FEATURE_LEVEL_10_1) { + sc_shader_model = 41; + } else { + sc_shader_model = 40; + } + + int64_t start_ns = mp_time_ns(); + + bstr spv_module; + if (!spirv->fns->compile_glsl(spirv, ta_ctx, type, glsl, &spv_module)) + goto done; + + int64_t shaderc_ns = mp_time_ns(); + + sc_res = spvc_context_create(&sc_ctx); + if (sc_res != SPVC_SUCCESS) + goto done; + + sc_res = spvc_context_parse_spirv(sc_ctx, (SpvId *)spv_module.start, + spv_module.len / sizeof(SpvId), &sc_ir); + if (sc_res != SPVC_SUCCESS) + goto done; + + sc_res = spvc_context_create_compiler(sc_ctx, SPVC_BACKEND_HLSL, sc_ir, + SPVC_CAPTURE_MODE_TAKE_OWNERSHIP, + &sc_compiler); + if (sc_res != SPVC_SUCCESS) + goto done; + + sc_res = spvc_compiler_create_compiler_options(sc_compiler, &sc_opts); + if (sc_res != SPVC_SUCCESS) + goto done; + sc_res = spvc_compiler_options_set_uint(sc_opts, + SPVC_COMPILER_OPTION_HLSL_SHADER_MODEL, sc_shader_model); + if (sc_res != SPVC_SUCCESS) + goto done; + if (type == GLSL_SHADER_VERTEX) { + // FLIP_VERTEX_Y is only valid for vertex shaders + sc_res = spvc_compiler_options_set_bool(sc_opts, + SPVC_COMPILER_OPTION_FLIP_VERTEX_Y, SPVC_TRUE); + if (sc_res != SPVC_SUCCESS) + goto done; + } + sc_res = spvc_compiler_install_compiler_options(sc_compiler, sc_opts); + if (sc_res != SPVC_SUCCESS) + goto done; + + sc_res = spvc_compiler_compile(sc_compiler, &hlsl); + if (sc_res != SPVC_SUCCESS) + goto done; + + int64_t cross_ns = mp_time_ns(); + + hr = p->D3DCompile(hlsl, strlen(hlsl), NULL, NULL, NULL, "main", + get_shader_target(ra, type), D3DCOMPILE_OPTIMIZATION_LEVEL3, 0, out, + &errors); + if (FAILED(hr)) { + MP_ERR(ra, "D3DCompile failed: %s\n%.*s", mp_HRESULT_to_str(hr), + (int)ID3D10Blob_GetBufferSize(errors), + (char*)ID3D10Blob_GetBufferPointer(errors)); + goto done; + } + + int64_t d3dcompile_ns = mp_time_ns(); + + MP_VERBOSE(ra, "Compiled a %s shader in %lldns\n", shader_type_name(type), + d3dcompile_ns - start_ns); + MP_VERBOSE(ra, "shaderc: %lldns, SPIRV-Cross: %lldns, D3DCompile: %lldns\n", + shaderc_ns - start_ns, + cross_ns - shaderc_ns, + d3dcompile_ns - cross_ns); + + success = true; +done: + if (sc_res != SPVC_SUCCESS) { + MP_MSG(ra, MSGL_ERR, "SPIRV-Cross failed: %s\n", + spvc_context_get_last_error_string(sc_ctx)); + } + int level = success ? MSGL_DEBUG : MSGL_ERR; + MP_MSG(ra, level, "GLSL source:\n"); + mp_log_source(ra->log, level, glsl); + if (hlsl) { + MP_MSG(ra, level, "HLSL source:\n"); + mp_log_source(ra->log, level, hlsl); + } + SAFE_RELEASE(errors); + if (sc_ctx) + spvc_context_destroy(sc_ctx); + talloc_free(ta_ctx); + return success; +} + +static void renderpass_destroy(struct ra *ra, struct ra_renderpass *pass) +{ + if (!pass) + return; + struct d3d_rpass *pass_p = pass->priv; + + SAFE_RELEASE(pass_p->vs); + SAFE_RELEASE(pass_p->ps); + SAFE_RELEASE(pass_p->cs); + SAFE_RELEASE(pass_p->layout); + SAFE_RELEASE(pass_p->bstate); + talloc_free(pass); +} + +static D3D11_BLEND map_ra_blend(enum ra_blend blend) +{ + switch (blend) { + default: + case RA_BLEND_ZERO: return D3D11_BLEND_ZERO; + case RA_BLEND_ONE: return D3D11_BLEND_ONE; + case RA_BLEND_SRC_ALPHA: return D3D11_BLEND_SRC_ALPHA; + case RA_BLEND_ONE_MINUS_SRC_ALPHA: return D3D11_BLEND_INV_SRC_ALPHA; + }; +} + +static size_t vbuf_upload(struct ra *ra, void *data, size_t size) +{ + struct ra_d3d11 *p = ra->priv; + HRESULT hr; + + // Arbitrary size limit in case there is an insane number of vertices + if (size > 1e9) { + MP_ERR(ra, "Vertex buffer is too large\n"); + return -1; + } + + // If the vertex data doesn't fit, realloc the vertex buffer + if (size > p->vbuf_size) { + size_t new_size = p->vbuf_size; + // Arbitrary base size + if (!new_size) + new_size = 64 * 1024; + while (new_size < size) + new_size *= 2; + + ID3D11Buffer *new_buf; + D3D11_BUFFER_DESC vbuf_desc = { + .ByteWidth = new_size, + .Usage = D3D11_USAGE_DYNAMIC, + .BindFlags = D3D11_BIND_VERTEX_BUFFER, + .CPUAccessFlags = D3D11_CPU_ACCESS_WRITE, + }; + hr = ID3D11Device_CreateBuffer(p->dev, &vbuf_desc, NULL, &new_buf); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create vertex buffer: %s\n", + mp_HRESULT_to_str(hr)); + return -1; + } + + SAFE_RELEASE(p->vbuf); + p->vbuf = new_buf; + p->vbuf_size = new_size; + p->vbuf_used = 0; + } + + bool discard = false; + size_t offset = p->vbuf_used; + if (offset + size > p->vbuf_size) { + // We reached the end of the buffer, so discard and wrap around + discard = true; + offset = 0; + } + + D3D11_MAPPED_SUBRESOURCE map = { 0 }; + hr = ID3D11DeviceContext_Map(p->ctx, (ID3D11Resource *)p->vbuf, 0, + discard ? D3D11_MAP_WRITE_DISCARD : D3D11_MAP_WRITE_NO_OVERWRITE, + 0, &map); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to map vertex buffer: %s\n", mp_HRESULT_to_str(hr)); + return -1; + } + + char *cdata = map.pData; + memcpy(cdata + offset, data, size); + + ID3D11DeviceContext_Unmap(p->ctx, (ID3D11Resource *)p->vbuf, 0); + + p->vbuf_used = offset + size; + return offset; +} + +static const char cache_magic[4] = "RD11"; +static const int cache_version = 3; + +struct cache_header { + char magic[sizeof(cache_magic)]; + int cache_version; + char compiler[SPIRV_NAME_MAX_LEN]; + int spv_compiler_version; + unsigned spvc_compiler_major; + unsigned spvc_compiler_minor; + unsigned spvc_compiler_patch; + struct dll_version d3d_compiler_version; + int feature_level; + size_t vert_bytecode_len; + size_t frag_bytecode_len; + size_t comp_bytecode_len; +}; + +static void load_cached_program(struct ra *ra, + const struct ra_renderpass_params *params, + bstr *vert_bc, + bstr *frag_bc, + bstr *comp_bc) +{ + struct ra_d3d11 *p = ra->priv; + struct spirv_compiler *spirv = p->spirv; + bstr cache = params->cached_program; + + if (cache.len < sizeof(struct cache_header)) + return; + + struct cache_header *header = (struct cache_header *)cache.start; + cache = bstr_cut(cache, sizeof(*header)); + + unsigned spvc_major, spvc_minor, spvc_patch; + spvc_get_version(&spvc_major, &spvc_minor, &spvc_patch); + + if (strncmp(header->magic, cache_magic, sizeof(cache_magic)) != 0) + return; + if (header->cache_version != cache_version) + return; + if (strncmp(header->compiler, spirv->name, sizeof(header->compiler)) != 0) + return; + if (header->spv_compiler_version != spirv->compiler_version) + return; + if (header->spvc_compiler_major != spvc_major) + return; + if (header->spvc_compiler_minor != spvc_minor) + return; + if (header->spvc_compiler_patch != spvc_patch) + return; + if (!dll_version_equal(header->d3d_compiler_version, p->d3d_compiler_ver)) + return; + if (header->feature_level != p->fl) + return; + + if (header->vert_bytecode_len && vert_bc) { + *vert_bc = bstr_splice(cache, 0, header->vert_bytecode_len); + MP_VERBOSE(ra, "Using cached vertex shader\n"); + } + cache = bstr_cut(cache, header->vert_bytecode_len); + + if (header->frag_bytecode_len && frag_bc) { + *frag_bc = bstr_splice(cache, 0, header->frag_bytecode_len); + MP_VERBOSE(ra, "Using cached fragment shader\n"); + } + cache = bstr_cut(cache, header->frag_bytecode_len); + + if (header->comp_bytecode_len && comp_bc) { + *comp_bc = bstr_splice(cache, 0, header->comp_bytecode_len); + MP_VERBOSE(ra, "Using cached compute shader\n"); + } + cache = bstr_cut(cache, header->comp_bytecode_len); +} + +static void save_cached_program(struct ra *ra, struct ra_renderpass *pass, + bstr vert_bc, + bstr frag_bc, + bstr comp_bc) +{ + struct ra_d3d11 *p = ra->priv; + struct spirv_compiler *spirv = p->spirv; + + unsigned spvc_major, spvc_minor, spvc_patch; + spvc_get_version(&spvc_major, &spvc_minor, &spvc_patch); + + struct cache_header header = { + .cache_version = cache_version, + .spv_compiler_version = p->spirv->compiler_version, + .spvc_compiler_major = spvc_major, + .spvc_compiler_minor = spvc_minor, + .spvc_compiler_patch = spvc_patch, + .d3d_compiler_version = p->d3d_compiler_ver, + .feature_level = p->fl, + .vert_bytecode_len = vert_bc.len, + .frag_bytecode_len = frag_bc.len, + .comp_bytecode_len = comp_bc.len, + }; + memcpy(header.magic, cache_magic, sizeof(header.magic)); + strncpy(header.compiler, spirv->name, sizeof(header.compiler)); + + struct bstr *prog = &pass->params.cached_program; + bstr_xappend(pass, prog, (bstr){ (char *) &header, sizeof(header) }); + bstr_xappend(pass, prog, vert_bc); + bstr_xappend(pass, prog, frag_bc); + bstr_xappend(pass, prog, comp_bc); +} + +static struct ra_renderpass *renderpass_create_raster(struct ra *ra, + struct ra_renderpass *pass, const struct ra_renderpass_params *params) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_rpass *pass_p = pass->priv; + ID3DBlob *vs_blob = NULL; + ID3DBlob *ps_blob = NULL; + HRESULT hr; + + // load_cached_program will load compiled shader bytecode into vert_bc and + // frag_bc if the cache is valid. If not, vert_bc/frag_bc will remain NULL. + bstr vert_bc = {0}; + bstr frag_bc = {0}; + load_cached_program(ra, params, &vert_bc, &frag_bc, NULL); + + if (!vert_bc.start) { + if (!compile_glsl(ra, GLSL_SHADER_VERTEX, params->vertex_shader, + &vs_blob)) + goto error; + vert_bc = (bstr){ + ID3D10Blob_GetBufferPointer(vs_blob), + ID3D10Blob_GetBufferSize(vs_blob), + }; + } + + hr = ID3D11Device_CreateVertexShader(p->dev, vert_bc.start, vert_bc.len, + NULL, &pass_p->vs); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create vertex shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + if (!frag_bc.start) { + if (!compile_glsl(ra, GLSL_SHADER_FRAGMENT, params->frag_shader, + &ps_blob)) + goto error; + frag_bc = (bstr){ + ID3D10Blob_GetBufferPointer(ps_blob), + ID3D10Blob_GetBufferSize(ps_blob), + }; + } + + hr = ID3D11Device_CreatePixelShader(p->dev, frag_bc.start, frag_bc.len, + NULL, &pass_p->ps); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create pixel shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + D3D11_INPUT_ELEMENT_DESC *in_descs = talloc_array(pass, + D3D11_INPUT_ELEMENT_DESC, params->num_vertex_attribs); + for (int i = 0; i < params->num_vertex_attribs; i++) { + struct ra_renderpass_input *inp = ¶ms->vertex_attribs[i]; + + DXGI_FORMAT fmt = DXGI_FORMAT_UNKNOWN; + switch (inp->type) { + case RA_VARTYPE_FLOAT: + switch (inp->dim_v) { + case 1: fmt = DXGI_FORMAT_R32_FLOAT; break; + case 2: fmt = DXGI_FORMAT_R32G32_FLOAT; break; + case 3: fmt = DXGI_FORMAT_R32G32B32_FLOAT; break; + case 4: fmt = DXGI_FORMAT_R32G32B32A32_FLOAT; break; + } + break; + case RA_VARTYPE_BYTE_UNORM: + switch (inp->dim_v) { + case 1: fmt = DXGI_FORMAT_R8_UNORM; break; + case 2: fmt = DXGI_FORMAT_R8G8_UNORM; break; + // There is no 3-component 8-bit DXGI format + case 4: fmt = DXGI_FORMAT_R8G8B8A8_UNORM; break; + } + break; + } + if (fmt == DXGI_FORMAT_UNKNOWN) { + MP_ERR(ra, "Could not find suitable vertex input format\n"); + goto error; + } + + in_descs[i] = (D3D11_INPUT_ELEMENT_DESC) { + // The semantic name doesn't mean much and is just used to verify + // the input description matches the shader. SPIRV-Cross always + // uses TEXCOORD, so we should too. + .SemanticName = "TEXCOORD", + .SemanticIndex = i, + .AlignedByteOffset = inp->offset, + .Format = fmt, + }; + } + + hr = ID3D11Device_CreateInputLayout(p->dev, in_descs, + params->num_vertex_attribs, vert_bc.start, vert_bc.len, + &pass_p->layout); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create IA layout: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + talloc_free(in_descs); + in_descs = NULL; + + D3D11_BLEND_DESC bdesc = { + .RenderTarget[0] = { + .BlendEnable = params->enable_blend, + .SrcBlend = map_ra_blend(params->blend_src_rgb), + .DestBlend = map_ra_blend(params->blend_dst_rgb), + .BlendOp = D3D11_BLEND_OP_ADD, + .SrcBlendAlpha = map_ra_blend(params->blend_src_alpha), + .DestBlendAlpha = map_ra_blend(params->blend_dst_alpha), + .BlendOpAlpha = D3D11_BLEND_OP_ADD, + .RenderTargetWriteMask = D3D11_COLOR_WRITE_ENABLE_ALL, + }, + }; + hr = ID3D11Device_CreateBlendState(p->dev, &bdesc, &pass_p->bstate); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create blend state: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + + save_cached_program(ra, pass, vert_bc, frag_bc, (bstr){0}); + + SAFE_RELEASE(vs_blob); + SAFE_RELEASE(ps_blob); + return pass; + +error: + renderpass_destroy(ra, pass); + SAFE_RELEASE(vs_blob); + SAFE_RELEASE(ps_blob); + return NULL; +} + +static struct ra_renderpass *renderpass_create_compute(struct ra *ra, + struct ra_renderpass *pass, const struct ra_renderpass_params *params) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_rpass *pass_p = pass->priv; + ID3DBlob *cs_blob = NULL; + HRESULT hr; + + bstr comp_bc = {0}; + load_cached_program(ra, params, NULL, NULL, &comp_bc); + + if (!comp_bc.start) { + if (!compile_glsl(ra, GLSL_SHADER_COMPUTE, params->compute_shader, + &cs_blob)) + goto error; + comp_bc = (bstr){ + ID3D10Blob_GetBufferPointer(cs_blob), + ID3D10Blob_GetBufferSize(cs_blob), + }; + } + hr = ID3D11Device_CreateComputeShader(p->dev, comp_bc.start, comp_bc.len, + NULL, &pass_p->cs); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create compute shader: %s\n", + mp_HRESULT_to_str(hr)); + goto error; + } + + save_cached_program(ra, pass, (bstr){0}, (bstr){0}, comp_bc); + + SAFE_RELEASE(cs_blob); + return pass; +error: + renderpass_destroy(ra, pass); + SAFE_RELEASE(cs_blob); + return NULL; +} + +static struct ra_renderpass *renderpass_create(struct ra *ra, + const struct ra_renderpass_params *params) +{ + struct ra_renderpass *pass = talloc_zero(NULL, struct ra_renderpass); + pass->params = *ra_renderpass_params_copy(pass, params); + pass->params.cached_program = (bstr){0}; + pass->priv = talloc_zero(pass, struct d3d_rpass); + + if (params->type == RA_RENDERPASS_TYPE_COMPUTE) { + return renderpass_create_compute(ra, pass, params); + } else { + return renderpass_create_raster(ra, pass, params); + } +} + +static void renderpass_run_raster(struct ra *ra, + const struct ra_renderpass_run_params *params, + ID3D11Buffer *ubos[], int ubos_len, + ID3D11SamplerState *samplers[], + ID3D11ShaderResourceView *srvs[], + int samplers_len, + ID3D11UnorderedAccessView *uavs[], + int uavs_len) +{ + struct ra_d3d11 *p = ra->priv; + struct ra_renderpass *pass = params->pass; + struct d3d_rpass *pass_p = pass->priv; + + UINT vbuf_offset = vbuf_upload(ra, params->vertex_data, + pass->params.vertex_stride * params->vertex_count); + if (vbuf_offset == (UINT)-1) + return; + + ID3D11DeviceContext_IASetInputLayout(p->ctx, pass_p->layout); + ID3D11DeviceContext_IASetVertexBuffers(p->ctx, 0, 1, &p->vbuf, + &pass->params.vertex_stride, &vbuf_offset); + ID3D11DeviceContext_IASetPrimitiveTopology(p->ctx, + D3D11_PRIMITIVE_TOPOLOGY_TRIANGLELIST); + + ID3D11DeviceContext_VSSetShader(p->ctx, pass_p->vs, NULL, 0); + + ID3D11DeviceContext_RSSetViewports(p->ctx, 1, (&(D3D11_VIEWPORT) { + .TopLeftX = params->viewport.x0, + .TopLeftY = params->viewport.y0, + .Width = mp_rect_w(params->viewport), + .Height = mp_rect_h(params->viewport), + .MinDepth = 0, + .MaxDepth = 1, + })); + ID3D11DeviceContext_RSSetScissorRects(p->ctx, 1, (&(D3D11_RECT) { + .left = params->scissors.x0, + .top = params->scissors.y0, + .right = params->scissors.x1, + .bottom = params->scissors.y1, + })); + ID3D11DeviceContext_PSSetShader(p->ctx, pass_p->ps, NULL, 0); + ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, ubos_len, ubos); + ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, samplers_len, srvs); + ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, samplers_len, samplers); + + struct ra_tex *target = params->target; + struct d3d_tex *target_p = target->priv; + ID3D11DeviceContext_OMSetRenderTargetsAndUnorderedAccessViews(p->ctx, 1, + &target_p->rtv, NULL, 1, uavs_len, uavs, NULL); + ID3D11DeviceContext_OMSetBlendState(p->ctx, pass_p->bstate, NULL, + D3D11_DEFAULT_SAMPLE_MASK); + + ID3D11DeviceContext_Draw(p->ctx, params->vertex_count, 0); + + // Unbind everything. It's easier to do this than to actually track state, + // and if we leave the RTV bound, it could trip up D3D's conflict checker. + for (int i = 0; i < ubos_len; i++) + ubos[i] = NULL; + for (int i = 0; i < samplers_len; i++) { + samplers[i] = NULL; + srvs[i] = NULL; + } + for (int i = 0; i < uavs_len; i++) + uavs[i] = NULL; + ID3D11DeviceContext_PSSetConstantBuffers(p->ctx, 0, ubos_len, ubos); + ID3D11DeviceContext_PSSetShaderResources(p->ctx, 0, samplers_len, srvs); + ID3D11DeviceContext_PSSetSamplers(p->ctx, 0, samplers_len, samplers); + ID3D11DeviceContext_OMSetRenderTargetsAndUnorderedAccessViews(p->ctx, 0, + NULL, NULL, 1, uavs_len, uavs, NULL); +} + +static void renderpass_run_compute(struct ra *ra, + const struct ra_renderpass_run_params *params, + ID3D11Buffer *ubos[], int ubos_len, + ID3D11SamplerState *samplers[], + ID3D11ShaderResourceView *srvs[], + int samplers_len, + ID3D11UnorderedAccessView *uavs[], + int uavs_len) +{ + struct ra_d3d11 *p = ra->priv; + struct ra_renderpass *pass = params->pass; + struct d3d_rpass *pass_p = pass->priv; + + ID3D11DeviceContext_CSSetShader(p->ctx, pass_p->cs, NULL, 0); + ID3D11DeviceContext_CSSetConstantBuffers(p->ctx, 0, ubos_len, ubos); + ID3D11DeviceContext_CSSetShaderResources(p->ctx, 0, samplers_len, srvs); + ID3D11DeviceContext_CSSetSamplers(p->ctx, 0, samplers_len, samplers); + ID3D11DeviceContext_CSSetUnorderedAccessViews(p->ctx, 0, uavs_len, uavs, + NULL); + + ID3D11DeviceContext_Dispatch(p->ctx, params->compute_groups[0], + params->compute_groups[1], + params->compute_groups[2]); + + for (int i = 0; i < ubos_len; i++) + ubos[i] = NULL; + for (int i = 0; i < samplers_len; i++) { + samplers[i] = NULL; + srvs[i] = NULL; + } + for (int i = 0; i < uavs_len; i++) + uavs[i] = NULL; + ID3D11DeviceContext_CSSetConstantBuffers(p->ctx, 0, ubos_len, ubos); + ID3D11DeviceContext_CSSetShaderResources(p->ctx, 0, samplers_len, srvs); + ID3D11DeviceContext_CSSetSamplers(p->ctx, 0, samplers_len, samplers); + ID3D11DeviceContext_CSSetUnorderedAccessViews(p->ctx, 0, uavs_len, uavs, + NULL); +} + +static void renderpass_run(struct ra *ra, + const struct ra_renderpass_run_params *params) +{ + struct ra_d3d11 *p = ra->priv; + struct ra_renderpass *pass = params->pass; + enum ra_renderpass_type type = pass->params.type; + + ID3D11Buffer *ubos[D3D11_COMMONSHADER_CONSTANT_BUFFER_API_SLOT_COUNT] = {0}; + int ubos_len = 0; + + ID3D11SamplerState *samplers[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT] = {0}; + ID3D11ShaderResourceView *srvs[D3D11_COMMONSHADER_SAMPLER_SLOT_COUNT] = {0}; + int samplers_len = 0; + + ID3D11UnorderedAccessView *uavs[D3D11_1_UAV_SLOT_COUNT] = {0}; + int uavs_len = 0; + + // In a raster pass, one of the UAV slots is used by the runtime for the RTV + int uavs_max = type == RA_RENDERPASS_TYPE_COMPUTE ? p->max_uavs + : p->max_uavs - 1; + + // Gather the input variables used in this pass. These will be mapped to + // HLSL registers. + for (int i = 0; i < params->num_values; i++) { + struct ra_renderpass_input_val *val = ¶ms->values[i]; + int binding = pass->params.inputs[val->index].binding; + switch (pass->params.inputs[val->index].type) { + case RA_VARTYPE_BUF_RO: + if (binding >= MP_ARRAY_SIZE(ubos)) { + MP_ERR(ra, "Too many constant buffers in pass\n"); + return; + } + struct ra_buf *buf_ro = *(struct ra_buf **)val->data; + buf_resolve(ra, buf_ro); + struct d3d_buf *buf_ro_p = buf_ro->priv; + ubos[binding] = buf_ro_p->buf; + ubos_len = MPMAX(ubos_len, binding + 1); + break; + case RA_VARTYPE_BUF_RW: + if (binding > uavs_max) { + MP_ERR(ra, "Too many UAVs in pass\n"); + return; + } + struct ra_buf *buf_rw = *(struct ra_buf **)val->data; + buf_resolve(ra, buf_rw); + struct d3d_buf *buf_rw_p = buf_rw->priv; + uavs[binding] = buf_rw_p->uav; + uavs_len = MPMAX(uavs_len, binding + 1); + break; + case RA_VARTYPE_TEX: + if (binding >= MP_ARRAY_SIZE(samplers)) { + MP_ERR(ra, "Too many textures in pass\n"); + return; + } + struct ra_tex *tex = *(struct ra_tex **)val->data; + struct d3d_tex *tex_p = tex->priv; + samplers[binding] = tex_p->sampler; + srvs[binding] = tex_p->srv; + samplers_len = MPMAX(samplers_len, binding + 1); + break; + case RA_VARTYPE_IMG_W: + if (binding > uavs_max) { + MP_ERR(ra, "Too many UAVs in pass\n"); + return; + } + struct ra_tex *img = *(struct ra_tex **)val->data; + struct d3d_tex *img_p = img->priv; + uavs[binding] = img_p->uav; + uavs_len = MPMAX(uavs_len, binding + 1); + break; + } + } + + if (type == RA_RENDERPASS_TYPE_COMPUTE) { + renderpass_run_compute(ra, params, ubos, ubos_len, samplers, srvs, + samplers_len, uavs, uavs_len); + } else { + renderpass_run_raster(ra, params, ubos, ubos_len, samplers, srvs, + samplers_len, uavs, uavs_len); + } +} + +static void timer_destroy(struct ra *ra, ra_timer *ratimer) +{ + if (!ratimer) + return; + struct d3d_timer *timer = ratimer; + + SAFE_RELEASE(timer->ts_start); + SAFE_RELEASE(timer->ts_end); + SAFE_RELEASE(timer->disjoint); + talloc_free(timer); +} + +static ra_timer *timer_create(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + if (!p->has_timestamp_queries) + return NULL; + + struct d3d_timer *timer = talloc_zero(NULL, struct d3d_timer); + HRESULT hr; + + hr = ID3D11Device_CreateQuery(p->dev, + &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, &timer->ts_start); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create start query: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + + hr = ID3D11Device_CreateQuery(p->dev, + &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, &timer->ts_end); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create end query: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + + // Measuring duration in D3D11 requires three queries: start and end + // timestamps, and a disjoint query containing a flag which says whether + // the timestamps are usable or if a discontinuity occurred between them, + // like a change in power state or clock speed. The disjoint query also + // contains the timer frequency, so the timestamps are useless without it. + hr = ID3D11Device_CreateQuery(p->dev, + &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP_DISJOINT }, &timer->disjoint); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create timer query: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + + return timer; +error: + timer_destroy(ra, timer); + return NULL; +} + +static uint64_t timestamp_to_ns(uint64_t timestamp, uint64_t freq) +{ + static const uint64_t ns_per_s = 1000000000llu; + return timestamp / freq * ns_per_s + timestamp % freq * ns_per_s / freq; +} + +static uint64_t timer_get_result(struct ra *ra, ra_timer *ratimer) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_timer *timer = ratimer; + HRESULT hr; + + UINT64 start, end; + D3D11_QUERY_DATA_TIMESTAMP_DISJOINT dj; + + hr = ID3D11DeviceContext_GetData(p->ctx, + (ID3D11Asynchronous *)timer->ts_end, &end, sizeof(end), + D3D11_ASYNC_GETDATA_DONOTFLUSH); + if (FAILED(hr) || hr == S_FALSE) + return 0; + hr = ID3D11DeviceContext_GetData(p->ctx, + (ID3D11Asynchronous *)timer->ts_start, &start, sizeof(start), + D3D11_ASYNC_GETDATA_DONOTFLUSH); + if (FAILED(hr) || hr == S_FALSE) + return 0; + hr = ID3D11DeviceContext_GetData(p->ctx, + (ID3D11Asynchronous *)timer->disjoint, &dj, sizeof(dj), + D3D11_ASYNC_GETDATA_DONOTFLUSH); + if (FAILED(hr) || hr == S_FALSE || dj.Disjoint || !dj.Frequency) + return 0; + + return timestamp_to_ns(end - start, dj.Frequency); +} + +static void timer_start(struct ra *ra, ra_timer *ratimer) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_timer *timer = ratimer; + + // Latch the last result of this ra_timer (returned by timer_stop) + timer->result = timer_get_result(ra, ratimer); + + ID3D11DeviceContext_Begin(p->ctx, (ID3D11Asynchronous *)timer->disjoint); + ID3D11DeviceContext_End(p->ctx, (ID3D11Asynchronous *)timer->ts_start); +} + +static uint64_t timer_stop(struct ra *ra, ra_timer *ratimer) +{ + struct ra_d3d11 *p = ra->priv; + struct d3d_timer *timer = ratimer; + + ID3D11DeviceContext_End(p->ctx, (ID3D11Asynchronous *)timer->ts_end); + ID3D11DeviceContext_End(p->ctx, (ID3D11Asynchronous *)timer->disjoint); + + return timer->result; +} + +static int map_msg_severity(D3D11_MESSAGE_SEVERITY sev) +{ + switch (sev) { + case D3D11_MESSAGE_SEVERITY_CORRUPTION: + return MSGL_FATAL; + case D3D11_MESSAGE_SEVERITY_ERROR: + return MSGL_ERR; + case D3D11_MESSAGE_SEVERITY_WARNING: + return MSGL_WARN; + default: + case D3D11_MESSAGE_SEVERITY_INFO: + case D3D11_MESSAGE_SEVERITY_MESSAGE: + return MSGL_DEBUG; + } +} + +static int map_msg_severity_by_id(D3D11_MESSAGE_ID id, + D3D11_MESSAGE_SEVERITY sev) +{ + switch (id) { + // These are normal. The RA timer queue habitually reuses timer objects + // without retrieving the results. + case D3D11_MESSAGE_ID_QUERY_BEGIN_ABANDONING_PREVIOUS_RESULTS: + case D3D11_MESSAGE_ID_QUERY_END_ABANDONING_PREVIOUS_RESULTS: + return MSGL_TRACE; + + // D3D11 writes log messages every time an object is created or + // destroyed. That results in a lot of log spam, so force MSGL_TRACE. +#define OBJ_LIFETIME_MESSAGES(obj) \ + case D3D11_MESSAGE_ID_CREATE_ ## obj: \ + case D3D11_MESSAGE_ID_DESTROY_ ## obj + + OBJ_LIFETIME_MESSAGES(CONTEXT): + OBJ_LIFETIME_MESSAGES(BUFFER): + OBJ_LIFETIME_MESSAGES(TEXTURE1D): + OBJ_LIFETIME_MESSAGES(TEXTURE2D): + OBJ_LIFETIME_MESSAGES(TEXTURE3D): + OBJ_LIFETIME_MESSAGES(SHADERRESOURCEVIEW): + OBJ_LIFETIME_MESSAGES(RENDERTARGETVIEW): + OBJ_LIFETIME_MESSAGES(DEPTHSTENCILVIEW): + OBJ_LIFETIME_MESSAGES(VERTEXSHADER): + OBJ_LIFETIME_MESSAGES(HULLSHADER): + OBJ_LIFETIME_MESSAGES(DOMAINSHADER): + OBJ_LIFETIME_MESSAGES(GEOMETRYSHADER): + OBJ_LIFETIME_MESSAGES(PIXELSHADER): + OBJ_LIFETIME_MESSAGES(INPUTLAYOUT): + OBJ_LIFETIME_MESSAGES(SAMPLER): + OBJ_LIFETIME_MESSAGES(BLENDSTATE): + OBJ_LIFETIME_MESSAGES(DEPTHSTENCILSTATE): + OBJ_LIFETIME_MESSAGES(RASTERIZERSTATE): + OBJ_LIFETIME_MESSAGES(QUERY): + OBJ_LIFETIME_MESSAGES(PREDICATE): + OBJ_LIFETIME_MESSAGES(COUNTER): + OBJ_LIFETIME_MESSAGES(COMMANDLIST): + OBJ_LIFETIME_MESSAGES(CLASSINSTANCE): + OBJ_LIFETIME_MESSAGES(CLASSLINKAGE): + OBJ_LIFETIME_MESSAGES(COMPUTESHADER): + OBJ_LIFETIME_MESSAGES(UNORDEREDACCESSVIEW): + OBJ_LIFETIME_MESSAGES(VIDEODECODER): + OBJ_LIFETIME_MESSAGES(VIDEOPROCESSORENUM): + OBJ_LIFETIME_MESSAGES(VIDEOPROCESSOR): + OBJ_LIFETIME_MESSAGES(DECODEROUTPUTVIEW): + OBJ_LIFETIME_MESSAGES(PROCESSORINPUTVIEW): + OBJ_LIFETIME_MESSAGES(PROCESSOROUTPUTVIEW): + OBJ_LIFETIME_MESSAGES(DEVICECONTEXTSTATE): + OBJ_LIFETIME_MESSAGES(FENCE): + return MSGL_TRACE; + +#undef OBJ_LIFETIME_MESSAGES + + default: + return map_msg_severity(sev); + } +} + +static void debug_marker(struct ra *ra, const char *msg) +{ + struct ra_d3d11 *p = ra->priv; + void *talloc_ctx = talloc_new(NULL); + HRESULT hr; + + if (!p->iqueue) + goto done; + + // Copy debug-layer messages to mpv's log output + bool printed_header = false; + uint64_t messages = ID3D11InfoQueue_GetNumStoredMessages(p->iqueue); + for (uint64_t i = 0; i < messages; i++) { + SIZE_T len; + hr = ID3D11InfoQueue_GetMessage(p->iqueue, i, NULL, &len); + if (FAILED(hr) || !len) + goto done; + + D3D11_MESSAGE *d3dmsg = talloc_size(talloc_ctx, len); + hr = ID3D11InfoQueue_GetMessage(p->iqueue, i, d3dmsg, &len); + if (FAILED(hr)) + goto done; + + int msgl = map_msg_severity_by_id(d3dmsg->ID, d3dmsg->Severity); + if (mp_msg_test(ra->log, msgl)) { + if (!printed_header) + MP_INFO(ra, "%s:\n", msg); + printed_header = true; + + MP_MSG(ra, msgl, "%d: %.*s\n", (int)d3dmsg->ID, + (int)d3dmsg->DescriptionByteLength, d3dmsg->pDescription); + talloc_free(d3dmsg); + } + } + + ID3D11InfoQueue_ClearStoredMessages(p->iqueue); +done: + talloc_free(talloc_ctx); +} + +static void destroy(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + + // Release everything except the interfaces needed to perform leak checking + SAFE_RELEASE(p->clear_ps); + SAFE_RELEASE(p->clear_vs); + SAFE_RELEASE(p->clear_layout); + SAFE_RELEASE(p->clear_vbuf); + SAFE_RELEASE(p->clear_cbuf); + SAFE_RELEASE(p->blit_float_ps); + SAFE_RELEASE(p->blit_vs); + SAFE_RELEASE(p->blit_layout); + SAFE_RELEASE(p->blit_vbuf); + SAFE_RELEASE(p->blit_sampler); + SAFE_RELEASE(p->vbuf); + SAFE_RELEASE(p->ctx1); + SAFE_RELEASE(p->dev1); + SAFE_RELEASE(p->dev); + + if (p->ctx) { + // Destroy the device context synchronously so referenced objects don't + // show up in the leak check + ID3D11DeviceContext_ClearState(p->ctx); + ID3D11DeviceContext_Flush(p->ctx); + } + SAFE_RELEASE(p->ctx); + + if (p->debug) { + // Report any leaked objects + debug_marker(ra, "after destroy"); + ID3D11Debug_ReportLiveDeviceObjects(p->debug, D3D11_RLDO_DETAIL); + debug_marker(ra, "after leak check"); + ID3D11Debug_ReportLiveDeviceObjects(p->debug, D3D11_RLDO_SUMMARY); + debug_marker(ra, "after leak summary"); + } + SAFE_RELEASE(p->debug); + SAFE_RELEASE(p->iqueue); + + talloc_free(ra); +} + +static struct ra_fns ra_fns_d3d11 = { + .destroy = destroy, + .tex_create = tex_create, + .tex_destroy = tex_destroy, + .tex_upload = tex_upload, + .tex_download = tex_download, + .buf_create = buf_create, + .buf_destroy = buf_destroy, + .buf_update = buf_update, + .clear = clear, + .blit = blit, + .uniform_layout = std140_layout, + .desc_namespace = desc_namespace, + .renderpass_create = renderpass_create, + .renderpass_destroy = renderpass_destroy, + .renderpass_run = renderpass_run, + .timer_create = timer_create, + .timer_destroy = timer_destroy, + .timer_start = timer_start, + .timer_stop = timer_stop, + .debug_marker = debug_marker, +}; + +void ra_d3d11_flush(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + ID3D11DeviceContext_Flush(p->ctx); +} + +static void init_debug_layer(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + HRESULT hr; + + hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11Debug, + (void**)&p->debug); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to get debug device: %s\n", mp_HRESULT_to_str(hr)); + return; + } + + hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11InfoQueue, + (void**)&p->iqueue); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to get info queue: %s\n", mp_HRESULT_to_str(hr)); + return; + } + + // Store an unlimited amount of messages in the buffer. This is fine + // because we flush stored messages regularly (in debug_marker.) + ID3D11InfoQueue_SetMessageCountLimit(p->iqueue, -1); + + // Push empty filter to get everything + D3D11_INFO_QUEUE_FILTER filter = {0}; + ID3D11InfoQueue_PushStorageFilter(p->iqueue, &filter); +} + +static struct dll_version get_dll_version(HMODULE dll) +{ + void *ctx = talloc_new(NULL); + struct dll_version ret = { 0 }; + + HRSRC rsrc = FindResourceW(dll, MAKEINTRESOURCEW(VS_VERSION_INFO), + VS_FILE_INFO); + if (!rsrc) + goto done; + DWORD size = SizeofResource(dll, rsrc); + HGLOBAL res = LoadResource(dll, rsrc); + if (!res) + goto done; + void *ptr = LockResource(res); + if (!ptr) + goto done; + void *copy = talloc_memdup(ctx, ptr, size); + + VS_FIXEDFILEINFO *ffi; + UINT ffi_len; + if (!VerQueryValueW(copy, L"\\", (void**)&ffi, &ffi_len)) + goto done; + if (ffi_len < sizeof(*ffi)) + goto done; + + ret.major = HIWORD(ffi->dwFileVersionMS); + ret.minor = LOWORD(ffi->dwFileVersionMS); + ret.build = HIWORD(ffi->dwFileVersionLS); + ret.revision = LOWORD(ffi->dwFileVersionLS); + +done: + talloc_free(ctx); + return ret; +} + +static bool load_d3d_compiler(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + HMODULE d3dcompiler = NULL; + + // Try the inbox D3DCompiler first (Windows 8.1 and up) + if (IsWindows8Point1OrGreater()) { + d3dcompiler = LoadLibraryExW(L"d3dcompiler_47.dll", NULL, + LOAD_LIBRARY_SEARCH_SYSTEM32); + } + // Check for a packaged version of d3dcompiler_47.dll + if (!d3dcompiler) + d3dcompiler = LoadLibraryW(L"d3dcompiler_47.dll"); + // Try d3dcompiler_46.dll from the Windows 8 SDK + if (!d3dcompiler) + d3dcompiler = LoadLibraryW(L"d3dcompiler_46.dll"); + // Try d3dcompiler_43.dll from the June 2010 DirectX SDK + if (!d3dcompiler) + d3dcompiler = LoadLibraryW(L"d3dcompiler_43.dll"); + // Can't find any compiler DLL, so give up + if (!d3dcompiler) + return false; + + p->d3d_compiler_ver = get_dll_version(d3dcompiler); + + p->D3DCompile = (pD3DCompile)GetProcAddress(d3dcompiler, "D3DCompile"); + if (!p->D3DCompile) + return false; + return true; +} + +static void find_max_texture_dimension(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + + D3D11_TEXTURE2D_DESC desc = { + .Width = ra->max_texture_wh, + .Height = ra->max_texture_wh, + .MipLevels = 1, + .ArraySize = 1, + .SampleDesc.Count = 1, + .Format = DXGI_FORMAT_R8_UNORM, + .BindFlags = D3D11_BIND_SHADER_RESOURCE, + }; + while (true) { + desc.Height = desc.Width *= 2; + if (desc.Width >= 0x8000000u) + return; + if (FAILED(ID3D11Device_CreateTexture2D(p->dev, &desc, NULL, NULL))) + return; + ra->max_texture_wh = desc.Width; + } +} + +struct ra *ra_d3d11_create(ID3D11Device *dev, struct mp_log *log, + struct spirv_compiler *spirv) +{ + HRESULT hr; + + struct ra *ra = talloc_zero(NULL, struct ra); + ra->log = log; + ra->fns = &ra_fns_d3d11; + + // Even Direct3D 10level9 supports 3D textures + ra->caps = RA_CAP_TEX_3D | RA_CAP_DIRECT_UPLOAD | RA_CAP_BUF_RO | + RA_CAP_BLIT | spirv->ra_caps; + + ra->glsl_version = spirv->glsl_version; + ra->glsl_vulkan = true; + + struct ra_d3d11 *p = ra->priv = talloc_zero(ra, struct ra_d3d11); + p->spirv = spirv; + + int minor = 0; + ID3D11Device_AddRef(dev); + p->dev = dev; + ID3D11Device_GetImmediateContext(p->dev, &p->ctx); + hr = ID3D11Device_QueryInterface(p->dev, &IID_ID3D11Device1, + (void**)&p->dev1); + if (SUCCEEDED(hr)) { + minor = 1; + ID3D11Device1_GetImmediateContext1(p->dev1, &p->ctx1); + + D3D11_FEATURE_DATA_D3D11_OPTIONS fopts = { 0 }; + hr = ID3D11Device_CheckFeatureSupport(p->dev, + D3D11_FEATURE_D3D11_OPTIONS, &fopts, sizeof(fopts)); + if (SUCCEEDED(hr)) { + p->has_clear_view = fopts.ClearView; + } + } + + MP_VERBOSE(ra, "Using Direct3D 11.%d runtime\n", minor); + + p->fl = ID3D11Device_GetFeatureLevel(p->dev); + if (p->fl >= D3D_FEATURE_LEVEL_11_0) { + ra->max_texture_wh = D3D11_REQ_TEXTURE2D_U_OR_V_DIMENSION; + } else if (p->fl >= D3D_FEATURE_LEVEL_10_0) { + ra->max_texture_wh = D3D10_REQ_TEXTURE2D_U_OR_V_DIMENSION; + } else if (p->fl >= D3D_FEATURE_LEVEL_9_3) { + ra->max_texture_wh = D3D_FL9_3_REQ_TEXTURE2D_U_OR_V_DIMENSION; + } else { + ra->max_texture_wh = D3D_FL9_1_REQ_TEXTURE2D_U_OR_V_DIMENSION; + } + + if (p->fl >= D3D_FEATURE_LEVEL_11_0) + ra->caps |= RA_CAP_GATHER; + if (p->fl >= D3D_FEATURE_LEVEL_10_0) + ra->caps |= RA_CAP_FRAGCOORD; + + // Some 10_0 hardware has compute shaders, but only 11_0 has image load/store + if (p->fl >= D3D_FEATURE_LEVEL_11_0) { + ra->caps |= RA_CAP_COMPUTE | RA_CAP_BUF_RW; + ra->max_shmem = 32 * 1024; + ra->max_compute_group_threads = + D3D11_CS_THREAD_GROUP_MAX_THREADS_PER_GROUP; + } + + if (p->fl >= D3D_FEATURE_LEVEL_11_1) { + p->max_uavs = D3D11_1_UAV_SLOT_COUNT; + } else { + p->max_uavs = D3D11_PS_CS_UAV_REGISTER_COUNT; + } + + if (ID3D11Device_GetCreationFlags(p->dev) & D3D11_CREATE_DEVICE_DEBUG) + init_debug_layer(ra); + + // Some level 9_x devices don't have timestamp queries + hr = ID3D11Device_CreateQuery(p->dev, + &(D3D11_QUERY_DESC) { D3D11_QUERY_TIMESTAMP }, NULL); + p->has_timestamp_queries = SUCCEEDED(hr); + + debug_marker(ra, "before maximum Texture2D size lookup"); + + // According to MSDN, the above texture sizes are just minimums and drivers + // may support larger textures. See: + // https://msdn.microsoft.com/en-us/library/windows/desktop/ff476874.aspx + find_max_texture_dimension(ra); + + // Ignore any messages during find_max_texture_dimension + if (p->iqueue) + ID3D11InfoQueue_ClearStoredMessages(p->iqueue); + + MP_VERBOSE(ra, "Maximum Texture2D size: %dx%d\n", ra->max_texture_wh, + ra->max_texture_wh); + + if (!load_d3d_compiler(ra)) { + MP_FATAL(ra, "Could not find D3DCompiler DLL\n"); + goto error; + } + + MP_VERBOSE(ra, "D3DCompiler version: %u.%u.%u.%u\n", + p->d3d_compiler_ver.major, p->d3d_compiler_ver.minor, + p->d3d_compiler_ver.build, p->d3d_compiler_ver.revision); + + setup_formats(ra); + + // The rasterizer state never changes, so set it up here + ID3D11RasterizerState *rstate; + D3D11_RASTERIZER_DESC rdesc = { + .FillMode = D3D11_FILL_SOLID, + .CullMode = D3D11_CULL_NONE, + .FrontCounterClockwise = FALSE, + .DepthClipEnable = TRUE, // Required for 10level9 + .ScissorEnable = TRUE, + }; + hr = ID3D11Device_CreateRasterizerState(p->dev, &rdesc, &rstate); + if (FAILED(hr)) { + MP_ERR(ra, "Failed to create rasterizer state: %s\n", mp_HRESULT_to_str(hr)); + goto error; + } + ID3D11DeviceContext_RSSetState(p->ctx, rstate); + SAFE_RELEASE(rstate); + + // If the device doesn't support ClearView, we have to set up a + // shader-based clear() implementation + if (!p->has_clear_view && !setup_clear_rpass(ra)) + goto error; + + if (!setup_blit_rpass(ra)) + goto error; + + return ra; + +error: + destroy(ra); + return NULL; +} + +ID3D11Device *ra_d3d11_get_device(struct ra *ra) +{ + struct ra_d3d11 *p = ra->priv; + ID3D11Device_AddRef(p->dev); + return p->dev; +} + +bool ra_is_d3d11(struct ra *ra) +{ + return ra->fns == &ra_fns_d3d11; +} diff --git a/video/out/d3d11/ra_d3d11.h b/video/out/d3d11/ra_d3d11.h new file mode 100644 index 0000000..6f62a7f --- /dev/null +++ b/video/out/d3d11/ra_d3d11.h @@ -0,0 +1,47 @@ +#pragma once + +#include <stdbool.h> +#include <windows.h> +#include <d3d11.h> +#include <dxgi1_2.h> + +#include "video/out/gpu/ra.h" +#include "video/out/gpu/spirv.h" + +// Get the underlying DXGI format from an RA format +DXGI_FORMAT ra_d3d11_get_format(const struct ra_format *fmt); + +// Gets the matching ra_format for a given DXGI format. +// Returns a nullptr in case of no known match. +const struct ra_format *ra_d3d11_get_ra_format(struct ra *ra, DXGI_FORMAT fmt); + +// Create an RA instance from a D3D11 device. This takes a reference to the +// device, which is released when the RA instance is destroyed. +struct ra *ra_d3d11_create(ID3D11Device *device, struct mp_log *log, + struct spirv_compiler *spirv); + +// Flush the immediate context of the wrapped D3D11 device +void ra_d3d11_flush(struct ra *ra); + +// Create an RA texture from a D3D11 resource. This takes a reference to the +// texture, which is released when the RA texture is destroyed. +struct ra_tex *ra_d3d11_wrap_tex(struct ra *ra, ID3D11Resource *res); + +// As above, but for a D3D11VA video resource. The fmt parameter selects which +// plane of a planar format will be mapped when the RA texture is used. +// array_slice should be set for texture arrays and is ignored for non-arrays. +struct ra_tex *ra_d3d11_wrap_tex_video(struct ra *ra, ID3D11Texture2D *res, + int w, int h, int array_slice, + const struct ra_format *fmt); + +// Get the underlying D3D11 resource from an RA texture. The returned resource +// is refcounted and must be released by the caller. +ID3D11Resource *ra_d3d11_get_raw_tex(struct ra *ra, struct ra_tex *tex, + int *array_slice); + +// Get the underlying D3D11 device from an RA instance. The returned device is +// refcounted and must be released by the caller. +ID3D11Device *ra_d3d11_get_device(struct ra *ra); + +// True if the RA instance was created with ra_d3d11_create() +bool ra_is_d3d11(struct ra *ra); diff --git a/video/out/dither.c b/video/out/dither.c new file mode 100644 index 0000000..44558ba --- /dev/null +++ b/video/out/dither.c @@ -0,0 +1,175 @@ +/* + * Generate a dithering matrix for downsampling images. + * + * Copyright © 2013 Wessel Dankers <wsl@fruit.je> + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> +#include <stdlib.h> +#include <inttypes.h> +#include <string.h> +#include <assert.h> +#include <math.h> + +#include <libavutil/lfg.h> + +#include "mpv_talloc.h" +#include "dither.h" + +#define MAX_SIZEB 8 +#define MAX_SIZE (1 << MAX_SIZEB) +#define MAX_SIZE2 (MAX_SIZE * MAX_SIZE) + +#define WRAP_SIZE2(k, x) ((unsigned int)((unsigned int)(x) & ((k)->size2 - 1))) +#define XY(k, x, y) ((unsigned int)(((x) | ((y) << (k)->sizeb)))) + +struct ctx { + unsigned int sizeb, size, size2; + unsigned int gauss_radius; + unsigned int gauss_middle; + uint64_t gauss[MAX_SIZE2]; + unsigned int randomat[MAX_SIZE2]; + bool calcmat[MAX_SIZE2]; + uint64_t gaussmat[MAX_SIZE2]; + unsigned int unimat[MAX_SIZE2]; + AVLFG avlfg; +}; + +static void makegauss(struct ctx *k, unsigned int sizeb) +{ + assert(sizeb >= 1 && sizeb <= MAX_SIZEB); + + av_lfg_init(&k->avlfg, 123); + + k->sizeb = sizeb; + k->size = 1 << k->sizeb; + k->size2 = k->size * k->size; + + k->gauss_radius = k->size / 2 - 1; + k->gauss_middle = XY(k, k->gauss_radius, k->gauss_radius); + + unsigned int gauss_size = k->gauss_radius * 2 + 1; + unsigned int gauss_size2 = gauss_size * gauss_size; + + for (unsigned int c = 0; c < k->size2; c++) + k->gauss[c] = 0; + + double sigma = -log(1.5 / (double) UINT64_MAX * gauss_size2) / k->gauss_radius; + + for (unsigned int gy = 0; gy <= k->gauss_radius; gy++) { + for (unsigned int gx = 0; gx <= gy; gx++) { + int cx = (int)gx - k->gauss_radius; + int cy = (int)gy - k->gauss_radius; + int sq = cx * cx + cy * cy; + double e = exp(-sqrt(sq) * sigma); + uint64_t v = e / gauss_size2 * (double) UINT64_MAX; + k->gauss[XY(k, gx, gy)] = + k->gauss[XY(k, gy, gx)] = + k->gauss[XY(k, gx, gauss_size - 1 - gy)] = + k->gauss[XY(k, gy, gauss_size - 1 - gx)] = + k->gauss[XY(k, gauss_size - 1 - gx, gy)] = + k->gauss[XY(k, gauss_size - 1 - gy, gx)] = + k->gauss[XY(k, gauss_size - 1 - gx, gauss_size - 1 - gy)] = + k->gauss[XY(k, gauss_size - 1 - gy, gauss_size - 1 - gx)] = v; + } + } + uint64_t total = 0; + for (unsigned int c = 0; c < k->size2; c++) { + uint64_t oldtotal = total; + total += k->gauss[c]; + assert(total >= oldtotal); + } +} + +static void setbit(struct ctx *k, unsigned int c) +{ + if (k->calcmat[c]) + return; + k->calcmat[c] = true; + uint64_t *m = k->gaussmat; + uint64_t *me = k->gaussmat + k->size2; + uint64_t *g = k->gauss + WRAP_SIZE2(k, k->gauss_middle + k->size2 - c); + uint64_t *ge = k->gauss + k->size2; + while (g < ge) + *m++ += *g++; + g = k->gauss; + while (m < me) + *m++ += *g++; +} + +static unsigned int getmin(struct ctx *k) +{ + uint64_t min = UINT64_MAX; + unsigned int resnum = 0; + unsigned int size2 = k->size2; + for (unsigned int c = 0; c < size2; c++) { + if (k->calcmat[c]) + continue; + uint64_t total = k->gaussmat[c]; + if (total <= min) { + if (total != min) { + min = total; + resnum = 0; + } + k->randomat[resnum++] = c; + } + } + if (resnum == 1) + return k->randomat[0]; + if (resnum == size2) + return size2 / 2; + return k->randomat[av_lfg_get(&k->avlfg) % resnum]; +} + +static void makeuniform(struct ctx *k) +{ + unsigned int size2 = k->size2; + for (unsigned int c = 0; c < size2; c++) { + unsigned int r = getmin(k); + setbit(k, r); + k->unimat[r] = c; + } +} + +// out_matrix is a reactangular tsize * tsize array, where tsize = (1 << size). +void mp_make_fruit_dither_matrix(float *out_matrix, int size) +{ + struct ctx *k = talloc_zero(NULL, struct ctx); + makegauss(k, size); + makeuniform(k); + float invscale = k->size2; + for(unsigned int y = 0; y < k->size; y++) { + for(unsigned int x = 0; x < k->size; x++) + out_matrix[x + y * k->size] = k->unimat[XY(k, x, y)] / invscale; + } + talloc_free(k); +} + +void mp_make_ordered_dither_matrix(unsigned char *m, int size) +{ + m[0] = 0; + for (int sz = 1; sz < size; sz *= 2) { + int offset[] = {sz*size, sz, sz * (size+1), 0}; + for (int i = 0; i < 4; i++) + for (int y = 0; y < sz * size; y += size) + for (int x = 0; x < sz; x++) + m[x+y+offset[i]] = m[x+y] * 4 + (3-i) * 256/size/size; + } +} diff --git a/video/out/dither.h b/video/out/dither.h new file mode 100644 index 0000000..ca804e3 --- /dev/null +++ b/video/out/dither.h @@ -0,0 +1,2 @@ +void mp_make_fruit_dither_matrix(float *out_matrix, int size); +void mp_make_ordered_dither_matrix(unsigned char *m, int size); diff --git a/video/out/dr_helper.c b/video/out/dr_helper.c new file mode 100644 index 0000000..ac440a7 --- /dev/null +++ b/video/out/dr_helper.c @@ -0,0 +1,162 @@ +#include <assert.h> +#include <stdatomic.h> +#include <stdlib.h> + +#include <libavutil/buffer.h> + +#include "misc/dispatch.h" +#include "mpv_talloc.h" +#include "osdep/threads.h" +#include "video/mp_image.h" + +#include "dr_helper.h" + +struct dr_helper { + mp_mutex thread_lock; + mp_thread_id thread_id; + bool thread_valid; // (POSIX defines no "unset" mp_thread value yet) + + struct mp_dispatch_queue *dispatch; + atomic_ullong dr_in_flight; + + struct mp_image *(*get_image)(void *ctx, int imgfmt, int w, int h, + int stride_align, int flags); + void *get_image_ctx; +}; + +static void dr_helper_destroy(void *ptr) +{ + struct dr_helper *dr = ptr; + + // All references must have been freed on destruction, or we'll have + // dangling pointers. + assert(atomic_load(&dr->dr_in_flight) == 0); + + mp_mutex_destroy(&dr->thread_lock); +} + +struct dr_helper *dr_helper_create(struct mp_dispatch_queue *dispatch, + struct mp_image *(*get_image)(void *ctx, int imgfmt, int w, int h, + int stride_align, int flags), + void *get_image_ctx) +{ + struct dr_helper *dr = talloc_ptrtype(NULL, dr); + talloc_set_destructor(dr, dr_helper_destroy); + *dr = (struct dr_helper){ + .dispatch = dispatch, + .dr_in_flight = 0, + .get_image = get_image, + .get_image_ctx = get_image_ctx, + }; + mp_mutex_init(&dr->thread_lock); + return dr; +} + +void dr_helper_acquire_thread(struct dr_helper *dr) +{ + mp_mutex_lock(&dr->thread_lock); + assert(!dr->thread_valid); // fails on API user errors + dr->thread_valid = true; + dr->thread_id = mp_thread_current_id(); + mp_mutex_unlock(&dr->thread_lock); +} + +void dr_helper_release_thread(struct dr_helper *dr) +{ + mp_mutex_lock(&dr->thread_lock); + // Fails on API user errors. + assert(dr->thread_valid); + assert(mp_thread_id_equal(dr->thread_id, mp_thread_current_id())); + dr->thread_valid = false; + mp_mutex_unlock(&dr->thread_lock); +} + +struct free_dr_context { + struct dr_helper *dr; + AVBufferRef *ref; +}; + +static void dr_thread_free(void *ptr) +{ + struct free_dr_context *ctx = ptr; + + unsigned long long v = atomic_fetch_add(&ctx->dr->dr_in_flight, -1); + assert(v); // value before sub is 0 - unexpected underflow. + + av_buffer_unref(&ctx->ref); + talloc_free(ctx); +} + +static void free_dr_buffer_on_dr_thread(void *opaque, uint8_t *data) +{ + struct free_dr_context *ctx = opaque; + struct dr_helper *dr = ctx->dr; + + mp_mutex_lock(&dr->thread_lock); + bool on_this_thread = + dr->thread_valid && mp_thread_id_equal(ctx->dr->thread_id, mp_thread_current_id()); + mp_mutex_unlock(&dr->thread_lock); + + // The image could be unreffed even on the DR thread. In practice, this + // matters most on DR destruction. + if (on_this_thread) { + dr_thread_free(ctx); + } else { + mp_dispatch_enqueue(dr->dispatch, dr_thread_free, ctx); + } +} + +struct get_image_cmd { + struct dr_helper *dr; + int imgfmt, w, h, stride_align, flags; + struct mp_image *res; +}; + +static void sync_get_image(void *ptr) +{ + struct get_image_cmd *cmd = ptr; + struct dr_helper *dr = cmd->dr; + + cmd->res = dr->get_image(dr->get_image_ctx, cmd->imgfmt, cmd->w, cmd->h, + cmd->stride_align, cmd->flags); + if (!cmd->res) + return; + + // We require exactly 1 AVBufferRef. + assert(cmd->res->bufs[0]); + assert(!cmd->res->bufs[1]); + + // Apply some magic to get it free'd on the DR thread as well. For this to + // work, we create a dummy-ref that aliases the original ref, which is why + // the original ref must be writable in the first place. (A newly allocated + // image should be always writable of course.) + assert(mp_image_is_writeable(cmd->res)); + + struct free_dr_context *ctx = talloc_zero(NULL, struct free_dr_context); + *ctx = (struct free_dr_context){ + .dr = dr, + .ref = cmd->res->bufs[0], + }; + + AVBufferRef *new_ref = av_buffer_create(ctx->ref->data, ctx->ref->size, + free_dr_buffer_on_dr_thread, ctx, 0); + MP_HANDLE_OOM(new_ref); + + cmd->res->bufs[0] = new_ref; + + atomic_fetch_add(&dr->dr_in_flight, 1); +} + +struct mp_image *dr_helper_get_image(struct dr_helper *dr, int imgfmt, + int w, int h, int stride_align, int flags) +{ + struct get_image_cmd cmd = { + .dr = dr, + .imgfmt = imgfmt, + .w = w, .h = h, + .stride_align = stride_align, + .flags = flags, + }; + mp_dispatch_run(dr->dispatch, sync_get_image, &cmd); + return cmd.res; +} diff --git a/video/out/dr_helper.h b/video/out/dr_helper.h new file mode 100644 index 0000000..cf2ed14 --- /dev/null +++ b/video/out/dr_helper.h @@ -0,0 +1,37 @@ +#pragma once + +// This is a helper for implementing thread-safety for DR callbacks. These need +// to allocate GPU buffers on the GPU thread (e.g. OpenGL with its forced TLS), +// and the buffers also need to be freed on the GPU thread. +// This is not a helpful "Dr.", rather it represents Satan in form of C code. +struct dr_helper; + +struct mp_image; +struct mp_dispatch_queue; + +// dr_helper_get_image() calls will use the dispatch queue to run get_image on +// a target thread, which processes the dispatch queue. +// Note: the dispatch queue must process outstanding async. work before the +// dr_helper instance can be destroyed. +struct dr_helper *dr_helper_create(struct mp_dispatch_queue *dispatch, + struct mp_image *(*get_image)(void *ctx, int imgfmt, int w, int h, + int stride_align, int flags), + void *get_image_ctx); + +// Make DR release calls (freeing images) reentrant if they are called on current +// thread. That means any free call will directly release the image as allocated +// with get_image(). +// Only 1 thread can use this at a time. Note that it would make no sense to +// call this on more than 1 thread, as get_image is assumed not thread-safe. +void dr_helper_acquire_thread(struct dr_helper *dr); + +// This _must_ be called on the same thread as dr_helper_acquire_thread() was +// called. Every release call must be paired with an acquire call. +void dr_helper_release_thread(struct dr_helper *dr); + +// Allocate an image by running the get_image callback on the target thread. +// Always blocks on dispatch queue processing. This implies there is no way to +// allocate a DR'ed image on the render thread (at least not in a way which +// actually works if you want foreign threads to be able to free them). +struct mp_image *dr_helper_get_image(struct dr_helper *dr, int imgfmt, + int w, int h, int stride_align, int flags); diff --git a/video/out/drm_atomic.c b/video/out/drm_atomic.c new file mode 100644 index 0000000..5754504 --- /dev/null +++ b/video/out/drm_atomic.c @@ -0,0 +1,458 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <errno.h> +#include <inttypes.h> + +#include "common/common.h" +#include "common/msg.h" +#include "drm_atomic.h" + +int drm_object_create_properties(struct mp_log *log, int fd, + struct drm_object *object) +{ + object->props = drmModeObjectGetProperties(fd, object->id, object->type); + if (object->props) { + object->props_info = talloc_zero_size(NULL, object->props->count_props + * sizeof(object->props_info)); + if (object->props_info) { + for (int i = 0; i < object->props->count_props; i++) + object->props_info[i] = drmModeGetProperty(fd, object->props->props[i]); + } else { + mp_err(log, "Out of memory\n"); + goto fail; + } + } else { + mp_err(log, "Failed to retrieve properties for object id %d\n", object->id); + goto fail; + } + + return 0; + + fail: + drm_object_free_properties(object); + return -1; +} + +void drm_object_free_properties(struct drm_object *object) +{ + if (object->props) { + for (int i = 0; i < object->props->count_props; i++) { + if (object->props_info[i]) { + drmModeFreeProperty(object->props_info[i]); + object->props_info[i] = NULL; + } + } + + talloc_free(object->props_info); + object->props_info = NULL; + + drmModeFreeObjectProperties(object->props); + object->props = NULL; + } +} + +int drm_object_get_property(struct drm_object *object, char *name, uint64_t *value) +{ + for (int i = 0; i < object->props->count_props; i++) { + if (strcasecmp(name, object->props_info[i]->name) == 0) { + *value = object->props->prop_values[i]; + return 0; + } + } + + return -EINVAL; +} + +drmModePropertyBlobPtr drm_object_get_property_blob(struct drm_object *object, char *name) +{ + uint64_t blob_id; + + if (!drm_object_get_property(object, name, &blob_id)) { + return drmModeGetPropertyBlob(object->fd, blob_id); + } + + return NULL; +} + +int drm_object_set_property(drmModeAtomicReq *request, struct drm_object *object, + char *name, uint64_t value) +{ + for (int i = 0; i < object->props->count_props; i++) { + if (strcasecmp(name, object->props_info[i]->name) == 0) { + if (object->props_info[i]->flags & DRM_MODE_PROP_IMMUTABLE) { + /* Do not try to set immutable values, as this might cause the + * atomic commit operation to fail. */ + return -EINVAL; + } + return drmModeAtomicAddProperty(request, object->id, + object->props_info[i]->prop_id, value); + } + } + + return -EINVAL; +} + +struct drm_object *drm_object_create(struct mp_log *log, int fd, + uint32_t object_id, uint32_t type) +{ + struct drm_object *obj = NULL; + obj = talloc_zero(NULL, struct drm_object); + obj->fd = fd; + obj->id = object_id; + obj->type = type; + + if (drm_object_create_properties(log, fd, obj)) { + talloc_free(obj); + return NULL; + } + + return obj; +} + +void drm_object_free(struct drm_object *object) +{ + if (object) { + drm_object_free_properties(object); + talloc_free(object); + } +} + +void drm_object_print_info(struct mp_log *log, struct drm_object *object) +{ + mp_err(log, "Object ID = %d (type = %x) has %d properties\n", + object->id, object->type, object->props->count_props); + + for (int i = 0; i < object->props->count_props; i++) + mp_err(log, " Property '%s' = %lld\n", object->props_info[i]->name, + (long long)object->props->prop_values[i]); +} + +struct drm_atomic_context *drm_atomic_create_context(struct mp_log *log, int fd, int crtc_id, + int connector_id, + int draw_plane_idx, int drmprime_video_plane_idx) +{ + drmModePlaneRes *plane_res = NULL; + drmModeRes *res = NULL; + struct drm_object *plane = NULL; + struct drm_atomic_context *ctx; + int crtc_index = -1; + int layercount = -1; + int primary_id = 0; + int overlay_id = 0; + + uint64_t value; + + res = drmModeGetResources(fd); + if (!res) { + mp_err(log, "Cannot retrieve DRM resources: %s\n", mp_strerror(errno)); + goto fail; + } + + plane_res = drmModeGetPlaneResources(fd); + if (!plane_res) { + mp_err(log, "Cannot retrieve plane resources: %s\n", mp_strerror(errno)); + goto fail; + } + + ctx = talloc_zero(NULL, struct drm_atomic_context); + if (!ctx) { + mp_err(log, "Out of memory\n"); + goto fail; + } + + ctx->fd = fd; + ctx->crtc = drm_object_create(log, ctx->fd, crtc_id, DRM_MODE_OBJECT_CRTC); + if (!ctx->crtc) { + mp_err(log, "Failed to create CRTC object\n"); + goto fail; + } + + for (int i = 0; i < res->count_crtcs; i++) { + if (res->crtcs[i] == crtc_id) { + crtc_index = i; + break; + } + } + + for (int i = 0; i < res->count_connectors; i++) { + drmModeConnector *connector = drmModeGetConnector(fd, res->connectors[i]); + if (connector) { + if (connector->connector_id == connector_id) + ctx->connector = drm_object_create(log, ctx->fd, connector->connector_id, + DRM_MODE_OBJECT_CONNECTOR); + drmModeFreeConnector(connector); + if (ctx->connector) + break; + } + } + + for (unsigned int j = 0; j < plane_res->count_planes; j++) { + + drmModePlane *drmplane = drmModeGetPlane(ctx->fd, plane_res->planes[j]); + const uint32_t possible_crtcs = drmplane->possible_crtcs; + const uint32_t plane_id = drmplane->plane_id; + drmModeFreePlane(drmplane); + drmplane = NULL; + + if (possible_crtcs & (1 << crtc_index)) { + plane = drm_object_create(log, ctx->fd, plane_id, DRM_MODE_OBJECT_PLANE); + + if (!plane) { + mp_err(log, "Failed to create Plane object from plane ID %d\n", + plane_id); + goto fail; + } + + if (drm_object_get_property(plane, "TYPE", &value) == -EINVAL) { + mp_err(log, "Unable to retrieve type property from plane %d\n", j); + goto fail; + } + + if (value != DRM_PLANE_TYPE_CURSOR) { // Skip cursor planes + layercount++; + + if ((!primary_id) && (value == DRM_PLANE_TYPE_PRIMARY)) + primary_id = plane_id; + + if ((!overlay_id) && (value == DRM_PLANE_TYPE_OVERLAY)) + overlay_id = plane_id; + + if (layercount == draw_plane_idx) { + ctx->draw_plane = plane; + continue; + } + + if (layercount == drmprime_video_plane_idx) { + ctx->drmprime_video_plane = plane; + continue; + } + } + + drm_object_free(plane); + plane = NULL; + } + } + + // draw plane was specified as either of the special options: any primary plane or any overlay plane + if (!ctx->draw_plane) { + const int draw_plane_id = (draw_plane_idx == DRM_OPTS_OVERLAY_PLANE) ? overlay_id : primary_id; + const char *plane_type = (draw_plane_idx == DRM_OPTS_OVERLAY_PLANE) ? "overlay" : "primary"; + if (draw_plane_id) { + mp_verbose(log, "Using %s plane %d as draw plane\n", plane_type, draw_plane_id); + ctx->draw_plane = drm_object_create(log, ctx->fd, draw_plane_id, DRM_MODE_OBJECT_PLANE); + } else { + mp_err(log, "Failed to find draw plane with idx=%d\n", draw_plane_idx); + goto fail; + } + } else { + mp_verbose(log, "Found draw plane with ID %d\n", ctx->draw_plane->id); + } + + // drmprime plane was specified as either of the special options: any primary plane or any overlay plane + if (!ctx->drmprime_video_plane) { + const int drmprime_video_plane_id = (drmprime_video_plane_idx == DRM_OPTS_PRIMARY_PLANE) ? primary_id : overlay_id; + const char *plane_type = (drmprime_video_plane_idx == DRM_OPTS_PRIMARY_PLANE) ? "primary" : "overlay"; + + if (drmprime_video_plane_id) { + mp_verbose(log, "Using %s plane %d as drmprime plane\n", plane_type, drmprime_video_plane_id); + ctx->drmprime_video_plane = drm_object_create(log, ctx->fd, drmprime_video_plane_id, DRM_MODE_OBJECT_PLANE); + } else { + mp_verbose(log, "Failed to find drmprime plane with idx=%d. drmprime-overlay hwdec interop will not work\n", drmprime_video_plane_idx); + } + } else { + mp_verbose(log, "Found drmprime plane with ID %d\n", ctx->drmprime_video_plane->id); + } + + drmModeFreePlaneResources(plane_res); + drmModeFreeResources(res); + return ctx; + +fail: + if (res) + drmModeFreeResources(res); + if (plane_res) + drmModeFreePlaneResources(plane_res); + if (plane) + drm_object_free(plane); + return NULL; +} + +void drm_atomic_destroy_context(struct drm_atomic_context *ctx) +{ + drm_mode_destroy_blob(ctx->fd, &ctx->old_state.crtc.mode); + drm_object_free(ctx->crtc); + drm_object_free(ctx->connector); + drm_object_free(ctx->draw_plane); + drm_object_free(ctx->drmprime_video_plane); + talloc_free(ctx); +} + +static bool drm_atomic_save_plane_state(struct drm_object *plane, + struct drm_atomic_plane_state *plane_state) +{ + if (!plane) + return true; + + bool ret = true; + + if (0 > drm_object_get_property(plane, "FB_ID", &plane_state->fb_id)) + ret = false; + if (0 > drm_object_get_property(plane, "CRTC_ID", &plane_state->crtc_id)) + ret = false; + if (0 > drm_object_get_property(plane, "SRC_X", &plane_state->src_x)) + ret = false; + if (0 > drm_object_get_property(plane, "SRC_Y", &plane_state->src_y)) + ret = false; + if (0 > drm_object_get_property(plane, "SRC_W", &plane_state->src_w)) + ret = false; + if (0 > drm_object_get_property(plane, "SRC_H", &plane_state->src_h)) + ret = false; + if (0 > drm_object_get_property(plane, "CRTC_X", &plane_state->crtc_x)) + ret = false; + if (0 > drm_object_get_property(plane, "CRTC_Y", &plane_state->crtc_y)) + ret = false; + if (0 > drm_object_get_property(plane, "CRTC_W", &plane_state->crtc_w)) + ret = false; + if (0 > drm_object_get_property(plane, "CRTC_H", &plane_state->crtc_h)) + ret = false; + // ZPOS might not exist, so ignore whether or not this succeeds + drm_object_get_property(plane, "ZPOS", &plane_state->zpos); + + return ret; +} + +static bool drm_atomic_restore_plane_state(drmModeAtomicReq *request, + struct drm_object *plane, + const struct drm_atomic_plane_state *plane_state) +{ + if (!plane) + return true; + + bool ret = true; + + if (0 > drm_object_set_property(request, plane, "FB_ID", plane_state->fb_id)) + ret = false; + if (0 > drm_object_set_property(request, plane, "CRTC_ID", plane_state->crtc_id)) + ret = false; + if (0 > drm_object_set_property(request, plane, "SRC_X", plane_state->src_x)) + ret = false; + if (0 > drm_object_set_property(request, plane, "SRC_Y", plane_state->src_y)) + ret = false; + if (0 > drm_object_set_property(request, plane, "SRC_W", plane_state->src_w)) + ret = false; + if (0 > drm_object_set_property(request, plane, "SRC_H", plane_state->src_h)) + ret = false; + if (0 > drm_object_set_property(request, plane, "CRTC_X", plane_state->crtc_x)) + ret = false; + if (0 > drm_object_set_property(request, plane, "CRTC_Y", plane_state->crtc_y)) + ret = false; + if (0 > drm_object_set_property(request, plane, "CRTC_W", plane_state->crtc_w)) + ret = false; + if (0 > drm_object_set_property(request, plane, "CRTC_H", plane_state->crtc_h)) + ret = false; + // ZPOS might not exist, or be immutable, so ignore whether or not this succeeds + drm_object_set_property(request, plane, "ZPOS", plane_state->zpos); + + return ret; +} + +bool drm_atomic_save_old_state(struct drm_atomic_context *ctx) +{ + if (ctx->old_state.saved) + return false; + + bool ret = true; + + drmModeCrtc *crtc = drmModeGetCrtc(ctx->fd, ctx->crtc->id); + if (crtc == NULL) + return false; + ctx->old_state.crtc.mode.mode = crtc->mode; + drmModeFreeCrtc(crtc); + + if (0 > drm_object_get_property(ctx->crtc, "ACTIVE", &ctx->old_state.crtc.active)) + ret = false; + + // This property was added in kernel 5.0. We will just ignore any errors. + drm_object_get_property(ctx->crtc, "VRR_ENABLED", &ctx->old_state.crtc.vrr_enabled); + + if (0 > drm_object_get_property(ctx->connector, "CRTC_ID", &ctx->old_state.connector.crtc_id)) + ret = false; + + if (!drm_atomic_save_plane_state(ctx->draw_plane, &ctx->old_state.draw_plane)) + ret = false; + if (!drm_atomic_save_plane_state(ctx->drmprime_video_plane, &ctx->old_state.drmprime_video_plane)) + ret = false; + + ctx->old_state.saved = true; + + return ret; +} + +bool drm_atomic_restore_old_state(drmModeAtomicReqPtr request, struct drm_atomic_context *ctx) +{ + if (!ctx->old_state.saved) + return false; + + bool ret = true; + + if (0 > drm_object_set_property(request, ctx->connector, "CRTC_ID", ctx->old_state.connector.crtc_id)) + ret = false; + + // This property was added in kernel 5.0. We will just ignore any errors. + drm_object_set_property(request, ctx->crtc, "VRR_ENABLED", ctx->old_state.crtc.vrr_enabled); + + if (!drm_mode_ensure_blob(ctx->fd, &ctx->old_state.crtc.mode)) + ret = false; + if (0 > drm_object_set_property(request, ctx->crtc, "MODE_ID", ctx->old_state.crtc.mode.blob_id)) + ret = false; + if (0 > drm_object_set_property(request, ctx->crtc, "ACTIVE", ctx->old_state.crtc.active)) + ret = false; + + if (!drm_atomic_restore_plane_state(request, ctx->draw_plane, &ctx->old_state.draw_plane)) + ret = false; + if (!drm_atomic_restore_plane_state(request, ctx->drmprime_video_plane, &ctx->old_state.drmprime_video_plane)) + ret = false; + + ctx->old_state.saved = false; + + return ret; +} + +bool drm_mode_ensure_blob(int fd, struct drm_mode *mode) +{ + int ret = 0; + + if (!mode->blob_id) { + ret = drmModeCreatePropertyBlob(fd, &mode->mode, sizeof(drmModeModeInfo), + &mode->blob_id); + } + + return (ret == 0); +} + +bool drm_mode_destroy_blob(int fd, struct drm_mode *mode) +{ + int ret = 0; + + if (mode->blob_id) { + ret = drmModeDestroyPropertyBlob(fd, mode->blob_id); + mode->blob_id = 0; + } + + return (ret == 0); +} diff --git a/video/out/drm_atomic.h b/video/out/drm_atomic.h new file mode 100644 index 0000000..499aa33 --- /dev/null +++ b/video/out/drm_atomic.h @@ -0,0 +1,100 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MP_DRMATOMIC_H +#define MP_DRMATOMIC_H + +#include <stdlib.h> +#include <stdbool.h> +#include <xf86drm.h> +#include <xf86drmMode.h> + +#include "common/msg.h" +#include "drm_common.h" + +#define DRM_OPTS_PRIMARY_PLANE -1 +#define DRM_OPTS_OVERLAY_PLANE -2 + +struct drm_atomic_plane_state { + uint64_t fb_id; + uint64_t crtc_id; + uint64_t src_x; + uint64_t src_y; + uint64_t src_w; + uint64_t src_h; + uint64_t crtc_x; + uint64_t crtc_y; + uint64_t crtc_w; + uint64_t crtc_h; + uint64_t zpos; +}; + +// Used to store the restore state for VT switching and uninit +struct drm_atomic_state { + bool saved; + struct { + uint64_t crtc_id; + } connector; + struct { + struct drm_mode mode; + uint64_t active; + uint64_t vrr_enabled; + } crtc; + struct drm_atomic_plane_state draw_plane; + struct drm_atomic_plane_state drmprime_video_plane; +}; + +struct drm_object { + int fd; + uint32_t id; + uint32_t type; + drmModeObjectProperties *props; + drmModePropertyRes **props_info; +}; + +struct drm_atomic_context { + int fd; + + struct drm_object *crtc; + struct drm_object *connector; + struct drm_object *draw_plane; + struct drm_object *drmprime_video_plane; + + drmModeAtomicReq *request; + + struct drm_atomic_state old_state; +}; + +int drm_object_create_properties(struct mp_log *log, int fd, struct drm_object *object); +void drm_object_free_properties(struct drm_object *object); +int drm_object_get_property(struct drm_object *object, char *name, uint64_t *value); +int drm_object_set_property(drmModeAtomicReq *request, struct drm_object *object, char *name, uint64_t value); +drmModePropertyBlobPtr drm_object_get_property_blob(struct drm_object *object, char *name); +struct drm_object *drm_object_create(struct mp_log *log, int fd, uint32_t object_id, uint32_t type); +void drm_object_free(struct drm_object *object); +void drm_object_print_info(struct mp_log *log, struct drm_object *object); +struct drm_atomic_context *drm_atomic_create_context(struct mp_log *log, int fd, int crtc_id, int connector_id, + int draw_plane_idx, int drmprime_video_plane_idx); +void drm_atomic_destroy_context(struct drm_atomic_context *ctx); + +bool drm_atomic_save_old_state(struct drm_atomic_context *ctx); +bool drm_atomic_restore_old_state(drmModeAtomicReq *request, struct drm_atomic_context *ctx); + +bool drm_mode_ensure_blob(int fd, struct drm_mode *mode); +bool drm_mode_destroy_blob(int fd, struct drm_mode *mode); + +#endif // MP_DRMATOMIC_H diff --git a/video/out/drm_common.c b/video/out/drm_common.c new file mode 100644 index 0000000..da45ca2 --- /dev/null +++ b/video/out/drm_common.c @@ -0,0 +1,1289 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <errno.h> +#include <string.h> +#include <signal.h> +#include <sys/ioctl.h> +#include <poll.h> +#include <sys/stat.h> +#include <unistd.h> +#include <limits.h> +#include <math.h> +#include <time.h> +#include <drm_fourcc.h> + +#include "config.h" + +#if HAVE_CONSIO_H +#include <sys/consio.h> +#else +#include <sys/vt.h> +#endif + +#include "drm_atomic.h" +#include "drm_common.h" + +#include "common/common.h" +#include "common/msg.h" +#include "misc/ctype.h" +#include "options/m_config.h" +#include "osdep/io.h" +#include "osdep/poll_wrapper.h" +#include "osdep/timer.h" +#include "present_sync.h" +#include "video/out/vo.h" + +#define EVT_RELEASE 1 +#define EVT_ACQUIRE 2 +#define EVT_INTERRUPT 255 +#define HANDLER_ACQUIRE 0 +#define HANDLER_RELEASE 1 +#define RELEASE_SIGNAL SIGUSR1 +#define ACQUIRE_SIGNAL SIGUSR2 +#define MAX_CONNECTOR_NAME_LEN 20 + +static int vt_switcher_pipe[2]; + +static int drm_connector_opt_help(struct mp_log *log, const struct m_option *opt, + struct bstr name); + +static int drm_mode_opt_help(struct mp_log *log, const struct m_option *opt, + struct bstr name); + +static int drm_validate_mode_opt(struct mp_log *log, const struct m_option *opt, + struct bstr name, const char **value); + +static void drm_show_available_modes(struct mp_log *log, const drmModeConnector *connector); + +static void drm_show_available_connectors(struct mp_log *log, int card_no, + const char *card_path); +static double mode_get_Hz(const drmModeModeInfo *mode); + +#define OPT_BASE_STRUCT struct drm_opts +const struct m_sub_options drm_conf = { + .opts = (const struct m_option[]) { + {"drm-device", OPT_STRING(device_path), .flags = M_OPT_FILE}, + {"drm-connector", OPT_STRING(connector_spec), + .help = drm_connector_opt_help}, + {"drm-mode", OPT_STRING_VALIDATE(mode_spec, drm_validate_mode_opt), + .help = drm_mode_opt_help}, + {"drm-atomic", OPT_CHOICE(drm_atomic, {"no", 0}, {"auto", 1}), + .deprecation_message = "this option is deprecated: DRM Atomic is required"}, + {"drm-draw-plane", OPT_CHOICE(draw_plane, + {"primary", DRM_OPTS_PRIMARY_PLANE}, + {"overlay", DRM_OPTS_OVERLAY_PLANE}), + M_RANGE(0, INT_MAX)}, + {"drm-drmprime-video-plane", OPT_CHOICE(drmprime_video_plane, + {"primary", DRM_OPTS_PRIMARY_PLANE}, + {"overlay", DRM_OPTS_OVERLAY_PLANE}), + M_RANGE(0, INT_MAX)}, + {"drm-format", OPT_CHOICE(drm_format, + {"xrgb8888", DRM_OPTS_FORMAT_XRGB8888}, + {"xrgb2101010", DRM_OPTS_FORMAT_XRGB2101010}, + {"xbgr8888", DRM_OPTS_FORMAT_XBGR8888}, + {"xbgr2101010", DRM_OPTS_FORMAT_XBGR2101010})}, + {"drm-draw-surface-size", OPT_SIZE_BOX(draw_surface_size)}, + {"drm-vrr-enabled", OPT_CHOICE(vrr_enabled, + {"no", 0}, {"yes", 1}, {"auto", -1})}, + {0}, + }, + .defaults = &(const struct drm_opts) { + .mode_spec = "preferred", + .drm_atomic = 1, + .draw_plane = DRM_OPTS_PRIMARY_PLANE, + .drmprime_video_plane = DRM_OPTS_OVERLAY_PLANE, + }, + .size = sizeof(struct drm_opts), +}; + +static const char *connector_names[] = { + "Unknown", // DRM_MODE_CONNECTOR_Unknown + "VGA", // DRM_MODE_CONNECTOR_VGA + "DVI-I", // DRM_MODE_CONNECTOR_DVII + "DVI-D", // DRM_MODE_CONNECTOR_DVID + "DVI-A", // DRM_MODE_CONNECTOR_DVIA + "Composite", // DRM_MODE_CONNECTOR_Composite + "SVIDEO", // DRM_MODE_CONNECTOR_SVIDEO + "LVDS", // DRM_MODE_CONNECTOR_LVDS + "Component", // DRM_MODE_CONNECTOR_Component + "DIN", // DRM_MODE_CONNECTOR_9PinDIN + "DP", // DRM_MODE_CONNECTOR_DisplayPort + "HDMI-A", // DRM_MODE_CONNECTOR_HDMIA + "HDMI-B", // DRM_MODE_CONNECTOR_HDMIB + "TV", // DRM_MODE_CONNECTOR_TV + "eDP", // DRM_MODE_CONNECTOR_eDP + "Virtual", // DRM_MODE_CONNECTOR_VIRTUAL + "DSI", // DRM_MODE_CONNECTOR_DSI + "DPI", // DRM_MODE_CONNECTOR_DPI + "Writeback", // DRM_MODE_CONNECTOR_WRITEBACK + "SPI", // DRM_MODE_CONNECTOR_SPI + "USB", // DRM_MODE_CONNECTOR_USB +}; + +struct drm_mode_spec { + enum { + DRM_MODE_SPEC_BY_IDX, // Specified by idx + DRM_MODE_SPEC_BY_NUMBERS, // Specified by width, height and opt. refresh + DRM_MODE_SPEC_PREFERRED, // Select the preferred mode of the display + DRM_MODE_SPEC_HIGHEST, // Select the mode with the highest resolution + } type; + unsigned int idx; + unsigned int width; + unsigned int height; + double refresh; +}; + +/* VT Switcher */ +static void vt_switcher_sighandler(int sig) +{ + int saved_errno = errno; + unsigned char event = sig == RELEASE_SIGNAL ? EVT_RELEASE : EVT_ACQUIRE; + (void)write(vt_switcher_pipe[1], &event, sizeof(event)); + errno = saved_errno; +} + +static bool has_signal_installed(int signo) +{ + struct sigaction act = { 0 }; + sigaction(signo, 0, &act); + return act.sa_handler != 0; +} + +static int install_signal(int signo, void (*handler)(int)) +{ + struct sigaction act = { 0 }; + act.sa_handler = handler; + sigemptyset(&act.sa_mask); + act.sa_flags = SA_RESTART; + return sigaction(signo, &act, NULL); +} + +static void release_vt(void *data) +{ + struct vo_drm_state *drm = data; + MP_VERBOSE(drm, "Releasing VT\n"); + vo_drm_release_crtc(drm); +} + +static void acquire_vt(void *data) +{ + struct vo_drm_state *drm = data; + MP_VERBOSE(drm, "Acquiring VT\n"); + vo_drm_acquire_crtc(drm); +} + +static void vt_switcher_acquire(struct vt_switcher *s, + void (*handler)(void*), void *user_data) +{ + s->handlers[HANDLER_ACQUIRE] = handler; + s->handler_data[HANDLER_ACQUIRE] = user_data; +} + +static void vt_switcher_release(struct vt_switcher *s, + void (*handler)(void*), void *user_data) +{ + s->handlers[HANDLER_RELEASE] = handler; + s->handler_data[HANDLER_RELEASE] = user_data; +} + +static bool vt_switcher_init(struct vt_switcher *s, struct mp_log *log) +{ + s->tty_fd = -1; + s->log = log; + vt_switcher_pipe[0] = -1; + vt_switcher_pipe[1] = -1; + + if (mp_make_cloexec_pipe(vt_switcher_pipe)) { + mp_err(log, "Creating pipe failed: %s\n", mp_strerror(errno)); + return false; + } + + s->tty_fd = open("/dev/tty", O_RDWR | O_CLOEXEC); + if (s->tty_fd < 0) { + mp_err(log, "Can't open TTY for VT control: %s\n", mp_strerror(errno)); + return false; + } + + if (has_signal_installed(RELEASE_SIGNAL)) { + mp_err(log, "Can't handle VT release - signal already used\n"); + return false; + } + if (has_signal_installed(ACQUIRE_SIGNAL)) { + mp_err(log, "Can't handle VT acquire - signal already used\n"); + return false; + } + + if (install_signal(RELEASE_SIGNAL, vt_switcher_sighandler)) { + mp_err(log, "Failed to install release signal: %s\n", mp_strerror(errno)); + return false; + } + if (install_signal(ACQUIRE_SIGNAL, vt_switcher_sighandler)) { + mp_err(log, "Failed to install acquire signal: %s\n", mp_strerror(errno)); + return false; + } + + struct vt_mode vt_mode = { 0 }; + if (ioctl(s->tty_fd, VT_GETMODE, &vt_mode) < 0) { + mp_err(log, "VT_GETMODE failed: %s\n", mp_strerror(errno)); + return false; + } + + vt_mode.mode = VT_PROCESS; + vt_mode.relsig = RELEASE_SIGNAL; + vt_mode.acqsig = ACQUIRE_SIGNAL; + // frsig is a signal for forced release. Not implemented on Linux, + // Solaris, BSDs but must be set to a valid signal on some of those. + vt_mode.frsig = SIGIO; // unused + if (ioctl(s->tty_fd, VT_SETMODE, &vt_mode) < 0) { + mp_err(log, "VT_SETMODE failed: %s\n", mp_strerror(errno)); + return false; + } + + // Block the VT switching signals from interrupting the VO thread (they will + // still be picked up by other threads, which will fill vt_switcher_pipe for us) + sigset_t set; + sigemptyset(&set); + sigaddset(&set, RELEASE_SIGNAL); + sigaddset(&set, ACQUIRE_SIGNAL); + pthread_sigmask(SIG_BLOCK, &set, NULL); + + return true; +} + +static void vt_switcher_interrupt_poll(struct vt_switcher *s) +{ + unsigned char event = EVT_INTERRUPT; + (void)write(vt_switcher_pipe[1], &event, sizeof(event)); +} + +static void vt_switcher_destroy(struct vt_switcher *s) +{ + struct vt_mode vt_mode = {0}; + vt_mode.mode = VT_AUTO; + if (ioctl(s->tty_fd, VT_SETMODE, &vt_mode) < 0) { + MP_ERR(s, "VT_SETMODE failed: %s\n", mp_strerror(errno)); + return; + } + + install_signal(RELEASE_SIGNAL, SIG_DFL); + install_signal(ACQUIRE_SIGNAL, SIG_DFL); + close(s->tty_fd); + close(vt_switcher_pipe[0]); + close(vt_switcher_pipe[1]); +} + +static void vt_switcher_poll(struct vt_switcher *s, int timeout_ns) +{ + struct pollfd fds[1] = { + { .events = POLLIN, .fd = vt_switcher_pipe[0] }, + }; + mp_poll(fds, 1, timeout_ns); + if (!fds[0].revents) + return; + + unsigned char event; + if (read(fds[0].fd, &event, sizeof(event)) != sizeof(event)) + return; + + switch (event) { + case EVT_RELEASE: + s->handlers[HANDLER_RELEASE](s->handler_data[HANDLER_RELEASE]); + if (ioctl(s->tty_fd, VT_RELDISP, 1) < 0) { + MP_ERR(s, "Failed to release virtual terminal\n"); + } + break; + case EVT_ACQUIRE: + s->handlers[HANDLER_ACQUIRE](s->handler_data[HANDLER_ACQUIRE]); + if (ioctl(s->tty_fd, VT_RELDISP, VT_ACKACQ) < 0) { + MP_ERR(s, "Failed to acquire virtual terminal\n"); + } + break; + case EVT_INTERRUPT: + break; + } +} + +bool vo_drm_acquire_crtc(struct vo_drm_state *drm) +{ + if (drm->active) + return true; + drm->active = true; + + if (drmSetMaster(drm->fd)) { + MP_WARN(drm, "Failed to acquire DRM master: %s\n", + mp_strerror(errno)); + } + + struct drm_atomic_context *atomic_ctx = drm->atomic_context; + + if (!drm_atomic_save_old_state(atomic_ctx)) + MP_WARN(drm, "Failed to save old DRM atomic state\n"); + + drmModeAtomicReqPtr request = drmModeAtomicAlloc(); + if (!request) { + MP_ERR(drm, "Failed to allocate drm atomic request\n"); + goto err; + } + + if (drm_object_set_property(request, atomic_ctx->connector, "CRTC_ID", drm->crtc_id) < 0) { + MP_ERR(drm, "Could not set CRTC_ID on connector\n"); + goto err; + } + + if (!drm_mode_ensure_blob(drm->fd, &drm->mode)) { + MP_ERR(drm, "Failed to create DRM mode blob\n"); + goto err; + } + if (drm_object_set_property(request, atomic_ctx->crtc, "MODE_ID", drm->mode.blob_id) < 0) { + MP_ERR(drm, "Could not set MODE_ID on crtc\n"); + goto err; + } + if (drm_object_set_property(request, atomic_ctx->crtc, "ACTIVE", 1) < 0) { + MP_ERR(drm, "Could not set ACTIVE on crtc\n"); + goto err; + } + + /* + * VRR related properties were added in kernel 5.0. We will not fail if we + * cannot query or set the value, but we will log as appropriate. + */ + uint64_t vrr_capable = 0; + drm_object_get_property(atomic_ctx->connector, "VRR_CAPABLE", &vrr_capable); + MP_VERBOSE(drm, "crtc is%s VRR capable\n", vrr_capable ? "" : " not"); + + uint64_t vrr_requested = drm->opts->vrr_enabled; + if (vrr_requested == 1 || (vrr_capable && vrr_requested == -1)) { + if (drm_object_set_property(request, atomic_ctx->crtc, "VRR_ENABLED", 1) < 0) { + MP_WARN(drm, "Could not enable VRR on crtc\n"); + } else { + MP_VERBOSE(drm, "Enabled VRR on crtc\n"); + } + } + + drm_object_set_property(request, atomic_ctx->draw_plane, "FB_ID", drm->fb->id); + drm_object_set_property(request, atomic_ctx->draw_plane, "CRTC_ID", drm->crtc_id); + drm_object_set_property(request, atomic_ctx->draw_plane, "SRC_X", 0); + drm_object_set_property(request, atomic_ctx->draw_plane, "SRC_Y", 0); + drm_object_set_property(request, atomic_ctx->draw_plane, "SRC_W", drm->width << 16); + drm_object_set_property(request, atomic_ctx->draw_plane, "SRC_H", drm->height << 16); + drm_object_set_property(request, atomic_ctx->draw_plane, "CRTC_X", 0); + drm_object_set_property(request, atomic_ctx->draw_plane, "CRTC_Y", 0); + drm_object_set_property(request, atomic_ctx->draw_plane, "CRTC_W", drm->mode.mode.hdisplay); + drm_object_set_property(request, atomic_ctx->draw_plane, "CRTC_H", drm->mode.mode.vdisplay); + + if (drmModeAtomicCommit(drm->fd, request, DRM_MODE_ATOMIC_ALLOW_MODESET, NULL)) { + MP_ERR(drm, "Failed to commit ModeSetting atomic request: %s\n", strerror(errno)); + goto err; + } + + drmModeAtomicFree(request); + return true; + +err: + drmModeAtomicFree(request); + return false; +} + + +void vo_drm_release_crtc(struct vo_drm_state *drm) +{ + if (!drm->active) + return; + drm->active = false; + + if (!drm->atomic_context->old_state.saved) + return; + + bool success = true; + struct drm_atomic_context *atomic_ctx = drm->atomic_context; + drmModeAtomicReqPtr request = drmModeAtomicAlloc(); + if (!request) { + MP_ERR(drm, "Failed to allocate drm atomic request\n"); + success = false; + } + + if (request && !drm_atomic_restore_old_state(request, atomic_ctx)) { + MP_WARN(drm, "Got error while restoring old state\n"); + success = false; + } + + if (request) { + if (drmModeAtomicCommit(drm->fd, request, DRM_MODE_ATOMIC_ALLOW_MODESET, NULL)) { + MP_WARN(drm, "Failed to commit ModeSetting atomic request: %s\n", + mp_strerror(errno)); + success = false; + } + } + + if (request) + drmModeAtomicFree(request); + + if (!success) + MP_ERR(drm, "Failed to restore previous mode\n"); + + if (drmDropMaster(drm->fd)) { + MP_WARN(drm, "Failed to drop DRM master: %s\n", + mp_strerror(errno)); + } +} + +/* libdrm */ +static void get_connector_name(const drmModeConnector *connector, + char ret[MAX_CONNECTOR_NAME_LEN]) +{ + const char *type_name; + + if (connector->connector_type < MP_ARRAY_SIZE(connector_names)) { + type_name = connector_names[connector->connector_type]; + } else { + type_name = "UNKNOWN"; + } + + snprintf(ret, MAX_CONNECTOR_NAME_LEN, "%s-%d", type_name, + connector->connector_type_id); +} + +// Gets the first connector whose name matches the input parameter. +// The returned connector may be disconnected. +// Result must be freed with drmModeFreeConnector. +static drmModeConnector *get_connector_by_name(const drmModeRes *res, + const char *connector_name, + int fd) +{ + for (int i = 0; i < res->count_connectors; i++) { + drmModeConnector *connector + = drmModeGetConnector(fd, res->connectors[i]); + if (!connector) + continue; + char other_connector_name[MAX_CONNECTOR_NAME_LEN]; + get_connector_name(connector, other_connector_name); + if (!strcmp(connector_name, other_connector_name)) + return connector; + drmModeFreeConnector(connector); + } + return NULL; +} + +// Gets the first connected connector. +// Result must be freed with drmModeFreeConnector. +static drmModeConnector *get_first_connected_connector(const drmModeRes *res, + int fd) +{ + for (int i = 0; i < res->count_connectors; i++) { + drmModeConnector *connector = drmModeGetConnector(fd, res->connectors[i]); + if (!connector) + continue; + if (connector->connection == DRM_MODE_CONNECTED && connector->count_modes > 0) { + return connector; + } + drmModeFreeConnector(connector); + } + return NULL; +} + +static bool setup_connector(struct vo_drm_state *drm, const drmModeRes *res, + const char *connector_name) +{ + drmModeConnector *connector; + + if (connector_name && strcmp(connector_name, "") && strcmp(connector_name, "auto")) { + connector = get_connector_by_name(res, connector_name, drm->fd); + if (!connector) { + MP_ERR(drm, "No connector with name %s found\n", connector_name); + drm_show_available_connectors(drm->log, drm->card_no, drm->card_path); + return false; + } + } else { + connector = get_first_connected_connector(res, drm->fd); + if (!connector) { + MP_ERR(drm, "No connected connectors found\n"); + return false; + } + } + + if (connector->connection != DRM_MODE_CONNECTED) { + drmModeFreeConnector(connector); + MP_ERR(drm, "Chosen connector is disconnected\n"); + return false; + } + + if (connector->count_modes == 0) { + drmModeFreeConnector(connector); + MP_ERR(drm, "Chosen connector has no valid modes\n"); + return false; + } + + drm->connector = connector; + return true; +} + +static bool setup_crtc(struct vo_drm_state *drm, const drmModeRes *res) +{ + // First try to find currently connected encoder and its current CRTC + for (unsigned int i = 0; i < res->count_encoders; i++) { + drmModeEncoder *encoder = drmModeGetEncoder(drm->fd, res->encoders[i]); + if (!encoder) { + MP_WARN(drm, "Cannot retrieve encoder %u:%u: %s\n", + i, res->encoders[i], mp_strerror(errno)); + continue; + } + + if (encoder->encoder_id == drm->connector->encoder_id && encoder->crtc_id != 0) { + MP_VERBOSE(drm, "Connector %u currently connected to encoder %u\n", + drm->connector->connector_id, drm->connector->encoder_id); + drm->encoder = encoder; + drm->crtc_id = encoder->crtc_id; + goto success; + } + + drmModeFreeEncoder(encoder); + } + + // Otherwise pick first legal encoder and CRTC combo for the connector + for (unsigned int i = 0; i < drm->connector->count_encoders; ++i) { + drmModeEncoder *encoder + = drmModeGetEncoder(drm->fd, drm->connector->encoders[i]); + if (!encoder) { + MP_WARN(drm, "Cannot retrieve encoder %u:%u: %s\n", + i, drm->connector->encoders[i], mp_strerror(errno)); + continue; + } + + // iterate all global CRTCs + for (unsigned int j = 0; j < res->count_crtcs; ++j) { + // check whether this CRTC works with the encoder + if (!(encoder->possible_crtcs & (1 << j))) + continue; + + drm->encoder = encoder; + drm->crtc_id = res->crtcs[j]; + goto success; + } + + drmModeFreeEncoder(encoder); + } + + MP_ERR(drm, "Connector %u has no suitable CRTC\n", + drm->connector->connector_id); + return false; + + success: + MP_VERBOSE(drm, "Selected Encoder %u with CRTC %u\n", + drm->encoder->encoder_id, drm->crtc_id); + return true; +} + +static bool all_digits(const char *str) +{ + if (str == NULL || str[0] == '\0') { + return false; + } + + for (const char *c = str; *c != '\0'; ++c) { + if (!mp_isdigit(*c)) + return false; + } + return true; +} + +static bool parse_mode_spec(const char *spec, struct drm_mode_spec *parse_result) +{ + if (spec == NULL || spec[0] == '\0' || strcmp(spec, "preferred") == 0) { + if (parse_result) { + *parse_result = + (struct drm_mode_spec) { .type = DRM_MODE_SPEC_PREFERRED }; + } + return true; + } + + if (strcmp(spec, "highest") == 0) { + if (parse_result) { + *parse_result = + (struct drm_mode_spec) { .type = DRM_MODE_SPEC_HIGHEST }; + } + return true; + } + + // If the string is made up of only digits, it means that it is an index number + if (all_digits(spec)) { + if (parse_result) { + *parse_result = (struct drm_mode_spec) { + .type = DRM_MODE_SPEC_BY_IDX, + .idx = strtoul(spec, NULL, 10), + }; + } + return true; + } + + if (!mp_isdigit(spec[0])) + return false; + char *height_part, *refresh_part; + const unsigned int width = strtoul(spec, &height_part, 10); + if (spec == height_part || height_part[0] == '\0' || height_part[0] != 'x') + return false; + + height_part += 1; + if (!mp_isdigit(height_part[0])) + return false; + const unsigned int height = strtoul(height_part, &refresh_part, 10); + if (height_part == refresh_part) + return false; + + char *rest = NULL; + double refresh; + switch (refresh_part[0]) { + case '\0': + refresh = nan(""); + break; + case '@': + refresh_part += 1; + if (!(mp_isdigit(refresh_part[0]) || refresh_part[0] == '.')) + return false; + refresh = strtod(refresh_part, &rest); + if (refresh_part == rest || rest[0] != '\0' || refresh < 0.0) + return false; + break; + default: + return false; + } + + if (parse_result) { + *parse_result = (struct drm_mode_spec) { + .type = DRM_MODE_SPEC_BY_NUMBERS, + .width = width, + .height = height, + .refresh = refresh, + }; + } + return true; +} + +static bool setup_mode_by_idx(struct vo_drm_state *drm, unsigned int mode_idx) +{ + if (mode_idx >= drm->connector->count_modes) { + MP_ERR(drm, "Bad mode index (max = %d).\n", + drm->connector->count_modes - 1); + return false; + } + + drm->mode.mode = drm->connector->modes[mode_idx]; + return true; +} + +static bool mode_match(const drmModeModeInfo *mode, + unsigned int width, + unsigned int height, + double refresh) +{ + if (isnan(refresh)) { + return + (mode->hdisplay == width) && + (mode->vdisplay == height); + } else { + const double mode_refresh = mode_get_Hz(mode); + return + (mode->hdisplay == width) && + (mode->vdisplay == height) && + ((int)round(refresh*100) == (int)round(mode_refresh*100)); + } +} + +static bool setup_mode_by_numbers(struct vo_drm_state *drm, + unsigned int width, + unsigned int height, + double refresh) +{ + for (unsigned int i = 0; i < drm->connector->count_modes; ++i) { + drmModeModeInfo *current_mode = &drm->connector->modes[i]; + if (mode_match(current_mode, width, height, refresh)) { + drm->mode.mode = *current_mode; + return true; + } + } + + MP_ERR(drm, "Could not find mode matching %s\n", drm->opts->mode_spec); + return false; +} + +static bool setup_mode_preferred(struct vo_drm_state *drm) +{ + for (unsigned int i = 0; i < drm->connector->count_modes; ++i) { + drmModeModeInfo *current_mode = &drm->connector->modes[i]; + if (current_mode->type & DRM_MODE_TYPE_PREFERRED) { + drm->mode.mode = *current_mode; + return true; + } + } + + // Fall back to first mode + MP_WARN(drm, "Could not find any preferred mode. Picking the first mode.\n"); + drm->mode.mode = drm->connector->modes[0]; + return true; +} + +static bool setup_mode_highest(struct vo_drm_state *drm) +{ + unsigned int area = 0; + drmModeModeInfo *highest_resolution_mode = &drm->connector->modes[0]; + for (unsigned int i = 0; i < drm->connector->count_modes; ++i) { + drmModeModeInfo *current_mode = &drm->connector->modes[i]; + + const unsigned int current_area = + current_mode->hdisplay * current_mode->vdisplay; + if (current_area > area) { + highest_resolution_mode = current_mode; + area = current_area; + } + } + + drm->mode.mode = *highest_resolution_mode; + return true; +} + +static bool setup_mode(struct vo_drm_state *drm) +{ + if (drm->connector->count_modes <= 0) { + MP_ERR(drm, "No available modes\n"); + return false; + } + + struct drm_mode_spec parsed; + if (!parse_mode_spec(drm->opts->mode_spec, &parsed)) { + MP_ERR(drm, "Parse error\n"); + goto err; + } + + switch (parsed.type) { + case DRM_MODE_SPEC_BY_IDX: + if (!setup_mode_by_idx(drm, parsed.idx)) + goto err; + break; + case DRM_MODE_SPEC_BY_NUMBERS: + if (!setup_mode_by_numbers(drm, parsed.width, parsed.height, parsed.refresh)) + goto err; + break; + case DRM_MODE_SPEC_PREFERRED: + if (!setup_mode_preferred(drm)) + goto err; + break; + case DRM_MODE_SPEC_HIGHEST: + if (!setup_mode_highest(drm)) + goto err; + break; + default: + MP_ERR(drm, "setup_mode: Internal error\n"); + goto err; + } + + drmModeModeInfo *mode = &drm->mode.mode; + MP_VERBOSE(drm, "Selected mode: %s (%dx%d@%.2fHz)\n", + mode->name, mode->hdisplay, mode->vdisplay, mode_get_Hz(mode)); + + return true; + +err: + MP_INFO(drm, "Available modes:\n"); + drm_show_available_modes(drm->log, drm->connector); + return false; +} + +static int open_card_path(const char *path) +{ + return open(path, O_RDWR | O_CLOEXEC); +} + +static bool card_supports_kms(const char *path) +{ + int fd = open_card_path(path); + bool ret = fd != -1 && drmIsKMS(fd); + if (fd != -1) + close(fd); + return ret; +} + +static bool card_has_connection(const char *path) +{ + int fd = open_card_path(path); + bool ret = false; + if (fd != -1) { + drmModeRes *res = drmModeGetResources(fd); + if (res) { + drmModeConnector *connector = get_first_connected_connector(res, fd); + if (connector) + ret = true; + drmModeFreeConnector(connector); + drmModeFreeResources(res); + } + close(fd); + } + return ret; +} + +static void get_primary_device_path(struct vo_drm_state *drm) +{ + if (drm->opts->device_path) { + drm->card_path = talloc_strdup(drm, drm->opts->device_path); + return; + } + + drmDevice *devices[DRM_MAX_MINOR] = { 0 }; + int card_count = drmGetDevices2(0, devices, MP_ARRAY_SIZE(devices)); + bool card_no_given = drm->card_no >= 0; + + if (card_count < 0) { + MP_ERR(drm, "Listing DRM devices with drmGetDevices failed! (%s)\n", + mp_strerror(errno)); + goto err; + } + + if (card_no_given && drm->card_no > (card_count - 1)) { + MP_ERR(drm, "Card number %d given too high! %d devices located.\n", + drm->card_no, card_count); + goto err; + } + + for (int i = card_no_given ? drm->card_no : 0; i < card_count; i++) { + drmDevice *dev = devices[i]; + + if (!(dev->available_nodes & (1 << DRM_NODE_PRIMARY))) { + if (card_no_given) { + MP_ERR(drm, "DRM card number %d given, but it does not have " + "a primary node!\n", i); + break; + } + + continue; + } + + const char *card_path = dev->nodes[DRM_NODE_PRIMARY]; + + if (!card_supports_kms(card_path)) { + if (card_no_given) { + MP_ERR(drm, + "DRM card number %d given, but it does not support " + "KMS!\n", i); + break; + } + + continue; + } + + if (!card_has_connection(card_path)) { + if (card_no_given) { + MP_ERR(drm, + "DRM card number %d given, but it does not have any " + "connected outputs.\n", i); + break; + } + + continue; + } + + MP_VERBOSE(drm, "Picked DRM card %d, primary node %s%s.\n", + i, card_path, + card_no_given ? "" : " as the default"); + + drm->card_path = talloc_strdup(drm, card_path); + drm->card_no = i; + break; + } + + if (!drm->card_path) + MP_ERR(drm, "No primary DRM device could be picked!\n"); + +err: + drmFreeDevices(devices, card_count); +} + +static void drm_pflip_cb(int fd, unsigned int msc, unsigned int sec, + unsigned int usec, void *data) +{ + struct vo_drm_state *drm = data; + + int64_t ust = MP_TIME_S_TO_NS(sec) + MP_TIME_US_TO_NS(usec); + present_sync_update_values(drm->present, ust, msc); + present_sync_swap(drm->present); + drm->waiting_for_flip = false; +} + +int vo_drm_control(struct vo *vo, int *events, int request, void *arg) +{ + struct vo_drm_state *drm = vo->drm; + switch (request) { + case VOCTRL_GET_DISPLAY_FPS: { + double fps = vo_drm_get_display_fps(drm); + if (fps <= 0) + break; + *(double*)arg = fps; + return VO_TRUE; + } + case VOCTRL_GET_DISPLAY_RES: { + ((int *)arg)[0] = drm->mode.mode.hdisplay; + ((int *)arg)[1] = drm->mode.mode.vdisplay; + return VO_TRUE; + } + case VOCTRL_PAUSE: + vo->want_redraw = true; + drm->paused = true; + return VO_TRUE; + case VOCTRL_RESUME: + drm->paused = false; + return VO_TRUE; + } + return VO_NOTIMPL; +} + +bool vo_drm_init(struct vo *vo) +{ + vo->drm = talloc_zero(NULL, struct vo_drm_state); + struct vo_drm_state *drm = vo->drm; + + *drm = (struct vo_drm_state) { + .vo = vo, + .log = mp_log_new(drm, vo->log, "drm"), + .mode = {{0}}, + .crtc_id = -1, + .card_no = -1, + }; + + drm->vt_switcher_active = vt_switcher_init(&drm->vt_switcher, drm->log); + if (drm->vt_switcher_active) { + vt_switcher_acquire(&drm->vt_switcher, acquire_vt, drm); + vt_switcher_release(&drm->vt_switcher, release_vt, drm); + } else { + MP_WARN(drm, "Failed to set up VT switcher. Terminal switching will be unavailable.\n"); + } + + drm->opts = mp_get_config_group(drm, drm->vo->global, &drm_conf); + + drmModeRes *res = NULL; + get_primary_device_path(drm); + + if (!drm->card_path) { + MP_ERR(drm, "Failed to find a usable DRM primary node!\n"); + goto err; + } + + drm->fd = open_card_path(drm->card_path); + if (drm->fd < 0) { + MP_ERR(drm, "Cannot open card \"%d\": %s.\n", drm->card_no, mp_strerror(errno)); + goto err; + } + + drmVersionPtr ver = drmGetVersion(drm->fd); + if (ver) { + MP_VERBOSE(drm, "Driver: %s %d.%d.%d (%s)\n", ver->name, ver->version_major, + ver->version_minor, ver->version_patchlevel, ver->date); + drmFreeVersion(ver); + } + + res = drmModeGetResources(drm->fd); + if (!res) { + MP_ERR(drm, "Cannot retrieve DRM resources: %s\n", mp_strerror(errno)); + goto err; + } + + if (!setup_connector(drm, res, drm->opts->connector_spec)) + goto err; + if (!setup_crtc(drm, res)) + goto err; + if (!setup_mode(drm)) + goto err; + + // Universal planes allows accessing all the planes (including primary) + if (drmSetClientCap(drm->fd, DRM_CLIENT_CAP_UNIVERSAL_PLANES, 1)) { + MP_ERR(drm, "Failed to set Universal planes capability\n"); + } + + if (drmSetClientCap(drm->fd, DRM_CLIENT_CAP_ATOMIC, 1)) { + MP_ERR(drm, "Failed to create DRM atomic context, no DRM Atomic support\n"); + goto err; + } else { + MP_VERBOSE(drm, "DRM Atomic support found\n"); + drm->atomic_context = drm_atomic_create_context(drm->log, drm->fd, drm->crtc_id, + drm->connector->connector_id, + drm->opts->draw_plane, + drm->opts->drmprime_video_plane); + if (!drm->atomic_context) { + MP_ERR(drm, "Failed to create DRM atomic context\n"); + goto err; + } + } + + drmModeFreeResources(res); + + drm->ev.version = DRM_EVENT_CONTEXT_VERSION; + drm->ev.page_flip_handler = &drm_pflip_cb; + drm->present = mp_present_initialize(drm, drm->vo->opts, VO_MAX_SWAPCHAIN_DEPTH); + + return true; + +err: + if (res) + drmModeFreeResources(res); + + vo_drm_uninit(vo); + return false; +} + +void vo_drm_uninit(struct vo *vo) +{ + struct vo_drm_state *drm = vo->drm; + if (!drm) + return; + + vo_drm_release_crtc(drm); + if (drm->vt_switcher_active) + vt_switcher_destroy(&drm->vt_switcher); + + drm_mode_destroy_blob(drm->fd, &drm->mode); + + if (drm->connector) { + drmModeFreeConnector(drm->connector); + drm->connector = NULL; + } + if (drm->encoder) { + drmModeFreeEncoder(drm->encoder); + drm->encoder = NULL; + } + if (drm->atomic_context) { + drm_atomic_destroy_context(drm->atomic_context); + } + + close(drm->fd); + talloc_free(drm); + vo->drm = NULL; +} + +static double mode_get_Hz(const drmModeModeInfo *mode) +{ + double rate = mode->clock * 1000.0 / mode->htotal / mode->vtotal; + if (mode->flags & DRM_MODE_FLAG_INTERLACE) + rate *= 2.0; + return rate; +} + +static void drm_show_available_modes(struct mp_log *log, + const drmModeConnector *connector) +{ + for (unsigned int i = 0; i < connector->count_modes; i++) { + mp_info(log, " Mode %d: %s (%dx%d@%.2fHz)\n", i, + connector->modes[i].name, + connector->modes[i].hdisplay, + connector->modes[i].vdisplay, + mode_get_Hz(&connector->modes[i])); + } +} + +static void drm_show_foreach_connector(struct mp_log *log, int card_no, + const char *card_path, + void (*show_fn)(struct mp_log*, int, + const drmModeConnector*)) +{ + int fd = open_card_path(card_path); + if (fd < 0) { + mp_err(log, "Failed to open card %d (%s)\n", card_no, card_path); + return; + } + + drmModeRes *res = drmModeGetResources(fd); + if (!res) { + mp_err(log, "Cannot retrieve DRM resources: %s\n", mp_strerror(errno)); + goto err; + } + + for (int i = 0; i < res->count_connectors; i++) { + drmModeConnector *connector = drmModeGetConnector(fd, res->connectors[i]); + if (!connector) + continue; + show_fn(log, card_no, connector); + drmModeFreeConnector(connector); + } + +err: + if (fd >= 0) + close(fd); + if (res) + drmModeFreeResources(res); +} + +static void drm_show_connector_name_and_state_callback(struct mp_log *log, int card_no, + const drmModeConnector *connector) +{ + char other_connector_name[MAX_CONNECTOR_NAME_LEN]; + get_connector_name(connector, other_connector_name); + const char *connection_str = (connector->connection == DRM_MODE_CONNECTED) ? + "connected" : "disconnected"; + mp_info(log, " %s (%s)\n", other_connector_name, connection_str); +} + +static void drm_show_available_connectors(struct mp_log *log, int card_no, + const char *card_path) +{ + mp_info(log, "Available connectors for card %d (%s):\n", card_no, + card_path); + drm_show_foreach_connector(log, card_no, card_path, + drm_show_connector_name_and_state_callback); + mp_info(log, "\n"); +} + +static void drm_show_connector_modes_callback(struct mp_log *log, int card_no, + const drmModeConnector *connector) +{ + if (connector->connection != DRM_MODE_CONNECTED) + return; + + char other_connector_name[MAX_CONNECTOR_NAME_LEN]; + get_connector_name(connector, other_connector_name); + mp_info(log, "Available modes for drm-connector=%d.%s\n", + card_no, other_connector_name); + drm_show_available_modes(log, connector); + mp_info(log, "\n"); +} + +static void drm_show_available_connectors_and_modes(struct mp_log *log, + int card_no, + const char *card_path) +{ + drm_show_foreach_connector(log, card_no, card_path, + drm_show_connector_modes_callback); +} + +static void drm_show_foreach_card(struct mp_log *log, + void (*show_fn)(struct mp_log *, int, + const char *)) +{ + drmDevice *devices[DRM_MAX_MINOR] = { 0 }; + int card_count = drmGetDevices2(0, devices, MP_ARRAY_SIZE(devices)); + if (card_count < 0) { + mp_err(log, "Listing DRM devices with drmGetDevices failed! (%s)\n", + mp_strerror(errno)); + return; + } + + for (int i = 0; i < card_count; i++) { + drmDevice *dev = devices[i]; + + if (!(dev->available_nodes & (1 << DRM_NODE_PRIMARY))) + continue; + + const char *card_path = dev->nodes[DRM_NODE_PRIMARY]; + + int fd = open_card_path(card_path); + if (fd < 0) { + mp_err(log, "Failed to open primary DRM node path %s!\n", + card_path); + continue; + } + + close(fd); + show_fn(log, i, card_path); + } + + drmFreeDevices(devices, card_count); +} + +static void drm_show_available_cards_and_connectors(struct mp_log *log) +{ + drm_show_foreach_card(log, drm_show_available_connectors); +} + +static void drm_show_available_cards_connectors_and_modes(struct mp_log *log) +{ + drm_show_foreach_card(log, drm_show_available_connectors_and_modes); +} + +static int drm_connector_opt_help(struct mp_log *log, const struct m_option *opt, + struct bstr name) +{ + drm_show_available_cards_and_connectors(log); + return M_OPT_EXIT; +} + +static int drm_mode_opt_help(struct mp_log *log, const struct m_option *opt, + struct bstr name) +{ + drm_show_available_cards_connectors_and_modes(log); + return M_OPT_EXIT; +} + +static int drm_validate_mode_opt(struct mp_log *log, const struct m_option *opt, + struct bstr name, const char **value) +{ + const char *param = *value; + if (!parse_mode_spec(param, NULL)) { + mp_fatal(log, "Invalid value for option drm-mode. Must be a positive number, a string of the format WxH[@R] or 'help'\n"); + return M_OPT_INVALID; + } + + return 1; +} + +/* Helpers */ +double vo_drm_get_display_fps(struct vo_drm_state *drm) +{ + return mode_get_Hz(&drm->mode.mode); +} + +void vo_drm_set_monitor_par(struct vo *vo) +{ + struct vo_drm_state *drm = vo->drm; + if (vo->opts->force_monitor_aspect != 0.0) { + vo->monitor_par = drm->fb->width / (double) drm->fb->height / + vo->opts->force_monitor_aspect; + } else { + vo->monitor_par = 1 / vo->opts->monitor_pixel_aspect; + } + MP_VERBOSE(drm, "Monitor pixel aspect: %g\n", vo->monitor_par); +} + +void vo_drm_wait_events(struct vo *vo, int64_t until_time_ns) +{ + struct vo_drm_state *drm = vo->drm; + if (drm->vt_switcher_active) { + int64_t wait_ns = until_time_ns - mp_time_ns(); + int64_t timeout_ns = MPCLAMP(wait_ns, 0, MP_TIME_S_TO_NS(10)); + vt_switcher_poll(&drm->vt_switcher, timeout_ns); + } else { + vo_wait_default(vo, until_time_ns); + } +} + +void vo_drm_wait_on_flip(struct vo_drm_state *drm) +{ + // poll page flip finish event + while (drm->waiting_for_flip) { + const int timeout_ms = 3000; + struct pollfd fds[1] = { { .events = POLLIN, .fd = drm->fd } }; + poll(fds, 1, timeout_ms); + if (fds[0].revents & POLLIN) { + const int ret = drmHandleEvent(drm->fd, &drm->ev); + if (ret != 0) { + MP_ERR(drm, "drmHandleEvent failed: %i\n", ret); + return; + } + } + } +} + +void vo_drm_wakeup(struct vo *vo) +{ + struct vo_drm_state *drm = vo->drm; + if (drm->vt_switcher_active) + vt_switcher_interrupt_poll(&drm->vt_switcher); +} diff --git a/video/out/drm_common.h b/video/out/drm_common.h new file mode 100644 index 0000000..581151f --- /dev/null +++ b/video/out/drm_common.h @@ -0,0 +1,108 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MP_VT_SWITCHER_H +#define MP_VT_SWITCHER_H + +#include <stdbool.h> +#include <xf86drm.h> +#include <xf86drmMode.h> +#include "vo.h" + +#define DRM_OPTS_FORMAT_XRGB8888 0 +#define DRM_OPTS_FORMAT_XRGB2101010 1 +#define DRM_OPTS_FORMAT_XBGR8888 2 +#define DRM_OPTS_FORMAT_XBGR2101010 3 + +struct framebuffer { + int fd; + uint32_t width; + uint32_t height; + uint32_t stride; + uint32_t size; + uint32_t handle; + uint8_t *map; + uint32_t id; +}; + +struct drm_mode { + drmModeModeInfo mode; + uint32_t blob_id; +}; + +struct drm_opts { + char *device_path; + char *connector_spec; + char *mode_spec; + int drm_atomic; + int draw_plane; + int drmprime_video_plane; + int drm_format; + struct m_geometry draw_surface_size; + int vrr_enabled; +}; + +struct vt_switcher { + int tty_fd; + struct mp_log *log; + void (*handlers[2])(void*); + void *handler_data[2]; +}; + +struct vo_drm_state { + drmModeConnector *connector; + drmModeEncoder *encoder; + drmEventContext ev; + + struct drm_atomic_context *atomic_context; + struct drm_mode mode; + struct drm_opts *opts; + struct framebuffer *fb; + struct mp_log *log; + struct mp_present *present; + struct vo *vo; + struct vt_switcher vt_switcher; + + bool active; + bool paused; + bool still; + bool vt_switcher_active; + bool waiting_for_flip; + + char *card_path; + int card_no; + int fd; + + uint32_t crtc_id; + uint32_t height; + uint32_t width; +}; + +bool vo_drm_init(struct vo *vo); +int vo_drm_control(struct vo *vo, int *events, int request, void *arg); + +double vo_drm_get_display_fps(struct vo_drm_state *drm); +void vo_drm_set_monitor_par(struct vo *vo); +void vo_drm_uninit(struct vo *vo); +void vo_drm_wait_events(struct vo *vo, int64_t until_time_ns); +void vo_drm_wait_on_flip(struct vo_drm_state *drm); +void vo_drm_wakeup(struct vo *vo); + +bool vo_drm_acquire_crtc(struct vo_drm_state *drm); +void vo_drm_release_crtc(struct vo_drm_state *drm); + +#endif diff --git a/video/out/drm_prime.c b/video/out/drm_prime.c new file mode 100644 index 0000000..9335fa8 --- /dev/null +++ b/video/out/drm_prime.c @@ -0,0 +1,160 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <errno.h> +#include <unistd.h> +#include <xf86drm.h> +#include <xf86drmMode.h> +#include <drm_mode.h> + +#include "common/common.h" +#include "common/msg.h" +#include "drm_common.h" +#include "drm_prime.h" + +int drm_prime_create_framebuffer(struct mp_log *log, int fd, + AVDRMFrameDescriptor *descriptor, int width, + int height, struct drm_prime_framebuffer *framebuffer, + struct drm_prime_handle_refs *handle_refs) +{ + AVDRMLayerDescriptor *layer = NULL; + uint32_t pitches[4] = { 0 }; + uint32_t offsets[4] = { 0 }; + uint32_t handles[4] = { 0 }; + uint64_t modifiers[4] = { 0 }; + int ret, layer_fd; + + if (descriptor && descriptor->nb_layers) { + *framebuffer = (struct drm_prime_framebuffer){0}; + + for (int object = 0; object < descriptor->nb_objects; object++) { + ret = drmPrimeFDToHandle(fd, descriptor->objects[object].fd, + &framebuffer->gem_handles[object]); + if (ret < 0) { + mp_err(log, "Failed to retrieve the Prime Handle from handle %d (%d).\n", + object, descriptor->objects[object].fd); + goto fail; + } + modifiers[object] = descriptor->objects[object].format_modifier; + } + + layer = &descriptor->layers[0]; + + for (int plane = 0; plane < AV_DRM_MAX_PLANES; plane++) { + layer_fd = framebuffer->gem_handles[layer->planes[plane].object_index]; + if (layer_fd && layer->planes[plane].pitch) { + pitches[plane] = layer->planes[plane].pitch; + offsets[plane] = layer->planes[plane].offset; + handles[plane] = layer_fd; + } else { + pitches[plane] = 0; + offsets[plane] = 0; + handles[plane] = 0; + modifiers[plane] = 0; + } + } + + ret = drmModeAddFB2WithModifiers(fd, width, height, layer->format, + handles, pitches, offsets, + modifiers, &framebuffer->fb_id, + DRM_MODE_FB_MODIFIERS); + if (ret < 0) { + ret = drmModeAddFB2(fd, width, height, layer->format, + handles, pitches, offsets, + &framebuffer->fb_id, 0); + if (ret < 0) { + mp_err(log, "Failed to create framebuffer with drmModeAddFB2 on layer %d: %s\n", + 0, mp_strerror(errno)); + goto fail; + } + } + + for (int plane = 0; plane < AV_DRM_MAX_PLANES; plane++) { + drm_prime_add_handle_ref(handle_refs, framebuffer->gem_handles[plane]); + } + } + + return 0; + +fail: + memset(framebuffer, 0, sizeof(*framebuffer)); + return -1; +} + +void drm_prime_destroy_framebuffer(struct mp_log *log, int fd, + struct drm_prime_framebuffer *framebuffer, + struct drm_prime_handle_refs *handle_refs) +{ + if (framebuffer->fb_id) + drmModeRmFB(fd, framebuffer->fb_id); + + for (int i = 0; i < AV_DRM_MAX_PLANES; i++) { + if (framebuffer->gem_handles[i]) { + drm_prime_remove_handle_ref(handle_refs, + framebuffer->gem_handles[i]); + if (!drm_prime_get_handle_ref_count(handle_refs, + framebuffer->gem_handles[i])) { + drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &framebuffer->gem_handles[i]); + } + } + } + + memset(framebuffer, 0, sizeof(*framebuffer)); +} + +void drm_prime_init_handle_ref_count(void *talloc_parent, + struct drm_prime_handle_refs *handle_refs) +{ + handle_refs->handle_ref_count = talloc_zero(talloc_parent, uint32_t); + handle_refs->size = 1; + handle_refs->ctx = talloc_parent; +} + +void drm_prime_add_handle_ref(struct drm_prime_handle_refs *handle_refs, + uint32_t handle) +{ + if (handle) { + if (handle > handle_refs->size) { + handle_refs->size = handle; + MP_TARRAY_GROW(handle_refs->ctx, handle_refs->handle_ref_count, + handle_refs->size); + } + handle_refs->handle_ref_count[handle - 1]++; + } +} + +void drm_prime_remove_handle_ref(struct drm_prime_handle_refs *handle_refs, + uint32_t handle) +{ + if (handle) { + if (handle <= handle_refs->size && + handle_refs->handle_ref_count[handle - 1]) + { + handle_refs->handle_ref_count[handle - 1]--; + } + } +} + +uint32_t drm_prime_get_handle_ref_count(struct drm_prime_handle_refs *handle_refs, + uint32_t handle) +{ + if (handle) { + if (handle <= handle_refs->size) + return handle_refs->handle_ref_count[handle - 1]; + } + return 0; +} diff --git a/video/out/drm_prime.h b/video/out/drm_prime.h new file mode 100644 index 0000000..69acba6 --- /dev/null +++ b/video/out/drm_prime.h @@ -0,0 +1,45 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef DRM_PRIME_H +#define DRM_PRIME_H + +#include <libavutil/hwcontext_drm.h> + +#include "common/msg.h" + +struct drm_prime_framebuffer { + uint32_t fb_id; + uint32_t gem_handles[AV_DRM_MAX_PLANES]; +}; + +struct drm_prime_handle_refs { + uint32_t *handle_ref_count; + size_t size; + void *ctx; +}; + +int drm_prime_create_framebuffer(struct mp_log *log, int fd, AVDRMFrameDescriptor *descriptor, int width, int height, + struct drm_prime_framebuffer *framebuffers, + struct drm_prime_handle_refs *handle_refs); +void drm_prime_destroy_framebuffer(struct mp_log *log, int fd, struct drm_prime_framebuffer *framebuffers, + struct drm_prime_handle_refs *handle_refs); +void drm_prime_init_handle_ref_count(void *talloc_parent, struct drm_prime_handle_refs *handle_refs); +void drm_prime_add_handle_ref(struct drm_prime_handle_refs *handle_refs, uint32_t handle); +void drm_prime_remove_handle_ref(struct drm_prime_handle_refs *handle_refs, uint32_t handle); +uint32_t drm_prime_get_handle_ref_count(struct drm_prime_handle_refs *handle_refs, uint32_t handle); +#endif // DRM_PRIME_H diff --git a/video/out/filter_kernels.c b/video/out/filter_kernels.c new file mode 100644 index 0000000..95d99ff --- /dev/null +++ b/video/out/filter_kernels.c @@ -0,0 +1,411 @@ +/* + * Some of the filter code was taken from Glumpy: + * # Copyright (c) 2009-2016 Nicolas P. Rougier. All rights reserved. + * # Distributed under the (new) BSD License. + * (https://github.com/glumpy/glumpy/blob/master/glumpy/library/build-spatial-filters.py) + * + * Also see: + * - http://vector-agg.cvs.sourceforge.net/viewvc/vector-agg/agg-2.5/include/agg_image_filters.h + * - Vapoursynth plugin fmtconv (WTFPL Licensed), which is based on + * dither plugin for avisynth from the same author: + * https://github.com/vapoursynth/fmtconv/tree/master/src/fmtc + * - Paul Heckbert's "zoom" + * - XBMC: ConvolutionKernels.cpp etc. + * + * This file is part of mpv. + * + * This file can be distributed under the 3-clause license ("New BSD License"). + * + * You can alternatively redistribute the non-Glumpy parts of this file and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + */ + +#include <stddef.h> +#include <string.h> +#include <math.h> +#include <assert.h> + +#include "filter_kernels.h" +#include "common/common.h" + +// NOTE: all filters are designed for discrete convolution + +const struct filter_window *mp_find_filter_window(const char *name) +{ + if (!name) + return NULL; + for (const struct filter_window *w = mp_filter_windows; w->name; w++) { + if (strcmp(w->name, name) == 0) + return w; + } + return NULL; +} + +const struct filter_kernel *mp_find_filter_kernel(const char *name) +{ + if (!name) + return NULL; + for (const struct filter_kernel *k = mp_filter_kernels; k->f.name; k++) { + if (strcmp(k->f.name, name) == 0) + return k; + } + return NULL; +} + +// sizes = sorted list of available filter sizes, terminated with size 0 +// inv_scale = source_size / dest_size +bool mp_init_filter(struct filter_kernel *filter, const int *sizes, + double inv_scale) +{ + assert(filter->f.radius > 0); + double blur = filter->f.blur > 0.0 ? filter->f.blur : 1.0; + filter->radius = blur * filter->f.radius; + + // Only downscaling requires widening the filter + filter->filter_scale = MPMAX(1.0, inv_scale); + double src_radius = filter->radius * filter->filter_scale; + // Polar filters are dependent solely on the radius + if (filter->polar) { + filter->size = 1; // Not meaningful for EWA/polar scalers. + // Safety precaution to avoid generating a gigantic shader + if (src_radius > 16.0) { + src_radius = 16.0; + filter->filter_scale = src_radius / filter->radius; + return false; + } + return true; + } + int size = ceil(2.0 * src_radius); + // round up to smallest available size that's still large enough + if (size < sizes[0]) + size = sizes[0]; + const int *cursize = sizes; + while (size > *cursize && *cursize) + cursize++; + if (*cursize) { + filter->size = *cursize; + return true; + } else { + // The filter doesn't fit - instead of failing completely, use the + // largest filter available. This is incorrect, but better than refusing + // to do anything. + filter->size = cursize[-1]; + filter->filter_scale = (filter->size/2.0) / filter->radius; + return false; + } +} + +// Sample from a blurred and tapered window +static double sample_window(struct filter_window *kernel, double x) +{ + if (!kernel->weight) + return 1.0; + + // All windows are symmetric, this makes life easier + x = fabs(x); + + // Stretch and taper the window size as needed + x = kernel->blur > 0.0 ? x / kernel->blur : x; + x = x <= kernel->taper ? 0.0 : (x - kernel->taper) / (1 - kernel->taper); + + if (x < kernel->radius) + return kernel->weight(kernel, x); + return 0.0; +} + +// Evaluate a filter's kernel and window at a given absolute position +static double sample_filter(struct filter_kernel *filter, double x) +{ + // The window is always stretched to the entire kernel + double w = sample_window(&filter->w, x / filter->radius * filter->w.radius); + double k = w * sample_window(&filter->f, x); + return k < 0 ? (1 - filter->clamp) * k : k; +} + +// Calculate the 1D filtering kernel for N sample points. +// N = number of samples, which is filter->size +// The weights will be stored in out_w[0] to out_w[N - 1] +// f = x0 - abs(x0), subpixel position in the range [0,1) or [0,1]. +static void mp_compute_weights(struct filter_kernel *filter, double f, + float *out_w) +{ + assert(filter->size > 0); + double sum = 0; + for (int n = 0; n < filter->size; n++) { + double x = f - (n - filter->size / 2 + 1); + double w = sample_filter(filter, x / filter->filter_scale); + out_w[n] = w; + sum += w; + } + // Normalize to preserve energy + for (int n = 0; n < filter->size; n++) + out_w[n] /= sum; +} + +// Fill the given array with weights for the range [0.0, 1.0]. The array is +// interpreted as rectangular array of count * filter->size items, with a +// stride of `stride` floats in between each array element. (For polar filters, +// the `count` indicates the row size and filter->size/stride are ignored) +// +// There will be slight sampling error if these weights are used in a OpenGL +// texture as LUT directly. The sampling point of a texel is located at its +// center, so out_array[0] will end up at 0.5 / count instead of 0.0. +// Correct lookup requires a linear coordinate mapping from [0.0, 1.0] to +// [0.5 / count, 1.0 - 0.5 / count]. +void mp_compute_lut(struct filter_kernel *filter, int count, int stride, + float *out_array) +{ + if (filter->polar) { + filter->radius_cutoff = 0.0; + // Compute a 1D array indexed by radius + for (int x = 0; x < count; x++) { + double r = x * filter->radius / (count - 1); + out_array[x] = sample_filter(filter, r); + + if (fabs(out_array[x]) > 1e-3f) + filter->radius_cutoff = r; + } + } else { + // Compute a 2D array indexed by subpixel position + for (int n = 0; n < count; n++) { + mp_compute_weights(filter, n / (double)(count - 1), + out_array + stride * n); + } + } +} + +typedef struct filter_window params; + +static double box(params *p, double x) +{ + // This is mathematically 1.0 everywhere, the clipping is done implicitly + // based on the radius. + return 1.0; +} + +static double triangle(params *p, double x) +{ + return fmax(0.0, 1.0 - fabs(x / p->radius)); +} + +static double cosine(params *p, double x) +{ + return cos(x); +} + +static double hanning(params *p, double x) +{ + return 0.5 + 0.5 * cos(M_PI * x); +} + +static double hamming(params *p, double x) +{ + return 0.54 + 0.46 * cos(M_PI * x); +} + +static double quadric(params *p, double x) +{ + if (x < 0.5) { + return 0.75 - x * x; + } else if (x < 1.5) { + double t = x - 1.5; + return 0.5 * t * t; + } + return 0.0; +} + +static double bessel_i0(double x) +{ + double s = 1.0; + double y = x * x / 4.0; + double t = y; + int i = 2; + while (t > 1e-12) { + s += t; + t *= y / (i * i); + i += 1; + } + return s; +} + +static double kaiser(params *p, double x) +{ + if (x > 1) + return 0; + double i0a = 1.0 / bessel_i0(p->params[0]); + return bessel_i0(p->params[0] * sqrt(1.0 - x * x)) * i0a; +} + +static double blackman(params *p, double x) +{ + double a = p->params[0]; + double a0 = (1-a)/2.0, a1 = 1/2.0, a2 = a/2.0; + double pix = M_PI * x; + return a0 + a1*cos(pix) + a2*cos(2 * pix); +} + +static double welch(params *p, double x) +{ + return 1.0 - x*x; +} + +// Family of cubic B/C splines +static double cubic_bc(params *p, double x) +{ + double b = p->params[0], + c = p->params[1]; + double p0 = (6.0 - 2.0 * b) / 6.0, + p2 = (-18.0 + 12.0 * b + 6.0 * c) / 6.0, + p3 = (12.0 - 9.0 * b - 6.0 * c) / 6.0, + q0 = (8.0 * b + 24.0 * c) / 6.0, + q1 = (-12.0 * b - 48.0 * c) / 6.0, + q2 = (6.0 * b + 30.0 * c) / 6.0, + q3 = (-b - 6.0 * c) / 6.0; + + if (x < 1.0) { + return p0 + x * x * (p2 + x * p3); + } else if (x < 2.0) { + return q0 + x * (q1 + x * (q2 + x * q3)); + } + return 0.0; +} + +static double spline16(params *p, double x) +{ + if (x < 1.0) { + return ((x - 9.0/5.0 ) * x - 1.0/5.0 ) * x + 1.0; + } else { + return ((-1.0/3.0 * (x-1) + 4.0/5.0) * (x-1) - 7.0/15.0 ) * (x-1); + } +} + +static double spline36(params *p, double x) +{ + if (x < 1.0) { + return ((13.0/11.0 * x - 453.0/209.0) * x - 3.0/209.0) * x + 1.0; + } else if (x < 2.0) { + return ((-6.0/11.0 * (x-1) + 270.0/209.0) * (x-1) - 156.0/ 209.0) * (x-1); + } else { + return ((1.0/11.0 * (x-2) - 45.0/209.0) * (x-2) + 26.0/209.0) * (x-2); + } +} + +static double spline64(params *p, double x) +{ + if (x < 1.0) { + return ((49.0/41.0 * x - 6387.0/2911.0) * x - 3.0/2911.0) * x + 1.0; + } else if (x < 2.0) { + return ((-24.0/41.0 * (x-1) + 4032.0/2911.0) * (x-1) - 2328.0/2911.0) * (x-1); + } else if (x < 3.0) { + return ((6.0/41.0 * (x-2) - 1008.0/2911.0) * (x-2) + 582.0/2911.0) * (x-2); + } else { + return ((-1.0/41.0 * (x-3) + 168.0/2911.0) * (x-3) - 97.0/2911.0) * (x-3); + } +} + +static double gaussian(params *p, double x) +{ + return exp(-2.0 * x * x / p->params[0]); +} + +static double sinc(params *p, double x) +{ + if (fabs(x) < 1e-8) + return 1.0; + x *= M_PI; + return sin(x) / x; +} + +static double jinc(params *p, double x) +{ + if (fabs(x) < 1e-8) + return 1.0; + x *= M_PI; + return 2.0 * j1(x) / x; +} + +static double sphinx(params *p, double x) +{ + if (fabs(x) < 1e-8) + return 1.0; + x *= M_PI; + return 3.0 * (sin(x) - x * cos(x)) / (x * x * x); +} + +const struct filter_window mp_filter_windows[] = { + {"box", 1, box}, + {"triangle", 1, triangle}, + {"bartlett", 1, triangle}, + {"cosine", M_PI_2, cosine}, + {"hanning", 1, hanning}, + {"tukey", 1, hanning, .taper = 0.5}, + {"hamming", 1, hamming}, + {"quadric", 1.5, quadric}, + {"welch", 1, welch}, + {"kaiser", 1, kaiser, .params = {6.33, NAN} }, + {"blackman", 1, blackman, .params = {0.16, NAN} }, + {"gaussian", 2, gaussian, .params = {1.00, NAN} }, + {"sinc", 1, sinc}, + {"jinc", 1.2196698912665045, jinc}, + {"sphinx", 1.4302966531242027, sphinx}, + {0} +}; + +#define JINC_R3 3.2383154841662362 +#define JINC_R4 4.2410628637960699 + +const struct filter_kernel mp_filter_kernels[] = { + // Spline filters + {{"spline16", 2, spline16}}, + {{"spline36", 3, spline36}}, + {{"spline64", 4, spline64}}, + // Sinc filters + {{"sinc", 2, sinc, .resizable = true}}, + {{"lanczos", 3, sinc, .resizable = true}, .window = "sinc"}, + {{"ginseng", 3, sinc, .resizable = true}, .window = "jinc"}, + // Jinc filters + {{"jinc", JINC_R3, jinc, .resizable = true}, .polar = true}, + {{"ewa_lanczos", JINC_R3, jinc, .resizable = true}, .polar = true, .window = "jinc"}, + {{"ewa_hanning", JINC_R3, jinc, .resizable = true}, .polar = true, .window = "hanning" }, + {{"ewa_ginseng", JINC_R3, jinc, .resizable = true}, .polar = true, .window = "sinc"}, + // Slightly sharpened to minimize the 1D step response error (to better + // preserve horizontal/vertical lines) + {{"ewa_lanczossharp", JINC_R3, jinc, .blur = 0.9812505837223707, .resizable = true}, + .polar = true, .window = "jinc"}, + // Similar to the above, but sharpened substantially to the point of + // minimizing the total impulse response error on an integer grid. Tends + // to preserve hash patterns well. Very sharp but rings a lot. + {{"ewa_lanczos4sharpest", JINC_R4, jinc, .blur = 0.8845120932605005, .resizable = true}, + .polar = true, .window = "jinc"}, + // Similar to the above, but softened instead, to make even/odd integer + // contributions exactly symmetrical. Designed to smooth out hash patterns. + {{"ewa_lanczossoft", JINC_R3, jinc, .blur = 1.0164667662867047, .resizable = true}, + .polar = true, .window = "jinc"}, + // Very soft (blurred) hanning-windowed jinc; removes almost all aliasing. + // Blur parameter picked to match orthogonal and diagonal contributions + {{"haasnsoft", JINC_R3, jinc, .blur = 1.11, .resizable = true}, + .polar = true, .window = "hanning"}, + // Cubic filters + {{"bicubic", 2, cubic_bc, .params = {1.0, 0.0} }}, + {{"hermite", 1, cubic_bc, .params = {0.0, 0.0} }}, + {{"catmull_rom", 2, cubic_bc, .params = {0.0, 0.5} }}, + {{"mitchell", 2, cubic_bc, .params = {1.0/3.0, 1.0/3.0} }}, + {{"robidoux", 2, cubic_bc, .params = {12 / (19 + 9 * M_SQRT2), + 113 / (58 + 216 * M_SQRT2)} }}, + {{"robidouxsharp", 2, cubic_bc, .params = {6 / (13 + 7 * M_SQRT2), + 7 / (2 + 12 * M_SQRT2)} }}, + {{"ewa_robidoux", 2, cubic_bc, .params = {12 / (19 + 9 * M_SQRT2), + 113 / (58 + 216 * M_SQRT2)}}, + .polar = true}, + {{"ewa_robidouxsharp", 2,cubic_bc, .params = {6 / (13 + 7 * M_SQRT2), + 7 / (2 + 12 * M_SQRT2)}}, + .polar = true}, + // Miscellaneous filters + {{"box", 1, box, .resizable = true}}, + {{"nearest", 0.5, box}}, + {{"triangle", 1, triangle, .resizable = true}}, + {{"gaussian", 2, gaussian, .params = {1.0, NAN}, .resizable = true}}, + {{0}} +}; diff --git a/video/out/filter_kernels.h b/video/out/filter_kernels.h new file mode 100644 index 0000000..b8b2f67 --- /dev/null +++ b/video/out/filter_kernels.h @@ -0,0 +1,56 @@ +/* + * This file is part of mpv. + * + * This file can be distributed under the 3-clause license ("New BSD License"). + * + * You can alternatively redistribute the non-Glumpy parts of this file and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + */ + +#ifndef MPLAYER_FILTER_KERNELS_H +#define MPLAYER_FILTER_KERNELS_H + +#include <stdbool.h> + +struct filter_window { + const char *name; + double radius; // Preferred radius, should only be changed if resizable + double (*weight)(struct filter_window *k, double x); + bool resizable; // Filter supports any given radius + double params[2]; // User-defined custom filter parameters. Not used by + // all filters + double blur; // Blur coefficient (sharpens or widens the filter) + double taper; // Taper coefficient (flattens the filter's center) +}; + +struct filter_kernel { + struct filter_window f; // the kernel itself + struct filter_window w; // window storage + double clamp; // clamping factor, affects negative weights + // Constant values + const char *window; // default window + bool polar; // whether or not the filter uses polar coordinates + // The following values are set by mp_init_filter() at runtime. + int size; // number of coefficients (may depend on radius) + double radius; // true filter radius, derived from f.radius and f.blur + double filter_scale; // Factor to convert the mathematical filter + // function radius to the possibly wider + // (in the case of downsampling) filter sample + // radius. + double radius_cutoff; // the radius at which we can cut off the filter +}; + +extern const struct filter_window mp_filter_windows[]; +extern const struct filter_kernel mp_filter_kernels[]; + +const struct filter_window *mp_find_filter_window(const char *name); +const struct filter_kernel *mp_find_filter_kernel(const char *name); + +bool mp_init_filter(struct filter_kernel *filter, const int *sizes, + double scale); +void mp_compute_lut(struct filter_kernel *filter, int count, int stride, + float *out_array); + +#endif /* MPLAYER_FILTER_KERNELS_H */ diff --git a/video/out/gpu/context.c b/video/out/gpu/context.c new file mode 100644 index 0000000..5ce18af --- /dev/null +++ b/video/out/gpu/context.c @@ -0,0 +1,277 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <stdbool.h> +#include <math.h> +#include <assert.h> + +#include "config.h" +#include "common/common.h" +#include "common/msg.h" +#include "options/options.h" +#include "options/m_option.h" +#include "video/out/vo.h" + +#include "context.h" +#include "spirv.h" + +/* OpenGL */ +extern const struct ra_ctx_fns ra_ctx_glx; +extern const struct ra_ctx_fns ra_ctx_x11_egl; +extern const struct ra_ctx_fns ra_ctx_drm_egl; +extern const struct ra_ctx_fns ra_ctx_wayland_egl; +extern const struct ra_ctx_fns ra_ctx_wgl; +extern const struct ra_ctx_fns ra_ctx_angle; +extern const struct ra_ctx_fns ra_ctx_dxgl; +extern const struct ra_ctx_fns ra_ctx_rpi; +extern const struct ra_ctx_fns ra_ctx_android; + +/* Vulkan */ +extern const struct ra_ctx_fns ra_ctx_vulkan_wayland; +extern const struct ra_ctx_fns ra_ctx_vulkan_win; +extern const struct ra_ctx_fns ra_ctx_vulkan_xlib; +extern const struct ra_ctx_fns ra_ctx_vulkan_android; +extern const struct ra_ctx_fns ra_ctx_vulkan_display; +extern const struct ra_ctx_fns ra_ctx_vulkan_mac; + +/* Direct3D 11 */ +extern const struct ra_ctx_fns ra_ctx_d3d11; + +/* No API */ +extern const struct ra_ctx_fns ra_ctx_wldmabuf; + +static const struct ra_ctx_fns *contexts[] = { +#if HAVE_D3D11 + &ra_ctx_d3d11, +#endif + +// OpenGL contexts: +#if HAVE_EGL_ANDROID + &ra_ctx_android, +#endif +#if HAVE_RPI + &ra_ctx_rpi, +#endif +#if HAVE_EGL_ANGLE_WIN32 + &ra_ctx_angle, +#endif +#if HAVE_GL_WIN32 + &ra_ctx_wgl, +#endif +#if HAVE_GL_DXINTEROP + &ra_ctx_dxgl, +#endif +#if HAVE_EGL_WAYLAND + &ra_ctx_wayland_egl, +#endif +#if HAVE_EGL_X11 + &ra_ctx_x11_egl, +#endif +#if HAVE_GL_X11 + &ra_ctx_glx, +#endif +#if HAVE_EGL_DRM + &ra_ctx_drm_egl, +#endif + +// Vulkan contexts: +#if HAVE_VULKAN + +#if HAVE_ANDROID + &ra_ctx_vulkan_android, +#endif +#if HAVE_WIN32_DESKTOP + &ra_ctx_vulkan_win, +#endif +#if HAVE_WAYLAND + &ra_ctx_vulkan_wayland, +#endif +#if HAVE_X11 + &ra_ctx_vulkan_xlib, +#endif +#if HAVE_VK_KHR_DISPLAY + &ra_ctx_vulkan_display, +#endif +#if HAVE_COCOA && HAVE_SWIFT + &ra_ctx_vulkan_mac, +#endif +#endif + +/* No API contexts: */ +#if HAVE_DMABUF_WAYLAND + &ra_ctx_wldmabuf, +#endif +}; + +static int ra_ctx_api_help(struct mp_log *log, const struct m_option *opt, + struct bstr name) +{ + mp_info(log, "GPU APIs (contexts):\n"); + mp_info(log, " auto (autodetect)\n"); + for (int n = 0; n < MP_ARRAY_SIZE(contexts); n++) { + if (!contexts[n]->hidden) + mp_info(log, " %s (%s)\n", contexts[n]->type, contexts[n]->name); + } + return M_OPT_EXIT; +} + +static int ra_ctx_validate_api(struct mp_log *log, const struct m_option *opt, + struct bstr name, const char **value) +{ + struct bstr param = bstr0(*value); + if (bstr_equals0(param, "auto")) + return 1; + for (int i = 0; i < MP_ARRAY_SIZE(contexts); i++) { + if (bstr_equals0(param, contexts[i]->type) && !contexts[i]->hidden) + return 1; + } + return M_OPT_INVALID; +} + +static int ra_ctx_context_help(struct mp_log *log, const struct m_option *opt, + struct bstr name) +{ + mp_info(log, "GPU contexts (APIs):\n"); + mp_info(log, " auto (autodetect)\n"); + for (int n = 0; n < MP_ARRAY_SIZE(contexts); n++) { + if (!contexts[n]->hidden) + mp_info(log, " %s (%s)\n", contexts[n]->name, contexts[n]->type); + } + return M_OPT_EXIT; +} + +static int ra_ctx_validate_context(struct mp_log *log, const struct m_option *opt, + struct bstr name, const char **value) +{ + struct bstr param = bstr0(*value); + if (bstr_equals0(param, "auto")) + return 1; + for (int i = 0; i < MP_ARRAY_SIZE(contexts); i++) { + if (bstr_equals0(param, contexts[i]->name) && !contexts[i]->hidden) + return 1; + } + return M_OPT_INVALID; +} + +// Create a VO window and create a RA context on it. +// vo_flags: passed to the backend's create window function +struct ra_ctx *ra_ctx_create(struct vo *vo, struct ra_ctx_opts opts) +{ + bool api_auto = !opts.context_type || strcmp(opts.context_type, "auto") == 0; + bool ctx_auto = !opts.context_name || strcmp(opts.context_name, "auto") == 0; + + if (ctx_auto) { + MP_VERBOSE(vo, "Probing for best GPU context.\n"); + opts.probing = true; + } + + // Hack to silence backend (X11/Wayland/etc.) errors. Kill it once backends + // are separate from `struct vo` + bool old_probing = vo->probing; + vo->probing = opts.probing; + + for (int i = 0; i < MP_ARRAY_SIZE(contexts); i++) { + if (contexts[i]->hidden) + continue; + if (!opts.probing && strcmp(contexts[i]->name, opts.context_name) != 0) + continue; + if (!api_auto && strcmp(contexts[i]->type, opts.context_type) != 0) + continue; + + struct ra_ctx *ctx = talloc_ptrtype(NULL, ctx); + *ctx = (struct ra_ctx) { + .vo = vo, + .global = vo->global, + .log = mp_log_new(ctx, vo->log, contexts[i]->type), + .opts = opts, + .fns = contexts[i], + }; + + MP_VERBOSE(ctx, "Initializing GPU context '%s'\n", ctx->fns->name); + if (contexts[i]->init(ctx)) { + vo->probing = old_probing; + return ctx; + } + + talloc_free(ctx); + } + + vo->probing = old_probing; + + // If we've reached this point, then none of the contexts matched the name + // requested, or the backend creation failed for all of them. + if (!vo->probing) + MP_ERR(vo, "Failed initializing any suitable GPU context!\n"); + return NULL; +} + +struct ra_ctx *ra_ctx_create_by_name(struct vo *vo, const char *name) +{ + for (int i = 0; i < MP_ARRAY_SIZE(contexts); i++) { + if (strcmp(name, contexts[i]->name) != 0) + continue; + + struct ra_ctx *ctx = talloc_ptrtype(NULL, ctx); + *ctx = (struct ra_ctx) { + .vo = vo, + .global = vo->global, + .log = mp_log_new(ctx, vo->log, contexts[i]->type), + .fns = contexts[i], + }; + + MP_VERBOSE(ctx, "Initializing GPU context '%s'\n", ctx->fns->name); + if (contexts[i]->init(ctx)) + return ctx; + talloc_free(ctx); + } + return NULL; +} + +void ra_ctx_destroy(struct ra_ctx **ctx_ptr) +{ + struct ra_ctx *ctx = *ctx_ptr; + if (!ctx) + return; + + if (ctx->spirv && ctx->spirv->fns->uninit) + ctx->spirv->fns->uninit(ctx); + + ctx->fns->uninit(ctx); + talloc_free(ctx); + + *ctx_ptr = NULL; +} + +#define OPT_BASE_STRUCT struct ra_ctx_opts +const struct m_sub_options ra_ctx_conf = { + .opts = (const m_option_t[]) { + {"gpu-context", + OPT_STRING_VALIDATE(context_name, ra_ctx_validate_context), + .help = ra_ctx_context_help}, + {"gpu-api", + OPT_STRING_VALIDATE(context_type, ra_ctx_validate_api), + .help = ra_ctx_api_help}, + {"gpu-debug", OPT_BOOL(debug)}, + {"gpu-sw", OPT_BOOL(allow_sw)}, + {0} + }, + .size = sizeof(struct ra_ctx_opts), +}; diff --git a/video/out/gpu/context.h b/video/out/gpu/context.h new file mode 100644 index 0000000..6788e6f --- /dev/null +++ b/video/out/gpu/context.h @@ -0,0 +1,107 @@ +#pragma once + +#include "video/out/vo.h" +#include "video/csputils.h" + +#include "ra.h" + +struct ra_ctx_opts { + bool allow_sw; // allow software renderers + bool want_alpha; // create an alpha framebuffer if possible + bool debug; // enable debugging layers/callbacks etc. + bool probing; // the backend was auto-probed + char *context_name; // filter by `ra_ctx_fns.name` + char *context_type; // filter by `ra_ctx_fns.type` +}; + +extern const struct m_sub_options ra_ctx_conf; + +struct ra_ctx { + struct vo *vo; + struct ra *ra; + struct mpv_global *global; + struct mp_log *log; + + struct ra_ctx_opts opts; + const struct ra_ctx_fns *fns; + struct ra_swapchain *swapchain; + struct spirv_compiler *spirv; + + void *priv; +}; + +// The functions that make up a ra_ctx. +struct ra_ctx_fns { + const char *type; // API type (for --gpu-api) + const char *name; // name (for --gpu-context) + + bool hidden; // hide the ra_ctx from users + + // Resize the window, or create a new window if there isn't one yet. + // Currently, there is an unfortunate interaction with ctx->vo, and + // display size etc. are determined by it. + bool (*reconfig)(struct ra_ctx *ctx); + + // This behaves exactly like vo_driver.control(). + int (*control)(struct ra_ctx *ctx, int *events, int request, void *arg); + + // These behave exactly like vo_driver.wakeup/wait_events. They are + // optional. + void (*wakeup)(struct ra_ctx *ctx); + void (*wait_events)(struct ra_ctx *ctx, int64_t until_time_ns); + void (*update_render_opts)(struct ra_ctx *ctx); + + // Initialize/destroy the 'struct ra' and possibly the underlying VO backend. + // Not normally called by the user of the ra_ctx. + bool (*init)(struct ra_ctx *ctx); + void (*uninit)(struct ra_ctx *ctx); +}; + +// Extra struct for the swapchain-related functions so they can be easily +// inherited from helpers. +struct ra_swapchain { + struct ra_ctx *ctx; + struct priv *priv; + const struct ra_swapchain_fns *fns; +}; + +// Represents a framebuffer / render target +struct ra_fbo { + struct ra_tex *tex; + bool flip; // rendering needs to be inverted + + // Host system's colorspace that it will be interpreting + // the frame buffer as. + struct mp_colorspace color_space; +}; + +struct ra_swapchain_fns { + // Gets the current framebuffer depth in bits (0 if unknown). Optional. + int (*color_depth)(struct ra_swapchain *sw); + + // Called when rendering starts. Returns NULL on failure. This must be + // followed by submit_frame, to submit the rendered frame. This function + // can also fail sporadically, and such errors should be ignored unless + // they persist. + bool (*start_frame)(struct ra_swapchain *sw, struct ra_fbo *out_fbo); + + // Present the frame. Issued in lockstep with start_frame, with rendering + // commands in between. The `frame` is just there for timing data, for + // swapchains smart enough to do something with it. + bool (*submit_frame)(struct ra_swapchain *sw, const struct vo_frame *frame); + + // Performs a buffer swap. This blocks for as long as necessary to meet + // params.swapchain_depth, or until the next vblank (for vsynced contexts) + void (*swap_buffers)(struct ra_swapchain *sw); + + // See vo. Usually called after swap_buffers(). + void (*get_vsync)(struct ra_swapchain *sw, struct vo_vsync_info *info); +}; + +// Create and destroy a ra_ctx. This also takes care of creating and destroying +// the underlying `struct ra`, and perhaps the underlying VO backend. +struct ra_ctx *ra_ctx_create(struct vo *vo, struct ra_ctx_opts opts); +void ra_ctx_destroy(struct ra_ctx **ctx); + +// Special case of creating a ra_ctx while specifying a specific context by name. +struct ra_ctx *ra_ctx_create_by_name(struct vo *vo, const char *name); diff --git a/video/out/gpu/d3d11_helpers.c b/video/out/gpu/d3d11_helpers.c new file mode 100644 index 0000000..30d9eae --- /dev/null +++ b/video/out/gpu/d3d11_helpers.c @@ -0,0 +1,966 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <windows.h> +#include <d3d11.h> +#include <dxgi1_6.h> +#include <versionhelpers.h> + +#include "common/common.h" +#include "common/msg.h" +#include "misc/bstr.h" +#include "osdep/io.h" +#include "osdep/threads.h" +#include "osdep/windows_utils.h" + +#include "d3d11_helpers.h" + +// Windows 8 enum value, not present in mingw-w64 headers +#define DXGI_ADAPTER_FLAG_SOFTWARE (2) +typedef HRESULT(WINAPI *PFN_CREATE_DXGI_FACTORY)(REFIID riid, void **ppFactory); + +static mp_once d3d11_once = MP_STATIC_ONCE_INITIALIZER; +static PFN_D3D11_CREATE_DEVICE pD3D11CreateDevice = NULL; +static PFN_CREATE_DXGI_FACTORY pCreateDXGIFactory1 = NULL; +static void d3d11_load(void) +{ + HMODULE d3d11 = LoadLibraryW(L"d3d11.dll"); + HMODULE dxgilib = LoadLibraryW(L"dxgi.dll"); + if (!d3d11 || !dxgilib) + return; + + pD3D11CreateDevice = (PFN_D3D11_CREATE_DEVICE) + GetProcAddress(d3d11, "D3D11CreateDevice"); + pCreateDXGIFactory1 = (PFN_CREATE_DXGI_FACTORY) + GetProcAddress(dxgilib, "CreateDXGIFactory1"); +} + +static bool load_d3d11_functions(struct mp_log *log) +{ + mp_exec_once(&d3d11_once, d3d11_load); + if (!pD3D11CreateDevice || !pCreateDXGIFactory1) { + mp_fatal(log, "Failed to load base d3d11 functionality: " + "CreateDevice: %s, CreateDXGIFactory1: %s\n", + pD3D11CreateDevice ? "success" : "failure", + pCreateDXGIFactory1 ? "success": "failure"); + return false; + } + + return true; +} + +#define D3D11_DXGI_ENUM(prefix, define) { case prefix ## define: return #define; } + +static const char *d3d11_get_format_name(DXGI_FORMAT fmt) +{ + switch (fmt) { + D3D11_DXGI_ENUM(DXGI_FORMAT_, UNKNOWN); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32A32_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32A32_FLOAT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32A32_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32A32_SINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32_FLOAT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32B32_SINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_FLOAT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_SNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16B16A16_SINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32_FLOAT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G32_SINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32G8X24_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, D32_FLOAT_S8X24_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32_FLOAT_X8X24_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, X32_TYPELESS_G8X24_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R10G10B10A2_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R10G10B10A2_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R10G10B10A2_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R11G11B10_FLOAT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_UNORM_SRGB); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_SNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8B8A8_SINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_FLOAT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_SNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16G16_SINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, D32_FLOAT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32_FLOAT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R32_SINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R24G8_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, D24_UNORM_S8_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R24_UNORM_X8_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, X24_TYPELESS_G8_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_SNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_SINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_FLOAT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, D16_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_SNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R16_SINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8_UINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8_SNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8_SINT); + D3D11_DXGI_ENUM(DXGI_FORMAT_, A8_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R1_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R9G9B9E5_SHAREDEXP); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R8G8_B8G8_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, G8R8_G8B8_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC1_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC1_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC1_UNORM_SRGB); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC2_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC2_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC2_UNORM_SRGB); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC3_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC3_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC3_UNORM_SRGB); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC4_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC4_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC4_SNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC5_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC5_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC5_SNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, B5G6R5_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, B5G5R5A1_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8A8_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8X8_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, R10G10B10_XR_BIAS_A2_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8A8_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8A8_UNORM_SRGB); + D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8X8_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, B8G8R8X8_UNORM_SRGB); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC6H_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC6H_UF16); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC6H_SF16); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC7_TYPELESS); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC7_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, BC7_UNORM_SRGB); + D3D11_DXGI_ENUM(DXGI_FORMAT_, AYUV); + D3D11_DXGI_ENUM(DXGI_FORMAT_, Y410); + D3D11_DXGI_ENUM(DXGI_FORMAT_, Y416); + D3D11_DXGI_ENUM(DXGI_FORMAT_, NV12); + D3D11_DXGI_ENUM(DXGI_FORMAT_, P010); + D3D11_DXGI_ENUM(DXGI_FORMAT_, P016); + D3D11_DXGI_ENUM(DXGI_FORMAT_, 420_OPAQUE); + D3D11_DXGI_ENUM(DXGI_FORMAT_, YUY2); + D3D11_DXGI_ENUM(DXGI_FORMAT_, Y210); + D3D11_DXGI_ENUM(DXGI_FORMAT_, Y216); + D3D11_DXGI_ENUM(DXGI_FORMAT_, NV11); + D3D11_DXGI_ENUM(DXGI_FORMAT_, AI44); + D3D11_DXGI_ENUM(DXGI_FORMAT_, IA44); + D3D11_DXGI_ENUM(DXGI_FORMAT_, P8); + D3D11_DXGI_ENUM(DXGI_FORMAT_, A8P8); + D3D11_DXGI_ENUM(DXGI_FORMAT_, B4G4R4A4_UNORM); + D3D11_DXGI_ENUM(DXGI_FORMAT_, P208); + D3D11_DXGI_ENUM(DXGI_FORMAT_, V208); + D3D11_DXGI_ENUM(DXGI_FORMAT_, V408); + D3D11_DXGI_ENUM(DXGI_FORMAT_, FORCE_UINT); + default: + return "<Unknown>"; + } +} + +static const char *d3d11_get_csp_name(DXGI_COLOR_SPACE_TYPE csp) +{ + switch (csp) { + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_FULL_G22_NONE_P709); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_FULL_G10_NONE_P709); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_STUDIO_G22_NONE_P709); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_STUDIO_G22_NONE_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RESERVED); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_FULL_G22_NONE_P709_X601); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G22_LEFT_P601); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_FULL_G22_LEFT_P601); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G22_LEFT_P709); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_FULL_G22_LEFT_P709); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G22_LEFT_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_FULL_G22_LEFT_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_FULL_G2084_NONE_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G2084_LEFT_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_STUDIO_G2084_NONE_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G22_TOPLEFT_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G2084_TOPLEFT_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_FULL_G22_NONE_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_GHLG_TOPLEFT_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_FULL_GHLG_TOPLEFT_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_STUDIO_G24_NONE_P709); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, RGB_STUDIO_G24_NONE_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G24_LEFT_P709); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G24_LEFT_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, YCBCR_STUDIO_G24_TOPLEFT_P2020); + D3D11_DXGI_ENUM(DXGI_COLOR_SPACE_, CUSTOM); + default: + return "<Unknown>"; + } +} + +static bool d3d11_get_mp_csp(DXGI_COLOR_SPACE_TYPE csp, + struct mp_colorspace *mp_csp) +{ + if (!mp_csp) + return false; + + // Colorspaces utilizing gamma 2.2 (G22) are set to + // AUTO as that keeps the current default flow regarding + // SDR transfer function handling. + // (no adjustment is done unless the user has a CMS LUT). + // + // Additionally, only set primary information with colorspaces + // utilizing non-709 primaries to keep the current behavior + // regarding not doing conversion from BT.601 to BT.709. + switch (csp) { + case DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709: + *mp_csp = (struct mp_colorspace){ + .gamma = MP_CSP_TRC_AUTO, + .primaries = MP_CSP_PRIM_AUTO, + }; + break; + case DXGI_COLOR_SPACE_RGB_FULL_G10_NONE_P709: + *mp_csp = (struct mp_colorspace) { + .gamma = MP_CSP_TRC_LINEAR, + .primaries = MP_CSP_PRIM_AUTO, + }; + break; + case DXGI_COLOR_SPACE_RGB_FULL_G2084_NONE_P2020: + *mp_csp = (struct mp_colorspace) { + .gamma = MP_CSP_TRC_PQ, + .primaries = MP_CSP_PRIM_BT_2020, + }; + break; + case DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P2020: + *mp_csp = (struct mp_colorspace) { + .gamma = MP_CSP_TRC_AUTO, + .primaries = MP_CSP_PRIM_BT_2020, + }; + break; + default: + return false; + } + + return true; +} + +static bool query_output_format_and_colorspace(struct mp_log *log, + IDXGISwapChain *swapchain, + DXGI_FORMAT *out_fmt, + DXGI_COLOR_SPACE_TYPE *out_cspace) +{ + IDXGIOutput *output = NULL; + IDXGIOutput6 *output6 = NULL; + DXGI_OUTPUT_DESC1 desc = { 0 }; + char *monitor_name = NULL; + bool success = false; + + if (!out_fmt || !out_cspace) + return false; + + HRESULT hr = IDXGISwapChain_GetContainingOutput(swapchain, &output); + if (FAILED(hr)) { + mp_err(log, "Failed to get swap chain's containing output: %s!\n", + mp_HRESULT_to_str(hr)); + goto done; + } + + hr = IDXGIOutput_QueryInterface(output, &IID_IDXGIOutput6, + (void**)&output6); + if (FAILED(hr)) { + // point where systems older than Windows 10 would fail, + // thus utilizing error log level only with windows 10+ + mp_msg(log, IsWindows10OrGreater() ? MSGL_ERR : MSGL_V, + "Failed to create a DXGI 1.6 output interface: %s\n", + mp_HRESULT_to_str(hr)); + goto done; + } + + hr = IDXGIOutput6_GetDesc1(output6, &desc); + if (FAILED(hr)) { + mp_err(log, "Failed to query swap chain's output information: %s\n", + mp_HRESULT_to_str(hr)); + goto done; + } + + monitor_name = mp_to_utf8(NULL, desc.DeviceName); + + mp_verbose(log, "Queried output: %s, %ldx%ld @ %d bits, colorspace: %s (%d)\n", + monitor_name, + desc.DesktopCoordinates.right - desc.DesktopCoordinates.left, + desc.DesktopCoordinates.bottom - desc.DesktopCoordinates.top, + desc.BitsPerColor, + d3d11_get_csp_name(desc.ColorSpace), + desc.ColorSpace); + + *out_cspace = desc.ColorSpace; + + // limit ourselves to the 8bit and 10bit formats for now. + // while the 16bit float format would be preferable as something + // to default to, it seems to be hard-coded to linear transfer + // in windowed mode, and follows configured colorspace in full screen. + *out_fmt = desc.BitsPerColor > 8 ? + DXGI_FORMAT_R10G10B10A2_UNORM : DXGI_FORMAT_R8G8B8A8_UNORM; + + success = true; + +done: + talloc_free(monitor_name); + SAFE_RELEASE(output6); + SAFE_RELEASE(output); + return success; +} + +// Get a const array of D3D_FEATURE_LEVELs from max_fl to min_fl (inclusive) +static int get_feature_levels(int max_fl, int min_fl, + const D3D_FEATURE_LEVEL **out) +{ + static const D3D_FEATURE_LEVEL levels[] = { + D3D_FEATURE_LEVEL_12_1, + D3D_FEATURE_LEVEL_12_0, + D3D_FEATURE_LEVEL_11_1, + D3D_FEATURE_LEVEL_11_0, + D3D_FEATURE_LEVEL_10_1, + D3D_FEATURE_LEVEL_10_0, + D3D_FEATURE_LEVEL_9_3, + D3D_FEATURE_LEVEL_9_2, + D3D_FEATURE_LEVEL_9_1, + }; + static const int levels_len = MP_ARRAY_SIZE(levels); + + int start = 0; + for (; start < levels_len; start++) { + if (levels[start] <= max_fl) + break; + } + int len = 0; + for (; start + len < levels_len; len++) { + if (levels[start + len] < min_fl) + break; + } + *out = &levels[start]; + return len; +} + +static IDXGIAdapter1 *get_d3d11_adapter(struct mp_log *log, + struct bstr requested_adapter_name, + struct bstr *listing) +{ + HRESULT hr = S_OK; + IDXGIFactory1 *factory; + IDXGIAdapter1 *picked_adapter = NULL; + + hr = pCreateDXGIFactory1(&IID_IDXGIFactory1, (void **)&factory); + if (FAILED(hr)) { + mp_fatal(log, "Failed to create a DXGI factory: %s\n", + mp_HRESULT_to_str(hr)); + return NULL; + } + + for (unsigned int adapter_num = 0; hr != DXGI_ERROR_NOT_FOUND; adapter_num++) + { + IDXGIAdapter1 *adapter = NULL; + DXGI_ADAPTER_DESC1 desc = { 0 }; + char *adapter_description = NULL; + + hr = IDXGIFactory1_EnumAdapters1(factory, adapter_num, &adapter); + if (FAILED(hr)) { + if (hr != DXGI_ERROR_NOT_FOUND) { + mp_fatal(log, "Failed to enumerate at adapter %u\n", + adapter_num); + } + continue; + } + + if (FAILED(IDXGIAdapter1_GetDesc1(adapter, &desc))) { + mp_fatal(log, "Failed to get adapter description when listing at adapter %u\n", + adapter_num); + continue; + } + + adapter_description = mp_to_utf8(NULL, desc.Description); + + if (listing) { + bstr_xappend_asprintf(NULL, listing, + "Adapter %u: vendor: %u, description: %s\n", + adapter_num, desc.VendorId, + adapter_description); + } + + if (requested_adapter_name.len && + bstr_case_startswith(bstr0(adapter_description), + requested_adapter_name)) + { + picked_adapter = adapter; + } + + talloc_free(adapter_description); + + if (picked_adapter) { + break; + } + + SAFE_RELEASE(adapter); + } + + SAFE_RELEASE(factory); + + return picked_adapter; +} + +static HRESULT create_device(struct mp_log *log, IDXGIAdapter1 *adapter, + bool warp, bool debug, int max_fl, int min_fl, + ID3D11Device **dev) +{ + const D3D_FEATURE_LEVEL *levels; + int levels_len = get_feature_levels(max_fl, min_fl, &levels); + if (!levels_len) { + mp_fatal(log, "No suitable Direct3D feature level found\n"); + return E_FAIL; + } + + D3D_DRIVER_TYPE type = warp ? D3D_DRIVER_TYPE_WARP + : D3D_DRIVER_TYPE_HARDWARE; + UINT flags = debug ? D3D11_CREATE_DEVICE_DEBUG : 0; + return pD3D11CreateDevice((IDXGIAdapter *)adapter, adapter ? D3D_DRIVER_TYPE_UNKNOWN : type, + NULL, flags, levels, levels_len, D3D11_SDK_VERSION, dev, NULL, NULL); +} + +bool mp_d3d11_list_or_verify_adapters(struct mp_log *log, + bstr adapter_name, + bstr *listing) +{ + IDXGIAdapter1 *picked_adapter = NULL; + + if (!load_d3d11_functions(log)) { + return false; + } + + if ((picked_adapter = get_d3d11_adapter(log, adapter_name, listing))) { + SAFE_RELEASE(picked_adapter); + return true; + } + + return false; +} + +// Create a Direct3D 11 device for rendering and presentation. This is meant to +// reduce boilerplate in backends that D3D11, while also making sure they share +// the same device creation logic and log the same information. +bool mp_d3d11_create_present_device(struct mp_log *log, + struct d3d11_device_opts *opts, + ID3D11Device **dev_out) +{ + bool debug = opts->debug; + bool warp = opts->force_warp; + int max_fl = opts->max_feature_level; + int min_fl = opts->min_feature_level; + // Normalize nullptr and an empty string to nullptr to simplify handling. + char *adapter_name = (opts->adapter_name && *(opts->adapter_name)) ? + opts->adapter_name : NULL; + ID3D11Device *dev = NULL; + IDXGIDevice1 *dxgi_dev = NULL; + IDXGIAdapter1 *adapter = NULL; + bool success = false; + HRESULT hr; + + if (!load_d3d11_functions(log)) { + goto done; + } + + adapter = get_d3d11_adapter(log, bstr0(adapter_name), NULL); + + if (adapter_name && !adapter) { + mp_warn(log, "Adapter matching '%s' was not found in the system! " + "Will fall back to the default adapter.\n", + adapter_name); + } + + // Return here to retry creating the device + do { + // Use these default feature levels if they are not set + max_fl = max_fl ? max_fl : D3D_FEATURE_LEVEL_11_0; + min_fl = min_fl ? min_fl : D3D_FEATURE_LEVEL_9_1; + + hr = create_device(log, adapter, warp, debug, max_fl, min_fl, &dev); + + // Retry without debug, if SDK is not available + if (debug && hr == DXGI_ERROR_SDK_COMPONENT_MISSING) { + mp_warn(log, "gpu-debug disabled due to error: %s\n", mp_HRESULT_to_str(hr)); + debug = false; + continue; + } + + if (SUCCEEDED(hr)) + break; + + // Trying to create a D3D_FEATURE_LEVEL_12_0 device on Windows 8.1 or + // below will not succeed. Try an 11_1 device. + if (max_fl >= D3D_FEATURE_LEVEL_12_0 && + min_fl <= D3D_FEATURE_LEVEL_11_1) + { + mp_dbg(log, "Failed to create 12_0+ device, trying 11_1\n"); + max_fl = D3D_FEATURE_LEVEL_11_1; + continue; + } + + // Trying to create a D3D_FEATURE_LEVEL_11_1 device on Windows 7 + // without the platform update will not succeed. Try an 11_0 device. + if (max_fl >= D3D_FEATURE_LEVEL_11_1 && + min_fl <= D3D_FEATURE_LEVEL_11_0) + { + mp_dbg(log, "Failed to create 11_1+ device, trying 11_0\n"); + max_fl = D3D_FEATURE_LEVEL_11_0; + continue; + } + + // Retry with WARP if allowed + if (!warp && opts->allow_warp) { + mp_dbg(log, "Failed to create hardware device, trying WARP\n"); + warp = true; + max_fl = opts->max_feature_level; + min_fl = opts->min_feature_level; + continue; + } + + mp_fatal(log, "Failed to create Direct3D 11 device: %s\n", + mp_HRESULT_to_str(hr)); + goto done; + } while (true); + + // if we picked an adapter, release it here - we're taking another + // from the device. + SAFE_RELEASE(adapter); + + hr = ID3D11Device_QueryInterface(dev, &IID_IDXGIDevice1, (void**)&dxgi_dev); + if (FAILED(hr)) { + mp_fatal(log, "Failed to get DXGI device\n"); + goto done; + } + hr = IDXGIDevice1_GetParent(dxgi_dev, &IID_IDXGIAdapter1, (void**)&adapter); + if (FAILED(hr)) { + mp_fatal(log, "Failed to get DXGI adapter\n"); + goto done; + } + + IDXGIDevice1_SetMaximumFrameLatency(dxgi_dev, opts->max_frame_latency); + + DXGI_ADAPTER_DESC1 desc; + hr = IDXGIAdapter1_GetDesc1(adapter, &desc); + if (FAILED(hr)) { + mp_fatal(log, "Failed to get adapter description\n"); + goto done; + } + + D3D_FEATURE_LEVEL selected_level = ID3D11Device_GetFeatureLevel(dev); + mp_verbose(log, "Using Direct3D 11 feature level %u_%u\n", + ((unsigned)selected_level) >> 12, + (((unsigned)selected_level) >> 8) & 0xf); + + char *dev_name = mp_to_utf8(NULL, desc.Description); + mp_verbose(log, "Device Name: %s\n" + "Device ID: %04x:%04x (rev %02x)\n" + "Subsystem ID: %04x:%04x\n" + "LUID: %08lx%08lx\n", + dev_name, + desc.VendorId, desc.DeviceId, desc.Revision, + LOWORD(desc.SubSysId), HIWORD(desc.SubSysId), + desc.AdapterLuid.HighPart, desc.AdapterLuid.LowPart); + talloc_free(dev_name); + + if (desc.Flags & DXGI_ADAPTER_FLAG_SOFTWARE) + warp = true; + // If the primary display adapter is a software adapter, the + // DXGI_ADAPTER_FLAG_SOFTWARE flag won't be set, but the device IDs should + // still match the Microsoft Basic Render Driver + if (desc.VendorId == 0x1414 && desc.DeviceId == 0x8c) + warp = true; + if (warp) { + mp_msg(log, opts->force_warp ? MSGL_V : MSGL_WARN, + "Using a software adapter\n"); + } + + *dev_out = dev; + dev = NULL; + success = true; + +done: + SAFE_RELEASE(adapter); + SAFE_RELEASE(dxgi_dev); + SAFE_RELEASE(dev); + return success; +} + +static HRESULT create_swapchain_1_2(ID3D11Device *dev, IDXGIFactory2 *factory, + struct mp_log *log, + struct d3d11_swapchain_opts *opts, + bool flip, DXGI_FORMAT format, + IDXGISwapChain **swapchain_out) +{ + IDXGISwapChain *swapchain = NULL; + IDXGISwapChain1 *swapchain1 = NULL; + HRESULT hr; + + DXGI_SWAP_CHAIN_DESC1 desc = { + .Width = opts->width ? opts->width : 1, + .Height = opts->height ? opts->height : 1, + .Format = format, + .SampleDesc = { .Count = 1 }, + .BufferUsage = opts->usage, + }; + + if (flip) { + // UNORDERED_ACCESS with FLIP_SEQUENTIAL seems to be buggy with + // Windows 7 drivers + if ((desc.BufferUsage & DXGI_USAGE_UNORDERED_ACCESS) && + !IsWindows8OrGreater()) + { + mp_verbose(log, "Disabling UNORDERED_ACCESS for flip-model " + "swapchain backbuffers in Windows 7\n"); + desc.BufferUsage &= ~DXGI_USAGE_UNORDERED_ACCESS; + } + + if (IsWindows10OrGreater()) { + desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_DISCARD; + } else { + desc.SwapEffect = DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL; + } + desc.BufferCount = opts->length; + } else { + desc.SwapEffect = DXGI_SWAP_EFFECT_DISCARD; + desc.BufferCount = 1; + } + + hr = IDXGIFactory2_CreateSwapChainForHwnd(factory, (IUnknown*)dev, + opts->window, &desc, NULL, NULL, &swapchain1); + if (FAILED(hr)) + goto done; + hr = IDXGISwapChain1_QueryInterface(swapchain1, &IID_IDXGISwapChain, + (void**)&swapchain); + if (FAILED(hr)) + goto done; + + *swapchain_out = swapchain; + swapchain = NULL; + +done: + SAFE_RELEASE(swapchain1); + SAFE_RELEASE(swapchain); + return hr; +} + +static HRESULT create_swapchain_1_1(ID3D11Device *dev, IDXGIFactory1 *factory, + struct mp_log *log, + struct d3d11_swapchain_opts *opts, + DXGI_FORMAT format, + IDXGISwapChain **swapchain_out) +{ + DXGI_SWAP_CHAIN_DESC desc = { + .BufferDesc = { + .Width = opts->width ? opts->width : 1, + .Height = opts->height ? opts->height : 1, + .Format = format, + }, + .SampleDesc = { .Count = 1 }, + .BufferUsage = opts->usage, + .BufferCount = 1, + .OutputWindow = opts->window, + .Windowed = TRUE, + .SwapEffect = DXGI_SWAP_EFFECT_DISCARD, + }; + + return IDXGIFactory1_CreateSwapChain(factory, (IUnknown*)dev, &desc, + swapchain_out); +} + +static bool update_swapchain_format(struct mp_log *log, + IDXGISwapChain *swapchain, + DXGI_FORMAT format) +{ + DXGI_SWAP_CHAIN_DESC desc; + + HRESULT hr = IDXGISwapChain_GetDesc(swapchain, &desc); + if (FAILED(hr)) { + mp_fatal(log, "Failed to query swap chain's current state: %s\n", + mp_HRESULT_to_str(hr)); + return false; + } + + hr = IDXGISwapChain_ResizeBuffers(swapchain, 0, desc.BufferDesc.Width, + desc.BufferDesc.Height, + format, 0); + if (FAILED(hr)) { + mp_fatal(log, "Couldn't update swapchain format: %s\n", + mp_HRESULT_to_str(hr)); + return false; + } + + return true; +} + +static bool update_swapchain_color_space(struct mp_log *log, + IDXGISwapChain *swapchain, + DXGI_COLOR_SPACE_TYPE color_space) +{ + IDXGISwapChain4 *swapchain4 = NULL; + const char *csp_name = d3d11_get_csp_name(color_space); + bool success = false; + HRESULT hr = E_FAIL; + unsigned int csp_support_flags; + + hr = IDXGISwapChain_QueryInterface(swapchain, &IID_IDXGISwapChain4, + (void *)&(swapchain4)); + if (FAILED(hr)) { + mp_err(log, "Failed to create v4 swapchain for color space " + "configuration (%s)!\n", + mp_HRESULT_to_str(hr)); + goto done; + } + + hr = IDXGISwapChain4_CheckColorSpaceSupport(swapchain4, + color_space, + &csp_support_flags); + if (FAILED(hr)) { + mp_err(log, "Failed to check color space support for color space " + "%s (%d): %s!\n", + csp_name, color_space, mp_HRESULT_to_str(hr)); + goto done; + } + + mp_verbose(log, + "Swapchain capabilities for color space %s (%d): " + "normal: %s, overlay: %s\n", + csp_name, color_space, + (csp_support_flags & DXGI_SWAP_CHAIN_COLOR_SPACE_SUPPORT_FLAG_PRESENT) ? + "yes" : "no", + (csp_support_flags & DXGI_SWAP_CHAIN_COLOR_SPACE_SUPPORT_FLAG_OVERLAY_PRESENT) ? + "yes" : "no"); + + if (!(csp_support_flags & DXGI_SWAP_CHAIN_COLOR_SPACE_SUPPORT_FLAG_PRESENT)) { + mp_err(log, "Color space %s (%d) is not supported by this swapchain!\n", + csp_name, color_space); + goto done; + } + + hr = IDXGISwapChain4_SetColorSpace1(swapchain4, color_space); + if (FAILED(hr)) { + mp_err(log, "Failed to set color space %s (%d) for this swapchain " + "(%s)!\n", + csp_name, color_space, mp_HRESULT_to_str(hr)); + goto done; + } + + mp_verbose(log, "Swapchain successfully configured to color space %s (%d)!\n", + csp_name, color_space); + + success = true; + +done: + SAFE_RELEASE(swapchain4); + return success; +} + +static bool configure_created_swapchain(struct mp_log *log, + IDXGISwapChain *swapchain, + DXGI_FORMAT requested_format, + DXGI_COLOR_SPACE_TYPE requested_csp, + struct mp_colorspace *configured_csp) +{ + DXGI_FORMAT probed_format = DXGI_FORMAT_UNKNOWN; + DXGI_FORMAT selected_format = DXGI_FORMAT_UNKNOWN; + DXGI_COLOR_SPACE_TYPE probed_colorspace = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; + DXGI_COLOR_SPACE_TYPE selected_colorspace; + const char *format_name = NULL; + const char *csp_name = NULL; + struct mp_colorspace mp_csp = { 0 }; + bool mp_csp_mapped = false; + + query_output_format_and_colorspace(log, swapchain, + &probed_format, + &probed_colorspace); + + + selected_format = requested_format != DXGI_FORMAT_UNKNOWN ? + requested_format : + (probed_format != DXGI_FORMAT_UNKNOWN ? + probed_format : DXGI_FORMAT_R8G8B8A8_UNORM); + selected_colorspace = requested_csp != -1 ? + requested_csp : probed_colorspace; + format_name = d3d11_get_format_name(selected_format); + csp_name = d3d11_get_csp_name(selected_colorspace); + mp_csp_mapped = d3d11_get_mp_csp(selected_colorspace, &mp_csp); + + mp_verbose(log, "Selected swapchain format %s (%d), attempting " + "to utilize it.\n", + format_name, selected_format); + + if (!update_swapchain_format(log, swapchain, selected_format)) { + return false; + } + + if (!IsWindows10OrGreater()) { + // On older than Windows 10, query_output_format_and_colorspace + // will not change probed_colorspace, and even if a user sets + // a colorspace it will not get applied. Thus warn user in case a + // value was specifically set and finish. + if (requested_csp != -1) { + mp_warn(log, "User selected a D3D11 color space %s (%d), " + "but configuration of color spaces is only supported" + "from Windows 10! The default configuration has been " + "left as-is.\n", + csp_name, selected_colorspace); + } + + return true; + } + + if (!mp_csp_mapped) { + mp_warn(log, "Color space %s (%d) does not have an mpv color space " + "mapping! Overriding to standard sRGB!\n", + csp_name, selected_colorspace); + selected_colorspace = DXGI_COLOR_SPACE_RGB_FULL_G22_NONE_P709; + d3d11_get_mp_csp(selected_colorspace, &mp_csp); + } + + mp_verbose(log, "Selected swapchain color space %s (%d), attempting to " + "utilize it.\n", + csp_name, selected_colorspace); + + if (!update_swapchain_color_space(log, swapchain, selected_colorspace)) { + return false; + } + + if (configured_csp) { + *configured_csp = mp_csp; + } + + return true; +} + +// Create a Direct3D 11 swapchain +bool mp_d3d11_create_swapchain(ID3D11Device *dev, struct mp_log *log, + struct d3d11_swapchain_opts *opts, + IDXGISwapChain **swapchain_out) +{ + IDXGIDevice1 *dxgi_dev = NULL; + IDXGIAdapter1 *adapter = NULL; + IDXGIFactory1 *factory = NULL; + IDXGIFactory2 *factory2 = NULL; + IDXGISwapChain *swapchain = NULL; + bool success = false; + HRESULT hr; + + hr = ID3D11Device_QueryInterface(dev, &IID_IDXGIDevice1, (void**)&dxgi_dev); + if (FAILED(hr)) { + mp_fatal(log, "Failed to get DXGI device\n"); + goto done; + } + hr = IDXGIDevice1_GetParent(dxgi_dev, &IID_IDXGIAdapter1, (void**)&adapter); + if (FAILED(hr)) { + mp_fatal(log, "Failed to get DXGI adapter\n"); + goto done; + } + hr = IDXGIAdapter1_GetParent(adapter, &IID_IDXGIFactory1, (void**)&factory); + if (FAILED(hr)) { + mp_fatal(log, "Failed to get DXGI factory\n"); + goto done; + } + hr = IDXGIFactory1_QueryInterface(factory, &IID_IDXGIFactory2, + (void**)&factory2); + if (FAILED(hr)) + factory2 = NULL; + + bool flip = factory2 && opts->flip; + + // Return here to retry creating the swapchain + do { + if (factory2) { + // Create a DXGI 1.2+ (Windows 8+) swap chain if possible + hr = create_swapchain_1_2(dev, factory2, log, opts, flip, + DXGI_FORMAT_R8G8B8A8_UNORM, &swapchain); + } else { + // Fall back to DXGI 1.1 (Windows 7) + hr = create_swapchain_1_1(dev, factory, log, opts, + DXGI_FORMAT_R8G8B8A8_UNORM, &swapchain); + } + if (SUCCEEDED(hr)) + break; + + if (flip) { + mp_dbg(log, "Failed to create flip-model swapchain, trying bitblt\n"); + flip = false; + continue; + } + + mp_fatal(log, "Failed to create swapchain: %s\n", mp_HRESULT_to_str(hr)); + goto done; + } while (true); + + // Prevent DXGI from making changes to the VO window, otherwise it will + // hook the Alt+Enter keystroke and make it trigger an ugly transition to + // exclusive fullscreen mode instead of running the user-set command. + IDXGIFactory_MakeWindowAssociation(factory, opts->window, + DXGI_MWA_NO_WINDOW_CHANGES | DXGI_MWA_NO_ALT_ENTER | + DXGI_MWA_NO_PRINT_SCREEN); + + if (factory2) { + mp_verbose(log, "Using DXGI 1.2+\n"); + } else { + mp_verbose(log, "Using DXGI 1.1\n"); + } + + configure_created_swapchain(log, swapchain, opts->format, + opts->color_space, + opts->configured_csp); + + DXGI_SWAP_CHAIN_DESC scd = {0}; + IDXGISwapChain_GetDesc(swapchain, &scd); + if (scd.SwapEffect == DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL || + scd.SwapEffect == DXGI_SWAP_EFFECT_FLIP_DISCARD) + { + mp_verbose(log, "Using flip-model presentation\n"); + } else { + mp_verbose(log, "Using bitblt-model presentation\n"); + } + + *swapchain_out = swapchain; + swapchain = NULL; + success = true; + +done: + SAFE_RELEASE(swapchain); + SAFE_RELEASE(factory2); + SAFE_RELEASE(factory); + SAFE_RELEASE(adapter); + SAFE_RELEASE(dxgi_dev); + return success; +} diff --git a/video/out/gpu/d3d11_helpers.h b/video/out/gpu/d3d11_helpers.h new file mode 100644 index 0000000..c115d33 --- /dev/null +++ b/video/out/gpu/d3d11_helpers.h @@ -0,0 +1,103 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MP_D3D11_HELPERS_H_ +#define MP_D3D11_HELPERS_H_ + +#include <stdbool.h> +#include <windows.h> +#include <d3d11.h> +#include <dxgi1_2.h> + +#include "video/mp_image.h" + +#define D3D_FEATURE_LEVEL_12_0 (0xc000) +#define D3D_FEATURE_LEVEL_12_1 (0xc100) + +#define DXGI_COLOR_SPACE_RGB_STUDIO_G24_NONE_P709 ((DXGI_COLOR_SPACE_TYPE)20) +#define DXGI_COLOR_SPACE_RGB_STUDIO_G24_NONE_P2020 ((DXGI_COLOR_SPACE_TYPE)21) +#define DXGI_COLOR_SPACE_YCBCR_STUDIO_G24_LEFT_P709 ((DXGI_COLOR_SPACE_TYPE)22) +#define DXGI_COLOR_SPACE_YCBCR_STUDIO_G24_LEFT_P2020 ((DXGI_COLOR_SPACE_TYPE)23) +#define DXGI_COLOR_SPACE_YCBCR_STUDIO_G24_TOPLEFT_P2020 ((DXGI_COLOR_SPACE_TYPE)24) + +struct d3d11_device_opts { + // Enable the debug layer (D3D11_CREATE_DEVICE_DEBUG) + bool debug; + + // Allow a software (WARP) adapter. Note, sometimes a software adapter will + // be used even when allow_warp is false. This is because, on Windows 8 and + // up, if there are no hardware adapters, Windows will pretend the WARP + // adapter is the primary hardware adapter. + bool allow_warp; + + // Always use a WARP adapter. This is mainly for testing purposes. + bool force_warp; + + // The maximum number of pending frames allowed to be queued to a swapchain + int max_frame_latency; + + // The maximum Direct3D 11 feature level to attempt to create + // If unset, defaults to D3D_FEATURE_LEVEL_11_0 + int max_feature_level; + + // The minimum Direct3D 11 feature level to attempt to create. If this is + // not supported, device creation will fail. + // If unset, defaults to D3D_FEATURE_LEVEL_9_1 + int min_feature_level; + + // The adapter name to utilize if a specific adapter is required + // If unset, the default adapter will be utilized when creating + // a device. + char *adapter_name; +}; + +bool mp_d3d11_list_or_verify_adapters(struct mp_log *log, + bstr adapter_name, + bstr *listing); + +bool mp_d3d11_create_present_device(struct mp_log *log, + struct d3d11_device_opts *opts, + ID3D11Device **dev_out); + +struct d3d11_swapchain_opts { + HWND window; + int width; + int height; + DXGI_FORMAT format; + DXGI_COLOR_SPACE_TYPE color_space; + + // mp_colorspace mapping of the configured swapchain colorspace + // shall be written into this memory location if configuration + // succeeds. Will be ignored if NULL. + struct mp_colorspace *configured_csp; + + // Use DXGI_SWAP_EFFECT_FLIP_SEQUENTIAL if possible + bool flip; + + // Number of surfaces in the swapchain + int length; + + // The BufferUsage value for swapchain surfaces. This should probably + // contain DXGI_USAGE_RENDER_TARGET_OUTPUT. + DXGI_USAGE usage; +}; + +bool mp_d3d11_create_swapchain(ID3D11Device *dev, struct mp_log *log, + struct d3d11_swapchain_opts *opts, + IDXGISwapChain **swapchain_out); + +#endif diff --git a/video/out/gpu/error_diffusion.c b/video/out/gpu/error_diffusion.c new file mode 100644 index 0000000..c1ea542 --- /dev/null +++ b/video/out/gpu/error_diffusion.c @@ -0,0 +1,316 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdlib.h> + +#include "error_diffusion.h" + +#include "common/common.h" + +#define GLSL(...) gl_sc_addf(sc, __VA_ARGS__) +#define GLSLH(...) gl_sc_haddf(sc, __VA_ARGS__) + +// After a (y, x) -> (y, x + y * shift) mapping, find the right most column that +// will be affected by the current column. +static int compute_rightmost_shifted_column(const struct error_diffusion_kernel *k) +{ + int ret = 0; + for (int y = 0; y <= EF_MAX_DELTA_Y; y++) { + for (int x = EF_MIN_DELTA_X; x <= EF_MAX_DELTA_X; x++) { + if (k->pattern[y][x - EF_MIN_DELTA_X] != 0) { + int shifted_x = x + y * k->shift; + + // The shift mapping guarantees current column (or left of it) + // won't be affected by error diffusion. + assert(shifted_x > 0); + + ret = MPMAX(ret, shifted_x); + } + } + } + return ret; +} + +const struct error_diffusion_kernel *mp_find_error_diffusion_kernel(const char *name) +{ + if (!name) + return NULL; + for (const struct error_diffusion_kernel *k = mp_error_diffusion_kernels; + k->name; + k++) { + if (strcmp(k->name, name) == 0) + return k; + } + return NULL; +} + +int mp_ef_compute_shared_memory_size(const struct error_diffusion_kernel *k, + int height) +{ + // We add EF_MAX_DELTA_Y empty lines on the bottom to handle errors + // propagated out from bottom side. + int rows = height + EF_MAX_DELTA_Y; + int shifted_columns = compute_rightmost_shifted_column(k) + 1; + + // The shared memory is an array of size rows*shifted_columns. Each element + // is a single uint for three RGB component. + return rows * shifted_columns * 4; +} + +void pass_error_diffusion(struct gl_shader_cache *sc, + const struct error_diffusion_kernel *k, + int tex, int width, int height, int depth, int block_size) +{ + assert(block_size <= height); + + // The parallel error diffusion works by applying the shift mapping first. + // Taking the Floyd and Steinberg algorithm for example. After applying + // the (y, x) -> (y, x + y * shift) mapping (with shift=2), all errors are + // propagated into the next few columns, which makes parallel processing on + // the same column possible. + // + // X 7/16 X 7/16 + // 3/16 5/16 1/16 ==> 0 0 3/16 5/16 1/16 + + // Figuring out the size of rectangle containing all shifted pixels. + // The rectangle height is not changed. + int shifted_width = width + (height - 1) * k->shift; + + // We process all pixels from the shifted rectangles column by column, with + // a single global work group of size |block_size|. + // Figuring out how many block are required to process all pixels. We need + // this explicitly to make the number of barrier() calls match. + int blocks = (height * shifted_width + block_size - 1) / block_size; + + // If we figure out how many of the next columns will be affected while the + // current columns is being processed. We can store errors of only a few + // columns in the shared memory. Using a ring buffer will further save the + // cost while iterating to next column. + int ring_buffer_rows = height + EF_MAX_DELTA_Y; + int ring_buffer_columns = compute_rightmost_shifted_column(k) + 1; + int ring_buffer_size = ring_buffer_rows * ring_buffer_columns; + + // Defines the ring buffer in shared memory. + GLSLH("shared uint err_rgb8[%d];\n", ring_buffer_size); + + // Initialize the ring buffer. + GLSL("for (int i = int(gl_LocalInvocationIndex); i < %d; i += %d) ", + ring_buffer_size, block_size); + GLSL("err_rgb8[i] = 0u;\n"); + + GLSL("for (int block_id = 0; block_id < %d; ++block_id) {\n", blocks); + + // Add barrier here to have previous block all processed before starting + // the processing of the next. + GLSL("groupMemoryBarrier();\n"); + GLSL("barrier();\n"); + + // Compute the coordinate of the pixel we are currently processing, both + // before and after the shift mapping. + GLSL("int id = int(gl_LocalInvocationIndex) + block_id * %d;\n", block_size); + GLSL("int y = id %% %d, x_shifted = id / %d;\n", height, height); + GLSL("int x = x_shifted - y * %d;\n", k->shift); + + // Proceed only if we are processing a valid pixel. + GLSL("if (0 <= x && x < %d) {\n", width); + + // The index that the current pixel have on the ring buffer. + GLSL("int idx = (x_shifted * %d + y) %% %d;\n", ring_buffer_rows, ring_buffer_size); + + // Fetch the current pixel. + GLSL("vec3 pix = texelFetch(texture%d, ivec2(x, y), 0).rgb;\n", tex); + + // The dithering will quantize pixel value into multiples of 1/dither_quant. + int dither_quant = (1 << depth) - 1; + + // We encode errors in RGB components into a single 32-bit unsigned integer. + // The error we propagate from the current pixel is in range of + // [-0.5 / dither_quant, 0.5 / dither_quant]. While not quite obvious, the + // sum of all errors been propagated into a pixel is also in the same range. + // It's possible to map errors in this range into [-127, 127], and use an + // unsigned 8-bit integer to store it (using standard two's complement). + // The three 8-bit unsigned integers can then be encoded into a single + // 32-bit unsigned integer, with two 4-bit padding to prevent addition + // operation overflows affecting other component. There are at most 12 + // addition operations on each pixel, so 4-bit padding should be enough. + // The overflow from R component will be discarded. + // + // The following figure is how the encoding looks like. + // + // +------------------------------------+ + // |RRRRRRRR|0000|GGGGGGGG|0000|BBBBBBBB| + // +------------------------------------+ + // + + // The bitshift position for R and G component. + int bitshift_r = 24, bitshift_g = 12; + // The multiplier we use to map [-0.5, 0.5] to [-127, 127]. + int uint8_mul = 127 * 2; + + // Adding the error previously propagated into current pixel, and clear it + // in the buffer. + GLSL("uint err_u32 = err_rgb8[idx] + %uu;\n", + (128u << bitshift_r) | (128u << bitshift_g) | 128u); + GLSL("pix = pix * %d.0 + vec3(" + "int((err_u32 >> %d) & 255u) - 128," + "int((err_u32 >> %d) & 255u) - 128," + "int( err_u32 & 255u) - 128" + ") / %d.0;\n", dither_quant, bitshift_r, bitshift_g, uint8_mul); + GLSL("err_rgb8[idx] = 0u;\n"); + + // Write the dithered pixel. + GLSL("vec3 dithered = round(pix);\n"); + GLSL("imageStore(out_image, ivec2(x, y), vec4(dithered / %d.0, 0.0));\n", + dither_quant); + + GLSL("vec3 err_divided = (pix - dithered) * %d.0 / %d.0;\n", + uint8_mul, k->divisor); + GLSL("ivec3 tmp;\n"); + + // Group error propagation with same weight factor together, in order to + // reduce the number of annoying error encoding. + for (int dividend = 1; dividend <= k->divisor; dividend++) { + bool err_assigned = false; + + for (int y = 0; y <= EF_MAX_DELTA_Y; y++) { + for (int x = EF_MIN_DELTA_X; x <= EF_MAX_DELTA_X; x++) { + if (k->pattern[y][x - EF_MIN_DELTA_X] != dividend) + continue; + + if (!err_assigned) { + err_assigned = true; + + GLSL("tmp = ivec3(round(err_divided * %d.0));\n", dividend); + + GLSL("err_u32 = " + "(uint(tmp.r & 255) << %d)|" + "(uint(tmp.g & 255) << %d)|" + " uint(tmp.b & 255);\n", + bitshift_r, bitshift_g); + } + + int shifted_x = x + y * k->shift; + + // Unlike the right border, errors propagated out from left + // border will remain in the ring buffer. This will produce + // visible artifacts near the left border, especially for + // shift=3 kernels. + if (x < 0) + GLSL("if (x >= %d) ", -x); + + // Calculate the new position in the ring buffer to propagate + // the error into. + int ring_buffer_delta = shifted_x * ring_buffer_rows + y; + GLSL("atomicAdd(err_rgb8[(idx + %d) %% %d], err_u32);\n", + ring_buffer_delta, ring_buffer_size); + } + } + } + + GLSL("}\n"); // if (0 <= x && x < width) + + GLSL("}\n"); // block_id +} + +// Different kernels for error diffusion. +// Patterns are from http://www.efg2.com/Lab/Library/ImageProcessing/DHALF.TXT +const struct error_diffusion_kernel mp_error_diffusion_kernels[] = { + { + .name = "simple", + .shift = 1, + .pattern = {{0, 0, 0, 1, 0}, + {0, 0, 1, 0, 0}, + {0, 0, 0, 0, 0}}, + .divisor = 2 + }, + { + // The "false" Floyd-Steinberg kernel + .name = "false-fs", + .shift = 1, + .pattern = {{0, 0, 0, 3, 0}, + {0, 0, 3, 2, 0}, + {0, 0, 0, 0, 0}}, + .divisor = 8 + }, + { + .name = "sierra-lite", + .shift = 2, + .pattern = {{0, 0, 0, 2, 0}, + {0, 1, 1, 0, 0}, + {0, 0, 0, 0, 0}}, + .divisor = 4 + }, + { + .name = "floyd-steinberg", + .shift = 2, + .pattern = {{0, 0, 0, 7, 0}, + {0, 3, 5, 1, 0}, + {0, 0, 0, 0, 0}}, + .divisor = 16 + }, + { + .name = "atkinson", + .shift = 2, + .pattern = {{0, 0, 0, 1, 1}, + {0, 1, 1, 1, 0}, + {0, 0, 1, 0, 0}}, + .divisor = 8 + }, + // All kernels below have shift value of 3, and probably are too heavy for + // low end GPU. + { + .name = "jarvis-judice-ninke", + .shift = 3, + .pattern = {{0, 0, 0, 7, 5}, + {3, 5, 7, 5, 3}, + {1, 3, 5, 3, 1}}, + .divisor = 48 + }, + { + .name = "stucki", + .shift = 3, + .pattern = {{0, 0, 0, 8, 4}, + {2, 4, 8, 4, 2}, + {1, 2, 4, 2, 1}}, + .divisor = 42 + }, + { + .name = "burkes", + .shift = 3, + .pattern = {{0, 0, 0, 8, 4}, + {2, 4, 8, 4, 2}, + {0, 0, 0, 0, 0}}, + .divisor = 32 + }, + { + .name = "sierra-3", + .shift = 3, + .pattern = {{0, 0, 0, 5, 3}, + {2, 4, 5, 4, 2}, + {0, 2, 3, 2, 0}}, + .divisor = 32 + }, + { + .name = "sierra-2", + .shift = 3, + .pattern = {{0, 0, 0, 4, 3}, + {1, 2, 3, 2, 1}, + {0, 0, 0, 0, 0}}, + .divisor = 16 + }, + {0} +}; diff --git a/video/out/gpu/error_diffusion.h b/video/out/gpu/error_diffusion.h new file mode 100644 index 0000000..6bdcea1 --- /dev/null +++ b/video/out/gpu/error_diffusion.h @@ -0,0 +1,48 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MP_GL_ERROR_DIFFUSION +#define MP_GL_ERROR_DIFFUSION + +#include "shader_cache.h" + +// defines the border of all error diffusion kernels +#define EF_MIN_DELTA_X (-2) +#define EF_MAX_DELTA_X (2) +#define EF_MAX_DELTA_Y (2) + +struct error_diffusion_kernel { + const char *name; + + // The minimum value such that a (y, x) -> (y, x + y * shift) mapping will + // make all error pushing operations affect next column (and after it) only. + int shift; + + // The diffusion factor for (y, x) is pattern[y][x - EF_MIN_DELTA_X] / divisor. + int pattern[EF_MAX_DELTA_Y + 1][EF_MAX_DELTA_X - EF_MIN_DELTA_X + 1]; + int divisor; +}; + +extern const struct error_diffusion_kernel mp_error_diffusion_kernels[]; + +const struct error_diffusion_kernel *mp_find_error_diffusion_kernel(const char *name); +int mp_ef_compute_shared_memory_size(const struct error_diffusion_kernel *k, int height); +void pass_error_diffusion(struct gl_shader_cache *sc, + const struct error_diffusion_kernel *k, + int tex, int width, int height, int depth, int block_size); + +#endif /* MP_GL_ERROR_DIFFUSION */ diff --git a/video/out/gpu/hwdec.c b/video/out/gpu/hwdec.c new file mode 100644 index 0000000..c8098f3 --- /dev/null +++ b/video/out/gpu/hwdec.c @@ -0,0 +1,358 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stddef.h> +#include <string.h> + +#include "config.h" + +#include "common/common.h" +#include "common/msg.h" +#include "options/m_config.h" +#include "hwdec.h" + +extern const struct ra_hwdec_driver ra_hwdec_vaapi; +extern const struct ra_hwdec_driver ra_hwdec_videotoolbox; +extern const struct ra_hwdec_driver ra_hwdec_vdpau; +extern const struct ra_hwdec_driver ra_hwdec_dxva2egl; +extern const struct ra_hwdec_driver ra_hwdec_d3d11egl; +extern const struct ra_hwdec_driver ra_hwdec_dxva2gldx; +extern const struct ra_hwdec_driver ra_hwdec_d3d11va; +extern const struct ra_hwdec_driver ra_hwdec_dxva2dxgi; +extern const struct ra_hwdec_driver ra_hwdec_cuda; +extern const struct ra_hwdec_driver ra_hwdec_rpi_overlay; +extern const struct ra_hwdec_driver ra_hwdec_drmprime; +extern const struct ra_hwdec_driver ra_hwdec_drmprime_overlay; +extern const struct ra_hwdec_driver ra_hwdec_aimagereader; +extern const struct ra_hwdec_driver ra_hwdec_vulkan; + +const struct ra_hwdec_driver *const ra_hwdec_drivers[] = { +#if HAVE_VAAPI + &ra_hwdec_vaapi, +#endif +#if HAVE_VIDEOTOOLBOX_GL || HAVE_IOS_GL || HAVE_VIDEOTOOLBOX_PL + &ra_hwdec_videotoolbox, +#endif +#if HAVE_D3D_HWACCEL + #if HAVE_EGL_ANGLE + &ra_hwdec_d3d11egl, + #if HAVE_D3D9_HWACCEL + &ra_hwdec_dxva2egl, + #endif + #endif + #if HAVE_D3D11 + &ra_hwdec_d3d11va, + #if HAVE_D3D9_HWACCEL + &ra_hwdec_dxva2dxgi, + #endif + #endif +#endif +#if HAVE_GL_DXINTEROP_D3D9 + &ra_hwdec_dxva2gldx, +#endif +#if HAVE_CUDA_INTEROP + &ra_hwdec_cuda, +#endif +#if HAVE_VDPAU_GL_X11 + &ra_hwdec_vdpau, +#endif +#if HAVE_RPI_MMAL + &ra_hwdec_rpi_overlay, +#endif +#if HAVE_DRM + &ra_hwdec_drmprime, + &ra_hwdec_drmprime_overlay, +#endif +#if HAVE_ANDROID_MEDIA_NDK + &ra_hwdec_aimagereader, +#endif +#if HAVE_VULKAN_INTEROP + &ra_hwdec_vulkan, +#endif + + NULL +}; + +struct ra_hwdec *ra_hwdec_load_driver(struct ra_ctx *ra_ctx, + struct mp_log *log, + struct mpv_global *global, + struct mp_hwdec_devices *devs, + const struct ra_hwdec_driver *drv, + bool is_auto) +{ + struct ra_hwdec *hwdec = talloc(NULL, struct ra_hwdec); + *hwdec = (struct ra_hwdec) { + .driver = drv, + .log = mp_log_new(hwdec, log, drv->name), + .global = global, + .ra_ctx = ra_ctx, + .devs = devs, + .probing = is_auto, + .priv = talloc_zero_size(hwdec, drv->priv_size), + }; + mp_verbose(log, "Loading hwdec driver '%s'\n", drv->name); + if (hwdec->driver->init(hwdec) < 0) { + ra_hwdec_uninit(hwdec); + mp_verbose(log, "Loading failed.\n"); + return NULL; + } + return hwdec; +} + +void ra_hwdec_uninit(struct ra_hwdec *hwdec) +{ + if (hwdec) + hwdec->driver->uninit(hwdec); + talloc_free(hwdec); +} + +bool ra_hwdec_test_format(struct ra_hwdec *hwdec, int imgfmt) +{ + for (int n = 0; hwdec->driver->imgfmts[n]; n++) { + if (hwdec->driver->imgfmts[n] == imgfmt) + return true; + } + return false; +} + +struct ra_hwdec_mapper *ra_hwdec_mapper_create(struct ra_hwdec *hwdec, + const struct mp_image_params *params) +{ + assert(ra_hwdec_test_format(hwdec, params->imgfmt)); + + struct ra_hwdec_mapper *mapper = talloc_ptrtype(NULL, mapper); + *mapper = (struct ra_hwdec_mapper){ + .owner = hwdec, + .driver = hwdec->driver->mapper, + .log = hwdec->log, + .ra = hwdec->ra_ctx->ra, + .priv = talloc_zero_size(mapper, hwdec->driver->mapper->priv_size), + .src_params = *params, + .dst_params = *params, + }; + if (mapper->driver->init(mapper) < 0) + ra_hwdec_mapper_free(&mapper); + return mapper; +} + +void ra_hwdec_mapper_free(struct ra_hwdec_mapper **mapper) +{ + struct ra_hwdec_mapper *p = *mapper; + if (p) { + ra_hwdec_mapper_unmap(p); + p->driver->uninit(p); + talloc_free(p); + } + *mapper = NULL; +} + +void ra_hwdec_mapper_unmap(struct ra_hwdec_mapper *mapper) +{ + if (mapper->driver->unmap) + mapper->driver->unmap(mapper); + + // Clean up after the image if the mapper didn't already + mp_image_unrefp(&mapper->src); +} + +int ra_hwdec_mapper_map(struct ra_hwdec_mapper *mapper, struct mp_image *img) +{ + ra_hwdec_mapper_unmap(mapper); + mp_image_setrefp(&mapper->src, img); + if (mapper->driver->map(mapper) < 0) { + ra_hwdec_mapper_unmap(mapper); + return -1; + } + return 0; +} + +static int ra_hwdec_validate_opt_full(struct mp_log *log, bool include_modes, + const m_option_t *opt, + struct bstr name, const char **value) +{ + struct bstr param = bstr0(*value); + bool help = bstr_equals0(param, "help"); + if (help) + mp_info(log, "Available hwdecs:\n"); + for (int n = 0; ra_hwdec_drivers[n]; n++) { + const struct ra_hwdec_driver *drv = ra_hwdec_drivers[n]; + if (help) { + mp_info(log, " %s\n", drv->name); + } else if (bstr_equals0(param, drv->name)) { + return 1; + } + } + if (help) { + if (include_modes) { + mp_info(log, " auto (behavior depends on context)\n" + " all (load all hwdecs)\n" + " no (do not load any and block loading on demand)\n"); + } + return M_OPT_EXIT; + } + if (!param.len) + return 1; // "" is treated specially + if (include_modes && + (bstr_equals0(param, "all") || bstr_equals0(param, "auto") || + bstr_equals0(param, "no"))) + return 1; + mp_fatal(log, "No hwdec backend named '%.*s' found!\n", BSTR_P(param)); + return M_OPT_INVALID; +} + +int ra_hwdec_validate_opt(struct mp_log *log, const m_option_t *opt, + struct bstr name, const char **value) +{ + return ra_hwdec_validate_opt_full(log, true, opt, name, value); +} + +int ra_hwdec_validate_drivers_only_opt(struct mp_log *log, + const m_option_t *opt, + struct bstr name, const char **value) +{ + return ra_hwdec_validate_opt_full(log, false, opt, name, value); +} + +static void load_add_hwdec(struct ra_hwdec_ctx *ctx, struct mp_hwdec_devices *devs, + const struct ra_hwdec_driver *drv, bool is_auto) +{ + // Don't load duplicate hwdecs + for (int j = 0; j < ctx->num_hwdecs; j++) { + if (ctx->hwdecs[j]->driver == drv) + return; + } + + struct ra_hwdec *hwdec = + ra_hwdec_load_driver(ctx->ra_ctx, ctx->log, ctx->global, devs, drv, is_auto); + if (hwdec) + MP_TARRAY_APPEND(NULL, ctx->hwdecs, ctx->num_hwdecs, hwdec); +} + +static void load_hwdecs_all(struct ra_hwdec_ctx *ctx, struct mp_hwdec_devices *devs) +{ + if (!ctx->loading_done) { + for (int n = 0; ra_hwdec_drivers[n]; n++) + load_add_hwdec(ctx, devs, ra_hwdec_drivers[n], true); + ctx->loading_done = true; + } +} + +void ra_hwdec_ctx_init(struct ra_hwdec_ctx *ctx, struct mp_hwdec_devices *devs, + const char *type, bool load_all_by_default) +{ + assert(ctx->ra_ctx); + + /* + * By default, or if the option value is "auto", we will not pre-emptively + * load any interops, and instead allow them to be loaded on-demand. + * + * If the option value is "no", then no interops will be loaded now, and + * no interops will be loaded, even if requested later. + * + * If the option value is "all", then all interops will be loaded now, and + * obviously no interops will need to be loaded later. + * + * Finally, if a specific interop is requested, it will be loaded now, and + * other interops can be loaded, if requested later. + */ + if (!type || !type[0] || strcmp(type, "auto") == 0) { + if (!load_all_by_default) + return; + type = "all"; + } + if (strcmp(type, "no") == 0) { + // do nothing, just block further loading + } else if (strcmp(type, "all") == 0) { + load_hwdecs_all(ctx, devs); + } else { + for (int n = 0; ra_hwdec_drivers[n]; n++) { + const struct ra_hwdec_driver *drv = ra_hwdec_drivers[n]; + if (strcmp(type, drv->name) == 0) { + load_add_hwdec(ctx, devs, drv, false); + break; + } + } + } + ctx->loading_done = true; +} + +void ra_hwdec_ctx_uninit(struct ra_hwdec_ctx *ctx) +{ + for (int n = 0; n < ctx->num_hwdecs; n++) + ra_hwdec_uninit(ctx->hwdecs[n]); + + talloc_free(ctx->hwdecs); + memset(ctx, 0, sizeof(*ctx)); +} + +void ra_hwdec_ctx_load_fmt(struct ra_hwdec_ctx *ctx, struct mp_hwdec_devices *devs, + struct hwdec_imgfmt_request *params) +{ + int imgfmt = params->imgfmt; + if (ctx->loading_done) { + /* + * If we previously marked interop loading as done (for reasons + * discussed above), then do not load any other interops regardless + * of imgfmt. + */ + return; + } + + if (imgfmt == IMGFMT_NONE) { + MP_VERBOSE(ctx, "Loading hwdec drivers for all formats\n"); + load_hwdecs_all(ctx, devs); + return; + } + + MP_VERBOSE(ctx, "Loading hwdec drivers for format: '%s'\n", + mp_imgfmt_to_name(imgfmt)); + for (int i = 0; ra_hwdec_drivers[i]; i++) { + bool matched_fmt = false; + const struct ra_hwdec_driver *drv = ra_hwdec_drivers[i]; + for (int j = 0; drv->imgfmts[j]; j++) { + if (imgfmt == drv->imgfmts[j]) { + matched_fmt = true; + break; + } + } + if (!matched_fmt) { + continue; + } + + load_add_hwdec(ctx, devs, drv, params->probing); + } +} + +struct ra_hwdec *ra_hwdec_get(struct ra_hwdec_ctx *ctx, int imgfmt) +{ + for (int n = 0; n < ctx->num_hwdecs; n++) { + if (ra_hwdec_test_format(ctx->hwdecs[n], imgfmt)) + return ctx->hwdecs[n]; + } + + return NULL; +} + +int ra_hwdec_driver_get_imgfmt_for_name(const char *name) +{ + for (int i = 0; ra_hwdec_drivers[i]; i++) { + if (!strcmp(ra_hwdec_drivers[i]->name, name)) { + return ra_hwdec_drivers[i]->imgfmts[0]; + } + } + return IMGFMT_NONE; +} diff --git a/video/out/gpu/hwdec.h b/video/out/gpu/hwdec.h new file mode 100644 index 0000000..7766073 --- /dev/null +++ b/video/out/gpu/hwdec.h @@ -0,0 +1,156 @@ +#ifndef MPGL_HWDEC_H_ +#define MPGL_HWDEC_H_ + +#include "video/mp_image.h" +#include "context.h" +#include "ra.h" +#include "video/hwdec.h" + +// Helper to organize/load hwdecs dynamically +struct ra_hwdec_ctx { + // Set these before calling `ra_hwdec_ctx_init` + struct mp_log *log; + struct mpv_global *global; + struct ra_ctx *ra_ctx; + + bool loading_done; + struct ra_hwdec **hwdecs; + int num_hwdecs; +}; + +int ra_hwdec_validate_opt(struct mp_log *log, const m_option_t *opt, + struct bstr name, const char **value); + +int ra_hwdec_validate_drivers_only_opt(struct mp_log *log, + const m_option_t *opt, + struct bstr name, const char **value); + +void ra_hwdec_ctx_init(struct ra_hwdec_ctx *ctx, struct mp_hwdec_devices *devs, + const char *opt, bool load_all_by_default); +void ra_hwdec_ctx_uninit(struct ra_hwdec_ctx *ctx); + +void ra_hwdec_ctx_load_fmt(struct ra_hwdec_ctx *ctx, struct mp_hwdec_devices *devs, + struct hwdec_imgfmt_request *params); + +// Gets the right `ra_hwdec` for a format, if any +struct ra_hwdec *ra_hwdec_get(struct ra_hwdec_ctx *ctx, int imgfmt); + +struct ra_hwdec { + const struct ra_hwdec_driver *driver; + struct mp_log *log; + struct mpv_global *global; + struct ra_ctx *ra_ctx; + struct mp_hwdec_devices *devs; + // GLSL extensions required to sample textures from this. + const char **glsl_extensions; + // For free use by hwdec driver + void *priv; + // For working around the vdpau vs. vaapi mess. + bool probing; + // Used in overlay mode only. + float overlay_colorkey[4]; +}; + +struct ra_hwdec_mapper { + const struct ra_hwdec_mapper_driver *driver; + struct mp_log *log; + struct ra *ra; + void *priv; + struct ra_hwdec *owner; + // Input frame parameters. (Set before init(), immutable.) + struct mp_image_params src_params; + // Output frame parameters (represents the format the textures return). Must + // be set by init(), immutable afterwards, + struct mp_image_params dst_params; + + // The currently mapped source image (or the image about to be mapped in + // ->map()). NULL if unmapped. The mapper can also clear this reference if + // the mapped textures contain a full copy. + struct mp_image *src; + + // The mapped textures and metadata about them. These fields change if a + // new frame is mapped (or unmapped), but otherwise remain constant. + // The common code won't mess with these, so you can e.g. set them in the + // .init() callback. + struct ra_tex *tex[4]; +}; + +// This can be used to map frames of a specific hw format as GL textures. +struct ra_hwdec_mapper_driver { + // Used to create ra_hwdec_mapper.priv. + size_t priv_size; + + // Init the mapper implementation. At this point, the field src_params, + // fns, devs, priv are initialized. + int (*init)(struct ra_hwdec_mapper *mapper); + // Destroy the mapper. unmap is called before this. + void (*uninit)(struct ra_hwdec_mapper *mapper); + + // Map mapper->src as texture, and set mapper->frame to textures using it. + // It is expected that the textures remain valid until the next unmap + // or uninit call. + // The function is allowed to unref mapper->src if it's not needed (i.e. + // this function creates a copy). + // The underlying format can change, so you might need to do some form + // of change detection. You also must reject unsupported formats with an + // error. + // On error, returns negative value on error and remains unmapped. + int (*map)(struct ra_hwdec_mapper *mapper); + // Unmap the frame. Does nothing if already unmapped. Optional. + void (*unmap)(struct ra_hwdec_mapper *mapper); +}; + +struct ra_hwdec_driver { + // Name of the interop backend. This is used for informational purposes and + // for use with debugging options. + const char *name; + // Used to create ra_hwdec.priv. + size_t priv_size; + // One of the hardware surface IMGFMT_ that must be passed to map_image later. + // Terminated with a 0 entry. (Extend the array size as needed.) + const int imgfmts[3]; + + // Create the hwdec device. It must add it to hw->devs, if applicable. + int (*init)(struct ra_hwdec *hw); + void (*uninit)(struct ra_hwdec *hw); + + // This will be used to create a ra_hwdec_mapper from ra_hwdec. + const struct ra_hwdec_mapper_driver *mapper; + + // The following function provides an alternative API. Each ra_hwdec_driver + // must have either provide a mapper or overlay_frame (not both or none), and + // if overlay_frame is set, it operates in overlay mode. In this mode, + // OSD etc. is rendered via OpenGL, but the video is rendered as a separate + // layer below it. + // Non-overlay mode is strictly preferred, so try not to use overlay mode. + // Set the given frame as overlay, replacing the previous one. This can also + // just change the position of the overlay. + // hw_image==src==dst==NULL is passed to clear the overlay. + int (*overlay_frame)(struct ra_hwdec *hw, struct mp_image *hw_image, + struct mp_rect *src, struct mp_rect *dst, bool newframe); +}; + +extern const struct ra_hwdec_driver *const ra_hwdec_drivers[]; + +struct ra_hwdec *ra_hwdec_load_driver(struct ra_ctx *ra_ctx, + struct mp_log *log, + struct mpv_global *global, + struct mp_hwdec_devices *devs, + const struct ra_hwdec_driver *drv, + bool is_auto); + +void ra_hwdec_uninit(struct ra_hwdec *hwdec); + +bool ra_hwdec_test_format(struct ra_hwdec *hwdec, int imgfmt); + +struct ra_hwdec_mapper *ra_hwdec_mapper_create(struct ra_hwdec *hwdec, + const struct mp_image_params *params); +void ra_hwdec_mapper_free(struct ra_hwdec_mapper **mapper); +void ra_hwdec_mapper_unmap(struct ra_hwdec_mapper *mapper); +int ra_hwdec_mapper_map(struct ra_hwdec_mapper *mapper, struct mp_image *img); + +// Get the primary image format for the given driver name. +// Returns IMGFMT_NONE if the name doesn't get matched. +int ra_hwdec_driver_get_imgfmt_for_name(const char *name); + +#endif diff --git a/video/out/gpu/lcms.c b/video/out/gpu/lcms.c new file mode 100644 index 0000000..7006a96 --- /dev/null +++ b/video/out/gpu/lcms.c @@ -0,0 +1,526 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <string.h> +#include <math.h> + +#include "mpv_talloc.h" + +#include "config.h" + +#include "stream/stream.h" +#include "common/common.h" +#include "misc/bstr.h" +#include "common/msg.h" +#include "options/m_option.h" +#include "options/path.h" +#include "video/csputils.h" +#include "lcms.h" + +#include "osdep/io.h" + +#if HAVE_LCMS2 + +#include <lcms2.h> +#include <libavutil/sha.h> +#include <libavutil/mem.h> + +struct gl_lcms { + void *icc_data; + size_t icc_size; + struct AVBufferRef *vid_profile; + char *current_profile; + bool using_memory_profile; + bool changed; + enum mp_csp_prim current_prim; + enum mp_csp_trc current_trc; + + struct mp_log *log; + struct mpv_global *global; + struct mp_icc_opts *opts; +}; + +static void lcms2_error_handler(cmsContext ctx, cmsUInt32Number code, + const char *msg) +{ + struct gl_lcms *p = cmsGetContextUserData(ctx); + MP_ERR(p, "lcms2: %s\n", msg); +} + +static void load_profile(struct gl_lcms *p) +{ + talloc_free(p->icc_data); + p->icc_data = NULL; + p->icc_size = 0; + p->using_memory_profile = false; + talloc_free(p->current_profile); + p->current_profile = NULL; + + if (!p->opts->profile || !p->opts->profile[0]) + return; + + char *fname = mp_get_user_path(NULL, p->global, p->opts->profile); + MP_VERBOSE(p, "Opening ICC profile '%s'\n", fname); + struct bstr iccdata = stream_read_file(fname, p, p->global, + 100000000); // 100 MB + talloc_free(fname); + if (!iccdata.len) + return; + + talloc_free(p->icc_data); + + p->icc_data = iccdata.start; + p->icc_size = iccdata.len; + p->current_profile = talloc_strdup(p, p->opts->profile); +} + +static void gl_lcms_destructor(void *ptr) +{ + struct gl_lcms *p = ptr; + av_buffer_unref(&p->vid_profile); +} + +struct gl_lcms *gl_lcms_init(void *talloc_ctx, struct mp_log *log, + struct mpv_global *global, + struct mp_icc_opts *opts) +{ + struct gl_lcms *p = talloc_ptrtype(talloc_ctx, p); + talloc_set_destructor(p, gl_lcms_destructor); + *p = (struct gl_lcms) { + .global = global, + .log = log, + .opts = opts, + }; + gl_lcms_update_options(p); + return p; +} + +void gl_lcms_update_options(struct gl_lcms *p) +{ + if ((p->using_memory_profile && !p->opts->profile_auto) || + !bstr_equals(bstr0(p->opts->profile), bstr0(p->current_profile))) + { + load_profile(p); + } + + p->changed = true; // probably +} + +// Warning: profile.start must point to a ta allocation, and the function +// takes over ownership. +// Returns whether the internal profile was changed. +bool gl_lcms_set_memory_profile(struct gl_lcms *p, bstr profile) +{ + if (!p->opts->profile_auto || (p->opts->profile && p->opts->profile[0])) { + talloc_free(profile.start); + return false; + } + + if (p->using_memory_profile && + p->icc_data && profile.start && + profile.len == p->icc_size && + memcmp(profile.start, p->icc_data, p->icc_size) == 0) + { + talloc_free(profile.start); + return false; + } + + p->changed = true; + p->using_memory_profile = true; + + talloc_free(p->icc_data); + + p->icc_data = talloc_steal(p, profile.start); + p->icc_size = profile.len; + + return true; +} + +// Guards against NULL and uses bstr_equals to short-circuit some special cases +static bool vid_profile_eq(struct AVBufferRef *a, struct AVBufferRef *b) +{ + if (!a || !b) + return a == b; + + return bstr_equals((struct bstr){ a->data, a->size }, + (struct bstr){ b->data, b->size }); +} + +// Return whether the profile or config has changed since the last time it was +// retrieved. If it has changed, gl_lcms_get_lut3d() should be called. +bool gl_lcms_has_changed(struct gl_lcms *p, enum mp_csp_prim prim, + enum mp_csp_trc trc, struct AVBufferRef *vid_profile) +{ + if (p->changed || p->current_prim != prim || p->current_trc != trc) + return true; + + return !vid_profile_eq(p->vid_profile, vid_profile); +} + +// Whether a profile is set. (gl_lcms_get_lut3d() is expected to return a lut, +// but it could still fail due to runtime errors, such as invalid icc data.) +bool gl_lcms_has_profile(struct gl_lcms *p) +{ + return p->icc_size > 0; +} + +static cmsHPROFILE get_vid_profile(struct gl_lcms *p, cmsContext cms, + cmsHPROFILE disp_profile, + enum mp_csp_prim prim, enum mp_csp_trc trc) +{ + if (p->opts->use_embedded && p->vid_profile) { + // Try using the embedded ICC profile + cmsHPROFILE prof = cmsOpenProfileFromMemTHR(cms, p->vid_profile->data, + p->vid_profile->size); + if (prof) { + MP_VERBOSE(p, "Successfully opened embedded ICC profile\n"); + return prof; + } + + // Otherwise, warn the user and generate the profile as usual + MP_WARN(p, "Video contained an invalid ICC profile! Ignoring...\n"); + } + + // The input profile for the transformation is dependent on the video + // primaries and transfer characteristics + struct mp_csp_primaries csp = mp_get_csp_primaries(prim); + cmsCIExyY wp_xyY = {csp.white.x, csp.white.y, 1.0}; + cmsCIExyYTRIPLE prim_xyY = { + .Red = {csp.red.x, csp.red.y, 1.0}, + .Green = {csp.green.x, csp.green.y, 1.0}, + .Blue = {csp.blue.x, csp.blue.y, 1.0}, + }; + + cmsToneCurve *tonecurve[3] = {0}; + switch (trc) { + case MP_CSP_TRC_LINEAR: tonecurve[0] = cmsBuildGamma(cms, 1.0); break; + case MP_CSP_TRC_GAMMA18: tonecurve[0] = cmsBuildGamma(cms, 1.8); break; + case MP_CSP_TRC_GAMMA20: tonecurve[0] = cmsBuildGamma(cms, 2.0); break; + case MP_CSP_TRC_GAMMA22: tonecurve[0] = cmsBuildGamma(cms, 2.2); break; + case MP_CSP_TRC_GAMMA24: tonecurve[0] = cmsBuildGamma(cms, 2.4); break; + case MP_CSP_TRC_GAMMA26: tonecurve[0] = cmsBuildGamma(cms, 2.6); break; + case MP_CSP_TRC_GAMMA28: tonecurve[0] = cmsBuildGamma(cms, 2.8); break; + + case MP_CSP_TRC_SRGB: + // Values copied from Little-CMS + tonecurve[0] = cmsBuildParametricToneCurve(cms, 4, + (double[5]){2.40, 1/1.055, 0.055/1.055, 1/12.92, 0.04045}); + break; + + case MP_CSP_TRC_PRO_PHOTO: + tonecurve[0] = cmsBuildParametricToneCurve(cms, 4, + (double[5]){1.8, 1.0, 0.0, 1/16.0, 0.03125}); + break; + + case MP_CSP_TRC_BT_1886: { + double src_black[3]; + if (p->opts->contrast < 0) { + // User requested infinite contrast, return 2.4 profile + tonecurve[0] = cmsBuildGamma(cms, 2.4); + break; + } else if (p->opts->contrast > 0) { + MP_VERBOSE(p, "Using specified contrast: %d\n", p->opts->contrast); + for (int i = 0; i < 3; i++) + src_black[i] = 1.0 / p->opts->contrast; + } else { + // To build an appropriate BT.1886 transformation we need access to + // the display's black point, so we use LittleCMS' detection + // function. Relative colorimetric is used since we want to + // approximate the BT.1886 to the target device's actual black + // point even in e.g. perceptual mode + const int intent = MP_INTENT_RELATIVE_COLORIMETRIC; + cmsCIEXYZ bp_XYZ; + if (!cmsDetectBlackPoint(&bp_XYZ, disp_profile, intent, 0)) + return false; + + // Map this XYZ value back into the (linear) source space + cmsHPROFILE rev_profile; + cmsToneCurve *linear = cmsBuildGamma(cms, 1.0); + rev_profile = cmsCreateRGBProfileTHR(cms, &wp_xyY, &prim_xyY, + (cmsToneCurve*[3]){linear, linear, linear}); + cmsHPROFILE xyz_profile = cmsCreateXYZProfile(); + cmsHTRANSFORM xyz2src = cmsCreateTransformTHR(cms, + xyz_profile, TYPE_XYZ_DBL, rev_profile, TYPE_RGB_DBL, + intent, cmsFLAGS_NOCACHE | cmsFLAGS_NOOPTIMIZE); + cmsFreeToneCurve(linear); + cmsCloseProfile(rev_profile); + cmsCloseProfile(xyz_profile); + if (!xyz2src) + return false; + + cmsDoTransform(xyz2src, &bp_XYZ, src_black, 1); + cmsDeleteTransform(xyz2src); + + double contrast = 3.0 / (src_black[0] + src_black[1] + src_black[2]); + MP_VERBOSE(p, "Detected ICC profile contrast: %f\n", contrast); + } + + // Build the parametric BT.1886 transfer curve, one per channel + for (int i = 0; i < 3; i++) { + const double gamma = 2.40; + double binv = pow(src_black[i], 1.0/gamma); + tonecurve[i] = cmsBuildParametricToneCurve(cms, 6, + (double[4]){gamma, 1.0 - binv, binv, 0.0}); + } + break; + } + + default: + abort(); + } + + if (!tonecurve[0]) + return false; + + if (!tonecurve[1]) tonecurve[1] = tonecurve[0]; + if (!tonecurve[2]) tonecurve[2] = tonecurve[0]; + + cmsHPROFILE *vid_profile = cmsCreateRGBProfileTHR(cms, &wp_xyY, &prim_xyY, + tonecurve); + + if (tonecurve[2] != tonecurve[0]) cmsFreeToneCurve(tonecurve[2]); + if (tonecurve[1] != tonecurve[0]) cmsFreeToneCurve(tonecurve[1]); + cmsFreeToneCurve(tonecurve[0]); + + return vid_profile; +} + +bool gl_lcms_get_lut3d(struct gl_lcms *p, struct lut3d **result_lut3d, + enum mp_csp_prim prim, enum mp_csp_trc trc, + struct AVBufferRef *vid_profile) +{ + int s_r, s_g, s_b; + bool result = false; + + p->changed = false; + p->current_prim = prim; + p->current_trc = trc; + + // We need to hold on to a reference to the video's ICC profile for as long + // as we still need to perform equality checking, so generate a new + // reference here + av_buffer_unref(&p->vid_profile); + if (vid_profile) { + MP_VERBOSE(p, "Got an embedded ICC profile.\n"); + p->vid_profile = av_buffer_ref(vid_profile); + MP_HANDLE_OOM(p->vid_profile); + } + + if (!gl_parse_3dlut_size(p->opts->size_str, &s_r, &s_g, &s_b)) + return false; + + if (!gl_lcms_has_profile(p)) + return false; + + // For simplicity, default to 65x65x65, which is large enough to cover + // typical profiles with good accuracy while not being too wasteful + s_r = s_r ? s_r : 65; + s_g = s_g ? s_g : 65; + s_b = s_b ? s_b : 65; + + void *tmp = talloc_new(NULL); + uint16_t *output = talloc_array(tmp, uint16_t, s_r * s_g * s_b * 4); + struct lut3d *lut = NULL; + cmsContext cms = NULL; + + char *cache_file = NULL; + if (p->opts->cache) { + // Gamma is included in the header to help uniquely identify it, + // because we may change the parameter in the future or make it + // customizable, same for the primaries. + char *cache_info = talloc_asprintf(tmp, + "ver=1.4, intent=%d, size=%dx%dx%d, prim=%d, trc=%d, " + "contrast=%d\n", + p->opts->intent, s_r, s_g, s_b, prim, trc, p->opts->contrast); + + uint8_t hash[32]; + struct AVSHA *sha = av_sha_alloc(); + MP_HANDLE_OOM(sha); + av_sha_init(sha, 256); + av_sha_update(sha, cache_info, strlen(cache_info)); + if (vid_profile) + av_sha_update(sha, vid_profile->data, vid_profile->size); + av_sha_update(sha, p->icc_data, p->icc_size); + av_sha_final(sha, hash); + av_free(sha); + + char *cache_dir = p->opts->cache_dir; + if (cache_dir && cache_dir[0]) { + cache_dir = mp_get_user_path(tmp, p->global, cache_dir); + } else { + cache_dir = mp_find_user_file(tmp, p->global, "cache", ""); + } + + if (cache_dir && cache_dir[0]) { + cache_file = talloc_strdup(tmp, ""); + for (int i = 0; i < sizeof(hash); i++) + cache_file = talloc_asprintf_append(cache_file, "%02X", hash[i]); + cache_file = mp_path_join(tmp, cache_dir, cache_file); + mp_mkdirp(cache_dir); + } + } + + // check cache + if (cache_file && stat(cache_file, &(struct stat){0}) == 0) { + MP_VERBOSE(p, "Opening 3D LUT cache in file '%s'.\n", cache_file); + struct bstr cachedata = stream_read_file(cache_file, tmp, p->global, + 1000000000); // 1 GB + if (cachedata.len == talloc_get_size(output)) { + memcpy(output, cachedata.start, cachedata.len); + goto done; + } else { + MP_WARN(p, "3D LUT cache invalid!\n"); + } + } + + cms = cmsCreateContext(NULL, p); + if (!cms) + goto error_exit; + cmsSetLogErrorHandlerTHR(cms, lcms2_error_handler); + + cmsHPROFILE profile = + cmsOpenProfileFromMemTHR(cms, p->icc_data, p->icc_size); + if (!profile) + goto error_exit; + + cmsHPROFILE vid_hprofile = get_vid_profile(p, cms, profile, prim, trc); + if (!vid_hprofile) { + cmsCloseProfile(profile); + goto error_exit; + } + + cmsHTRANSFORM trafo = cmsCreateTransformTHR(cms, vid_hprofile, TYPE_RGB_16, + profile, TYPE_RGBA_16, + p->opts->intent, + cmsFLAGS_NOCACHE | + cmsFLAGS_NOOPTIMIZE | + cmsFLAGS_BLACKPOINTCOMPENSATION); + cmsCloseProfile(profile); + cmsCloseProfile(vid_hprofile); + + if (!trafo) + goto error_exit; + + // transform a (s_r)x(s_g)x(s_b) cube, with 3 components per channel + uint16_t *input = talloc_array(tmp, uint16_t, s_r * 3); + for (int b = 0; b < s_b; b++) { + for (int g = 0; g < s_g; g++) { + for (int r = 0; r < s_r; r++) { + input[r * 3 + 0] = r * 65535 / (s_r - 1); + input[r * 3 + 1] = g * 65535 / (s_g - 1); + input[r * 3 + 2] = b * 65535 / (s_b - 1); + } + size_t base = (b * s_r * s_g + g * s_r) * 4; + cmsDoTransform(trafo, input, output + base, s_r); + } + } + + cmsDeleteTransform(trafo); + + if (cache_file) { + FILE *out = fopen(cache_file, "wb"); + if (out) { + fwrite(output, talloc_get_size(output), 1, out); + fclose(out); + } + } + +done: ; + + lut = talloc_ptrtype(NULL, lut); + *lut = (struct lut3d) { + .data = talloc_steal(lut, output), + .size = {s_r, s_g, s_b}, + }; + + *result_lut3d = lut; + result = true; + +error_exit: + + if (cms) + cmsDeleteContext(cms); + + if (!lut) + MP_FATAL(p, "Error loading ICC profile.\n"); + + talloc_free(tmp); + return result; +} + +#else /* HAVE_LCMS2 */ + +struct gl_lcms *gl_lcms_init(void *talloc_ctx, struct mp_log *log, + struct mpv_global *global, + struct mp_icc_opts *opts) +{ + return (struct gl_lcms *) talloc_new(talloc_ctx); +} + +void gl_lcms_update_options(struct gl_lcms *p) { } +bool gl_lcms_set_memory_profile(struct gl_lcms *p, bstr profile) {return false;} + +bool gl_lcms_has_changed(struct gl_lcms *p, enum mp_csp_prim prim, + enum mp_csp_trc trc, struct AVBufferRef *vid_profile) +{ + return false; +} + +bool gl_lcms_has_profile(struct gl_lcms *p) +{ + return false; +} + +bool gl_lcms_get_lut3d(struct gl_lcms *p, struct lut3d **result_lut3d, + enum mp_csp_prim prim, enum mp_csp_trc trc, + struct AVBufferRef *vid_profile) +{ + return false; +} + +#endif + +static int validate_3dlut_size_opt(struct mp_log *log, const m_option_t *opt, + struct bstr name, const char **value) +{ + int p1, p2, p3; + return gl_parse_3dlut_size(*value, &p1, &p2, &p3) ? 0 : M_OPT_INVALID; +} + +#define OPT_BASE_STRUCT struct mp_icc_opts +const struct m_sub_options mp_icc_conf = { + .opts = (const m_option_t[]) { + {"use-embedded-icc-profile", OPT_BOOL(use_embedded)}, + {"icc-profile", OPT_STRING(profile), .flags = M_OPT_FILE}, + {"icc-profile-auto", OPT_BOOL(profile_auto)}, + {"icc-cache", OPT_BOOL(cache)}, + {"icc-cache-dir", OPT_STRING(cache_dir), .flags = M_OPT_FILE}, + {"icc-intent", OPT_INT(intent)}, + {"icc-force-contrast", OPT_CHOICE(contrast, {"no", 0}, {"inf", -1}), + M_RANGE(0, 1000000)}, + {"icc-3dlut-size", OPT_STRING_VALIDATE(size_str, validate_3dlut_size_opt)}, + {"icc-use-luma", OPT_BOOL(icc_use_luma)}, + {0} + }, + .size = sizeof(struct mp_icc_opts), + .defaults = &(const struct mp_icc_opts) { + .size_str = "auto", + .intent = MP_INTENT_RELATIVE_COLORIMETRIC, + .use_embedded = true, + .cache = true, + }, +}; diff --git a/video/out/gpu/lcms.h b/video/out/gpu/lcms.h new file mode 100644 index 0000000..607353a --- /dev/null +++ b/video/out/gpu/lcms.h @@ -0,0 +1,61 @@ +#ifndef MP_GL_LCMS_H +#define MP_GL_LCMS_H + +#include <stddef.h> +#include <stdbool.h> +#include "misc/bstr.h" +#include "video/csputils.h" +#include <libavutil/buffer.h> + +extern const struct m_sub_options mp_icc_conf; + +struct mp_icc_opts { + bool use_embedded; + char *profile; + bool profile_auto; + bool cache; + char *cache_dir; + char *size_str; + int intent; + int contrast; + bool icc_use_luma; +}; + +struct lut3d { + uint16_t *data; + int size[3]; +}; + +struct mp_log; +struct mpv_global; +struct gl_lcms; + +struct gl_lcms *gl_lcms_init(void *talloc_ctx, struct mp_log *log, + struct mpv_global *global, + struct mp_icc_opts *opts); +void gl_lcms_update_options(struct gl_lcms *p); +bool gl_lcms_set_memory_profile(struct gl_lcms *p, bstr profile); +bool gl_lcms_has_profile(struct gl_lcms *p); +bool gl_lcms_get_lut3d(struct gl_lcms *p, struct lut3d **, + enum mp_csp_prim prim, enum mp_csp_trc trc, + struct AVBufferRef *vid_profile); +bool gl_lcms_has_changed(struct gl_lcms *p, enum mp_csp_prim prim, + enum mp_csp_trc trc, struct AVBufferRef *vid_profile); + +static inline bool gl_parse_3dlut_size(const char *arg, int *p1, int *p2, int *p3) +{ + if (!strcmp(arg, "auto")) { + *p1 = *p2 = *p3 = 0; + return true; + } + if (sscanf(arg, "%dx%dx%d", p1, p2, p3) != 3) + return false; + for (int n = 0; n < 3; n++) { + int s = ((int[]) { *p1, *p2, *p3 })[n]; + if (s < 2 || s > 512) + return false; + } + return true; +} + +#endif diff --git a/video/out/gpu/libmpv_gpu.c b/video/out/gpu/libmpv_gpu.c new file mode 100644 index 0000000..aae1d18 --- /dev/null +++ b/video/out/gpu/libmpv_gpu.c @@ -0,0 +1,248 @@ +#include "config.h" +#include "hwdec.h" +#include "libmpv_gpu.h" +#include "libmpv/render_gl.h" +#include "video.h" +#include "video/out/libmpv.h" + +static const struct libmpv_gpu_context_fns *context_backends[] = { +#if HAVE_GL + &libmpv_gpu_context_gl, +#endif + NULL +}; + +struct priv { + struct libmpv_gpu_context *context; + + struct gl_video *renderer; +}; + +struct native_resource_entry { + const char *name; // ra_add_native_resource() internal name argument + size_t size; // size of struct pointed to (0 for no copy) +}; + +static const struct native_resource_entry native_resource_map[] = { + [MPV_RENDER_PARAM_X11_DISPLAY] = { + .name = "x11", + .size = 0, + }, + [MPV_RENDER_PARAM_WL_DISPLAY] = { + .name = "wl", + .size = 0, + }, + [MPV_RENDER_PARAM_DRM_DRAW_SURFACE_SIZE] = { + .name = "drm_draw_surface_size", + .size = sizeof (mpv_opengl_drm_draw_surface_size), + }, + [MPV_RENDER_PARAM_DRM_DISPLAY_V2] = { + .name = "drm_params_v2", + .size = sizeof (mpv_opengl_drm_params_v2), + }, +}; + +static int init(struct render_backend *ctx, mpv_render_param *params) +{ + ctx->priv = talloc_zero(NULL, struct priv); + struct priv *p = ctx->priv; + + char *api = get_mpv_render_param(params, MPV_RENDER_PARAM_API_TYPE, NULL); + if (!api) + return MPV_ERROR_INVALID_PARAMETER; + + for (int n = 0; context_backends[n]; n++) { + const struct libmpv_gpu_context_fns *backend = context_backends[n]; + if (strcmp(backend->api_name, api) == 0) { + p->context = talloc_zero(NULL, struct libmpv_gpu_context); + *p->context = (struct libmpv_gpu_context){ + .global = ctx->global, + .log = ctx->log, + .fns = backend, + }; + break; + } + } + + if (!p->context) + return MPV_ERROR_NOT_IMPLEMENTED; + + int err = p->context->fns->init(p->context, params); + if (err < 0) + return err; + + for (int n = 0; params && params[n].type; n++) { + if (params[n].type > 0 && + params[n].type < MP_ARRAY_SIZE(native_resource_map) && + native_resource_map[params[n].type].name) + { + const struct native_resource_entry *entry = + &native_resource_map[params[n].type]; + void *data = params[n].data; + if (entry->size) + data = talloc_memdup(p, data, entry->size); + ra_add_native_resource(p->context->ra_ctx->ra, entry->name, data); + } + } + + p->renderer = gl_video_init(p->context->ra_ctx->ra, ctx->log, ctx->global); + + ctx->hwdec_devs = hwdec_devices_create(); + gl_video_init_hwdecs(p->renderer, p->context->ra_ctx, ctx->hwdec_devs, true); + ctx->driver_caps = VO_CAP_ROTATE90; + return 0; +} + +static bool check_format(struct render_backend *ctx, int imgfmt) +{ + struct priv *p = ctx->priv; + + return gl_video_check_format(p->renderer, imgfmt); +} + +static int set_parameter(struct render_backend *ctx, mpv_render_param param) +{ + struct priv *p = ctx->priv; + + switch (param.type) { + case MPV_RENDER_PARAM_ICC_PROFILE: { + mpv_byte_array *data = param.data; + gl_video_set_icc_profile(p->renderer, (bstr){data->data, data->size}); + return 0; + } + case MPV_RENDER_PARAM_AMBIENT_LIGHT: { + int lux = *(int *)param.data; + gl_video_set_ambient_lux(p->renderer, lux); + return 0; + } + default: + return MPV_ERROR_NOT_IMPLEMENTED; + } +} + +static void reconfig(struct render_backend *ctx, struct mp_image_params *params) +{ + struct priv *p = ctx->priv; + + gl_video_config(p->renderer, params); +} + +static void reset(struct render_backend *ctx) +{ + struct priv *p = ctx->priv; + + gl_video_reset(p->renderer); +} + +static void update_external(struct render_backend *ctx, struct vo *vo) +{ + struct priv *p = ctx->priv; + + gl_video_set_osd_source(p->renderer, vo ? vo->osd : NULL); + if (vo) + gl_video_configure_queue(p->renderer, vo); +} + +static void resize(struct render_backend *ctx, struct mp_rect *src, + struct mp_rect *dst, struct mp_osd_res *osd) +{ + struct priv *p = ctx->priv; + + gl_video_resize(p->renderer, src, dst, osd); +} + +static int get_target_size(struct render_backend *ctx, mpv_render_param *params, + int *out_w, int *out_h) +{ + struct priv *p = ctx->priv; + + // Mapping the surface is cheap, better than adding new backend entrypoints. + struct ra_tex *tex; + int err = p->context->fns->wrap_fbo(p->context, params, &tex); + if (err < 0) + return err; + *out_w = tex->params.w; + *out_h = tex->params.h; + return 0; +} + +static int render(struct render_backend *ctx, mpv_render_param *params, + struct vo_frame *frame) +{ + struct priv *p = ctx->priv; + + // Mapping the surface is cheap, better than adding new backend entrypoints. + struct ra_tex *tex; + int err = p->context->fns->wrap_fbo(p->context, params, &tex); + if (err < 0) + return err; + + int depth = *(int *)get_mpv_render_param(params, MPV_RENDER_PARAM_DEPTH, + &(int){0}); + gl_video_set_fb_depth(p->renderer, depth); + + bool flip = *(int *)get_mpv_render_param(params, MPV_RENDER_PARAM_FLIP_Y, + &(int){0}); + + struct ra_fbo target = {.tex = tex, .flip = flip}; + gl_video_render_frame(p->renderer, frame, target, RENDER_FRAME_DEF); + p->context->fns->done_frame(p->context, frame->display_synced); + + return 0; +} + +static struct mp_image *get_image(struct render_backend *ctx, int imgfmt, + int w, int h, int stride_align, int flags) +{ + struct priv *p = ctx->priv; + + return gl_video_get_image(p->renderer, imgfmt, w, h, stride_align, flags); +} + +static void screenshot(struct render_backend *ctx, struct vo_frame *frame, + struct voctrl_screenshot *args) +{ + struct priv *p = ctx->priv; + + gl_video_screenshot(p->renderer, frame, args); +} + +static void perfdata(struct render_backend *ctx, + struct voctrl_performance_data *out) +{ + struct priv *p = ctx->priv; + + gl_video_perfdata(p->renderer, out); +} + +static void destroy(struct render_backend *ctx) +{ + struct priv *p = ctx->priv; + + if (p->renderer) + gl_video_uninit(p->renderer); + + hwdec_devices_destroy(ctx->hwdec_devs); + + if (p->context) { + p->context->fns->destroy(p->context); + talloc_free(p->context->priv); + talloc_free(p->context); + } +} + +const struct render_backend_fns render_backend_gpu = { + .init = init, + .check_format = check_format, + .set_parameter = set_parameter, + .reconfig = reconfig, + .reset = reset, + .update_external = update_external, + .resize = resize, + .get_target_size = get_target_size, + .render = render, + .get_image = get_image, + .screenshot = screenshot, + .perfdata = perfdata, + .destroy = destroy, +}; diff --git a/video/out/gpu/libmpv_gpu.h b/video/out/gpu/libmpv_gpu.h new file mode 100644 index 0000000..497dcc3 --- /dev/null +++ b/video/out/gpu/libmpv_gpu.h @@ -0,0 +1,40 @@ +#pragma once + +#include "video/out/libmpv.h" + +struct ra_tex; + +struct libmpv_gpu_context { + struct mpv_global *global; + struct mp_log *log; + const struct libmpv_gpu_context_fns *fns; + + struct ra_ctx *ra_ctx; + void *priv; +}; + +// Manage backend specific interaction between libmpv and ra backend, that can't +// be managed by ra itself (initialization and passing FBOs). +struct libmpv_gpu_context_fns { + // The libmpv API type name, see MPV_RENDER_PARAM_API_TYPE. + const char *api_name; + // Pretty much works like render_backend_fns.init, except that the + // API type is already checked by the caller. + // Successful init must set ctx->ra. + int (*init)(struct libmpv_gpu_context *ctx, mpv_render_param *params); + // Wrap the surface passed to mpv_render_context_render() (via the params + // array) into a ra_tex and return it. Returns a libmpv error code, and sets + // *out to a temporary object on success. The returned object is valid until + // another wrap_fbo() or done_frame() is called. + // This does not need to care about generic attributes, like flipping. + int (*wrap_fbo)(struct libmpv_gpu_context *ctx, mpv_render_param *params, + struct ra_tex **out); + // Signal that the ra_tex object obtained with wrap_fbo is no longer used. + // For certain backends, this might also be used to signal the end of + // rendering (like OpenGL doing weird crap). + void (*done_frame)(struct libmpv_gpu_context *ctx, bool ds); + // Free all data in ctx->priv. + void (*destroy)(struct libmpv_gpu_context *ctx); +}; + +extern const struct libmpv_gpu_context_fns libmpv_gpu_context_gl; diff --git a/video/out/gpu/osd.c b/video/out/gpu/osd.c new file mode 100644 index 0000000..91505a9 --- /dev/null +++ b/video/out/gpu/osd.c @@ -0,0 +1,363 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdlib.h> +#include <assert.h> +#include <limits.h> + +#include "common/common.h" +#include "common/msg.h" +#include "video/csputils.h" +#include "video/mp_image.h" +#include "osd.h" + +#define GLSL(x) gl_sc_add(sc, #x "\n"); + +// glBlendFuncSeparate() arguments +static const int blend_factors[SUBBITMAP_COUNT][4] = { + [SUBBITMAP_LIBASS] = {RA_BLEND_SRC_ALPHA, RA_BLEND_ONE_MINUS_SRC_ALPHA, + RA_BLEND_ONE, RA_BLEND_ONE_MINUS_SRC_ALPHA}, + [SUBBITMAP_BGRA] = {RA_BLEND_ONE, RA_BLEND_ONE_MINUS_SRC_ALPHA, + RA_BLEND_ONE, RA_BLEND_ONE_MINUS_SRC_ALPHA}, +}; + +struct vertex { + float position[2]; + float texcoord[2]; + uint8_t ass_color[4]; +}; + +static const struct ra_renderpass_input vertex_vao[] = { + {"position", RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, position)}, + {"texcoord" , RA_VARTYPE_FLOAT, 2, 1, offsetof(struct vertex, texcoord)}, + {"ass_color", RA_VARTYPE_BYTE_UNORM, 4, 1, offsetof(struct vertex, ass_color)}, +}; + +struct mpgl_osd_part { + enum sub_bitmap_format format; + int change_id; + struct ra_tex *texture; + int w, h; + int num_subparts; + int prev_num_subparts; + struct sub_bitmap *subparts; + int num_vertices; + struct vertex *vertices; +}; + +struct mpgl_osd { + struct mp_log *log; + struct osd_state *osd; + struct ra *ra; + struct mpgl_osd_part *parts[MAX_OSD_PARTS]; + const struct ra_format *fmt_table[SUBBITMAP_COUNT]; + bool formats[SUBBITMAP_COUNT]; + bool change_flag; // for reporting to API user only + // temporary + int stereo_mode; + struct mp_osd_res osd_res; + void *scratch; +}; + +struct mpgl_osd *mpgl_osd_init(struct ra *ra, struct mp_log *log, + struct osd_state *osd) +{ + struct mpgl_osd *ctx = talloc_ptrtype(NULL, ctx); + *ctx = (struct mpgl_osd) { + .log = log, + .osd = osd, + .ra = ra, + .change_flag = true, + .scratch = talloc_zero_size(ctx, 1), + }; + + ctx->fmt_table[SUBBITMAP_LIBASS] = ra_find_unorm_format(ra, 1, 1); + ctx->fmt_table[SUBBITMAP_BGRA] = ra_find_unorm_format(ra, 1, 4); + + for (int n = 0; n < MAX_OSD_PARTS; n++) + ctx->parts[n] = talloc_zero(ctx, struct mpgl_osd_part); + + for (int n = 0; n < SUBBITMAP_COUNT; n++) + ctx->formats[n] = !!ctx->fmt_table[n]; + + return ctx; +} + +void mpgl_osd_destroy(struct mpgl_osd *ctx) +{ + if (!ctx) + return; + + for (int n = 0; n < MAX_OSD_PARTS; n++) { + struct mpgl_osd_part *p = ctx->parts[n]; + ra_tex_free(ctx->ra, &p->texture); + } + talloc_free(ctx); +} + +static int next_pow2(int v) +{ + for (int x = 0; x < 30; x++) { + if ((1 << x) >= v) + return 1 << x; + } + return INT_MAX; +} + +static bool upload_osd(struct mpgl_osd *ctx, struct mpgl_osd_part *osd, + struct sub_bitmaps *imgs) +{ + struct ra *ra = ctx->ra; + bool ok = false; + + assert(imgs->packed); + + int req_w = next_pow2(imgs->packed_w); + int req_h = next_pow2(imgs->packed_h); + + const struct ra_format *fmt = ctx->fmt_table[imgs->format]; + assert(fmt); + + if (!osd->texture || req_w > osd->w || req_h > osd->h || + osd->format != imgs->format) + { + ra_tex_free(ra, &osd->texture); + + osd->format = imgs->format; + osd->w = MPMAX(32, req_w); + osd->h = MPMAX(32, req_h); + + MP_VERBOSE(ctx, "Reallocating OSD texture to %dx%d.\n", osd->w, osd->h); + + if (osd->w > ra->max_texture_wh || osd->h > ra->max_texture_wh) { + MP_ERR(ctx, "OSD bitmaps do not fit on a surface with the maximum " + "supported size %dx%d.\n", ra->max_texture_wh, + ra->max_texture_wh); + goto done; + } + + struct ra_tex_params params = { + .dimensions = 2, + .w = osd->w, + .h = osd->h, + .d = 1, + .format = fmt, + .render_src = true, + .src_linear = true, + .host_mutable = true, + }; + osd->texture = ra_tex_create(ra, ¶ms); + if (!osd->texture) + goto done; + } + + struct ra_tex_upload_params params = { + .tex = osd->texture, + .src = imgs->packed->planes[0], + .invalidate = true, + .rc = &(struct mp_rect){0, 0, imgs->packed_w, imgs->packed_h}, + .stride = imgs->packed->stride[0], + }; + + ok = ra->fns->tex_upload(ra, ¶ms); + +done: + return ok; +} + +static void gen_osd_cb(void *pctx, struct sub_bitmaps *imgs) +{ + struct mpgl_osd *ctx = pctx; + + if (imgs->num_parts == 0 || !ctx->formats[imgs->format]) + return; + + struct mpgl_osd_part *osd = ctx->parts[imgs->render_index]; + + bool ok = true; + if (imgs->change_id != osd->change_id) { + if (!upload_osd(ctx, osd, imgs)) + ok = false; + + osd->change_id = imgs->change_id; + ctx->change_flag = true; + } + osd->num_subparts = ok ? imgs->num_parts : 0; + + MP_TARRAY_GROW(osd, osd->subparts, osd->num_subparts); + memcpy(osd->subparts, imgs->parts, + osd->num_subparts * sizeof(osd->subparts[0])); +} + +bool mpgl_osd_draw_prepare(struct mpgl_osd *ctx, int index, + struct gl_shader_cache *sc) +{ + assert(index >= 0 && index < MAX_OSD_PARTS); + struct mpgl_osd_part *part = ctx->parts[index]; + + enum sub_bitmap_format fmt = part->format; + if (!fmt || !part->num_subparts || !part->texture) + return false; + + gl_sc_uniform_texture(sc, "osdtex", part->texture); + switch (fmt) { + case SUBBITMAP_BGRA: { + GLSL(color = texture(osdtex, texcoord).bgra;) + break; + } + case SUBBITMAP_LIBASS: { + GLSL(color = + vec4(ass_color.rgb, ass_color.a * texture(osdtex, texcoord).r);) + break; + } + default: + MP_ASSERT_UNREACHABLE(); + } + + return true; +} + +static void write_quad(struct vertex *va, struct gl_transform t, + float x0, float y0, float x1, float y1, + float tx0, float ty0, float tx1, float ty1, + float tex_w, float tex_h, const uint8_t color[4]) +{ + gl_transform_vec(t, &x0, &y0); + gl_transform_vec(t, &x1, &y1); + +#define COLOR_INIT {color[0], color[1], color[2], color[3]} + va[0] = (struct vertex){ {x0, y0}, {tx0 / tex_w, ty0 / tex_h}, COLOR_INIT }; + va[1] = (struct vertex){ {x0, y1}, {tx0 / tex_w, ty1 / tex_h}, COLOR_INIT }; + va[2] = (struct vertex){ {x1, y0}, {tx1 / tex_w, ty0 / tex_h}, COLOR_INIT }; + va[3] = (struct vertex){ {x1, y1}, {tx1 / tex_w, ty1 / tex_h}, COLOR_INIT }; + va[4] = va[2]; + va[5] = va[1]; +#undef COLOR_INIT +} + +static void generate_verts(struct mpgl_osd_part *part, struct gl_transform t) +{ + MP_TARRAY_GROW(part, part->vertices, + part->num_vertices + part->num_subparts * 6); + + for (int n = 0; n < part->num_subparts; n++) { + struct sub_bitmap *b = &part->subparts[n]; + struct vertex *va = &part->vertices[part->num_vertices]; + + // NOTE: the blend color is used with SUBBITMAP_LIBASS only, so it + // doesn't matter that we upload garbage for the other formats + uint32_t c = b->libass.color; + uint8_t color[4] = { c >> 24, (c >> 16) & 0xff, + (c >> 8) & 0xff, 255 - (c & 0xff) }; + + write_quad(va, t, + b->x, b->y, b->x + b->dw, b->y + b->dh, + b->src_x, b->src_y, b->src_x + b->w, b->src_y + b->h, + part->w, part->h, color); + + part->num_vertices += 6; + } +} + +// number of screen divisions per axis (x=0, y=1) for the current 3D mode +static void get_3d_side_by_side(int stereo_mode, int div[2]) +{ + div[0] = div[1] = 1; + switch (stereo_mode) { + case MP_STEREO3D_SBS2L: + case MP_STEREO3D_SBS2R: div[0] = 2; break; + case MP_STEREO3D_AB2R: + case MP_STEREO3D_AB2L: div[1] = 2; break; + } +} + +void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int index, + struct gl_shader_cache *sc, struct ra_fbo fbo) +{ + struct mpgl_osd_part *part = ctx->parts[index]; + + int div[2]; + get_3d_side_by_side(ctx->stereo_mode, div); + + part->num_vertices = 0; + + for (int x = 0; x < div[0]; x++) { + for (int y = 0; y < div[1]; y++) { + struct gl_transform t; + gl_transform_ortho_fbo(&t, fbo); + + float a_x = ctx->osd_res.w * x; + float a_y = ctx->osd_res.h * y; + t.t[0] += a_x * t.m[0][0] + a_y * t.m[1][0]; + t.t[1] += a_x * t.m[0][1] + a_y * t.m[1][1]; + + generate_verts(part, t); + } + } + + const int *factors = &blend_factors[part->format][0]; + gl_sc_blend(sc, factors[0], factors[1], factors[2], factors[3]); + + gl_sc_dispatch_draw(sc, fbo.tex, false, vertex_vao, MP_ARRAY_SIZE(vertex_vao), + sizeof(struct vertex), part->vertices, part->num_vertices); +} + +static void set_res(struct mpgl_osd *ctx, struct mp_osd_res res, int stereo_mode) +{ + int div[2]; + get_3d_side_by_side(stereo_mode, div); + + res.w /= div[0]; + res.h /= div[1]; + ctx->osd_res = res; +} + +void mpgl_osd_generate(struct mpgl_osd *ctx, struct mp_osd_res res, double pts, + int stereo_mode, int draw_flags) +{ + for (int n = 0; n < MAX_OSD_PARTS; n++) + ctx->parts[n]->num_subparts = 0; + + set_res(ctx, res, stereo_mode); + + osd_draw(ctx->osd, ctx->osd_res, pts, draw_flags, ctx->formats, gen_osd_cb, ctx); + ctx->stereo_mode = stereo_mode; + + // Parts going away does not necessarily result in gen_osd_cb() being called + // (not even with num_parts==0), so check this separately. + for (int n = 0; n < MAX_OSD_PARTS; n++) { + struct mpgl_osd_part *part = ctx->parts[n]; + if (part->num_subparts != part->prev_num_subparts) + ctx->change_flag = true; + part->prev_num_subparts = part->num_subparts; + } +} + +// See osd_resize() for remarks. This function is an optional optimization too. +void mpgl_osd_resize(struct mpgl_osd *ctx, struct mp_osd_res res, int stereo_mode) +{ + set_res(ctx, res, stereo_mode); + osd_resize(ctx->osd, ctx->osd_res); +} + +bool mpgl_osd_check_change(struct mpgl_osd *ctx, struct mp_osd_res *res, + double pts) +{ + ctx->change_flag = false; + mpgl_osd_generate(ctx, *res, pts, 0, 0); + return ctx->change_flag; +} diff --git a/video/out/gpu/osd.h b/video/out/gpu/osd.h new file mode 100644 index 0000000..00fbc49 --- /dev/null +++ b/video/out/gpu/osd.h @@ -0,0 +1,25 @@ +#ifndef MPLAYER_GL_OSD_H +#define MPLAYER_GL_OSD_H + +#include <stdbool.h> +#include <inttypes.h> + +#include "utils.h" +#include "shader_cache.h" +#include "sub/osd.h" + +struct mpgl_osd *mpgl_osd_init(struct ra *ra, struct mp_log *log, + struct osd_state *osd); +void mpgl_osd_destroy(struct mpgl_osd *ctx); + +void mpgl_osd_generate(struct mpgl_osd *ctx, struct mp_osd_res res, double pts, + int stereo_mode, int draw_flags); +void mpgl_osd_resize(struct mpgl_osd *ctx, struct mp_osd_res res, int stereo_mode); +bool mpgl_osd_draw_prepare(struct mpgl_osd *ctx, int index, + struct gl_shader_cache *sc); +void mpgl_osd_draw_finish(struct mpgl_osd *ctx, int index, + struct gl_shader_cache *sc, struct ra_fbo fbo); +bool mpgl_osd_check_change(struct mpgl_osd *ctx, struct mp_osd_res *res, + double pts); + +#endif diff --git a/video/out/gpu/ra.c b/video/out/gpu/ra.c new file mode 100644 index 0000000..855f9b6 --- /dev/null +++ b/video/out/gpu/ra.c @@ -0,0 +1,424 @@ +#include "common/common.h" +#include "common/msg.h" +#include "video/img_format.h" + +#include "ra.h" + +void ra_add_native_resource(struct ra *ra, const char *name, void *data) +{ + struct ra_native_resource r = { + .name = name, + .data = data, + }; + MP_TARRAY_APPEND(ra, ra->native_resources, ra->num_native_resources, r); +} + +void *ra_get_native_resource(struct ra *ra, const char *name) +{ + for (int n = 0; n < ra->num_native_resources; n++) { + struct ra_native_resource *r = &ra->native_resources[n]; + if (strcmp(r->name, name) == 0) + return r->data; + } + + return NULL; +} + +struct ra_tex *ra_tex_create(struct ra *ra, const struct ra_tex_params *params) +{ + switch (params->dimensions) { + case 1: + assert(params->h == 1 && params->d == 1); + break; + case 2: + assert(params->d == 1); + break; + default: + assert(params->dimensions >= 1 && params->dimensions <= 3); + } + return ra->fns->tex_create(ra, params); +} + +void ra_tex_free(struct ra *ra, struct ra_tex **tex) +{ + if (*tex) + ra->fns->tex_destroy(ra, *tex); + *tex = NULL; +} + +struct ra_buf *ra_buf_create(struct ra *ra, const struct ra_buf_params *params) +{ + return ra->fns->buf_create(ra, params); +} + +void ra_buf_free(struct ra *ra, struct ra_buf **buf) +{ + if (*buf) + ra->fns->buf_destroy(ra, *buf); + *buf = NULL; +} + +void ra_free(struct ra **ra) +{ + if (*ra) + (*ra)->fns->destroy(*ra); + talloc_free(*ra); + *ra = NULL; +} + +size_t ra_vartype_size(enum ra_vartype type) +{ + switch (type) { + case RA_VARTYPE_INT: return sizeof(int); + case RA_VARTYPE_FLOAT: return sizeof(float); + case RA_VARTYPE_BYTE_UNORM: return 1; + default: return 0; + } +} + +struct ra_layout ra_renderpass_input_layout(struct ra_renderpass_input *input) +{ + size_t el_size = ra_vartype_size(input->type); + if (!el_size) + return (struct ra_layout){0}; + + // host data is always tightly packed + return (struct ra_layout) { + .align = 1, + .stride = el_size * input->dim_v, + .size = el_size * input->dim_v * input->dim_m, + }; +} + +static struct ra_renderpass_input *dup_inputs(void *ta_parent, + const struct ra_renderpass_input *inputs, int num_inputs) +{ + struct ra_renderpass_input *res = + talloc_memdup(ta_parent, (void *)inputs, num_inputs * sizeof(inputs[0])); + for (int n = 0; n < num_inputs; n++) + res[n].name = talloc_strdup(res, res[n].name); + return res; +} + +// Return a newly allocated deep-copy of params. +struct ra_renderpass_params *ra_renderpass_params_copy(void *ta_parent, + const struct ra_renderpass_params *params) +{ + struct ra_renderpass_params *res = talloc_ptrtype(ta_parent, res); + *res = *params; + res->inputs = dup_inputs(res, res->inputs, res->num_inputs); + res->vertex_attribs = + dup_inputs(res, res->vertex_attribs, res->num_vertex_attribs); + res->cached_program = bstrdup(res, res->cached_program); + res->vertex_shader = talloc_strdup(res, res->vertex_shader); + res->frag_shader = talloc_strdup(res, res->frag_shader); + res->compute_shader = talloc_strdup(res, res->compute_shader); + return res; +} + +struct glsl_fmt { + enum ra_ctype ctype; + int num_components; + int component_depth[4]; + const char *glsl_format; +}; + +// List taken from the GLSL specification, sans snorm and sint formats +static const struct glsl_fmt ra_glsl_fmts[] = { + {RA_CTYPE_FLOAT, 1, {16}, "r16f"}, + {RA_CTYPE_FLOAT, 1, {32}, "r32f"}, + {RA_CTYPE_FLOAT, 2, {16, 16}, "rg16f"}, + {RA_CTYPE_FLOAT, 2, {32, 32}, "rg32f"}, + {RA_CTYPE_FLOAT, 4, {16, 16, 16, 16}, "rgba16f"}, + {RA_CTYPE_FLOAT, 4, {32, 32, 32, 32}, "rgba32f"}, + {RA_CTYPE_FLOAT, 3, {11, 11, 10}, "r11f_g11f_b10f"}, + + {RA_CTYPE_UNORM, 1, {8}, "r8"}, + {RA_CTYPE_UNORM, 1, {16}, "r16"}, + {RA_CTYPE_UNORM, 2, {8, 8}, "rg8"}, + {RA_CTYPE_UNORM, 2, {16, 16}, "rg16"}, + {RA_CTYPE_UNORM, 4, {8, 8, 8, 8}, "rgba8"}, + {RA_CTYPE_UNORM, 4, {16, 16, 16, 16}, "rgba16"}, + {RA_CTYPE_UNORM, 4, {10, 10, 10, 2}, "rgb10_a2"}, + + {RA_CTYPE_UINT, 1, {8}, "r8ui"}, + {RA_CTYPE_UINT, 1, {16}, "r16ui"}, + {RA_CTYPE_UINT, 1, {32}, "r32ui"}, + {RA_CTYPE_UINT, 2, {8, 8}, "rg8ui"}, + {RA_CTYPE_UINT, 2, {16, 16}, "rg16ui"}, + {RA_CTYPE_UINT, 2, {32, 32}, "rg32ui"}, + {RA_CTYPE_UINT, 4, {8, 8, 8, 8}, "rgba8ui"}, + {RA_CTYPE_UINT, 4, {16, 16, 16, 16}, "rgba16ui"}, + {RA_CTYPE_UINT, 4, {32, 32, 32, 32}, "rgba32ui"}, + {RA_CTYPE_UINT, 4, {10, 10, 10, 2}, "rgb10_a2ui"}, +}; + +const char *ra_fmt_glsl_format(const struct ra_format *fmt) +{ + for (int n = 0; n < MP_ARRAY_SIZE(ra_glsl_fmts); n++) { + const struct glsl_fmt *gfmt = &ra_glsl_fmts[n]; + + if (fmt->ctype != gfmt->ctype) + continue; + if (fmt->num_components != gfmt->num_components) + continue; + + for (int i = 0; i < fmt->num_components; i++) { + if (fmt->component_depth[i] != gfmt->component_depth[i]) + goto next_fmt; + } + + return gfmt->glsl_format; + +next_fmt: ; // equivalent to `continue` + } + + return NULL; +} + +// Return whether this is a tightly packed format with no external padding and +// with the same bit size/depth in all components, and the shader returns +// components in the same order as in memory. +static bool ra_format_is_regular(const struct ra_format *fmt) +{ + if (!fmt->pixel_size || !fmt->num_components || !fmt->ordered) + return false; + for (int n = 1; n < fmt->num_components; n++) { + if (fmt->component_size[n] != fmt->component_size[0] || + fmt->component_depth[n] != fmt->component_depth[0]) + return false; + } + if (fmt->component_size[0] * fmt->num_components != fmt->pixel_size * 8) + return false; + return true; +} + +// Return a regular filterable format using RA_CTYPE_UNORM. +const struct ra_format *ra_find_unorm_format(struct ra *ra, + int bytes_per_component, + int n_components) +{ + for (int n = 0; n < ra->num_formats; n++) { + const struct ra_format *fmt = ra->formats[n]; + if (fmt->ctype == RA_CTYPE_UNORM && fmt->num_components == n_components && + fmt->pixel_size == bytes_per_component * n_components && + fmt->component_depth[0] == bytes_per_component * 8 && + fmt->linear_filter && ra_format_is_regular(fmt)) + return fmt; + } + return NULL; +} + +// Return a regular format using RA_CTYPE_UINT. +const struct ra_format *ra_find_uint_format(struct ra *ra, + int bytes_per_component, + int n_components) +{ + for (int n = 0; n < ra->num_formats; n++) { + const struct ra_format *fmt = ra->formats[n]; + if (fmt->ctype == RA_CTYPE_UINT && fmt->num_components == n_components && + fmt->pixel_size == bytes_per_component * n_components && + fmt->component_depth[0] == bytes_per_component * 8 && + ra_format_is_regular(fmt)) + return fmt; + } + return NULL; +} + +// Find a float format of any precision that matches the C type of the same +// size for upload. +// May drop bits from the mantissa (such as selecting float16 even if +// bytes_per_component == 32); prefers possibly faster formats first. +static const struct ra_format *ra_find_float_format(struct ra *ra, + int bytes_per_component, + int n_components) +{ + // Assumes ra_format are ordered by performance. + // The >=16 check is to avoid catching fringe formats. + for (int n = 0; n < ra->num_formats; n++) { + const struct ra_format *fmt = ra->formats[n]; + if (fmt->ctype == RA_CTYPE_FLOAT && fmt->num_components == n_components && + fmt->pixel_size == bytes_per_component * n_components && + fmt->component_depth[0] >= 16 && + fmt->linear_filter && ra_format_is_regular(fmt)) + return fmt; + } + return NULL; +} + +// Return a filterable regular format that uses at least float16 internally, and +// uses a normal C float for transfer on the CPU side. (This is just so we don't +// need 32->16 bit conversion on CPU, which would be messy.) +const struct ra_format *ra_find_float16_format(struct ra *ra, int n_components) +{ + return ra_find_float_format(ra, sizeof(float), n_components); +} + +const struct ra_format *ra_find_named_format(struct ra *ra, const char *name) +{ + for (int n = 0; n < ra->num_formats; n++) { + const struct ra_format *fmt = ra->formats[n]; + if (strcmp(fmt->name, name) == 0) + return fmt; + } + return NULL; +} + +// Like ra_find_unorm_format(), but if no fixed point format is available, +// return an unsigned integer format. +static const struct ra_format *find_plane_format(struct ra *ra, int bytes, + int n_channels, + enum mp_component_type ctype) +{ + switch (ctype) { + case MP_COMPONENT_TYPE_UINT: { + const struct ra_format *f = ra_find_unorm_format(ra, bytes, n_channels); + if (f) + return f; + return ra_find_uint_format(ra, bytes, n_channels); + } + case MP_COMPONENT_TYPE_FLOAT: + return ra_find_float_format(ra, bytes, n_channels); + default: return NULL; + } +} + +// Put a mapping of imgfmt to texture formats into *out. Basically it selects +// the correct texture formats needed to represent an imgfmt in a shader, with +// textures using the same memory organization as on the CPU. +// Each plane is represented by a texture, and each texture has a RGBA +// component order. out->components describes the meaning of them. +// May return integer formats for >8 bit formats, if the driver has no +// normalized 16 bit formats. +// Returns false (and *out is not touched) if no format found. +bool ra_get_imgfmt_desc(struct ra *ra, int imgfmt, struct ra_imgfmt_desc *out) +{ + struct ra_imgfmt_desc res = {.component_type = RA_CTYPE_UNKNOWN}; + + struct mp_regular_imgfmt regfmt; + if (mp_get_regular_imgfmt(®fmt, imgfmt)) { + res.num_planes = regfmt.num_planes; + res.component_bits = regfmt.component_size * 8; + res.component_pad = regfmt.component_pad; + for (int n = 0; n < regfmt.num_planes; n++) { + struct mp_regular_imgfmt_plane *plane = ®fmt.planes[n]; + res.planes[n] = find_plane_format(ra, regfmt.component_size, + plane->num_components, + regfmt.component_type); + if (!res.planes[n]) + return false; + for (int i = 0; i < plane->num_components; i++) + res.components[n][i] = plane->components[i]; + // Dropping LSBs when shifting will lead to dropped MSBs. + if (res.component_bits > res.planes[n]->component_depth[0] && + res.component_pad < 0) + return false; + // Renderer restriction, but actually an unwanted corner case. + if (res.component_type != RA_CTYPE_UNKNOWN && + res.component_type != res.planes[n]->ctype) + return false; + res.component_type = res.planes[n]->ctype; + } + res.chroma_w = 1 << regfmt.chroma_xs; + res.chroma_h = 1 << regfmt.chroma_ys; + goto supported; + } + + for (int n = 0; n < ra->num_formats; n++) { + if (imgfmt && ra->formats[n]->special_imgfmt == imgfmt) { + res = *ra->formats[n]->special_imgfmt_desc; + goto supported; + } + } + + // Unsupported format + return false; + +supported: + + *out = res; + return true; +} + +static const char *ctype_to_str(enum ra_ctype ctype) +{ + switch (ctype) { + case RA_CTYPE_UNORM: return "unorm"; + case RA_CTYPE_UINT: return "uint "; + case RA_CTYPE_FLOAT: return "float"; + default: return "unknown"; + } +} + +void ra_dump_tex_formats(struct ra *ra, int msgl) +{ + if (!mp_msg_test(ra->log, msgl)) + return; + MP_MSG(ra, msgl, "Texture formats:\n"); + MP_MSG(ra, msgl, " NAME COMP*TYPE SIZE DEPTH PER COMP.\n"); + for (int n = 0; n < ra->num_formats; n++) { + const struct ra_format *fmt = ra->formats[n]; + const char *ctype = ctype_to_str(fmt->ctype); + char cl[40] = ""; + for (int i = 0; i < fmt->num_components; i++) { + mp_snprintf_cat(cl, sizeof(cl), "%s%d", i ? " " : "", + fmt->component_size[i]); + if (fmt->component_size[i] != fmt->component_depth[i]) + mp_snprintf_cat(cl, sizeof(cl), "/%d", fmt->component_depth[i]); + } + MP_MSG(ra, msgl, " %-10s %d*%s %3dB %s %s %s %s {%s}\n", fmt->name, + fmt->num_components, ctype, fmt->pixel_size, + fmt->luminance_alpha ? "LA" : " ", + fmt->linear_filter ? "LF" : " ", + fmt->renderable ? "CR" : " ", + fmt->storable ? "ST" : " ", cl); + } + MP_MSG(ra, msgl, " LA = LUMINANCE_ALPHA hack format\n"); + MP_MSG(ra, msgl, " LF = linear filterable\n"); + MP_MSG(ra, msgl, " CR = can be used for render targets\n"); + MP_MSG(ra, msgl, " ST = can be used for storable images\n"); +} + +void ra_dump_imgfmt_desc(struct ra *ra, const struct ra_imgfmt_desc *desc, + int msgl) +{ + char pl[80] = ""; + char pf[80] = ""; + for (int n = 0; n < desc->num_planes; n++) { + if (n > 0) { + mp_snprintf_cat(pl, sizeof(pl), "/"); + mp_snprintf_cat(pf, sizeof(pf), "/"); + } + char t[5] = {0}; + for (int i = 0; i < 4; i++) + t[i] = "_rgba"[desc->components[n][i]]; + for (int i = 3; i > 0 && t[i] == '_'; i--) + t[i] = '\0'; + mp_snprintf_cat(pl, sizeof(pl), "%s", t); + mp_snprintf_cat(pf, sizeof(pf), "%s", desc->planes[n]->name); + } + MP_MSG(ra, msgl, "%d planes %dx%d %d/%d [%s] (%s) [%s]\n", + desc->num_planes, desc->chroma_w, desc->chroma_h, + desc->component_bits, desc->component_pad, pf, pl, + ctype_to_str(desc->component_type)); +} + +void ra_dump_img_formats(struct ra *ra, int msgl) +{ + if (!mp_msg_test(ra->log, msgl)) + return; + MP_MSG(ra, msgl, "Image formats:\n"); + for (int imgfmt = IMGFMT_START; imgfmt < IMGFMT_END; imgfmt++) { + const char *name = mp_imgfmt_to_name(imgfmt); + if (strcmp(name, "unknown") == 0) + continue; + MP_MSG(ra, msgl, " %s", name); + struct ra_imgfmt_desc desc; + if (ra_get_imgfmt_desc(ra, imgfmt, &desc)) { + MP_MSG(ra, msgl, " => "); + ra_dump_imgfmt_desc(ra, &desc, msgl); + } else { + MP_MSG(ra, msgl, "\n"); + } + } +} diff --git a/video/out/gpu/ra.h b/video/out/gpu/ra.h new file mode 100644 index 0000000..5f229f8 --- /dev/null +++ b/video/out/gpu/ra.h @@ -0,0 +1,559 @@ +#pragma once + +#include "common/common.h" +#include "misc/bstr.h" + +// Handle for a rendering API backend. +struct ra { + struct ra_fns *fns; + void *priv; + + int glsl_version; // GLSL version (e.g. 300 => 3.0) + bool glsl_es; // use ES dialect + bool glsl_vulkan; // use vulkan dialect + + struct mp_log *log; + + // RA_CAP_* bit field. The RA backend must set supported features at init + // time. + uint64_t caps; + + // Maximum supported width and height of a 2D texture. Set by the RA backend + // at init time. + int max_texture_wh; + + // Maximum shared memory for compute shaders. Set by the RA backend at init + // time. + size_t max_shmem; + + // Maximum number of threads in a compute work group. Set by the RA backend + // at init time. + size_t max_compute_group_threads; + + // Maximum push constant size. Set by the RA backend at init time. + size_t max_pushc_size; + + // Set of supported texture formats. Must be added by RA backend at init time. + // If there are equivalent formats with different caveats, the preferred + // formats should have a lower index. (E.g. GLES3 should put rg8 before la.) + struct ra_format **formats; + int num_formats; + + // Accelerate texture uploads via an extra PBO even when + // RA_CAP_DIRECT_UPLOAD is supported. This is basically only relevant for + // OpenGL. Set by the RA user. + bool use_pbo; + + // Array of native resources. For the most part an "escape" mechanism, and + // usually does not contain parameters required for basic functionality. + struct ra_native_resource *native_resources; + int num_native_resources; +}; + +// For passing through windowing system specific parameters and such. The +// names are always internal (the libmpv render API uses mpv_render_param_type +// and maps them to names internally). +// For example, a name="x11" entry has a X11 display as (Display*)data. +struct ra_native_resource { + const char *name; + void *data; +}; + +// Add a ra_native_resource entry. Both name and data pointers must stay valid +// until ra termination. +void ra_add_native_resource(struct ra *ra, const char *name, void *data); + +// Search ra->native_resources, returns NULL on failure. +void *ra_get_native_resource(struct ra *ra, const char *name); + +enum { + RA_CAP_TEX_1D = 1 << 0, // supports 1D textures (as shader inputs) + RA_CAP_TEX_3D = 1 << 1, // supports 3D textures (as shader inputs) + RA_CAP_BLIT = 1 << 2, // supports ra_fns.blit + RA_CAP_COMPUTE = 1 << 3, // supports compute shaders + RA_CAP_DIRECT_UPLOAD = 1 << 4, // supports tex_upload without ra_buf + RA_CAP_BUF_RO = 1 << 5, // supports RA_VARTYPE_BUF_RO + RA_CAP_BUF_RW = 1 << 6, // supports RA_VARTYPE_BUF_RW + RA_CAP_NESTED_ARRAY = 1 << 7, // supports nested arrays + RA_CAP_GLOBAL_UNIFORM = 1 << 8, // supports using "naked" uniforms (not UBO) + RA_CAP_GATHER = 1 << 9, // supports textureGather in GLSL + RA_CAP_FRAGCOORD = 1 << 10, // supports reading from gl_FragCoord + RA_CAP_PARALLEL_COMPUTE = 1 << 11, // supports parallel compute shaders + RA_CAP_NUM_GROUPS = 1 << 12, // supports gl_NumWorkGroups + RA_CAP_SLOW_DR = 1 << 13, // direct rendering is assumed to be slow +}; + +enum ra_ctype { + RA_CTYPE_UNKNOWN = 0, // also used for inconsistent multi-component formats + RA_CTYPE_UNORM, // unsigned normalized integer (fixed point) formats + RA_CTYPE_UINT, // full integer formats + RA_CTYPE_FLOAT, // float formats (signed, any bit size) +}; + +// All formats must be useable as texture formats. All formats must be byte +// aligned (all pixels start and end on a byte boundary), at least as far CPU +// transfers are concerned. +struct ra_format { + // All fields are read-only after creation. + const char *name; // symbolic name for user interaction/debugging + void *priv; + enum ra_ctype ctype; // data type of each component + bool ordered; // components are sequential in memory, and returned + // by the shader in memory order (the shader can + // return arbitrary values for unused components) + int num_components; // component count, 0 if not applicable, max. 4 + int component_size[4]; // in bits, all entries 0 if not applicable + int component_depth[4]; // bits in use for each component, 0 if not applicable + // (_must_ be set if component_size[] includes padding, + // and the real procession as seen by shader is lower) + int pixel_size; // in bytes, total pixel size (0 if opaque) + bool luminance_alpha; // pre-GL_ARB_texture_rg hack for 2 component textures + // if this is set, shader must use .ra instead of .rg + // only applies to 2-component textures + bool linear_filter; // linear filtering available from shader + bool renderable; // can be used for render targets + bool storable; // can be used for storage images + bool dummy_format; // is not a real ra_format but a fake one (e.g. FBO). + // dummy formats cannot be used to create textures + + // If not 0, the format represents some sort of packed fringe format, whose + // shader representation is given by the special_imgfmt_desc pointer. + int special_imgfmt; + const struct ra_imgfmt_desc *special_imgfmt_desc; + + // This gives the GLSL image format corresponding to the format, if any. + // (e.g. rgba16ui) + const char *glsl_format; +}; + +struct ra_tex_params { + int dimensions; // 1-3 for 1D-3D textures + // Size of the texture. 1D textures require h=d=1, 2D textures require d=1. + int w, h, d; + const struct ra_format *format; + bool render_src; // must be useable as source texture in a shader + bool render_dst; // must be useable as target texture in a shader + bool storage_dst; // must be usable as a storage image (RA_VARTYPE_IMG_W) + bool blit_src; // must be usable as a blit source + bool blit_dst; // must be usable as a blit destination + bool host_mutable; // texture may be updated with tex_upload + bool downloadable; // texture can be read with tex_download + // When used as render source texture. + bool src_linear; // if false, use nearest sampling (whether this can + // be true depends on ra_format.linear_filter) + bool src_repeat; // if false, clamp texture coordinates to edge + // if true, repeat texture coordinates + bool non_normalized; // hack for GL_TEXTURE_RECTANGLE OSX idiocy + // always set to false, except in OSX code + bool external_oes; // hack for GL_TEXTURE_EXTERNAL_OES idiocy + // If non-NULL, the texture will be created with these contents. Using + // this does *not* require setting host_mutable. Otherwise, the initial + // data is undefined. + void *initial_data; +}; + +// Conflates the following typical GPU API concepts: +// - texture itself +// - sampler state +// - staging buffers for texture upload +// - framebuffer objects +// - wrappers for swapchain framebuffers +// - synchronization needed for upload/rendering/etc. +struct ra_tex { + // All fields are read-only after creation. + struct ra_tex_params params; + void *priv; +}; + +struct ra_tex_upload_params { + struct ra_tex *tex; // Texture to upload to + bool invalidate; // Discard pre-existing data not in the region uploaded + // Uploading from buffer: + struct ra_buf *buf; // Buffer to upload from (mutually exclusive with `src`) + size_t buf_offset; // Start of data within buffer (bytes) + // Uploading directly: (Note: If RA_CAP_DIRECT_UPLOAD is not set, then this + // will be internally translated to a tex_upload buffer by the RA) + const void *src; // Address of data + // For 2D textures only: + struct mp_rect *rc; // Region to upload. NULL means entire image + ptrdiff_t stride; // The size of a horizontal line in bytes (*not* texels!) +}; + +struct ra_tex_download_params { + struct ra_tex *tex; // Texture to download from + // Downloading directly (set by caller, data written to by callee): + void *dst; // Address of data (packed with no alignment) + ptrdiff_t stride; // The size of a horizontal line in bytes (*not* texels!) +}; + +// Buffer usage type. This restricts what types of operations may be performed +// on a buffer. +enum ra_buf_type { + RA_BUF_TYPE_INVALID, + RA_BUF_TYPE_TEX_UPLOAD, // texture upload buffer (pixel buffer object) + RA_BUF_TYPE_SHADER_STORAGE, // shader buffer (SSBO), for RA_VARTYPE_BUF_RW + RA_BUF_TYPE_UNIFORM, // uniform buffer (UBO), for RA_VARTYPE_BUF_RO + RA_BUF_TYPE_VERTEX, // not publicly usable (RA-internal usage) + RA_BUF_TYPE_SHARED_MEMORY, // device memory for sharing with external API +}; + +struct ra_buf_params { + enum ra_buf_type type; + size_t size; + bool host_mapped; // create a read-writable persistent mapping (ra_buf.data) + bool host_mutable; // contents may be updated via buf_update() + // If non-NULL, the buffer will be created with these contents. Otherwise, + // the initial data is undefined. + void *initial_data; +}; + +// A generic buffer, which can be used for many purposes (texture upload, +// storage buffer, uniform buffer, etc.) +struct ra_buf { + // All fields are read-only after creation. + struct ra_buf_params params; + void *data; // for persistently mapped buffers, points to the first byte + void *priv; +}; + +// Type of a shader uniform variable, or a vertex attribute. In all cases, +// vectors are matrices are done by having more than 1 value. +enum ra_vartype { + RA_VARTYPE_INVALID, + RA_VARTYPE_INT, // C: int, GLSL: int, ivec* + RA_VARTYPE_FLOAT, // C: float, GLSL: float, vec*, mat* + RA_VARTYPE_TEX, // C: ra_tex*, GLSL: various sampler types + // ra_tex.params.render_src must be true + RA_VARTYPE_IMG_W, // C: ra_tex*, GLSL: various image types + // write-only (W) image for compute shaders + // ra_tex.params.storage_dst must be true + RA_VARTYPE_BYTE_UNORM, // C: uint8_t, GLSL: int, vec* (vertex data only) + RA_VARTYPE_BUF_RO, // C: ra_buf*, GLSL: uniform buffer block + // buf type must be RA_BUF_TYPE_UNIFORM + RA_VARTYPE_BUF_RW, // C: ra_buf*, GLSL: shader storage buffer block + // buf type must be RA_BUF_TYPE_SHADER_STORAGE + RA_VARTYPE_COUNT +}; + +// Returns the host size of a ra_vartype, or 0 for abstract vartypes (e.g. tex) +size_t ra_vartype_size(enum ra_vartype type); + +// Represents a uniform, texture input parameter, and similar things. +struct ra_renderpass_input { + const char *name; // name as used in the shader + enum ra_vartype type; + // The total number of values is given by dim_v * dim_m. + int dim_v; // vector dimension (1 for non-vector and non-matrix) + int dim_m; // additional matrix dimension (dim_v x dim_m) + // Vertex data: byte offset of the attribute into the vertex struct + size_t offset; + // RA_VARTYPE_TEX: texture unit + // RA_VARTYPE_IMG_W: image unit + // RA_VARTYPE_BUF_* buffer binding point + // Other uniforms: unused + // Bindings must be unique within each namespace, as specified by + // desc_namespace() + int binding; +}; + +// Represents the layout requirements of an input value +struct ra_layout { + size_t align; // the alignment requirements (always a power of two) + size_t stride; // the delta between two rows of an array/matrix + size_t size; // the total size of the input +}; + +// Returns the host layout of a render pass input. Returns {0} for renderpass +// inputs without a corresponding host representation (e.g. textures/buffers) +struct ra_layout ra_renderpass_input_layout(struct ra_renderpass_input *input); + +enum ra_blend { + RA_BLEND_ZERO, + RA_BLEND_ONE, + RA_BLEND_SRC_ALPHA, + RA_BLEND_ONE_MINUS_SRC_ALPHA, +}; + +enum ra_renderpass_type { + RA_RENDERPASS_TYPE_INVALID, + RA_RENDERPASS_TYPE_RASTER, // vertex+fragment shader + RA_RENDERPASS_TYPE_COMPUTE, // compute shader +}; + +// Static part of a rendering pass. It conflates the following: +// - compiled shader and its list of uniforms +// - vertex attributes and its shader mappings +// - blending parameters +// (For Vulkan, this would be shader module + pipeline state.) +// Upon creation, the values of dynamic values such as uniform contents (whose +// initial values are not provided here) are required to be 0. +struct ra_renderpass_params { + enum ra_renderpass_type type; + + // Uniforms, including texture/sampler inputs. + struct ra_renderpass_input *inputs; + int num_inputs; + size_t push_constants_size; // must be <= ra.max_pushc_size and a multiple of 4 + + // Highly implementation-specific byte array storing a compiled version + // of the program. Can be used to speed up shader compilation. A backend + // xan read this in renderpass_create, or set this on the newly created + // ra_renderpass params field. + bstr cached_program; + + // --- type==RA_RENDERPASS_TYPE_RASTER only + + // Describes the format of the vertex data. When using ra.glsl_vulkan, + // the order of this array must match the vertex attribute locations. + struct ra_renderpass_input *vertex_attribs; + int num_vertex_attribs; + int vertex_stride; + + // Format of the target texture + const struct ra_format *target_format; + + // Shader text, in GLSL. (Yes, you need a GLSL compiler.) + // These are complete shaders, including prelude and declarations. + const char *vertex_shader; + const char *frag_shader; + + // Target blending mode. If enable_blend is false, the blend_ fields can + // be ignored. + bool enable_blend; + enum ra_blend blend_src_rgb; + enum ra_blend blend_dst_rgb; + enum ra_blend blend_src_alpha; + enum ra_blend blend_dst_alpha; + + // If true, the contents of `target` not written to will become undefined + bool invalidate_target; + + // --- type==RA_RENDERPASS_TYPE_COMPUTE only + + // Shader text, like vertex_shader/frag_shader. + const char *compute_shader; +}; + +struct ra_renderpass_params *ra_renderpass_params_copy(void *ta_parent, + const struct ra_renderpass_params *params); + +// Conflates the following typical GPU API concepts: +// - various kinds of shaders +// - rendering pipelines +// - descriptor sets, uniforms, other bindings +// - all synchronization necessary +// - the current values of all uniforms (this one makes it relatively stateful +// from an API perspective) +struct ra_renderpass { + // All fields are read-only after creation. + struct ra_renderpass_params params; + void *priv; +}; + +// An input value (see ra_renderpass_input). +struct ra_renderpass_input_val { + int index; // index into ra_renderpass_params.inputs[] + void *data; // pointer to data according to ra_renderpass_input + // (e.g. type==RA_VARTYPE_FLOAT+dim_v=3,dim_m=3 => float[9]) +}; + +// Parameters for performing a rendering pass (basically the dynamic params). +// These change potentially every time. +struct ra_renderpass_run_params { + struct ra_renderpass *pass; + + // Generally this lists parameters only which changed since the last + // invocation and need to be updated. The ra_renderpass instance is + // supposed to keep unchanged values from the previous run. + // For non-primitive types like textures, these entries are always added, + // even if they do not change. + struct ra_renderpass_input_val *values; + int num_values; + void *push_constants; // must be set if params.push_constants_size > 0 + + // --- pass->params.type==RA_RENDERPASS_TYPE_RASTER only + + // target->params.render_dst must be true, and target->params.format must + // match pass->params.target_format. + struct ra_tex *target; + struct mp_rect viewport; + struct mp_rect scissors; + + // (The primitive type is always a triangle list.) + void *vertex_data; + int vertex_count; // number of vertex elements, not bytes + + // --- pass->params.type==RA_RENDERPASS_TYPE_COMPUTE only + + // Number of work groups to be run in X/Y/Z dimensions. + int compute_groups[3]; +}; + +// This is an opaque type provided by the implementation, but we want to at +// least give it a saner name than void* for code readability purposes. +typedef void ra_timer; + +// Rendering API entrypoints. (Note: there are some additional hidden features +// you need to take care of. For example, hwdec mapping will be provided +// separately from ra, but might need to call into ra private code.) +struct ra_fns { + void (*destroy)(struct ra *ra); + + // Create a texture (with undefined contents). Return NULL on failure. + // This is a rare operation, and normally textures and even FBOs for + // temporary rendering intermediate data are cached. + struct ra_tex *(*tex_create)(struct ra *ra, + const struct ra_tex_params *params); + + void (*tex_destroy)(struct ra *ra, struct ra_tex *tex); + + // Upload data to a texture. This is an extremely common operation. When + // using a buffer, the contents of the buffer must exactly match the image + // - conversions between bit depth etc. are not supported. The buffer *may* + // be marked as "in use" while this operation is going on, and the contents + // must not be touched again by the API user until buf_poll returns true. + // Returns whether successful. + bool (*tex_upload)(struct ra *ra, const struct ra_tex_upload_params *params); + + // Copy data from the texture to memory. ra_tex_params.downloadable must + // have been set to true on texture creation. + bool (*tex_download)(struct ra *ra, struct ra_tex_download_params *params); + + // Create a buffer. This can be used as a persistently mapped buffer, + // a uniform buffer, a shader storage buffer or possibly others. + // Not all usage types must be supported; may return NULL if unavailable. + struct ra_buf *(*buf_create)(struct ra *ra, + const struct ra_buf_params *params); + + void (*buf_destroy)(struct ra *ra, struct ra_buf *buf); + + // Update the contents of a buffer, starting at a given offset (*must* be a + // multiple of 4) and up to a given size, with the contents of *data. This + // is an extremely common operation. Calling this while the buffer is + // considered "in use" is an error. (See: buf_poll) + void (*buf_update)(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset, + const void *data, size_t size); + + // Returns if a buffer is currently "in use" or not. Updating the contents + // of a buffer (via buf_update or writing to buf->data) while it is still + // in use is an error and may result in graphical corruption. Optional, if + // NULL then all buffers are always usable. + bool (*buf_poll)(struct ra *ra, struct ra_buf *buf); + + // Returns the layout requirements of a uniform buffer element. Optional, + // but must be implemented if RA_CAP_BUF_RO is supported. + struct ra_layout (*uniform_layout)(struct ra_renderpass_input *inp); + + // Returns the layout requirements of a push constant element. Optional, + // but must be implemented if ra.max_pushc_size > 0. + struct ra_layout (*push_constant_layout)(struct ra_renderpass_input *inp); + + // Returns an abstract namespace index for a given renderpass input type. + // This will always be a value >= 0 and < RA_VARTYPE_COUNT. This is used to + // figure out which inputs may share the same value of `binding`. + int (*desc_namespace)(struct ra *ra, enum ra_vartype type); + + // Clear the dst with the given color (rgba) and within the given scissor. + // dst must have dst->params.render_dst==true. Content outside of the + // scissor is preserved. + void (*clear)(struct ra *ra, struct ra_tex *dst, float color[4], + struct mp_rect *scissor); + + // Copy a sub-rectangle from one texture to another. The source/dest region + // is always within the texture bounds. Areas outside the dest region are + // preserved. The formats of the textures must be loosely compatible. The + // dst texture can be a swapchain framebuffer, but src can not. Only 2D + // textures are supported. + // The textures must have blit_src and blit_dst set, respectively. + // Rectangles with negative width/height lead to flipping, different src/dst + // sizes lead to point scaling. Coordinates are always in pixels. + // Optional. Only available if RA_CAP_BLIT is set (if it's not set, it must + // not be called, even if it's non-NULL). + void (*blit)(struct ra *ra, struct ra_tex *dst, struct ra_tex *src, + struct mp_rect *dst_rc, struct mp_rect *src_rc); + + // Compile a shader and create a pipeline. This is a rare operation. + // The params pointer and anything it points to must stay valid until + // renderpass_destroy. + struct ra_renderpass *(*renderpass_create)(struct ra *ra, + const struct ra_renderpass_params *params); + + void (*renderpass_destroy)(struct ra *ra, struct ra_renderpass *pass); + + // Perform a render pass, basically drawing a list of triangles to a FBO. + // This is an extremely common operation. + void (*renderpass_run)(struct ra *ra, + const struct ra_renderpass_run_params *params); + + // Create a timer object. Returns NULL on failure, or if timers are + // unavailable for some reason. Optional. + ra_timer *(*timer_create)(struct ra *ra); + + void (*timer_destroy)(struct ra *ra, ra_timer *timer); + + // Start recording a timer. Note that valid usage requires you to pair + // every start with a stop. Trying to start a timer twice, or trying to + // stop a timer before having started it, consistutes invalid usage. + void (*timer_start)(struct ra *ra, ra_timer *timer); + + // Stop recording a timer. This also returns any results that have been + // measured since the last usage of this ra_timer. It's important to note + // that GPU timer measurement are asynchronous, so this function does not + // always produce a value - and the values it does produce are typically + // delayed by a few frames. When no value is available, this returns 0. + uint64_t (*timer_stop)(struct ra *ra, ra_timer *timer); + + // Associates a marker with any past error messages, for debugging + // purposes. Optional. + void (*debug_marker)(struct ra *ra, const char *msg); +}; + +struct ra_tex *ra_tex_create(struct ra *ra, const struct ra_tex_params *params); +void ra_tex_free(struct ra *ra, struct ra_tex **tex); + +struct ra_buf *ra_buf_create(struct ra *ra, const struct ra_buf_params *params); +void ra_buf_free(struct ra *ra, struct ra_buf **buf); + +void ra_free(struct ra **ra); + +const struct ra_format *ra_find_unorm_format(struct ra *ra, + int bytes_per_component, + int n_components); +const struct ra_format *ra_find_uint_format(struct ra *ra, + int bytes_per_component, + int n_components); +const struct ra_format *ra_find_float16_format(struct ra *ra, int n_components); +const struct ra_format *ra_find_named_format(struct ra *ra, const char *name); + +struct ra_imgfmt_desc { + int num_planes; + const struct ra_format *planes[4]; + // Chroma pixel size (1x1 is 4:4:4) + uint8_t chroma_w, chroma_h; + // Component storage size in bits (possibly padded). For formats with + // different sizes per component, this is arbitrary. For padded formats + // like P010 or YUV420P10, padding is included. + int component_bits; + // Like mp_regular_imgfmt.component_pad. + int component_pad; + // == planes[n].ctype (RA_CTYPE_UNKNOWN if not applicable) + enum ra_ctype component_type; + // For each texture and each texture output (rgba order) describe what + // component it returns. + // The values are like the values in mp_regular_imgfmt_plane.components[]. + // Access as components[plane_nr][component_index]. Set unused items to 0. + // For ra_format.luminance_alpha, this returns 1/2 ("rg") instead of 1/4 + // ("ra"). the logic is that the texture format has 2 channels, thus the + // data must be returned in the first two components. The renderer fixes + // this later. + uint8_t components[4][4]; +}; + +const char *ra_fmt_glsl_format(const struct ra_format *fmt); + +bool ra_get_imgfmt_desc(struct ra *ra, int imgfmt, struct ra_imgfmt_desc *out); + +void ra_dump_tex_formats(struct ra *ra, int msgl); +void ra_dump_imgfmt_desc(struct ra *ra, const struct ra_imgfmt_desc *desc, + int msgl); +void ra_dump_img_formats(struct ra *ra, int msgl); diff --git a/video/out/gpu/shader_cache.c b/video/out/gpu/shader_cache.c new file mode 100644 index 0000000..3e05173 --- /dev/null +++ b/video/out/gpu/shader_cache.c @@ -0,0 +1,1056 @@ +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <assert.h> + +#include <libavutil/sha.h> +#include <libavutil/mem.h> + +#include "osdep/io.h" + +#include "common/common.h" +#include "options/path.h" +#include "stream/stream.h" +#include "shader_cache.h" +#include "utils.h" + +// Force cache flush if more than this number of shaders is created. +#define SC_MAX_ENTRIES 256 + +union uniform_val { + float f[9]; // RA_VARTYPE_FLOAT + int i[4]; // RA_VARTYPE_INT + struct ra_tex *tex; // RA_VARTYPE_TEX, RA_VARTYPE_IMG_* + struct ra_buf *buf; // RA_VARTYPE_BUF_* +}; + +enum sc_uniform_type { + SC_UNIFORM_TYPE_GLOBAL = 0, // global uniform (RA_CAP_GLOBAL_UNIFORM) + SC_UNIFORM_TYPE_UBO = 1, // uniform buffer (RA_CAP_BUF_RO) + SC_UNIFORM_TYPE_PUSHC = 2, // push constant (ra.max_pushc_size) +}; + +struct sc_uniform { + enum sc_uniform_type type; + struct ra_renderpass_input input; + const char *glsl_type; + union uniform_val v; + char *buffer_format; + // for SC_UNIFORM_TYPE_UBO/PUSHC: + struct ra_layout layout; + size_t offset; // byte offset within the buffer +}; + +struct sc_cached_uniform { + union uniform_val v; + int index; // for ra_renderpass_input_val + bool set; // whether the uniform has ever been set +}; + +struct sc_entry { + struct ra_renderpass *pass; + struct sc_cached_uniform *cached_uniforms; + int num_cached_uniforms; + bstr total; + struct timer_pool *timer; + struct ra_buf *ubo; + int ubo_index; // for ra_renderpass_input_val.index + void *pushc; +}; + +struct gl_shader_cache { + struct ra *ra; + struct mp_log *log; + + // permanent + char **exts; + int num_exts; + + // this is modified during use (gl_sc_add() etc.) and reset for each shader + bstr prelude_text; + bstr header_text; + bstr text; + + // Next binding point (texture unit, image unit, buffer binding, etc.) + // In OpenGL these are separate for each input type + int next_binding[RA_VARTYPE_COUNT]; + bool next_uniform_dynamic; + + struct ra_renderpass_params params; + + struct sc_entry **entries; + int num_entries; + + struct sc_entry *current_shader; // set by gl_sc_generate() + + struct sc_uniform *uniforms; + int num_uniforms; + + int ubo_binding; + size_t ubo_size; + size_t pushc_size; + + struct ra_renderpass_input_val *values; + int num_values; + + // For checking that the user is calling gl_sc_reset() properly. + bool needs_reset; + + bool error_state; // true if an error occurred + + // temporary buffers (avoids frequent reallocations) + bstr tmp[6]; + + // For the disk-cache. + char *cache_dir; + struct mpv_global *global; // can be NULL +}; + +struct gl_shader_cache *gl_sc_create(struct ra *ra, struct mpv_global *global, + struct mp_log *log) +{ + struct gl_shader_cache *sc = talloc_ptrtype(NULL, sc); + *sc = (struct gl_shader_cache){ + .ra = ra, + .global = global, + .log = log, + }; + gl_sc_reset(sc); + return sc; +} + +// Reset the previous pass. This must be called after gl_sc_generate and before +// starting a new shader. It may also be called on errors. +void gl_sc_reset(struct gl_shader_cache *sc) +{ + sc->prelude_text.len = 0; + sc->header_text.len = 0; + sc->text.len = 0; + for (int n = 0; n < sc->num_uniforms; n++) + talloc_free((void *)sc->uniforms[n].input.name); + sc->num_uniforms = 0; + sc->ubo_binding = 0; + sc->ubo_size = 0; + sc->pushc_size = 0; + for (int i = 0; i < RA_VARTYPE_COUNT; i++) + sc->next_binding[i] = 0; + sc->next_uniform_dynamic = false; + sc->current_shader = NULL; + sc->params = (struct ra_renderpass_params){0}; + sc->needs_reset = false; +} + +static void sc_flush_cache(struct gl_shader_cache *sc) +{ + MP_DBG(sc, "flushing shader cache\n"); + + for (int n = 0; n < sc->num_entries; n++) { + struct sc_entry *e = sc->entries[n]; + ra_buf_free(sc->ra, &e->ubo); + if (e->pass) + sc->ra->fns->renderpass_destroy(sc->ra, e->pass); + timer_pool_destroy(e->timer); + talloc_free(e); + } + sc->num_entries = 0; +} + +void gl_sc_destroy(struct gl_shader_cache *sc) +{ + if (!sc) + return; + gl_sc_reset(sc); + sc_flush_cache(sc); + talloc_free(sc); +} + +bool gl_sc_error_state(struct gl_shader_cache *sc) +{ + return sc->error_state; +} + +void gl_sc_reset_error(struct gl_shader_cache *sc) +{ + sc->error_state = false; +} + +void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name) +{ + for (int n = 0; n < sc->num_exts; n++) { + if (strcmp(sc->exts[n], name) == 0) + return; + } + MP_TARRAY_APPEND(sc, sc->exts, sc->num_exts, talloc_strdup(sc, name)); +} + +#define bstr_xappend0(sc, b, s) bstr_xappend(sc, b, bstr0(s)) + +void gl_sc_add(struct gl_shader_cache *sc, const char *text) +{ + bstr_xappend0(sc, &sc->text, text); +} + +void gl_sc_addf(struct gl_shader_cache *sc, const char *textf, ...) +{ + va_list ap; + va_start(ap, textf); + bstr_xappend_vasprintf(sc, &sc->text, textf, ap); + va_end(ap); +} + +void gl_sc_hadd(struct gl_shader_cache *sc, const char *text) +{ + bstr_xappend0(sc, &sc->header_text, text); +} + +void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...) +{ + va_list ap; + va_start(ap, textf); + bstr_xappend_vasprintf(sc, &sc->header_text, textf, ap); + va_end(ap); +} + +void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text) +{ + bstr_xappend(sc, &sc->header_text, text); +} + +void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...) +{ + va_list ap; + va_start(ap, textf); + bstr_xappend_vasprintf(sc, &sc->prelude_text, textf, ap); + va_end(ap); +} + +static struct sc_uniform *find_uniform(struct gl_shader_cache *sc, + const char *name) +{ + struct sc_uniform new = { + .input = { + .dim_v = 1, + .dim_m = 1, + }, + }; + + for (int n = 0; n < sc->num_uniforms; n++) { + struct sc_uniform *u = &sc->uniforms[n]; + if (strcmp(u->input.name, name) == 0) { + const char *allocname = u->input.name; + *u = new; + u->input.name = allocname; + return u; + } + } + + // not found -> add it + new.input.name = talloc_strdup(NULL, name); + MP_TARRAY_APPEND(sc, sc->uniforms, sc->num_uniforms, new); + return &sc->uniforms[sc->num_uniforms - 1]; +} + +static int gl_sc_next_binding(struct gl_shader_cache *sc, enum ra_vartype type) +{ + return sc->next_binding[sc->ra->fns->desc_namespace(sc->ra, type)]++; +} + +void gl_sc_uniform_dynamic(struct gl_shader_cache *sc) +{ + sc->next_uniform_dynamic = true; +} + +// Updates the metadata for the given sc_uniform. Assumes sc_uniform->input +// and glsl_type/buffer_format are already set. +static void update_uniform_params(struct gl_shader_cache *sc, struct sc_uniform *u) +{ + bool dynamic = sc->next_uniform_dynamic; + sc->next_uniform_dynamic = false; + + // Try not using push constants for "large" values like matrices, since + // this is likely to both exceed the VGPR budget as well as the pushc size + // budget + bool try_pushc = u->input.dim_m == 1 || dynamic; + + // Attempt using push constants first + if (try_pushc && sc->ra->glsl_vulkan && sc->ra->max_pushc_size) { + struct ra_layout layout = sc->ra->fns->push_constant_layout(&u->input); + size_t offset = MP_ALIGN_UP(sc->pushc_size, layout.align); + // Push constants have limited size, so make sure we don't exceed this + size_t new_size = offset + layout.size; + if (new_size <= sc->ra->max_pushc_size) { + u->type = SC_UNIFORM_TYPE_PUSHC; + u->layout = layout; + u->offset = offset; + sc->pushc_size = new_size; + return; + } + } + + // Attempt using uniform buffer next. The GLSL version 440 check is due + // to explicit offsets on UBO entries. In theory we could leave away + // the offsets and support UBOs for older GL as well, but this is a nice + // safety net for driver bugs (and also rules out potentially buggy drivers) + // Also avoid UBOs for highly dynamic stuff since that requires synchronizing + // the UBO writes every frame + bool try_ubo = !(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM) || !dynamic; + if (try_ubo && sc->ra->glsl_version >= 440 && (sc->ra->caps & RA_CAP_BUF_RO)) { + u->type = SC_UNIFORM_TYPE_UBO; + u->layout = sc->ra->fns->uniform_layout(&u->input); + u->offset = MP_ALIGN_UP(sc->ubo_size, u->layout.align); + sc->ubo_size = u->offset + u->layout.size; + return; + } + + // If all else fails, use global uniforms + assert(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM); + u->type = SC_UNIFORM_TYPE_GLOBAL; +} + +void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name, + struct ra_tex *tex) +{ + const char *glsl_type = "sampler2D"; + if (tex->params.dimensions == 1) { + glsl_type = "sampler1D"; + } else if (tex->params.dimensions == 3) { + glsl_type = "sampler3D"; + } else if (tex->params.non_normalized) { + glsl_type = "sampler2DRect"; + } else if (tex->params.external_oes) { + glsl_type = "samplerExternalOES"; + } else if (tex->params.format->ctype == RA_CTYPE_UINT) { + glsl_type = sc->ra->glsl_es ? "highp usampler2D" : "usampler2D"; + } + + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_TEX; + u->glsl_type = glsl_type; + u->input.binding = gl_sc_next_binding(sc, u->input.type); + u->v.tex = tex; +} + +void gl_sc_uniform_image2D_wo(struct gl_shader_cache *sc, const char *name, + struct ra_tex *tex) +{ + gl_sc_enable_extension(sc, "GL_ARB_shader_image_load_store"); + + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_IMG_W; + u->glsl_type = sc->ra->glsl_es ? "writeonly highp image2D" : "writeonly image2D"; + u->input.binding = gl_sc_next_binding(sc, u->input.type); + u->v.tex = tex; +} + +void gl_sc_ssbo(struct gl_shader_cache *sc, char *name, struct ra_buf *buf, + char *format, ...) +{ + assert(sc->ra->caps & RA_CAP_BUF_RW); + gl_sc_enable_extension(sc, "GL_ARB_shader_storage_buffer_object"); + + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_BUF_RW; + u->glsl_type = ""; + u->input.binding = gl_sc_next_binding(sc, u->input.type); + u->v.buf = buf; + + va_list ap; + va_start(ap, format); + u->buffer_format = ta_vasprintf(sc, format, ap); + va_end(ap); +} + +void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, float f) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_FLOAT; + u->glsl_type = "float"; + update_uniform_params(sc, u); + u->v.f[0] = f; +} + +void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, int i) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_INT; + u->glsl_type = "int"; + update_uniform_params(sc, u); + u->v.i[0] = i; +} + +void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, float f[2]) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_FLOAT; + u->input.dim_v = 2; + u->glsl_type = "vec2"; + update_uniform_params(sc, u); + u->v.f[0] = f[0]; + u->v.f[1] = f[1]; +} + +void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, float f[3]) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_FLOAT; + u->input.dim_v = 3; + u->glsl_type = "vec3"; + update_uniform_params(sc, u); + u->v.f[0] = f[0]; + u->v.f[1] = f[1]; + u->v.f[2] = f[2]; +} + +static void transpose2x2(float r[2 * 2]) +{ + MPSWAP(float, r[0+2*1], r[1+2*0]); +} + +void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name, + bool transpose, float *v) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_FLOAT; + u->input.dim_v = 2; + u->input.dim_m = 2; + u->glsl_type = "mat2"; + update_uniform_params(sc, u); + for (int n = 0; n < 4; n++) + u->v.f[n] = v[n]; + if (transpose) + transpose2x2(&u->v.f[0]); +} + +static void transpose3x3(float r[3 * 3]) +{ + MPSWAP(float, r[0+3*1], r[1+3*0]); + MPSWAP(float, r[0+3*2], r[2+3*0]); + MPSWAP(float, r[1+3*2], r[2+3*1]); +} + +void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name, + bool transpose, float *v) +{ + struct sc_uniform *u = find_uniform(sc, name); + u->input.type = RA_VARTYPE_FLOAT; + u->input.dim_v = 3; + u->input.dim_m = 3; + u->glsl_type = "mat3"; + update_uniform_params(sc, u); + for (int n = 0; n < 9; n++) + u->v.f[n] = v[n]; + if (transpose) + transpose3x3(&u->v.f[0]); +} + +void gl_sc_blend(struct gl_shader_cache *sc, + enum ra_blend blend_src_rgb, + enum ra_blend blend_dst_rgb, + enum ra_blend blend_src_alpha, + enum ra_blend blend_dst_alpha) +{ + sc->params.enable_blend = true; + sc->params.blend_src_rgb = blend_src_rgb; + sc->params.blend_dst_rgb = blend_dst_rgb; + sc->params.blend_src_alpha = blend_src_alpha; + sc->params.blend_dst_alpha = blend_dst_alpha; +} + +const char *gl_sc_bvec(struct gl_shader_cache *sc, int dims) +{ + static const char *bvecs[] = { + [1] = "bool", + [2] = "bvec2", + [3] = "bvec3", + [4] = "bvec4", + }; + + static const char *vecs[] = { + [1] = "float", + [2] = "vec2", + [3] = "vec3", + [4] = "vec4", + }; + + assert(dims > 0 && dims < MP_ARRAY_SIZE(bvecs)); + return sc->ra->glsl_version >= 130 ? bvecs[dims] : vecs[dims]; +} + +static const char *vao_glsl_type(const struct ra_renderpass_input *e) +{ + // pretty dumb... too dumb, but works for us + switch (e->dim_v) { + case 1: return "float"; + case 2: return "vec2"; + case 3: return "vec3"; + case 4: return "vec4"; + default: MP_ASSERT_UNREACHABLE(); + } +} + +static void update_ubo(struct ra *ra, struct ra_buf *ubo, struct sc_uniform *u) +{ + uintptr_t src = (uintptr_t) &u->v; + size_t dst = u->offset; + struct ra_layout src_layout = ra_renderpass_input_layout(&u->input); + struct ra_layout dst_layout = u->layout; + + for (int i = 0; i < u->input.dim_m; i++) { + ra->fns->buf_update(ra, ubo, dst, (void *)src, src_layout.stride); + src += src_layout.stride; + dst += dst_layout.stride; + } +} + +static void update_pushc(struct ra *ra, void *pushc, struct sc_uniform *u) +{ + uintptr_t src = (uintptr_t) &u->v; + uintptr_t dst = (uintptr_t) pushc + (ptrdiff_t) u->offset; + struct ra_layout src_layout = ra_renderpass_input_layout(&u->input); + struct ra_layout dst_layout = u->layout; + + for (int i = 0; i < u->input.dim_m; i++) { + memcpy((void *)dst, (void *)src, src_layout.stride); + src += src_layout.stride; + dst += dst_layout.stride; + } +} + +static void update_uniform(struct gl_shader_cache *sc, struct sc_entry *e, + struct sc_uniform *u, int n) +{ + struct sc_cached_uniform *un = &e->cached_uniforms[n]; + struct ra_layout layout = ra_renderpass_input_layout(&u->input); + if (layout.size > 0 && un->set && memcmp(&un->v, &u->v, layout.size) == 0) + return; + + un->v = u->v; + un->set = true; + + static const char *desc[] = { + [SC_UNIFORM_TYPE_UBO] = "UBO", + [SC_UNIFORM_TYPE_PUSHC] = "PC", + [SC_UNIFORM_TYPE_GLOBAL] = "global", + }; + MP_TRACE(sc, "Updating %s uniform '%s'\n", desc[u->type], u->input.name); + + switch (u->type) { + case SC_UNIFORM_TYPE_GLOBAL: { + struct ra_renderpass_input_val value = { + .index = un->index, + .data = &un->v, + }; + MP_TARRAY_APPEND(sc, sc->values, sc->num_values, value); + break; + } + case SC_UNIFORM_TYPE_UBO: + assert(e->ubo); + update_ubo(sc->ra, e->ubo, u); + break; + case SC_UNIFORM_TYPE_PUSHC: + assert(e->pushc); + update_pushc(sc->ra, e->pushc, u); + break; + default: MP_ASSERT_UNREACHABLE(); + } +} + +void gl_sc_set_cache_dir(struct gl_shader_cache *sc, char *dir) +{ + talloc_free(sc->cache_dir); + if (dir && dir[0]) { + dir = mp_get_user_path(NULL, sc->global, dir); + } else { + dir = mp_find_user_file(NULL, sc->global, "cache", ""); + } + sc->cache_dir = talloc_strdup(sc, dir); + talloc_free(dir); +} + +static bool create_pass(struct gl_shader_cache *sc, struct sc_entry *entry) +{ + bool ret = false; + + void *tmp = talloc_new(NULL); + struct ra_renderpass_params params = sc->params; + + const char *cache_header = "mpv shader cache v1\n"; + char *cache_filename = NULL; + char *cache_dir = NULL; + + if (sc->cache_dir && sc->cache_dir[0]) { + // Try to load it from a disk cache. + cache_dir = mp_get_user_path(tmp, sc->global, sc->cache_dir); + + struct AVSHA *sha = av_sha_alloc(); + MP_HANDLE_OOM(sha); + av_sha_init(sha, 256); + av_sha_update(sha, entry->total.start, entry->total.len); + + uint8_t hash[256 / 8]; + av_sha_final(sha, hash); + av_free(sha); + + char hashstr[256 / 8 * 2 + 1]; + for (int n = 0; n < 256 / 8; n++) + snprintf(hashstr + n * 2, sizeof(hashstr) - n * 2, "%02X", hash[n]); + + cache_filename = mp_path_join(tmp, cache_dir, hashstr); + if (stat(cache_filename, &(struct stat){0}) == 0) { + MP_DBG(sc, "Trying to load shader from disk...\n"); + struct bstr cachedata = + stream_read_file(cache_filename, tmp, sc->global, 1000000000); + if (bstr_eatstart0(&cachedata, cache_header)) + params.cached_program = cachedata; + } + } + + // If using a UBO, also make sure to add it as an input value so the RA + // can see it + if (sc->ubo_size) { + entry->ubo_index = sc->params.num_inputs; + struct ra_renderpass_input ubo_input = { + .name = "UBO", + .type = RA_VARTYPE_BUF_RO, + .dim_v = 1, + .dim_m = 1, + .binding = sc->ubo_binding, + }; + MP_TARRAY_APPEND(sc, params.inputs, params.num_inputs, ubo_input); + } + + if (sc->pushc_size) { + params.push_constants_size = MP_ALIGN_UP(sc->pushc_size, 4); + entry->pushc = talloc_zero_size(entry, params.push_constants_size); + } + + if (sc->ubo_size) { + struct ra_buf_params ubo_params = { + .type = RA_BUF_TYPE_UNIFORM, + .size = sc->ubo_size, + .host_mutable = true, + }; + + entry->ubo = ra_buf_create(sc->ra, &ubo_params); + if (!entry->ubo) { + MP_ERR(sc, "Failed creating uniform buffer!\n"); + goto error; + } + } + + entry->pass = sc->ra->fns->renderpass_create(sc->ra, ¶ms); + if (!entry->pass) + goto error; + + if (entry->pass && cache_filename) { + bstr nc = entry->pass->params.cached_program; + if (nc.len && !bstr_equals(params.cached_program, nc)) { + mp_mkdirp(cache_dir); + + MP_DBG(sc, "Writing shader cache file: %s\n", cache_filename); + FILE *out = fopen(cache_filename, "wb"); + if (out) { + fwrite(cache_header, strlen(cache_header), 1, out); + fwrite(nc.start, nc.len, 1, out); + fclose(out); + } + } + } + + ret = true; + +error: + talloc_free(tmp); + return ret; +} + +#define ADD(x, ...) bstr_xappend_asprintf(sc, (x), __VA_ARGS__) +#define ADD_BSTR(x, s) bstr_xappend(sc, (x), (s)) + +static void add_uniforms(struct gl_shader_cache *sc, bstr *dst) +{ + // Add all of the UBO entries separately as members of their own buffer + if (sc->ubo_size > 0) { + ADD(dst, "layout(std140, binding=%d) uniform UBO {\n", sc->ubo_binding); + for (int n = 0; n < sc->num_uniforms; n++) { + struct sc_uniform *u = &sc->uniforms[n]; + if (u->type != SC_UNIFORM_TYPE_UBO) + continue; + ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset, u->glsl_type, + u->input.name); + } + ADD(dst, "};\n"); + } + + // Ditto for push constants + if (sc->pushc_size > 0) { + ADD(dst, "layout(std430, push_constant) uniform PushC {\n"); + for (int n = 0; n < sc->num_uniforms; n++) { + struct sc_uniform *u = &sc->uniforms[n]; + if (u->type != SC_UNIFORM_TYPE_PUSHC) + continue; + ADD(dst, "layout(offset=%zu) %s %s;\n", u->offset, u->glsl_type, + u->input.name); + } + ADD(dst, "};\n"); + } + + for (int n = 0; n < sc->num_uniforms; n++) { + struct sc_uniform *u = &sc->uniforms[n]; + if (u->type != SC_UNIFORM_TYPE_GLOBAL) + continue; + switch (u->input.type) { + case RA_VARTYPE_INT: + case RA_VARTYPE_FLOAT: + assert(sc->ra->caps & RA_CAP_GLOBAL_UNIFORM); + MP_FALLTHROUGH; + case RA_VARTYPE_TEX: + // Vulkan requires explicitly assigning the bindings in the shader + // source. For OpenGL it's optional, but requires higher GL version + // so we don't do it (and instead have ra_gl update the bindings + // after program creation). + if (sc->ra->glsl_vulkan) + ADD(dst, "layout(binding=%d) ", u->input.binding); + ADD(dst, "uniform %s %s;\n", u->glsl_type, u->input.name); + break; + case RA_VARTYPE_BUF_RO: + ADD(dst, "layout(std140, binding=%d) uniform %s { %s };\n", + u->input.binding, u->input.name, u->buffer_format); + break; + case RA_VARTYPE_BUF_RW: + ADD(dst, "layout(std430, binding=%d) restrict coherent buffer %s { %s };\n", + u->input.binding, u->input.name, u->buffer_format); + break; + case RA_VARTYPE_IMG_W: { + // For better compatibility, we have to explicitly label the + // type of data we will be reading/writing to this image. + const char *fmt = u->v.tex->params.format->glsl_format; + + if (sc->ra->glsl_vulkan) { + if (fmt) { + ADD(dst, "layout(binding=%d, %s) ", u->input.binding, fmt); + } else { + ADD(dst, "layout(binding=%d) ", u->input.binding); + } + } else if (fmt) { + ADD(dst, "layout(%s) ", fmt); + } + ADD(dst, "uniform restrict %s %s;\n", u->glsl_type, u->input.name); + } + } + } +} + +// 1. Generate vertex and fragment shaders from the fragment shader text added +// with gl_sc_add(). The generated shader program is cached (based on the +// text), so actual compilation happens only the first time. +// 2. Update the uniforms and textures set with gl_sc_uniform_*. +// 3. Make the new shader program current (glUseProgram()). +// After that, you render, and then you call gc_sc_reset(), which does: +// 1. Unbind the program and all textures. +// 2. Reset the sc state and prepare for a new shader program. (All uniforms +// and fragment operations needed for the next program have to be re-added.) +static void gl_sc_generate(struct gl_shader_cache *sc, + enum ra_renderpass_type type, + const struct ra_format *target_format, + const struct ra_renderpass_input *vao, + int vao_len, size_t vertex_stride) +{ + int glsl_version = sc->ra->glsl_version; + int glsl_es = sc->ra->glsl_es ? glsl_version : 0; + + sc->params.type = type; + + // gl_sc_reset() must be called after ending the previous render process, + // and before starting a new one. + assert(!sc->needs_reset); + sc->needs_reset = true; + + // If using a UBO, pick a binding (needed for shader generation) + if (sc->ubo_size) + sc->ubo_binding = gl_sc_next_binding(sc, RA_VARTYPE_BUF_RO); + + for (int n = 0; n < MP_ARRAY_SIZE(sc->tmp); n++) + sc->tmp[n].len = 0; + + // set up shader text (header + uniforms + body) + bstr *header = &sc->tmp[0]; + ADD(header, "#version %d%s\n", glsl_version, glsl_es >= 300 ? " es" : ""); + if (type == RA_RENDERPASS_TYPE_COMPUTE) { + // This extension cannot be enabled in fragment shader. Enable it as + // an exception for compute shader. + ADD(header, "#extension GL_ARB_compute_shader : enable\n"); + } + for (int n = 0; n < sc->num_exts; n++) + ADD(header, "#extension %s : enable\n", sc->exts[n]); + if (glsl_es) { + ADD(header, "#ifdef GL_FRAGMENT_PRECISION_HIGH\n"); + ADD(header, "precision highp float;\n"); + ADD(header, "#else\n"); + ADD(header, "precision mediump float;\n"); + ADD(header, "#endif\n"); + + ADD(header, "precision mediump sampler2D;\n"); + if (sc->ra->caps & RA_CAP_TEX_3D) + ADD(header, "precision mediump sampler3D;\n"); + } + + if (glsl_version >= 130) { + ADD(header, "#define tex1D texture\n"); + ADD(header, "#define tex3D texture\n"); + } else { + ADD(header, "#define tex1D texture1D\n"); + ADD(header, "#define tex3D texture3D\n"); + ADD(header, "#define texture texture2D\n"); + } + + // Additional helpers. + ADD(header, "#define LUT_POS(x, lut_size)" + " mix(0.5 / (lut_size), 1.0 - 0.5 / (lut_size), (x))\n"); + + char *vert_in = glsl_version >= 130 ? "in" : "attribute"; + char *vert_out = glsl_version >= 130 ? "out" : "varying"; + char *frag_in = glsl_version >= 130 ? "in" : "varying"; + + struct bstr *vert = NULL, *frag = NULL, *comp = NULL; + + if (type == RA_RENDERPASS_TYPE_RASTER) { + // vertex shader: we don't use the vertex shader, so just setup a + // dummy, which passes through the vertex array attributes. + bstr *vert_head = &sc->tmp[1]; + ADD_BSTR(vert_head, *header); + bstr *vert_body = &sc->tmp[2]; + ADD(vert_body, "void main() {\n"); + bstr *frag_vaos = &sc->tmp[3]; + for (int n = 0; n < vao_len; n++) { + const struct ra_renderpass_input *e = &vao[n]; + const char *glsl_type = vao_glsl_type(e); + char loc[32] = {0}; + if (sc->ra->glsl_vulkan) + snprintf(loc, sizeof(loc), "layout(location=%d) ", n); + if (strcmp(e->name, "position") == 0) { + // setting raster pos. requires setting gl_Position magic variable + assert(e->dim_v == 2 && e->type == RA_VARTYPE_FLOAT); + ADD(vert_head, "%s%s vec2 vertex_position;\n", loc, vert_in); + ADD(vert_body, "gl_Position = vec4(vertex_position, 1.0, 1.0);\n"); + } else { + ADD(vert_head, "%s%s %s vertex_%s;\n", loc, vert_in, glsl_type, e->name); + ADD(vert_head, "%s%s %s %s;\n", loc, vert_out, glsl_type, e->name); + ADD(vert_body, "%s = vertex_%s;\n", e->name, e->name); + ADD(frag_vaos, "%s%s %s %s;\n", loc, frag_in, glsl_type, e->name); + } + } + ADD(vert_body, "}\n"); + vert = vert_head; + ADD_BSTR(vert, *vert_body); + + // fragment shader; still requires adding used uniforms and VAO elements + frag = &sc->tmp[4]; + ADD_BSTR(frag, *header); + if (glsl_version >= 130) { + ADD(frag, "%sout vec4 out_color;\n", + sc->ra->glsl_vulkan ? "layout(location=0) " : ""); + } + ADD_BSTR(frag, *frag_vaos); + add_uniforms(sc, frag); + + ADD_BSTR(frag, sc->prelude_text); + ADD_BSTR(frag, sc->header_text); + + ADD(frag, "void main() {\n"); + // we require _all_ frag shaders to write to a "vec4 color" + ADD(frag, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n"); + ADD_BSTR(frag, sc->text); + if (glsl_version >= 130) { + ADD(frag, "out_color = color;\n"); + } else { + ADD(frag, "gl_FragColor = color;\n"); + } + ADD(frag, "}\n"); + + // We need to fix the format of the render dst at renderpass creation + // time + assert(target_format); + sc->params.target_format = target_format; + } + + if (type == RA_RENDERPASS_TYPE_COMPUTE) { + comp = &sc->tmp[4]; + ADD_BSTR(comp, *header); + + add_uniforms(sc, comp); + + ADD_BSTR(comp, sc->prelude_text); + ADD_BSTR(comp, sc->header_text); + + ADD(comp, "void main() {\n"); + ADD(comp, "vec4 color = vec4(0.0, 0.0, 0.0, 1.0);\n"); // convenience + ADD_BSTR(comp, sc->text); + ADD(comp, "}\n"); + } + + bstr *hash_total = &sc->tmp[5]; + + ADD(hash_total, "type %d\n", sc->params.type); + + if (frag) { + ADD_BSTR(hash_total, *frag); + sc->params.frag_shader = frag->start; + } + ADD(hash_total, "\n"); + if (vert) { + ADD_BSTR(hash_total, *vert); + sc->params.vertex_shader = vert->start; + } + ADD(hash_total, "\n"); + if (comp) { + ADD_BSTR(hash_total, *comp); + sc->params.compute_shader = comp->start; + } + ADD(hash_total, "\n"); + + if (sc->params.enable_blend) { + ADD(hash_total, "blend %d %d %d %d\n", + sc->params.blend_src_rgb, sc->params.blend_dst_rgb, + sc->params.blend_src_alpha, sc->params.blend_dst_alpha); + } + + if (sc->params.target_format) + ADD(hash_total, "format %s\n", sc->params.target_format->name); + + struct sc_entry *entry = NULL; + for (int n = 0; n < sc->num_entries; n++) { + struct sc_entry *cur = sc->entries[n]; + if (bstr_equals(cur->total, *hash_total)) { + entry = cur; + break; + } + } + if (!entry) { + if (sc->num_entries == SC_MAX_ENTRIES) + sc_flush_cache(sc); + entry = talloc_ptrtype(NULL, entry); + *entry = (struct sc_entry){ + .total = bstrdup(entry, *hash_total), + .timer = timer_pool_create(sc->ra), + }; + + // The vertex shader uses mangled names for the vertex attributes, so + // that the fragment shader can use the "real" names. But the shader is + // expecting the vertex attribute names (at least with older GLSL + // targets for GL). + sc->params.vertex_stride = vertex_stride; + for (int n = 0; n < vao_len; n++) { + struct ra_renderpass_input attrib = vao[n]; + attrib.name = talloc_asprintf(entry, "vertex_%s", attrib.name); + MP_TARRAY_APPEND(sc, sc->params.vertex_attribs, + sc->params.num_vertex_attribs, attrib); + } + + for (int n = 0; n < sc->num_uniforms; n++) { + struct sc_cached_uniform u = {0}; + if (sc->uniforms[n].type == SC_UNIFORM_TYPE_GLOBAL) { + // global uniforms need to be made visible to the ra_renderpass + u.index = sc->params.num_inputs; + MP_TARRAY_APPEND(sc, sc->params.inputs, sc->params.num_inputs, + sc->uniforms[n].input); + } + MP_TARRAY_APPEND(entry, entry->cached_uniforms, + entry->num_cached_uniforms, u); + } + if (!create_pass(sc, entry)) + sc->error_state = true; + MP_TARRAY_APPEND(sc, sc->entries, sc->num_entries, entry); + } + + if (!entry->pass) { + sc->current_shader = NULL; + return; + } + + assert(sc->num_uniforms == entry->num_cached_uniforms); + + sc->num_values = 0; + for (int n = 0; n < sc->num_uniforms; n++) + update_uniform(sc, entry, &sc->uniforms[n], n); + + // If we're using a UBO, make sure to bind it as well + if (sc->ubo_size) { + struct ra_renderpass_input_val ubo_val = { + .index = entry->ubo_index, + .data = &entry->ubo, + }; + MP_TARRAY_APPEND(sc, sc->values, sc->num_values, ubo_val); + } + + sc->current_shader = entry; +} + +struct mp_pass_perf gl_sc_dispatch_draw(struct gl_shader_cache *sc, + struct ra_tex *target, bool discard, + const struct ra_renderpass_input *vao, + int vao_len, size_t vertex_stride, + void *vertices, size_t num_vertices) +{ + struct timer_pool *timer = NULL; + + sc->params.invalidate_target = discard; + gl_sc_generate(sc, RA_RENDERPASS_TYPE_RASTER, target->params.format, + vao, vao_len, vertex_stride); + if (!sc->current_shader) + goto error; + + timer = sc->current_shader->timer; + + struct mp_rect full_rc = {0, 0, target->params.w, target->params.h}; + + struct ra_renderpass_run_params run = { + .pass = sc->current_shader->pass, + .values = sc->values, + .num_values = sc->num_values, + .push_constants = sc->current_shader->pushc, + .target = target, + .vertex_data = vertices, + .vertex_count = num_vertices, + .viewport = full_rc, + .scissors = full_rc, + }; + + timer_pool_start(timer); + sc->ra->fns->renderpass_run(sc->ra, &run); + timer_pool_stop(timer); + +error: + gl_sc_reset(sc); + return timer_pool_measure(timer); +} + +struct mp_pass_perf gl_sc_dispatch_compute(struct gl_shader_cache *sc, + int w, int h, int d) +{ + struct timer_pool *timer = NULL; + + gl_sc_generate(sc, RA_RENDERPASS_TYPE_COMPUTE, NULL, NULL, 0, 0); + if (!sc->current_shader) + goto error; + + timer = sc->current_shader->timer; + + struct ra_renderpass_run_params run = { + .pass = sc->current_shader->pass, + .values = sc->values, + .num_values = sc->num_values, + .push_constants = sc->current_shader->pushc, + .compute_groups = {w, h, d}, + }; + + timer_pool_start(timer); + sc->ra->fns->renderpass_run(sc->ra, &run); + timer_pool_stop(timer); + +error: + gl_sc_reset(sc); + return timer_pool_measure(timer); +} diff --git a/video/out/gpu/shader_cache.h b/video/out/gpu/shader_cache.h new file mode 100644 index 0000000..7c51c7a --- /dev/null +++ b/video/out/gpu/shader_cache.h @@ -0,0 +1,66 @@ +#pragma once + +#include "common/common.h" +#include "misc/bstr.h" +#include "ra.h" + +// For mp_pass_perf +#include "video/out/vo.h" + +struct mp_log; +struct mpv_global; +struct gl_shader_cache; + +struct gl_shader_cache *gl_sc_create(struct ra *ra, struct mpv_global *global, + struct mp_log *log); +void gl_sc_destroy(struct gl_shader_cache *sc); +bool gl_sc_error_state(struct gl_shader_cache *sc); +void gl_sc_reset_error(struct gl_shader_cache *sc); +void gl_sc_add(struct gl_shader_cache *sc, const char *text); +void gl_sc_addf(struct gl_shader_cache *sc, const char *textf, ...) + PRINTF_ATTRIBUTE(2, 3); +void gl_sc_hadd(struct gl_shader_cache *sc, const char *text); +void gl_sc_haddf(struct gl_shader_cache *sc, const char *textf, ...) + PRINTF_ATTRIBUTE(2, 3); +void gl_sc_hadd_bstr(struct gl_shader_cache *sc, struct bstr text); +void gl_sc_paddf(struct gl_shader_cache *sc, const char *textf, ...) + PRINTF_ATTRIBUTE(2, 3); + +// A hint that the next data-type (i.e. non-binding) uniform is expected to +// change frequently. This refers to the _f, _i, _vecN etc. uniform types. +void gl_sc_uniform_dynamic(struct gl_shader_cache *sc); +void gl_sc_uniform_texture(struct gl_shader_cache *sc, char *name, + struct ra_tex *tex); +void gl_sc_uniform_image2D_wo(struct gl_shader_cache *sc, const char *name, + struct ra_tex *tex); +void gl_sc_ssbo(struct gl_shader_cache *sc, char *name, struct ra_buf *buf, + char *format, ...) PRINTF_ATTRIBUTE(4, 5); +void gl_sc_uniform_f(struct gl_shader_cache *sc, char *name, float f); +void gl_sc_uniform_i(struct gl_shader_cache *sc, char *name, int f); +void gl_sc_uniform_vec2(struct gl_shader_cache *sc, char *name, float f[2]); +void gl_sc_uniform_vec3(struct gl_shader_cache *sc, char *name, float f[3]); +void gl_sc_uniform_mat2(struct gl_shader_cache *sc, char *name, + bool transpose, float *v); +void gl_sc_uniform_mat3(struct gl_shader_cache *sc, char *name, + bool transpose, float *v); + +// Return the correct bvecN() variant for using mix() in this GLSL version +const char *gl_sc_bvec(struct gl_shader_cache *sc, int dims); + +void gl_sc_blend(struct gl_shader_cache *sc, + enum ra_blend blend_src_rgb, + enum ra_blend blend_dst_rgb, + enum ra_blend blend_src_alpha, + enum ra_blend blend_dst_alpha); +void gl_sc_enable_extension(struct gl_shader_cache *sc, char *name); +struct mp_pass_perf gl_sc_dispatch_draw(struct gl_shader_cache *sc, + struct ra_tex *target, bool discard, + const struct ra_renderpass_input *vao, + int vao_len, size_t vertex_stride, + void *ptr, size_t num); +struct mp_pass_perf gl_sc_dispatch_compute(struct gl_shader_cache *sc, + int w, int h, int d); +// The application can call this on errors, to reset the current shader. This +// is normally done implicitly by gl_sc_dispatch_* +void gl_sc_reset(struct gl_shader_cache *sc); +void gl_sc_set_cache_dir(struct gl_shader_cache *sc, char *dir); diff --git a/video/out/gpu/spirv.c b/video/out/gpu/spirv.c new file mode 100644 index 0000000..67088bc --- /dev/null +++ b/video/out/gpu/spirv.c @@ -0,0 +1,70 @@ +#include "common/msg.h" +#include "options/m_config.h" + +#include "spirv.h" +#include "config.h" + +extern const struct spirv_compiler_fns spirv_shaderc; + +// in probe-order +enum { + SPIRV_AUTO = 0, + SPIRV_SHADERC, // generally preferred, but not packaged everywhere +}; + +static const struct spirv_compiler_fns *compilers[] = { +#if HAVE_SHADERC + [SPIRV_SHADERC] = &spirv_shaderc, +#endif +}; + +static const struct m_opt_choice_alternatives compiler_choices[] = { + {"auto", SPIRV_AUTO}, +#if HAVE_SHADERC + {"shaderc", SPIRV_SHADERC}, +#endif + {0} +}; + +struct spirv_opts { + int compiler; +}; + +#define OPT_BASE_STRUCT struct spirv_opts +const struct m_sub_options spirv_conf = { + .opts = (const struct m_option[]) { + {"spirv-compiler", OPT_CHOICE_C(compiler, compiler_choices)}, + {0} + }, + .size = sizeof(struct spirv_opts), +}; + +bool spirv_compiler_init(struct ra_ctx *ctx) +{ + void *tmp = talloc_new(NULL); + struct spirv_opts *opts = mp_get_config_group(tmp, ctx->global, &spirv_conf); + int compiler = opts->compiler; + talloc_free(tmp); + + for (int i = SPIRV_AUTO+1; i < MP_ARRAY_SIZE(compilers); i++) { + if (compiler != SPIRV_AUTO && i != compiler) + continue; + if (!compilers[i]) + continue; + + ctx->spirv = talloc_zero(ctx, struct spirv_compiler); + ctx->spirv->log = ctx->log, + ctx->spirv->fns = compilers[i]; + + const char *name = m_opt_choice_str(compiler_choices, i); + strncpy(ctx->spirv->name, name, sizeof(ctx->spirv->name) - 1); + MP_VERBOSE(ctx, "Initializing SPIR-V compiler '%s'\n", name); + if (ctx->spirv->fns->init(ctx)) + return true; + talloc_free(ctx->spirv); + ctx->spirv = NULL; + } + + MP_ERR(ctx, "Failed initializing SPIR-V compiler!\n"); + return false; +} diff --git a/video/out/gpu/spirv.h b/video/out/gpu/spirv.h new file mode 100644 index 0000000..e3dbd4f --- /dev/null +++ b/video/out/gpu/spirv.h @@ -0,0 +1,41 @@ +#pragma once + +#include "common/msg.h" +#include "common/common.h" +#include "context.h" + +enum glsl_shader { + GLSL_SHADER_VERTEX, + GLSL_SHADER_FRAGMENT, + GLSL_SHADER_COMPUTE, +}; + +#define SPIRV_NAME_MAX_LEN 32 + +struct spirv_compiler { + char name[SPIRV_NAME_MAX_LEN]; + const struct spirv_compiler_fns *fns; + struct mp_log *log; + void *priv; + + const char *required_ext; // or NULL + int glsl_version; // GLSL version supported + int compiler_version; // for cache invalidation, may be left as 0 + int ra_caps; // RA_CAP_* provided by this implementation, if any +}; + +struct spirv_compiler_fns { + // Compile GLSL to SPIR-V, under GL_KHR_vulkan_glsl semantics. + bool (*compile_glsl)(struct spirv_compiler *spirv, void *tactx, + enum glsl_shader type, const char *glsl, + struct bstr *out_spirv); + + // Called by spirv_compiler_init / ra_ctx_destroy. These don't need to + // allocate/free ctx->spirv, that is done by the caller + bool (*init)(struct ra_ctx *ctx); + void (*uninit)(struct ra_ctx *ctx); // optional +}; + +// Initializes ctx->spirv to a valid SPIR-V compiler, or returns false on +// failure. Cleanup will be handled by ra_ctx_destroy. +bool spirv_compiler_init(struct ra_ctx *ctx); diff --git a/video/out/gpu/spirv_shaderc.c b/video/out/gpu/spirv_shaderc.c new file mode 100644 index 0000000..f285631 --- /dev/null +++ b/video/out/gpu/spirv_shaderc.c @@ -0,0 +1,125 @@ +#include "common/msg.h" + +#include "context.h" +#include "spirv.h" + +#include <shaderc/shaderc.h> + +struct priv { + shaderc_compiler_t compiler; + shaderc_compile_options_t opts; +}; + +static void shaderc_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->spirv->priv; + if (!p) + return; + + shaderc_compile_options_release(p->opts); + shaderc_compiler_release(p->compiler); +} + +static bool shaderc_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->spirv->priv = talloc_zero(ctx->spirv, struct priv); + + p->compiler = shaderc_compiler_initialize(); + if (!p->compiler) + goto error; + p->opts = shaderc_compile_options_initialize(); + if (!p->opts) + goto error; + + shaderc_compile_options_set_optimization_level(p->opts, + shaderc_optimization_level_performance); + if (ctx->opts.debug) + shaderc_compile_options_set_generate_debug_info(p->opts); + + int ver, rev; + shaderc_get_spv_version(&ver, &rev); + ctx->spirv->compiler_version = ver * 100 + rev; // forwards compatibility + ctx->spirv->glsl_version = 450; // impossible to query? + return true; + +error: + shaderc_uninit(ctx); + return false; +} + +static shaderc_compilation_result_t compile(struct priv *p, + enum glsl_shader type, + const char *glsl, bool debug) +{ + static const shaderc_shader_kind kinds[] = { + [GLSL_SHADER_VERTEX] = shaderc_glsl_vertex_shader, + [GLSL_SHADER_FRAGMENT] = shaderc_glsl_fragment_shader, + [GLSL_SHADER_COMPUTE] = shaderc_glsl_compute_shader, + }; + + if (debug) { + return shaderc_compile_into_spv_assembly(p->compiler, glsl, strlen(glsl), + kinds[type], "input", "main", p->opts); + } else { + return shaderc_compile_into_spv(p->compiler, glsl, strlen(glsl), + kinds[type], "input", "main", p->opts); + } +} + +static bool shaderc_compile(struct spirv_compiler *spirv, void *tactx, + enum glsl_shader type, const char *glsl, + struct bstr *out_spirv) +{ + struct priv *p = spirv->priv; + + shaderc_compilation_result_t res = compile(p, type, glsl, false); + int errs = shaderc_result_get_num_errors(res), + warn = shaderc_result_get_num_warnings(res), + msgl = errs ? MSGL_ERR : warn ? MSGL_WARN : MSGL_V; + + const char *msg = shaderc_result_get_error_message(res); + if (msg[0]) + MP_MSG(spirv, msgl, "shaderc output:\n%s", msg); + + int s = shaderc_result_get_compilation_status(res); + bool success = s == shaderc_compilation_status_success; + + static const char *results[] = { + [shaderc_compilation_status_success] = "success", + [shaderc_compilation_status_invalid_stage] = "invalid stage", + [shaderc_compilation_status_compilation_error] = "error", + [shaderc_compilation_status_internal_error] = "internal error", + [shaderc_compilation_status_null_result_object] = "no result", + [shaderc_compilation_status_invalid_assembly] = "invalid assembly", + }; + + const char *status = s < MP_ARRAY_SIZE(results) ? results[s] : "unknown"; + MP_MSG(spirv, msgl, "shaderc compile status '%s' (%d errors, %d warnings)\n", + status, errs, warn); + + if (success) { + void *bytes = (void *) shaderc_result_get_bytes(res); + out_spirv->len = shaderc_result_get_length(res); + out_spirv->start = talloc_memdup(tactx, bytes, out_spirv->len); + } + + // Also print SPIR-V disassembly for debugging purposes. Unfortunately + // there doesn't seem to be a way to get this except compiling the shader + // a second time.. + if (mp_msg_test(spirv->log, MSGL_TRACE)) { + shaderc_compilation_result_t dis = compile(p, type, glsl, true); + MP_TRACE(spirv, "Generated SPIR-V:\n%.*s", + (int)shaderc_result_get_length(dis), + shaderc_result_get_bytes(dis)); + shaderc_result_release(dis); + } + + shaderc_result_release(res); + return success; +} + +const struct spirv_compiler_fns spirv_shaderc = { + .compile_glsl = shaderc_compile, + .init = shaderc_init, + .uninit = shaderc_uninit, +}; diff --git a/video/out/gpu/user_shaders.c b/video/out/gpu/user_shaders.c new file mode 100644 index 0000000..708de87 --- /dev/null +++ b/video/out/gpu/user_shaders.c @@ -0,0 +1,463 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <math.h> + +#include "common/msg.h" +#include "misc/ctype.h" +#include "user_shaders.h" + +static bool parse_rpn_szexpr(struct bstr line, struct szexp out[MAX_SZEXP_SIZE]) +{ + int pos = 0; + + while (line.len > 0) { + struct bstr word = bstr_strip(bstr_splitchar(line, &line, ' ')); + if (word.len == 0) + continue; + + if (pos >= MAX_SZEXP_SIZE) + return false; + + struct szexp *exp = &out[pos++]; + + if (bstr_eatend0(&word, ".w") || bstr_eatend0(&word, ".width")) { + exp->tag = SZEXP_VAR_W; + exp->val.varname = word; + continue; + } + + if (bstr_eatend0(&word, ".h") || bstr_eatend0(&word, ".height")) { + exp->tag = SZEXP_VAR_H; + exp->val.varname = word; + continue; + } + + switch (word.start[0]) { + case '+': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_ADD; continue; + case '-': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_SUB; continue; + case '*': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_MUL; continue; + case '/': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_DIV; continue; + case '%': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_MOD; continue; + case '!': exp->tag = SZEXP_OP1; exp->val.op = SZEXP_OP_NOT; continue; + case '>': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_GT; continue; + case '<': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_LT; continue; + case '=': exp->tag = SZEXP_OP2; exp->val.op = SZEXP_OP_EQ; continue; + } + + if (mp_isdigit(word.start[0])) { + exp->tag = SZEXP_CONST; + if (bstr_sscanf(word, "%f", &exp->val.cval) != 1) + return false; + continue; + } + + // Some sort of illegal expression + return false; + } + + return true; +} + +// Returns whether successful. 'result' is left untouched on failure +bool eval_szexpr(struct mp_log *log, void *priv, + bool (*lookup)(void *priv, struct bstr var, float size[2]), + struct szexp expr[MAX_SZEXP_SIZE], float *result) +{ + float stack[MAX_SZEXP_SIZE] = {0}; + int idx = 0; // points to next element to push + + for (int i = 0; i < MAX_SZEXP_SIZE; i++) { + switch (expr[i].tag) { + case SZEXP_END: + goto done; + + case SZEXP_CONST: + // Since our SZEXPs are bound by MAX_SZEXP_SIZE, it should be + // impossible to overflow the stack + assert(idx < MAX_SZEXP_SIZE); + stack[idx++] = expr[i].val.cval; + continue; + + case SZEXP_OP1: + if (idx < 1) { + mp_warn(log, "Stack underflow in RPN expression!\n"); + return false; + } + + switch (expr[i].val.op) { + case SZEXP_OP_NOT: stack[idx-1] = !stack[idx-1]; break; + default: MP_ASSERT_UNREACHABLE(); + } + continue; + + case SZEXP_OP2: + if (idx < 2) { + mp_warn(log, "Stack underflow in RPN expression!\n"); + return false; + } + + // Pop the operands in reverse order + float op2 = stack[--idx]; + float op1 = stack[--idx]; + float res = 0.0; + switch (expr[i].val.op) { + case SZEXP_OP_ADD: res = op1 + op2; break; + case SZEXP_OP_SUB: res = op1 - op2; break; + case SZEXP_OP_MUL: res = op1 * op2; break; + case SZEXP_OP_DIV: res = op1 / op2; break; + case SZEXP_OP_MOD: res = fmodf(op1, op2); break; + case SZEXP_OP_GT: res = op1 > op2; break; + case SZEXP_OP_LT: res = op1 < op2; break; + case SZEXP_OP_EQ: res = op1 == op2; break; + default: MP_ASSERT_UNREACHABLE(); + } + + if (!isfinite(res)) { + mp_warn(log, "Illegal operation in RPN expression!\n"); + return false; + } + + stack[idx++] = res; + continue; + + case SZEXP_VAR_W: + case SZEXP_VAR_H: { + struct bstr name = expr[i].val.varname; + float size[2]; + + if (!lookup(priv, name, size)) { + mp_warn(log, "Variable %.*s not found in RPN expression!\n", + BSTR_P(name)); + return false; + } + + stack[idx++] = (expr[i].tag == SZEXP_VAR_W) ? size[0] : size[1]; + continue; + } + } + } + +done: + // Return the single stack element + if (idx != 1) { + mp_warn(log, "Malformed stack after RPN expression!\n"); + return false; + } + + *result = stack[0]; + return true; +} + +static bool parse_hook(struct mp_log *log, struct bstr *body, + struct gl_user_shader_hook *out) +{ + *out = (struct gl_user_shader_hook){ + .pass_desc = bstr0("(unknown)"), + .offset = identity_trans, + .align_offset = false, + .width = {{ SZEXP_VAR_W, { .varname = bstr0("HOOKED") }}}, + .height = {{ SZEXP_VAR_H, { .varname = bstr0("HOOKED") }}}, + .cond = {{ SZEXP_CONST, { .cval = 1.0 }}}, + }; + + int hook_idx = 0; + int bind_idx = 0; + + // Parse all headers + while (true) { + struct bstr rest; + struct bstr line = bstr_strip(bstr_getline(*body, &rest)); + + // Check for the presence of the magic line beginning + if (!bstr_eatstart0(&line, "//!")) + break; + + *body = rest; + + // Parse the supported commands + if (bstr_eatstart0(&line, "HOOK")) { + if (hook_idx == SHADER_MAX_HOOKS) { + mp_err(log, "Passes may only hook up to %d textures!\n", + SHADER_MAX_HOOKS); + return false; + } + out->hook_tex[hook_idx++] = bstr_strip(line); + continue; + } + + if (bstr_eatstart0(&line, "BIND")) { + if (bind_idx == SHADER_MAX_BINDS) { + mp_err(log, "Passes may only bind up to %d textures!\n", + SHADER_MAX_BINDS); + return false; + } + out->bind_tex[bind_idx++] = bstr_strip(line); + continue; + } + + if (bstr_eatstart0(&line, "SAVE")) { + out->save_tex = bstr_strip(line); + continue; + } + + if (bstr_eatstart0(&line, "DESC")) { + out->pass_desc = bstr_strip(line); + continue; + } + + if (bstr_eatstart0(&line, "OFFSET")) { + line = bstr_strip(line); + if (bstr_equals0(line, "ALIGN")) { + out->align_offset = true; + } else { + float ox, oy; + if (bstr_sscanf(line, "%f %f", &ox, &oy) != 2) { + mp_err(log, "Error while parsing OFFSET!\n"); + return false; + } + out->offset.t[0] = ox; + out->offset.t[1] = oy; + } + continue; + } + + if (bstr_eatstart0(&line, "WIDTH")) { + if (!parse_rpn_szexpr(line, out->width)) { + mp_err(log, "Error while parsing WIDTH!\n"); + return false; + } + continue; + } + + if (bstr_eatstart0(&line, "HEIGHT")) { + if (!parse_rpn_szexpr(line, out->height)) { + mp_err(log, "Error while parsing HEIGHT!\n"); + return false; + } + continue; + } + + if (bstr_eatstart0(&line, "WHEN")) { + if (!parse_rpn_szexpr(line, out->cond)) { + mp_err(log, "Error while parsing WHEN!\n"); + return false; + } + continue; + } + + if (bstr_eatstart0(&line, "COMPONENTS")) { + if (bstr_sscanf(line, "%d", &out->components) != 1) { + mp_err(log, "Error while parsing COMPONENTS!\n"); + return false; + } + continue; + } + + if (bstr_eatstart0(&line, "COMPUTE")) { + struct compute_info *ci = &out->compute; + int num = bstr_sscanf(line, "%d %d %d %d", &ci->block_w, &ci->block_h, + &ci->threads_w, &ci->threads_h); + + if (num == 2 || num == 4) { + ci->active = true; + ci->directly_writes = true; + } else { + mp_err(log, "Error while parsing COMPUTE!\n"); + return false; + } + continue; + } + + // Unknown command type + mp_err(log, "Unrecognized command '%.*s'!\n", BSTR_P(line)); + return false; + } + + // The rest of the file up until the next magic line beginning (if any) + // shall be the shader body + if (bstr_split_tok(*body, "//!", &out->pass_body, body)) { + // Make sure the magic line is part of the rest + body->start -= 3; + body->len += 3; + } + + // Sanity checking + if (hook_idx == 0) + mp_warn(log, "Pass has no hooked textures (will be ignored)!\n"); + + return true; +} + +static bool parse_tex(struct mp_log *log, struct ra *ra, struct bstr *body, + struct gl_user_shader_tex *out) +{ + *out = (struct gl_user_shader_tex){ + .name = bstr0("USER_TEX"), + .params = { + .dimensions = 2, + .w = 1, .h = 1, .d = 1, + .render_src = true, + .src_linear = true, + }, + }; + struct ra_tex_params *p = &out->params; + + while (true) { + struct bstr rest; + struct bstr line = bstr_strip(bstr_getline(*body, &rest)); + + if (!bstr_eatstart0(&line, "//!")) + break; + + *body = rest; + + if (bstr_eatstart0(&line, "TEXTURE")) { + out->name = bstr_strip(line); + continue; + } + + if (bstr_eatstart0(&line, "SIZE")) { + p->dimensions = bstr_sscanf(line, "%d %d %d", &p->w, &p->h, &p->d); + if (p->dimensions < 1 || p->dimensions > 3 || + p->w < 1 || p->h < 1 || p->d < 1) + { + mp_err(log, "Error while parsing SIZE!\n"); + return false; + } + continue; + } + + if (bstr_eatstart0(&line, "FORMAT ")) { + p->format = NULL; + for (int n = 0; n < ra->num_formats; n++) { + const struct ra_format *fmt = ra->formats[n]; + if (bstr_equals0(line, fmt->name)) { + p->format = fmt; + break; + } + } + // (pixel_size==0 is for opaque formats) + if (!p->format || !p->format->pixel_size) { + mp_err(log, "Unrecognized/unavailable FORMAT name: '%.*s'!\n", + BSTR_P(line)); + return false; + } + continue; + } + + if (bstr_eatstart0(&line, "FILTER")) { + line = bstr_strip(line); + if (bstr_equals0(line, "LINEAR")) { + p->src_linear = true; + } else if (bstr_equals0(line, "NEAREST")) { + p->src_linear = false; + } else { + mp_err(log, "Unrecognized FILTER: '%.*s'!\n", BSTR_P(line)); + return false; + } + continue; + } + + if (bstr_eatstart0(&line, "BORDER")) { + line = bstr_strip(line); + if (bstr_equals0(line, "CLAMP")) { + p->src_repeat = false; + } else if (bstr_equals0(line, "REPEAT")) { + p->src_repeat = true; + } else { + mp_err(log, "Unrecognized BORDER: '%.*s'!\n", BSTR_P(line)); + return false; + } + continue; + } + + mp_err(log, "Unrecognized command '%.*s'!\n", BSTR_P(line)); + return false; + } + + if (!p->format) { + mp_err(log, "No FORMAT specified.\n"); + return false; + } + + if (p->src_linear && !p->format->linear_filter) { + mp_err(log, "The specified texture format cannot be filtered!\n"); + return false; + } + + // Decode the rest of the section (up to the next //! marker) as raw hex + // data for the texture + struct bstr hexdata; + if (bstr_split_tok(*body, "//!", &hexdata, body)) { + // Make sure the magic line is part of the rest + body->start -= 3; + body->len += 3; + } + + struct bstr tex; + if (!bstr_decode_hex(NULL, bstr_strip(hexdata), &tex)) { + mp_err(log, "Error while parsing TEXTURE body: must be a valid " + "hexadecimal sequence, on a single line!\n"); + return false; + } + + int expected_len = p->w * p->h * p->d * p->format->pixel_size; + if (tex.len != expected_len) { + mp_err(log, "Shader TEXTURE size mismatch: got %zd bytes, expected %d!\n", + tex.len, expected_len); + talloc_free(tex.start); + return false; + } + + p->initial_data = tex.start; + return true; +} + +void parse_user_shader(struct mp_log *log, struct ra *ra, struct bstr shader, + void *priv, + bool (*dohook)(void *p, struct gl_user_shader_hook hook), + bool (*dotex)(void *p, struct gl_user_shader_tex tex)) +{ + if (!dohook || !dotex || !shader.len) + return; + + // Skip all garbage (e.g. comments) before the first header + int pos = bstr_find(shader, bstr0("//!")); + if (pos < 0) { + mp_warn(log, "Shader appears to contain no headers!\n"); + return; + } + shader = bstr_cut(shader, pos); + + // Loop over the file + while (shader.len > 0) + { + // Peek at the first header to dispatch the right type + if (bstr_startswith0(shader, "//!TEXTURE")) { + struct gl_user_shader_tex t; + if (!parse_tex(log, ra, &shader, &t) || !dotex(priv, t)) + return; + continue; + } + + struct gl_user_shader_hook h; + if (!parse_hook(log, &shader, &h) || !dohook(priv, h)) + return; + } +} diff --git a/video/out/gpu/user_shaders.h b/video/out/gpu/user_shaders.h new file mode 100644 index 0000000..4bb7c22 --- /dev/null +++ b/video/out/gpu/user_shaders.h @@ -0,0 +1,99 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MP_GL_USER_SHADERS_H +#define MP_GL_USER_SHADERS_H + +#include "utils.h" +#include "ra.h" + +#define SHADER_MAX_HOOKS 16 +#define SHADER_MAX_BINDS 16 +#define MAX_SZEXP_SIZE 32 + +enum szexp_op { + SZEXP_OP_ADD, + SZEXP_OP_SUB, + SZEXP_OP_MUL, + SZEXP_OP_DIV, + SZEXP_OP_MOD, + SZEXP_OP_NOT, + SZEXP_OP_GT, + SZEXP_OP_LT, + SZEXP_OP_EQ, +}; + +enum szexp_tag { + SZEXP_END = 0, // End of an RPN expression + SZEXP_CONST, // Push a constant value onto the stack + SZEXP_VAR_W, // Get the width/height of a named texture (variable) + SZEXP_VAR_H, + SZEXP_OP2, // Pop two elements and push the result of a dyadic operation + SZEXP_OP1, // Pop one element and push the result of a monadic operation +}; + +struct szexp { + enum szexp_tag tag; + union { + float cval; + struct bstr varname; + enum szexp_op op; + } val; +}; + +struct compute_info { + bool active; + int block_w, block_h; // Block size (each block corresponds to one WG) + int threads_w, threads_h; // How many threads form a working group + bool directly_writes; // If true, shader is assumed to imageStore(out_image) +}; + +struct gl_user_shader_hook { + struct bstr pass_desc; + struct bstr hook_tex[SHADER_MAX_HOOKS]; + struct bstr bind_tex[SHADER_MAX_BINDS]; + struct bstr save_tex; + struct bstr pass_body; + struct gl_transform offset; + bool align_offset; + struct szexp width[MAX_SZEXP_SIZE]; + struct szexp height[MAX_SZEXP_SIZE]; + struct szexp cond[MAX_SZEXP_SIZE]; + int components; + struct compute_info compute; +}; + +struct gl_user_shader_tex { + struct bstr name; + struct ra_tex_params params; + // for video.c + struct ra_tex *tex; +}; + +// Parse the next shader block from `body`. The callbacks are invoked on every +// valid shader block parsed. +void parse_user_shader(struct mp_log *log, struct ra *ra, struct bstr shader, + void *priv, + bool (*dohook)(void *p, struct gl_user_shader_hook hook), + bool (*dotex)(void *p, struct gl_user_shader_tex tex)); + +// Evaluate a szexp, given a lookup function for named textures +bool eval_szexpr(struct mp_log *log, void *priv, + bool (*lookup)(void *priv, struct bstr var, float size[2]), + struct szexp expr[MAX_SZEXP_SIZE], float *result); + +#endif diff --git a/video/out/gpu/utils.c b/video/out/gpu/utils.c new file mode 100644 index 0000000..8a1aacf --- /dev/null +++ b/video/out/gpu/utils.c @@ -0,0 +1,349 @@ +#include "common/msg.h" +#include "video/out/vo.h" +#include "utils.h" + +// Standard parallel 2D projection, except y1 < y0 means that the coordinate +// system is flipped, not the projection. +void gl_transform_ortho(struct gl_transform *t, float x0, float x1, + float y0, float y1) +{ + if (y1 < y0) { + float tmp = y0; + y0 = tmp - y1; + y1 = tmp; + } + + t->m[0][0] = 2.0f / (x1 - x0); + t->m[0][1] = 0.0f; + t->m[1][0] = 0.0f; + t->m[1][1] = 2.0f / (y1 - y0); + t->t[0] = -(x1 + x0) / (x1 - x0); + t->t[1] = -(y1 + y0) / (y1 - y0); +} + +// Apply the effects of one transformation to another, transforming it in the +// process. In other words: post-composes t onto x +void gl_transform_trans(struct gl_transform t, struct gl_transform *x) +{ + struct gl_transform xt = *x; + x->m[0][0] = t.m[0][0] * xt.m[0][0] + t.m[0][1] * xt.m[1][0]; + x->m[1][0] = t.m[1][0] * xt.m[0][0] + t.m[1][1] * xt.m[1][0]; + x->m[0][1] = t.m[0][0] * xt.m[0][1] + t.m[0][1] * xt.m[1][1]; + x->m[1][1] = t.m[1][0] * xt.m[0][1] + t.m[1][1] * xt.m[1][1]; + gl_transform_vec(t, &x->t[0], &x->t[1]); +} + +void gl_transform_ortho_fbo(struct gl_transform *t, struct ra_fbo fbo) +{ + int y_dir = fbo.flip ? -1 : 1; + gl_transform_ortho(t, 0, fbo.tex->params.w, 0, fbo.tex->params.h * y_dir); +} + +float gl_video_scale_ambient_lux(float lmin, float lmax, + float rmin, float rmax, float lux) +{ + assert(lmax > lmin); + + float num = (rmax - rmin) * (log10(lux) - log10(lmin)); + float den = log10(lmax) - log10(lmin); + float result = num / den + rmin; + + // clamp the result + float max = MPMAX(rmax, rmin); + float min = MPMIN(rmax, rmin); + return MPMAX(MPMIN(result, max), min); +} + +void ra_buf_pool_uninit(struct ra *ra, struct ra_buf_pool *pool) +{ + for (int i = 0; i < pool->num_buffers; i++) + ra_buf_free(ra, &pool->buffers[i]); + + talloc_free(pool->buffers); + *pool = (struct ra_buf_pool){0}; +} + +static bool ra_buf_params_compatible(const struct ra_buf_params *new, + const struct ra_buf_params *old) +{ + return new->type == old->type && + new->size <= old->size && + new->host_mapped == old->host_mapped && + new->host_mutable == old->host_mutable; +} + +static bool ra_buf_pool_grow(struct ra *ra, struct ra_buf_pool *pool) +{ + struct ra_buf *buf = ra_buf_create(ra, &pool->current_params); + if (!buf) + return false; + + MP_TARRAY_INSERT_AT(NULL, pool->buffers, pool->num_buffers, pool->index, buf); + MP_VERBOSE(ra, "Resized buffer pool of type %u to size %d\n", + pool->current_params.type, pool->num_buffers); + return true; +} + +struct ra_buf *ra_buf_pool_get(struct ra *ra, struct ra_buf_pool *pool, + const struct ra_buf_params *params) +{ + assert(!params->initial_data); + + if (!ra_buf_params_compatible(params, &pool->current_params)) { + ra_buf_pool_uninit(ra, pool); + pool->current_params = *params; + } + + // Make sure we have at least one buffer available + if (!pool->buffers && !ra_buf_pool_grow(ra, pool)) + return NULL; + + // Make sure the next buffer is available for use + if (!ra->fns->buf_poll(ra, pool->buffers[pool->index]) && + !ra_buf_pool_grow(ra, pool)) + { + return NULL; + } + + struct ra_buf *buf = pool->buffers[pool->index++]; + pool->index %= pool->num_buffers; + + return buf; +} + +bool ra_tex_upload_pbo(struct ra *ra, struct ra_buf_pool *pbo, + const struct ra_tex_upload_params *params) +{ + if (params->buf) + return ra->fns->tex_upload(ra, params); + + struct ra_tex *tex = params->tex; + size_t row_size = tex->params.dimensions == 2 ? params->stride : + tex->params.w * tex->params.format->pixel_size; + + int height = tex->params.h; + if (tex->params.dimensions == 2 && params->rc) + height = mp_rect_h(*params->rc); + + struct ra_buf_params bufparams = { + .type = RA_BUF_TYPE_TEX_UPLOAD, + .size = row_size * height * tex->params.d, + .host_mutable = true, + }; + + struct ra_buf *buf = ra_buf_pool_get(ra, pbo, &bufparams); + if (!buf) + return false; + + ra->fns->buf_update(ra, buf, 0, params->src, bufparams.size); + + struct ra_tex_upload_params newparams = *params; + newparams.buf = buf; + newparams.src = NULL; + + return ra->fns->tex_upload(ra, &newparams); +} + +struct ra_layout std140_layout(struct ra_renderpass_input *inp) +{ + size_t el_size = ra_vartype_size(inp->type); + + // std140 packing rules: + // 1. The alignment of generic values is their size in bytes + // 2. The alignment of vectors is the vector length * the base count, with + // the exception of vec3 which is always aligned like vec4 + // 3. The alignment of arrays is that of the element size rounded up to + // the nearest multiple of vec4 + // 4. Matrices are treated like arrays of vectors + // 5. Arrays/matrices are laid out with a stride equal to the alignment + size_t stride = el_size * inp->dim_v; + size_t align = stride; + if (inp->dim_v == 3) + align += el_size; + if (inp->dim_m > 1) + stride = align = MP_ALIGN_UP(stride, sizeof(float[4])); + + return (struct ra_layout) { + .align = align, + .stride = stride, + .size = stride * inp->dim_m, + }; +} + +struct ra_layout std430_layout(struct ra_renderpass_input *inp) +{ + size_t el_size = ra_vartype_size(inp->type); + + // std430 packing rules: like std140, except arrays/matrices are always + // "tightly" packed, even arrays/matrices of vec3s + size_t stride = el_size * inp->dim_v; + size_t align = stride; + if (inp->dim_v == 3 && inp->dim_m == 1) + align += el_size; + + return (struct ra_layout) { + .align = align, + .stride = stride, + .size = stride * inp->dim_m, + }; +} + +// Resize a texture to a new desired size and format if necessary +bool ra_tex_resize(struct ra *ra, struct mp_log *log, struct ra_tex **tex, + int w, int h, const struct ra_format *fmt) +{ + if (*tex) { + struct ra_tex_params cur_params = (*tex)->params; + if (cur_params.w == w && cur_params.h == h && cur_params.format == fmt) + return true; + } + + mp_dbg(log, "Resizing texture: %dx%d\n", w, h); + + if (!fmt || !fmt->renderable || !fmt->linear_filter) { + mp_err(log, "Format %s not supported.\n", fmt ? fmt->name : "(unset)"); + return false; + } + + ra_tex_free(ra, tex); + struct ra_tex_params params = { + .dimensions = 2, + .w = w, + .h = h, + .d = 1, + .format = fmt, + .src_linear = true, + .render_src = true, + .render_dst = true, + .storage_dst = fmt->storable, + .blit_src = true, + }; + + *tex = ra_tex_create(ra, ¶ms); + if (!*tex) + mp_err(log, "Error: texture could not be created.\n"); + + return *tex; +} + +struct timer_pool { + struct ra *ra; + ra_timer *timer; + bool running; // detect invalid usage + + uint64_t samples[VO_PERF_SAMPLE_COUNT]; + int sample_idx; + int sample_count; + + uint64_t sum; + uint64_t peak; +}; + +struct timer_pool *timer_pool_create(struct ra *ra) +{ + if (!ra->fns->timer_create) + return NULL; + + ra_timer *timer = ra->fns->timer_create(ra); + if (!timer) + return NULL; + + struct timer_pool *pool = talloc(NULL, struct timer_pool); + if (!pool) { + ra->fns->timer_destroy(ra, timer); + return NULL; + } + + *pool = (struct timer_pool){ .ra = ra, .timer = timer }; + return pool; +} + +void timer_pool_destroy(struct timer_pool *pool) +{ + if (!pool) + return; + + pool->ra->fns->timer_destroy(pool->ra, pool->timer); + talloc_free(pool); +} + +void timer_pool_start(struct timer_pool *pool) +{ + if (!pool) + return; + + assert(!pool->running); + pool->ra->fns->timer_start(pool->ra, pool->timer); + pool->running = true; +} + +void timer_pool_stop(struct timer_pool *pool) +{ + if (!pool) + return; + + assert(pool->running); + uint64_t res = pool->ra->fns->timer_stop(pool->ra, pool->timer); + pool->running = false; + + if (res) { + // Input res into the buffer and grab the previous value + uint64_t old = pool->samples[pool->sample_idx]; + pool->sample_count = MPMIN(pool->sample_count + 1, VO_PERF_SAMPLE_COUNT); + pool->samples[pool->sample_idx++] = res; + pool->sample_idx %= VO_PERF_SAMPLE_COUNT; + pool->sum = pool->sum + res - old; + + // Update peak if necessary + if (res >= pool->peak) { + pool->peak = res; + } else if (pool->peak == old) { + // It's possible that the last peak was the value we just removed, + // if so we need to scan for the new peak + uint64_t peak = res; + for (int i = 0; i < VO_PERF_SAMPLE_COUNT; i++) + peak = MPMAX(peak, pool->samples[i]); + pool->peak = peak; + } + } +} + +struct mp_pass_perf timer_pool_measure(struct timer_pool *pool) +{ + if (!pool) + return (struct mp_pass_perf){0}; + + struct mp_pass_perf res = { + .peak = pool->peak, + .count = pool->sample_count, + }; + + int idx = pool->sample_idx - pool->sample_count + VO_PERF_SAMPLE_COUNT; + for (int i = 0; i < res.count; i++) { + idx %= VO_PERF_SAMPLE_COUNT; + res.samples[i] = pool->samples[idx++]; + } + + if (res.count > 0) { + res.last = res.samples[res.count - 1]; + res.avg = pool->sum / res.count; + } + + return res; +} + +void mp_log_source(struct mp_log *log, int lev, const char *src) +{ + int line = 1; + if (!src) + return; + while (*src) { + const char *end = strchr(src, '\n'); + const char *next = end + 1; + if (!end) + next = end = src + strlen(src); + mp_msg(log, lev, "[%3d] %.*s\n", line, (int)(end - src), src); + line++; + src = next; + } +} diff --git a/video/out/gpu/utils.h b/video/out/gpu/utils.h new file mode 100644 index 0000000..215873e --- /dev/null +++ b/video/out/gpu/utils.h @@ -0,0 +1,108 @@ +#pragma once + +#include <stdbool.h> +#include <math.h> + +#include "ra.h" +#include "context.h" + +// A 3x2 matrix, with the translation part separate. +struct gl_transform { + // row-major, e.g. in mathematical notation: + // | m[0][0] m[0][1] | + // | m[1][0] m[1][1] | + float m[2][2]; + float t[2]; +}; + +static const struct gl_transform identity_trans = { + .m = {{1.0, 0.0}, {0.0, 1.0}}, + .t = {0.0, 0.0}, +}; + +void gl_transform_ortho(struct gl_transform *t, float x0, float x1, + float y0, float y1); + +// This treats m as an affine transformation, in other words m[2][n] gets +// added to the output. +static inline void gl_transform_vec(struct gl_transform t, float *x, float *y) +{ + float vx = *x, vy = *y; + *x = vx * t.m[0][0] + vy * t.m[0][1] + t.t[0]; + *y = vx * t.m[1][0] + vy * t.m[1][1] + t.t[1]; +} + +struct mp_rect_f { + float x0, y0, x1, y1; +}; + +// Semantic equality (fuzzy comparison) +static inline bool mp_rect_f_seq(struct mp_rect_f a, struct mp_rect_f b) +{ + return fabs(a.x0 - b.x0) < 1e-6 && fabs(a.x1 - b.x1) < 1e-6 && + fabs(a.y0 - b.y0) < 1e-6 && fabs(a.y1 - b.y1) < 1e-6; +} + +static inline void gl_transform_rect(struct gl_transform t, struct mp_rect_f *r) +{ + gl_transform_vec(t, &r->x0, &r->y0); + gl_transform_vec(t, &r->x1, &r->y1); +} + +static inline bool gl_transform_eq(struct gl_transform a, struct gl_transform b) +{ + for (int x = 0; x < 2; x++) { + for (int y = 0; y < 2; y++) { + if (a.m[x][y] != b.m[x][y]) + return false; + } + } + + return a.t[0] == b.t[0] && a.t[1] == b.t[1]; +} + +void gl_transform_trans(struct gl_transform t, struct gl_transform *x); + +void gl_transform_ortho_fbo(struct gl_transform *t, struct ra_fbo fbo); + +float gl_video_scale_ambient_lux(float lmin, float lmax, + float rmin, float rmax, float lux); + +// A pool of buffers, which can grow as needed +struct ra_buf_pool { + struct ra_buf_params current_params; + struct ra_buf **buffers; + int num_buffers; + int index; +}; + +void ra_buf_pool_uninit(struct ra *ra, struct ra_buf_pool *pool); + +// Note: params->initial_data is *not* supported +struct ra_buf *ra_buf_pool_get(struct ra *ra, struct ra_buf_pool *pool, + const struct ra_buf_params *params); + +// Helper that wraps ra_tex_upload using texture upload buffers to ensure that +// params->buf is always set. This is intended for RA-internal usage. +bool ra_tex_upload_pbo(struct ra *ra, struct ra_buf_pool *pbo, + const struct ra_tex_upload_params *params); + +// Layout rules for GLSL's packing modes +struct ra_layout std140_layout(struct ra_renderpass_input *inp); +struct ra_layout std430_layout(struct ra_renderpass_input *inp); + +bool ra_tex_resize(struct ra *ra, struct mp_log *log, struct ra_tex **tex, + int w, int h, const struct ra_format *fmt); + +// A wrapper around ra_timer that does result pooling, averaging etc. +struct timer_pool; + +struct timer_pool *timer_pool_create(struct ra *ra); +void timer_pool_destroy(struct timer_pool *pool); +void timer_pool_start(struct timer_pool *pool); +void timer_pool_stop(struct timer_pool *pool); +struct mp_pass_perf timer_pool_measure(struct timer_pool *pool); + +// print a multi line string with line numbers (e.g. for shader sources) +// log, lev: module and log level, as in mp_msg() +void mp_log_source(struct mp_log *log, int lev, const char *src); diff --git a/video/out/gpu/video.c b/video/out/gpu/video.c new file mode 100644 index 0000000..852ee78 --- /dev/null +++ b/video/out/gpu/video.c @@ -0,0 +1,4364 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <float.h> +#include <math.h> +#include <stdarg.h> +#include <stdbool.h> +#include <string.h> + +#include <libavutil/common.h> +#include <libavutil/lfg.h> + +#include "video.h" + +#include "misc/bstr.h" +#include "options/m_config.h" +#include "options/path.h" +#include "common/global.h" +#include "options/options.h" +#include "utils.h" +#include "hwdec.h" +#include "osd.h" +#include "ra.h" +#include "stream/stream.h" +#include "video_shaders.h" +#include "user_shaders.h" +#include "error_diffusion.h" +#include "video/out/filter_kernels.h" +#include "video/out/aspect.h" +#include "video/out/dither.h" +#include "video/out/vo.h" + +// scale/cscale arguments that map directly to shader filter routines. +// Note that the convolution filters are not included in this list. +static const char *const fixed_scale_filters[] = { + "bilinear", + "bicubic_fast", + "oversample", + NULL +}; +static const char *const fixed_tscale_filters[] = { + "oversample", + "linear", + NULL +}; + +// must be sorted, and terminated with 0 +int filter_sizes[] = + {2, 4, 6, 8, 12, 16, 20, 24, 28, 32, 36, 40, 44, 48, 52, 56, 60, 64, 0}; +int tscale_sizes[] = {2, 4, 6, 8, 0}; + +struct vertex_pt { + float x, y; +}; + +struct texplane { + struct ra_tex *tex; + int w, h; + bool flipped; +}; + +struct video_image { + struct texplane planes[4]; + struct mp_image *mpi; // original input image + uint64_t id; // unique ID identifying mpi contents + bool hwdec_mapped; +}; + +enum plane_type { + PLANE_NONE = 0, + PLANE_RGB, + PLANE_LUMA, + PLANE_CHROMA, + PLANE_ALPHA, + PLANE_XYZ, +}; + +static const char *plane_names[] = { + [PLANE_NONE] = "unknown", + [PLANE_RGB] = "rgb", + [PLANE_LUMA] = "luma", + [PLANE_CHROMA] = "chroma", + [PLANE_ALPHA] = "alpha", + [PLANE_XYZ] = "xyz", +}; + +// A self-contained description of a source image which can be bound to a +// texture unit and sampled from. Contains metadata about how it's to be used +struct image { + enum plane_type type; // must be set to something non-zero + int components; // number of relevant coordinates + float multiplier; // multiplier to be used when sampling + struct ra_tex *tex; + int w, h; // logical size (after transformation) + struct gl_transform transform; // rendering transformation + int padding; // number of leading padding components (e.g. 2 = rg is padding) +}; + +// A named image, for user scripting purposes +struct saved_img { + const char *name; + struct image img; +}; + +// A texture hook. This is some operation that transforms a named texture as +// soon as it's generated +struct tex_hook { + const char *save_tex; + const char *hook_tex[SHADER_MAX_HOOKS]; + const char *bind_tex[SHADER_MAX_BINDS]; + int components; // how many components are relevant (0 = same as input) + bool align_offset; // whether to align hooked tex with reference. + void *priv; // this gets talloc_freed when the tex_hook is removed + void (*hook)(struct gl_video *p, struct image img, // generates GLSL + struct gl_transform *trans, void *priv); + bool (*cond)(struct gl_video *p, struct image img, void *priv); +}; + +struct surface { + struct ra_tex *tex; + uint64_t id; + double pts; +}; + +#define SURFACES_MAX 10 + +struct cached_file { + char *path; + struct bstr body; +}; + +struct pass_info { + struct bstr desc; + struct mp_pass_perf perf; +}; + +struct dr_buffer { + struct ra_buf *buf; + // The mpi reference will keep the data from being recycled (or from other + // references gaining write access) while the GPU is accessing the buffer. + struct mp_image *mpi; +}; + +struct gl_video { + struct ra *ra; + + struct mpv_global *global; + struct mp_log *log; + struct gl_video_opts opts; + struct m_config_cache *opts_cache; + struct gl_lcms *cms; + + int fb_depth; // actual bits available in GL main framebuffer + struct m_color clear_color; + bool force_clear_color; + + struct gl_shader_cache *sc; + + struct osd_state *osd_state; + struct mpgl_osd *osd; + double osd_pts; + + struct ra_tex *lut_3d_texture; + bool use_lut_3d; + int lut_3d_size[3]; + + struct ra_tex *dither_texture; + + struct mp_image_params real_image_params; // configured format + struct mp_image_params image_params; // texture format (mind hwdec case) + struct ra_imgfmt_desc ra_format; // texture format + int plane_count; + + bool is_gray; + bool has_alpha; + char color_swizzle[5]; + bool use_integer_conversion; + + struct video_image image; + + struct dr_buffer *dr_buffers; + int num_dr_buffers; + + bool using_dr_path; + + bool dumb_mode; + bool forced_dumb_mode; + + // Cached vertex array, to avoid re-allocation per frame. For simplicity, + // our vertex format is simply a list of `vertex_pt`s, since this greatly + // simplifies offset calculation at the cost of (unneeded) flexibility. + struct vertex_pt *tmp_vertex; + struct ra_renderpass_input *vao; + int vao_len; + + const struct ra_format *fbo_format; + struct ra_tex *merge_tex[4]; + struct ra_tex *scale_tex[4]; + struct ra_tex *integer_tex[4]; + struct ra_tex *indirect_tex; + struct ra_tex *blend_subs_tex; + struct ra_tex *error_diffusion_tex[2]; + struct ra_tex *screen_tex; + struct ra_tex *output_tex; + struct ra_tex **hook_textures; + int num_hook_textures; + int idx_hook_textures; + + struct ra_buf *hdr_peak_ssbo; + struct surface surfaces[SURFACES_MAX]; + + // user pass descriptions and textures + struct tex_hook *tex_hooks; + int num_tex_hooks; + struct gl_user_shader_tex *user_textures; + int num_user_textures; + + int surface_idx; + int surface_now; + int frames_drawn; + bool is_interpolated; + bool output_tex_valid; + + // state for configured scalers + struct scaler scaler[SCALER_COUNT]; + + struct mp_csp_equalizer_state *video_eq; + + struct mp_rect src_rect; // displayed part of the source video + struct mp_rect dst_rect; // video rectangle on output window + struct mp_osd_res osd_rect; // OSD size/margins + + // temporary during rendering + struct compute_info pass_compute; // compute shader metadata for this pass + struct image *pass_imgs; // bound images for this pass + int num_pass_imgs; + struct saved_img *saved_imgs; // saved (named) images for this frame + int num_saved_imgs; + + // effective current texture metadata - this will essentially affect the + // next render pass target, as well as implicitly tracking what needs to + // be done with the image + int texture_w, texture_h; + struct gl_transform texture_offset; // texture transform without rotation + int components; + bool use_linear; + float user_gamma; + + // pass info / metrics + struct pass_info pass_fresh[VO_PASS_PERF_MAX]; + struct pass_info pass_redraw[VO_PASS_PERF_MAX]; + struct pass_info *pass; + int pass_idx; + struct timer_pool *upload_timer; + struct timer_pool *blit_timer; + struct timer_pool *osd_timer; + + int frames_uploaded; + int frames_rendered; + AVLFG lfg; + + // Cached because computing it can take relatively long + int last_dither_matrix_size; + float *last_dither_matrix; + + struct cached_file *files; + int num_files; + + struct ra_hwdec_ctx hwdec_ctx; + struct ra_hwdec_mapper *hwdec_mapper; + struct ra_hwdec *hwdec_overlay; + bool hwdec_active; + + bool dsi_warned; + bool broken_frame; // temporary error state + + bool colorspace_override_warned; + bool correct_downscaling_warned; +}; + +static const struct gl_video_opts gl_video_opts_def = { + .dither_algo = DITHER_FRUIT, + .dither_size = 6, + .temporal_dither_period = 1, + .error_diffusion = "sierra-lite", + .fbo_format = "auto", + .sigmoid_center = 0.75, + .sigmoid_slope = 6.5, + .scaler = { + {{"lanczos", .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // scale + {{"hermite", .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // dscale + {{NULL, .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // cscale + {{"oversample", .params={NAN, NAN}}, {.params = {NAN, NAN}}}, // tscale + }, + .scaler_resizes_only = true, + .correct_downscaling = true, + .linear_downscaling = true, + .sigmoid_upscaling = true, + .interpolation_threshold = 0.01, + .alpha_mode = ALPHA_BLEND_TILES, + .background = {0, 0, 0, 255}, + .gamma = 1.0f, + .tone_map = { + .curve = TONE_MAPPING_AUTO, + .curve_param = NAN, + .max_boost = 1.0, + .decay_rate = 20.0, + .scene_threshold_low = 1.0, + .scene_threshold_high = 3.0, + .contrast_smoothness = 3.5, + }, + .early_flush = -1, + .shader_cache = true, + .hwdec_interop = "auto", +}; + +static int validate_scaler_opt(struct mp_log *log, const m_option_t *opt, + struct bstr name, const char **value); + +static int validate_window_opt(struct mp_log *log, const m_option_t *opt, + struct bstr name, const char **value); + +static int validate_error_diffusion_opt(struct mp_log *log, const m_option_t *opt, + struct bstr name, const char **value); + +#define OPT_BASE_STRUCT struct gl_video_opts + +// Use for options which use NAN for defaults. +#define OPT_FLOATDEF(field) \ + OPT_FLOAT(field), \ + .flags = M_OPT_DEFAULT_NAN + +#define SCALER_OPTS(n, i) \ + {n, OPT_STRING_VALIDATE(scaler[i].kernel.name, validate_scaler_opt)}, \ + {n"-param1", OPT_FLOATDEF(scaler[i].kernel.params[0])}, \ + {n"-param2", OPT_FLOATDEF(scaler[i].kernel.params[1])}, \ + {n"-blur", OPT_FLOAT(scaler[i].kernel.blur)}, \ + {n"-cutoff", OPT_REMOVED("Hard-coded as 0.001")}, \ + {n"-taper", OPT_FLOAT(scaler[i].kernel.taper), M_RANGE(0.0, 1.0)}, \ + {n"-wparam", OPT_FLOATDEF(scaler[i].window.params[0])}, \ + {n"-wblur", OPT_REMOVED("Just adjust filter radius directly")}, \ + {n"-wtaper", OPT_FLOAT(scaler[i].window.taper), M_RANGE(0.0, 1.0)}, \ + {n"-clamp", OPT_FLOAT(scaler[i].clamp), M_RANGE(0.0, 1.0)}, \ + {n"-radius", OPT_FLOAT(scaler[i].radius), M_RANGE(0.5, 16.0)}, \ + {n"-antiring", OPT_FLOAT(scaler[i].antiring), M_RANGE(0.0, 1.0)}, \ + {n"-window", OPT_STRING_VALIDATE(scaler[i].window.name, validate_window_opt)} + +const struct m_sub_options gl_video_conf = { + .opts = (const m_option_t[]) { + {"gpu-dumb-mode", OPT_CHOICE(dumb_mode, + {"auto", 0}, {"yes", 1}, {"no", -1})}, + {"gamma-factor", OPT_FLOAT(gamma), M_RANGE(0.1, 2.0), + .deprecation_message = "no replacement"}, + {"gamma-auto", OPT_BOOL(gamma_auto), + .deprecation_message = "no replacement"}, + {"target-prim", OPT_CHOICE_C(target_prim, mp_csp_prim_names)}, + {"target-trc", OPT_CHOICE_C(target_trc, mp_csp_trc_names)}, + {"target-peak", OPT_CHOICE(target_peak, {"auto", 0}), + M_RANGE(10, 10000)}, + {"target-contrast", OPT_CHOICE(target_contrast, {"auto", 0}, {"inf", -1}), + M_RANGE(10, 1000000)}, + {"target-gamut", OPT_CHOICE_C(target_gamut, mp_csp_prim_names)}, + {"tone-mapping", OPT_CHOICE(tone_map.curve, + {"auto", TONE_MAPPING_AUTO}, + {"clip", TONE_MAPPING_CLIP}, + {"mobius", TONE_MAPPING_MOBIUS}, + {"reinhard", TONE_MAPPING_REINHARD}, + {"hable", TONE_MAPPING_HABLE}, + {"gamma", TONE_MAPPING_GAMMA}, + {"linear", TONE_MAPPING_LINEAR}, + {"spline", TONE_MAPPING_SPLINE}, + {"bt.2390", TONE_MAPPING_BT_2390}, + {"bt.2446a", TONE_MAPPING_BT_2446A}, + {"st2094-40", TONE_MAPPING_ST2094_40}, + {"st2094-10", TONE_MAPPING_ST2094_10})}, + {"tone-mapping-param", OPT_FLOATDEF(tone_map.curve_param)}, + {"inverse-tone-mapping", OPT_BOOL(tone_map.inverse)}, + {"tone-mapping-max-boost", OPT_FLOAT(tone_map.max_boost), + M_RANGE(1.0, 10.0)}, + {"tone-mapping-visualize", OPT_BOOL(tone_map.visualize)}, + {"gamut-mapping-mode", OPT_CHOICE(tone_map.gamut_mode, + {"auto", GAMUT_AUTO}, + {"clip", GAMUT_CLIP}, + {"perceptual", GAMUT_PERCEPTUAL}, + {"relative", GAMUT_RELATIVE}, + {"saturation", GAMUT_SATURATION}, + {"absolute", GAMUT_ABSOLUTE}, + {"desaturate", GAMUT_DESATURATE}, + {"darken", GAMUT_DARKEN}, + {"warn", GAMUT_WARN}, + {"linear", GAMUT_LINEAR})}, + {"hdr-compute-peak", OPT_CHOICE(tone_map.compute_peak, + {"auto", 0}, + {"yes", 1}, + {"no", -1})}, + {"hdr-peak-percentile", OPT_FLOAT(tone_map.peak_percentile), + M_RANGE(0.0, 100.0)}, + {"hdr-peak-decay-rate", OPT_FLOAT(tone_map.decay_rate), + M_RANGE(0.0, 1000.0)}, + {"hdr-scene-threshold-low", OPT_FLOAT(tone_map.scene_threshold_low), + M_RANGE(0, 20.0)}, + {"hdr-scene-threshold-high", OPT_FLOAT(tone_map.scene_threshold_high), + M_RANGE(0, 20.0)}, + {"hdr-contrast-recovery", OPT_FLOAT(tone_map.contrast_recovery), + M_RANGE(0, 2.0)}, + {"hdr-contrast-smoothness", OPT_FLOAT(tone_map.contrast_smoothness), + M_RANGE(1.0, 100.0)}, + {"opengl-pbo", OPT_BOOL(pbo)}, + SCALER_OPTS("scale", SCALER_SCALE), + SCALER_OPTS("dscale", SCALER_DSCALE), + SCALER_OPTS("cscale", SCALER_CSCALE), + SCALER_OPTS("tscale", SCALER_TSCALE), + {"scaler-lut-size", OPT_REMOVED("hard-coded as 8")}, + {"scaler-resizes-only", OPT_BOOL(scaler_resizes_only)}, + {"correct-downscaling", OPT_BOOL(correct_downscaling)}, + {"linear-downscaling", OPT_BOOL(linear_downscaling)}, + {"linear-upscaling", OPT_BOOL(linear_upscaling)}, + {"sigmoid-upscaling", OPT_BOOL(sigmoid_upscaling)}, + {"sigmoid-center", OPT_FLOAT(sigmoid_center), M_RANGE(0.0, 1.0)}, + {"sigmoid-slope", OPT_FLOAT(sigmoid_slope), M_RANGE(1.0, 20.0)}, + {"fbo-format", OPT_STRING(fbo_format)}, + {"dither-depth", OPT_CHOICE(dither_depth, {"no", -1}, {"auto", 0}), + M_RANGE(-1, 16)}, + {"dither", OPT_CHOICE(dither_algo, + {"fruit", DITHER_FRUIT}, + {"ordered", DITHER_ORDERED}, + {"error-diffusion", DITHER_ERROR_DIFFUSION}, + {"no", DITHER_NONE})}, + {"dither-size-fruit", OPT_INT(dither_size), M_RANGE(2, 8)}, + {"temporal-dither", OPT_BOOL(temporal_dither)}, + {"temporal-dither-period", OPT_INT(temporal_dither_period), + M_RANGE(1, 128)}, + {"error-diffusion", + OPT_STRING_VALIDATE(error_diffusion, validate_error_diffusion_opt)}, + {"alpha", OPT_CHOICE(alpha_mode, + {"no", ALPHA_NO}, + {"yes", ALPHA_YES}, + {"blend", ALPHA_BLEND}, + {"blend-tiles", ALPHA_BLEND_TILES})}, + {"opengl-rectangle-textures", OPT_BOOL(use_rectangle)}, + {"background", OPT_COLOR(background)}, + {"interpolation", OPT_BOOL(interpolation)}, + {"interpolation-threshold", OPT_FLOAT(interpolation_threshold)}, + {"blend-subtitles", OPT_CHOICE(blend_subs, + {"no", BLEND_SUBS_NO}, + {"yes", BLEND_SUBS_YES}, + {"video", BLEND_SUBS_VIDEO})}, + {"glsl-shaders", OPT_PATHLIST(user_shaders), .flags = M_OPT_FILE}, + {"glsl-shader", OPT_CLI_ALIAS("glsl-shaders-append")}, + {"glsl-shader-opts", OPT_KEYVALUELIST(user_shader_opts)}, + {"deband", OPT_BOOL(deband)}, + {"deband", OPT_SUBSTRUCT(deband_opts, deband_conf)}, + {"sharpen", OPT_FLOAT(unsharp)}, + {"gpu-tex-pad-x", OPT_INT(tex_pad_x), M_RANGE(0, 4096)}, + {"gpu-tex-pad-y", OPT_INT(tex_pad_y), M_RANGE(0, 4096)}, + {"", OPT_SUBSTRUCT(icc_opts, mp_icc_conf)}, + {"gpu-shader-cache", OPT_BOOL(shader_cache)}, + {"gpu-shader-cache-dir", OPT_STRING(shader_cache_dir), .flags = M_OPT_FILE}, + {"gpu-hwdec-interop", + OPT_STRING_VALIDATE(hwdec_interop, ra_hwdec_validate_opt)}, + {"gamut-warning", OPT_REMOVED("Replaced by --gamut-mapping-mode=warn")}, + {"gamut-clipping", OPT_REMOVED("Replaced by --gamut-mapping-mode=desaturate")}, + {"tone-mapping-desaturate", OPT_REMOVED("Replaced by --tone-mapping-mode")}, + {"tone-mapping-desaturate-exponent", OPT_REMOVED("Replaced by --tone-mapping-mode")}, + {"tone-mapping-crosstalk", OPT_REMOVED("Hard-coded as 0.04")}, + {"tone-mapping-mode", OPT_REMOVED("no replacement")}, + {0} + }, + .size = sizeof(struct gl_video_opts), + .defaults = &gl_video_opts_def, +}; + +static void uninit_rendering(struct gl_video *p); +static void uninit_scaler(struct gl_video *p, struct scaler *scaler); +static void check_gl_features(struct gl_video *p); +static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t id); +static const char *handle_scaler_opt(const char *name, bool tscale); +static void reinit_from_options(struct gl_video *p); +static void get_scale_factors(struct gl_video *p, bool transpose_rot, double xy[2]); +static void gl_video_setup_hooks(struct gl_video *p); +static void gl_video_update_options(struct gl_video *p); + +#define GLSL(x) gl_sc_add(p->sc, #x "\n"); +#define GLSLF(...) gl_sc_addf(p->sc, __VA_ARGS__) +#define GLSLHF(...) gl_sc_haddf(p->sc, __VA_ARGS__) +#define PRELUDE(...) gl_sc_paddf(p->sc, __VA_ARGS__) + +static struct bstr load_cached_file(struct gl_video *p, const char *path) +{ + if (!path || !path[0]) + return (struct bstr){0}; + for (int n = 0; n < p->num_files; n++) { + if (strcmp(p->files[n].path, path) == 0) + return p->files[n].body; + } + // not found -> load it + char *fname = mp_get_user_path(NULL, p->global, path); + struct bstr s = stream_read_file(fname, p, p->global, 1000000000); // 1GB + talloc_free(fname); + if (s.len) { + struct cached_file new = { + .path = talloc_strdup(p, path), + .body = s, + }; + MP_TARRAY_APPEND(p, p->files, p->num_files, new); + return new.body; + } + return (struct bstr){0}; +} + +static void debug_check_gl(struct gl_video *p, const char *msg) +{ + if (p->ra->fns->debug_marker) + p->ra->fns->debug_marker(p->ra, msg); +} + +static void gl_video_reset_surfaces(struct gl_video *p) +{ + for (int i = 0; i < SURFACES_MAX; i++) { + p->surfaces[i].id = 0; + p->surfaces[i].pts = MP_NOPTS_VALUE; + } + p->surface_idx = 0; + p->surface_now = 0; + p->frames_drawn = 0; + p->output_tex_valid = false; +} + +static void gl_video_reset_hooks(struct gl_video *p) +{ + for (int i = 0; i < p->num_tex_hooks; i++) + talloc_free(p->tex_hooks[i].priv); + + for (int i = 0; i < p->num_user_textures; i++) + ra_tex_free(p->ra, &p->user_textures[i].tex); + + p->num_tex_hooks = 0; + p->num_user_textures = 0; +} + +static inline int surface_wrap(int id) +{ + id = id % SURFACES_MAX; + return id < 0 ? id + SURFACES_MAX : id; +} + +static void reinit_osd(struct gl_video *p) +{ + mpgl_osd_destroy(p->osd); + p->osd = NULL; + if (p->osd_state) + p->osd = mpgl_osd_init(p->ra, p->log, p->osd_state); +} + +static void uninit_rendering(struct gl_video *p) +{ + for (int n = 0; n < SCALER_COUNT; n++) + uninit_scaler(p, &p->scaler[n]); + + ra_tex_free(p->ra, &p->dither_texture); + + for (int n = 0; n < 4; n++) { + ra_tex_free(p->ra, &p->merge_tex[n]); + ra_tex_free(p->ra, &p->scale_tex[n]); + ra_tex_free(p->ra, &p->integer_tex[n]); + } + + ra_tex_free(p->ra, &p->indirect_tex); + ra_tex_free(p->ra, &p->blend_subs_tex); + ra_tex_free(p->ra, &p->screen_tex); + ra_tex_free(p->ra, &p->output_tex); + + for (int n = 0; n < 2; n++) + ra_tex_free(p->ra, &p->error_diffusion_tex[n]); + + for (int n = 0; n < SURFACES_MAX; n++) + ra_tex_free(p->ra, &p->surfaces[n].tex); + + for (int n = 0; n < p->num_hook_textures; n++) + ra_tex_free(p->ra, &p->hook_textures[n]); + + gl_video_reset_surfaces(p); + gl_video_reset_hooks(p); + + gl_sc_reset_error(p->sc); +} + +bool gl_video_gamma_auto_enabled(struct gl_video *p) +{ + return p->opts.gamma_auto; +} + +struct mp_colorspace gl_video_get_output_colorspace(struct gl_video *p) +{ + return (struct mp_colorspace) { + .primaries = p->opts.target_prim, + .gamma = p->opts.target_trc, + .hdr.max_luma = p->opts.target_peak, + }; +} + +// Warning: profile.start must point to a ta allocation, and the function +// takes over ownership. +void gl_video_set_icc_profile(struct gl_video *p, bstr icc_data) +{ + if (gl_lcms_set_memory_profile(p->cms, icc_data)) + reinit_from_options(p); +} + +bool gl_video_icc_auto_enabled(struct gl_video *p) +{ + return p->opts.icc_opts ? p->opts.icc_opts->profile_auto : false; +} + +static bool gl_video_get_lut3d(struct gl_video *p, enum mp_csp_prim prim, + enum mp_csp_trc trc) +{ + if (!p->use_lut_3d) + return false; + + struct AVBufferRef *icc = NULL; + if (p->image.mpi) + icc = p->image.mpi->icc_profile; + + if (p->lut_3d_texture && !gl_lcms_has_changed(p->cms, prim, trc, icc)) + return true; + + // GLES3 doesn't provide filtered 16 bit integer textures + // GLES2 doesn't even provide 3D textures + const struct ra_format *fmt = ra_find_unorm_format(p->ra, 2, 4); + if (!fmt || !(p->ra->caps & RA_CAP_TEX_3D)) { + p->use_lut_3d = false; + MP_WARN(p, "Disabling color management (no RGBA16 3D textures).\n"); + return false; + } + + struct lut3d *lut3d = NULL; + if (!fmt || !gl_lcms_get_lut3d(p->cms, &lut3d, prim, trc, icc) || !lut3d) { + p->use_lut_3d = false; + return false; + } + + ra_tex_free(p->ra, &p->lut_3d_texture); + + struct ra_tex_params params = { + .dimensions = 3, + .w = lut3d->size[0], + .h = lut3d->size[1], + .d = lut3d->size[2], + .format = fmt, + .render_src = true, + .src_linear = true, + .initial_data = lut3d->data, + }; + p->lut_3d_texture = ra_tex_create(p->ra, ¶ms); + + debug_check_gl(p, "after 3d lut creation"); + + for (int i = 0; i < 3; i++) + p->lut_3d_size[i] = lut3d->size[i]; + + talloc_free(lut3d); + + if (!p->lut_3d_texture) { + p->use_lut_3d = false; + return false; + } + + return true; +} + +// Fill an image struct from a ra_tex + some metadata +static struct image image_wrap(struct ra_tex *tex, enum plane_type type, + int components) +{ + assert(type != PLANE_NONE); + return (struct image){ + .type = type, + .tex = tex, + .multiplier = 1.0, + .w = tex ? tex->params.w : 1, + .h = tex ? tex->params.h : 1, + .transform = identity_trans, + .components = components, + }; +} + +// Bind an image to a free texture unit and return its ID. +static int pass_bind(struct gl_video *p, struct image img) +{ + int idx = p->num_pass_imgs; + MP_TARRAY_APPEND(p, p->pass_imgs, p->num_pass_imgs, img); + return idx; +} + +// Rotation by 90° and flipping. +// w/h is used for recentering. +static void get_transform(float w, float h, int rotate, bool flip, + struct gl_transform *out_tr) +{ + int a = rotate % 90 ? 0 : rotate / 90; + int sin90[4] = {0, 1, 0, -1}; // just to avoid rounding issues etc. + int cos90[4] = {1, 0, -1, 0}; + struct gl_transform tr = {{{ cos90[a], sin90[a]}, + {-sin90[a], cos90[a]}}}; + + // basically, recenter to keep the whole image in view + float b[2] = {1, 1}; + gl_transform_vec(tr, &b[0], &b[1]); + tr.t[0] += b[0] < 0 ? w : 0; + tr.t[1] += b[1] < 0 ? h : 0; + + if (flip) { + struct gl_transform fliptr = {{{1, 0}, {0, -1}}, {0, h}}; + gl_transform_trans(fliptr, &tr); + } + + *out_tr = tr; +} + +// Return the chroma plane upscaled to luma size, but with additional padding +// for image sizes not aligned to subsampling. +static int chroma_upsize(int size, int pixel) +{ + return (size + pixel - 1) / pixel * pixel; +} + +// If a and b are on the same plane, return what plane type should be used. +// If a or b are none, the other type always wins. +// Usually: LUMA/RGB/XYZ > CHROMA > ALPHA +static enum plane_type merge_plane_types(enum plane_type a, enum plane_type b) +{ + if (a == PLANE_NONE) + return b; + if (b == PLANE_LUMA || b == PLANE_RGB || b == PLANE_XYZ) + return b; + if (b != PLANE_NONE && a == PLANE_ALPHA) + return b; + return a; +} + +// Places a video_image's image textures + associated metadata into img[]. The +// number of textures is equal to p->plane_count. Any necessary plane offsets +// are stored in off. (e.g. chroma position) +static void pass_get_images(struct gl_video *p, struct video_image *vimg, + struct image img[4], struct gl_transform off[4]) +{ + assert(vimg->mpi); + + int w = p->image_params.w; + int h = p->image_params.h; + + // Determine the chroma offset + float ls_w = 1.0 / p->ra_format.chroma_w; + float ls_h = 1.0 / p->ra_format.chroma_h; + + struct gl_transform chroma = {{{ls_w, 0.0}, {0.0, ls_h}}}; + + if (p->image_params.chroma_location != MP_CHROMA_CENTER) { + int cx, cy; + mp_get_chroma_location(p->image_params.chroma_location, &cx, &cy); + // By default texture coordinates are such that chroma is centered with + // any chroma subsampling. If a specific direction is given, make it + // so that the luma and chroma sample line up exactly. + // For 4:4:4, setting chroma location should have no effect at all. + // luma sample size (in chroma coord. space) + chroma.t[0] = ls_w < 1 ? ls_w * -cx / 2 : 0; + chroma.t[1] = ls_h < 1 ? ls_h * -cy / 2 : 0; + } + + memset(img, 0, 4 * sizeof(img[0])); + for (int n = 0; n < p->plane_count; n++) { + struct texplane *t = &vimg->planes[n]; + + enum plane_type type = PLANE_NONE; + int padding = 0; + for (int i = 0; i < 4; i++) { + int c = p->ra_format.components[n][i]; + enum plane_type ctype; + if (c == 0) { + ctype = PLANE_NONE; + } else if (c == 4) { + ctype = PLANE_ALPHA; + } else if (p->image_params.color.space == MP_CSP_RGB) { + ctype = PLANE_RGB; + } else if (p->image_params.color.space == MP_CSP_XYZ) { + ctype = PLANE_XYZ; + } else { + ctype = c == 1 ? PLANE_LUMA : PLANE_CHROMA; + } + type = merge_plane_types(type, ctype); + if (!c && padding == i) + padding = i + 1; + } + + int msb_valid_bits = + p->ra_format.component_bits + MPMIN(p->ra_format.component_pad, 0); + int csp = type == PLANE_ALPHA ? MP_CSP_RGB : p->image_params.color.space; + float tex_mul = + 1.0 / mp_get_csp_mul(csp, msb_valid_bits, p->ra_format.component_bits); + if (p->ra_format.component_type == RA_CTYPE_FLOAT) + tex_mul = 1.0; + + img[n] = (struct image){ + .type = type, + .tex = t->tex, + .multiplier = tex_mul, + .w = t->w, + .h = t->h, + .padding = padding, + }; + + for (int i = 0; i < 4; i++) + img[n].components += !!p->ra_format.components[n][i]; + + get_transform(t->w, t->h, p->image_params.rotate, t->flipped, + &img[n].transform); + if (p->image_params.rotate % 180 == 90) + MPSWAP(int, img[n].w, img[n].h); + + off[n] = identity_trans; + + if (type == PLANE_CHROMA) { + struct gl_transform rot; + get_transform(0, 0, p->image_params.rotate, true, &rot); + + struct gl_transform tr = chroma; + gl_transform_vec(rot, &tr.t[0], &tr.t[1]); + + float dx = (chroma_upsize(w, p->ra_format.chroma_w) - w) * ls_w; + float dy = (chroma_upsize(h, p->ra_format.chroma_h) - h) * ls_h; + + // Adjust the chroma offset if the real chroma size is fractional + // due image sizes not aligned to chroma subsampling. + struct gl_transform rot2; + get_transform(0, 0, p->image_params.rotate, t->flipped, &rot2); + if (rot2.m[0][0] < 0) + tr.t[0] += dx; + if (rot2.m[1][0] < 0) + tr.t[0] += dy; + if (rot2.m[0][1] < 0) + tr.t[1] += dx; + if (rot2.m[1][1] < 0) + tr.t[1] += dy; + + off[n] = tr; + } + } +} + +// Return the index of the given component (assuming all non-padding components +// of all planes are concatenated into a linear list). +static int find_comp(struct ra_imgfmt_desc *desc, int component) +{ + int cur = 0; + for (int n = 0; n < desc->num_planes; n++) { + for (int i = 0; i < 4; i++) { + if (desc->components[n][i]) { + if (desc->components[n][i] == component) + return cur; + cur++; + } + } + } + return -1; +} + +static void init_video(struct gl_video *p) +{ + p->use_integer_conversion = false; + + struct ra_hwdec *hwdec = ra_hwdec_get(&p->hwdec_ctx, p->image_params.imgfmt); + if (hwdec) { + if (hwdec->driver->overlay_frame) { + MP_WARN(p, "Using HW-overlay mode. No GL filtering is performed " + "on the video!\n"); + p->hwdec_overlay = hwdec; + } else { + p->hwdec_mapper = ra_hwdec_mapper_create(hwdec, &p->image_params); + if (!p->hwdec_mapper) + MP_ERR(p, "Initializing texture for hardware decoding failed.\n"); + } + if (p->hwdec_mapper) + p->image_params = p->hwdec_mapper->dst_params; + const char **exts = hwdec->glsl_extensions; + for (int n = 0; exts && exts[n]; n++) + gl_sc_enable_extension(p->sc, (char *)exts[n]); + p->hwdec_active = true; + } + + p->ra_format = (struct ra_imgfmt_desc){0}; + ra_get_imgfmt_desc(p->ra, p->image_params.imgfmt, &p->ra_format); + + p->plane_count = p->ra_format.num_planes; + + p->has_alpha = false; + p->is_gray = true; + + for (int n = 0; n < p->ra_format.num_planes; n++) { + for (int i = 0; i < 4; i++) { + if (p->ra_format.components[n][i]) { + p->has_alpha |= p->ra_format.components[n][i] == 4; + p->is_gray &= p->ra_format.components[n][i] == 1 || + p->ra_format.components[n][i] == 4; + } + } + } + + for (int c = 0; c < 4; c++) { + int loc = find_comp(&p->ra_format, c + 1); + p->color_swizzle[c] = "rgba"[loc >= 0 && loc < 4 ? loc : 0]; + } + p->color_swizzle[4] = '\0'; + + mp_image_params_guess_csp(&p->image_params); + + av_lfg_init(&p->lfg, 1); + + debug_check_gl(p, "before video texture creation"); + + if (!p->hwdec_active) { + struct video_image *vimg = &p->image; + + struct mp_image layout = {0}; + mp_image_set_params(&layout, &p->image_params); + + for (int n = 0; n < p->plane_count; n++) { + struct texplane *plane = &vimg->planes[n]; + const struct ra_format *format = p->ra_format.planes[n]; + + plane->w = mp_image_plane_w(&layout, n); + plane->h = mp_image_plane_h(&layout, n); + + struct ra_tex_params params = { + .dimensions = 2, + .w = plane->w + p->opts.tex_pad_x, + .h = plane->h + p->opts.tex_pad_y, + .d = 1, + .format = format, + .render_src = true, + .src_linear = format->linear_filter, + .non_normalized = p->opts.use_rectangle, + .host_mutable = true, + }; + + MP_VERBOSE(p, "Texture for plane %d: %dx%d\n", n, + params.w, params.h); + + plane->tex = ra_tex_create(p->ra, ¶ms); + p->use_integer_conversion |= format->ctype == RA_CTYPE_UINT; + } + } + + debug_check_gl(p, "after video texture creation"); + + // Format-dependent checks. + check_gl_features(p); + + gl_video_setup_hooks(p); +} + +static struct dr_buffer *gl_find_dr_buffer(struct gl_video *p, uint8_t *ptr) +{ + for (int i = 0; i < p->num_dr_buffers; i++) { + struct dr_buffer *buffer = &p->dr_buffers[i]; + uint8_t *bufptr = buffer->buf->data; + size_t size = buffer->buf->params.size; + if (ptr >= bufptr && ptr < bufptr + size) + return buffer; + } + + return NULL; +} + +static void gc_pending_dr_fences(struct gl_video *p, bool force) +{ +again:; + for (int n = 0; n < p->num_dr_buffers; n++) { + struct dr_buffer *buffer = &p->dr_buffers[n]; + if (!buffer->mpi) + continue; + + bool res = p->ra->fns->buf_poll(p->ra, buffer->buf); + if (res || force) { + // Unreferencing the image could cause gl_video_dr_free_buffer() + // to be called by the talloc destructor (if it was the last + // reference). This will implicitly invalidate the buffer pointer + // and change the p->dr_buffers array. To make it worse, it could + // free multiple dr_buffers due to weird theoretical corner cases. + // This is also why we use the goto to iterate again from the + // start, because everything gets fucked up. Hail satan! + struct mp_image *ref = buffer->mpi; + buffer->mpi = NULL; + talloc_free(ref); + goto again; + } + } +} + +static void unref_current_image(struct gl_video *p) +{ + struct video_image *vimg = &p->image; + + if (vimg->hwdec_mapped) { + assert(p->hwdec_active && p->hwdec_mapper); + ra_hwdec_mapper_unmap(p->hwdec_mapper); + memset(vimg->planes, 0, sizeof(vimg->planes)); + vimg->hwdec_mapped = false; + } + + vimg->id = 0; + + mp_image_unrefp(&vimg->mpi); + + // While we're at it, also garbage collect pending fences in here to + // get it out of the way. + gc_pending_dr_fences(p, false); +} + +// If overlay mode is used, make sure to remove the overlay. +// Be careful with this. Removing the overlay and adding another one will +// lead to flickering artifacts. +static void unmap_overlay(struct gl_video *p) +{ + if (p->hwdec_overlay) + p->hwdec_overlay->driver->overlay_frame(p->hwdec_overlay, NULL, NULL, NULL, true); +} + +static void uninit_video(struct gl_video *p) +{ + uninit_rendering(p); + + struct video_image *vimg = &p->image; + + unmap_overlay(p); + unref_current_image(p); + + for (int n = 0; n < p->plane_count; n++) { + struct texplane *plane = &vimg->planes[n]; + ra_tex_free(p->ra, &plane->tex); + } + *vimg = (struct video_image){0}; + + // Invalidate image_params to ensure that gl_video_config() will call + // init_video() on uninitialized gl_video. + p->real_image_params = (struct mp_image_params){0}; + p->image_params = p->real_image_params; + p->hwdec_active = false; + p->hwdec_overlay = NULL; + ra_hwdec_mapper_free(&p->hwdec_mapper); +} + +static void pass_record(struct gl_video *p, struct mp_pass_perf perf) +{ + if (!p->pass || p->pass_idx == VO_PASS_PERF_MAX) + return; + + struct pass_info *pass = &p->pass[p->pass_idx]; + pass->perf = perf; + + if (pass->desc.len == 0) + bstr_xappend(p, &pass->desc, bstr0("(unknown)")); + + p->pass_idx++; +} + +PRINTF_ATTRIBUTE(2, 3) +static void pass_describe(struct gl_video *p, const char *textf, ...) +{ + if (!p->pass || p->pass_idx == VO_PASS_PERF_MAX) + return; + + struct pass_info *pass = &p->pass[p->pass_idx]; + + if (pass->desc.len > 0) + bstr_xappend(p, &pass->desc, bstr0(" + ")); + + va_list ap; + va_start(ap, textf); + bstr_xappend_vasprintf(p, &pass->desc, textf, ap); + va_end(ap); +} + +static void pass_info_reset(struct gl_video *p, bool is_redraw) +{ + p->pass = is_redraw ? p->pass_redraw : p->pass_fresh; + p->pass_idx = 0; + + for (int i = 0; i < VO_PASS_PERF_MAX; i++) { + p->pass[i].desc.len = 0; + p->pass[i].perf = (struct mp_pass_perf){0}; + } +} + +static void pass_report_performance(struct gl_video *p) +{ + if (!p->pass) + return; + + for (int i = 0; i < VO_PASS_PERF_MAX; i++) { + struct pass_info *pass = &p->pass[i]; + if (pass->desc.len) { + MP_TRACE(p, "pass '%.*s': last %dus avg %dus peak %dus\n", + BSTR_P(pass->desc), + (int)pass->perf.last/1000, + (int)pass->perf.avg/1000, + (int)pass->perf.peak/1000); + } + } +} + +static void pass_prepare_src_tex(struct gl_video *p) +{ + struct gl_shader_cache *sc = p->sc; + + for (int n = 0; n < p->num_pass_imgs; n++) { + struct image *s = &p->pass_imgs[n]; + if (!s->tex) + continue; + + char *texture_name = mp_tprintf(32, "texture%d", n); + char *texture_size = mp_tprintf(32, "texture_size%d", n); + char *texture_rot = mp_tprintf(32, "texture_rot%d", n); + char *texture_off = mp_tprintf(32, "texture_off%d", n); + char *pixel_size = mp_tprintf(32, "pixel_size%d", n); + + gl_sc_uniform_texture(sc, texture_name, s->tex); + float f[2] = {1, 1}; + if (!s->tex->params.non_normalized) { + f[0] = s->tex->params.w; + f[1] = s->tex->params.h; + } + gl_sc_uniform_vec2(sc, texture_size, f); + gl_sc_uniform_mat2(sc, texture_rot, true, (float *)s->transform.m); + gl_sc_uniform_vec2(sc, texture_off, (float *)s->transform.t); + gl_sc_uniform_vec2(sc, pixel_size, (float[]){1.0f / f[0], + 1.0f / f[1]}); + } +} + +static void cleanup_binds(struct gl_video *p) +{ + p->num_pass_imgs = 0; +} + +// Sets the appropriate compute shader metadata for an implicit compute pass +// bw/bh: block size +static void pass_is_compute(struct gl_video *p, int bw, int bh, bool flexible) +{ + if (p->pass_compute.active && flexible) { + // Avoid overwriting existing block sizes when using a flexible pass + bw = p->pass_compute.block_w; + bh = p->pass_compute.block_h; + } + + p->pass_compute = (struct compute_info){ + .active = true, + .block_w = bw, + .block_h = bh, + }; +} + +// w/h: the width/height of the compute shader's operating domain (e.g. the +// target target that needs to be written, or the source texture that needs to +// be reduced) +static void dispatch_compute(struct gl_video *p, int w, int h, + struct compute_info info) +{ + PRELUDE("layout (local_size_x = %d, local_size_y = %d) in;\n", + info.threads_w > 0 ? info.threads_w : info.block_w, + info.threads_h > 0 ? info.threads_h : info.block_h); + + pass_prepare_src_tex(p); + + // Since we don't actually have vertices, we pretend for convenience + // reasons that we do and calculate the right texture coordinates based on + // the output sample ID + gl_sc_uniform_vec2(p->sc, "out_scale", (float[2]){ 1.0 / w, 1.0 / h }); + PRELUDE("#define outcoord(id) (out_scale * (vec2(id) + vec2(0.5)))\n"); + + for (int n = 0; n < p->num_pass_imgs; n++) { + struct image *s = &p->pass_imgs[n]; + if (!s->tex) + continue; + + PRELUDE("#define texmap%d(id) (texture_rot%d * outcoord(id) + " + "pixel_size%d * texture_off%d)\n", n, n, n, n); + PRELUDE("#define texcoord%d texmap%d(gl_GlobalInvocationID)\n", n, n); + } + + // always round up when dividing to make sure we don't leave off a part of + // the image + int num_x = info.block_w > 0 ? (w + info.block_w - 1) / info.block_w : 1, + num_y = info.block_h > 0 ? (h + info.block_h - 1) / info.block_h : 1; + + if (!(p->ra->caps & RA_CAP_NUM_GROUPS)) + PRELUDE("#define gl_NumWorkGroups uvec3(%d, %d, 1)\n", num_x, num_y); + + pass_record(p, gl_sc_dispatch_compute(p->sc, num_x, num_y, 1)); + cleanup_binds(p); +} + +static struct mp_pass_perf render_pass_quad(struct gl_video *p, + struct ra_fbo fbo, bool discard, + const struct mp_rect *dst) +{ + // The first element is reserved for `vec2 position` + int num_vertex_attribs = 1 + p->num_pass_imgs; + size_t vertex_stride = num_vertex_attribs * sizeof(struct vertex_pt); + + // Expand the VAO if necessary + while (p->vao_len < num_vertex_attribs) { + MP_TARRAY_APPEND(p, p->vao, p->vao_len, (struct ra_renderpass_input) { + .name = talloc_asprintf(p, "texcoord%d", p->vao_len - 1), + .type = RA_VARTYPE_FLOAT, + .dim_v = 2, + .dim_m = 1, + .offset = p->vao_len * sizeof(struct vertex_pt), + }); + } + + int num_vertices = 6; // quad as triangle list + int num_attribs_total = num_vertices * num_vertex_attribs; + MP_TARRAY_GROW(p, p->tmp_vertex, num_attribs_total); + + struct gl_transform t; + gl_transform_ortho_fbo(&t, fbo); + + float x[2] = {dst->x0, dst->x1}; + float y[2] = {dst->y0, dst->y1}; + gl_transform_vec(t, &x[0], &y[0]); + gl_transform_vec(t, &x[1], &y[1]); + + for (int n = 0; n < 4; n++) { + struct vertex_pt *vs = &p->tmp_vertex[num_vertex_attribs * n]; + // vec2 position in idx 0 + vs[0].x = x[n / 2]; + vs[0].y = y[n % 2]; + for (int i = 0; i < p->num_pass_imgs; i++) { + struct image *s = &p->pass_imgs[i]; + if (!s->tex) + continue; + struct gl_transform tr = s->transform; + float tx = (n / 2) * s->w; + float ty = (n % 2) * s->h; + gl_transform_vec(tr, &tx, &ty); + bool rect = s->tex->params.non_normalized; + // vec2 texcoordN in idx N+1 + vs[i + 1].x = tx / (rect ? 1 : s->tex->params.w); + vs[i + 1].y = ty / (rect ? 1 : s->tex->params.h); + } + } + + memmove(&p->tmp_vertex[num_vertex_attribs * 4], + &p->tmp_vertex[num_vertex_attribs * 2], + vertex_stride); + + memmove(&p->tmp_vertex[num_vertex_attribs * 5], + &p->tmp_vertex[num_vertex_attribs * 1], + vertex_stride); + + return gl_sc_dispatch_draw(p->sc, fbo.tex, discard, p->vao, num_vertex_attribs, + vertex_stride, p->tmp_vertex, num_vertices); +} + +static void finish_pass_fbo(struct gl_video *p, struct ra_fbo fbo, + bool discard, const struct mp_rect *dst) +{ + pass_prepare_src_tex(p); + pass_record(p, render_pass_quad(p, fbo, discard, dst)); + debug_check_gl(p, "after rendering"); + cleanup_binds(p); +} + +// dst_fbo: this will be used for rendering; possibly reallocating the whole +// FBO, if the required parameters have changed +// w, h: required FBO target dimension, and also defines the target rectangle +// used for rasterization +static void finish_pass_tex(struct gl_video *p, struct ra_tex **dst_tex, + int w, int h) +{ + if (!ra_tex_resize(p->ra, p->log, dst_tex, w, h, p->fbo_format)) { + cleanup_binds(p); + gl_sc_reset(p->sc); + return; + } + + // If RA_CAP_PARALLEL_COMPUTE is set, try to prefer compute shaders + // over fragment shaders wherever possible. + if (!p->pass_compute.active && (p->ra->caps & RA_CAP_PARALLEL_COMPUTE) && + (*dst_tex)->params.storage_dst) + { + pass_is_compute(p, 16, 16, true); + } + + if (p->pass_compute.active) { + gl_sc_uniform_image2D_wo(p->sc, "out_image", *dst_tex); + if (!p->pass_compute.directly_writes) + GLSL(imageStore(out_image, ivec2(gl_GlobalInvocationID), color);) + + dispatch_compute(p, w, h, p->pass_compute); + p->pass_compute = (struct compute_info){0}; + + debug_check_gl(p, "after dispatching compute shader"); + } else { + struct ra_fbo fbo = { .tex = *dst_tex, }; + finish_pass_fbo(p, fbo, true, &(struct mp_rect){0, 0, w, h}); + } +} + +static const char *get_tex_swizzle(struct image *img) +{ + if (!img->tex) + return "rgba"; + return img->tex->params.format->luminance_alpha ? "raaa" : "rgba"; +} + +// Copy a texture to the vec4 color, while increasing offset. Also applies +// the texture multiplier to the sampled color +static void copy_image(struct gl_video *p, unsigned int *offset, struct image img) +{ + const unsigned int count = img.components; + char src[5] = {0}; + char dst[5] = {0}; + + assert(*offset + count < sizeof(dst)); + assert(img.padding + count < sizeof(src)); + + int id = pass_bind(p, img); + + const char *tex_fmt = get_tex_swizzle(&img); + const char *dst_fmt = "rgba"; + for (unsigned int i = 0; i < count; i++) { + src[i] = tex_fmt[img.padding + i]; + dst[i] = dst_fmt[*offset + i]; + } + + if (img.tex && img.tex->params.format->ctype == RA_CTYPE_UINT) { + uint64_t tex_max = 1ull << p->ra_format.component_bits; + img.multiplier *= 1.0 / (tex_max - 1); + } + + GLSLF("color.%s = %f * vec4(texture(texture%d, texcoord%d)).%s;\n", + dst, img.multiplier, id, id, src); + + *offset += count; +} + +static void skip_unused(struct gl_video *p, int num_components) +{ + for (int i = num_components; i < 4; i++) + GLSLF("color.%c = %f;\n", "rgba"[i], i < 3 ? 0.0 : 1.0); +} + +static void uninit_scaler(struct gl_video *p, struct scaler *scaler) +{ + ra_tex_free(p->ra, &scaler->sep_fbo); + ra_tex_free(p->ra, &scaler->lut); + scaler->kernel = NULL; + scaler->initialized = false; +} + +static void hook_prelude(struct gl_video *p, const char *name, int id, + struct image img) +{ + GLSLHF("#define %s_raw texture%d\n", name, id); + GLSLHF("#define %s_pos texcoord%d\n", name, id); + GLSLHF("#define %s_size texture_size%d\n", name, id); + GLSLHF("#define %s_rot texture_rot%d\n", name, id); + GLSLHF("#define %s_off texture_off%d\n", name, id); + GLSLHF("#define %s_pt pixel_size%d\n", name, id); + GLSLHF("#define %s_map texmap%d\n", name, id); + GLSLHF("#define %s_mul %f\n", name, img.multiplier); + + char crap[5] = ""; + snprintf(crap, sizeof(crap), "%s", get_tex_swizzle(&img)); + + // Remove leading padding by rotating the swizzle mask. + int len = strlen(crap); + for (int n = 0; n < img.padding; n++) { + if (len) { + char f = crap[0]; + memmove(crap, crap + 1, len - 1); + crap[len - 1] = f; + } + } + + // Set up the sampling functions + GLSLHF("#define %s_tex(pos) (%s_mul * vec4(texture(%s_raw, pos)).%s)\n", + name, name, name, crap); + + if (p->ra->caps & RA_CAP_GATHER) { + GLSLHF("#define %s_gather(pos, c) (%s_mul * vec4(" + "textureGather(%s_raw, pos, c)))\n", name, name, name); + } + + // Since the extra matrix multiplication impacts performance, + // skip it unless the texture was actually rotated + if (gl_transform_eq(img.transform, identity_trans)) { + GLSLHF("#define %s_texOff(off) %s_tex(%s_pos + %s_pt * vec2(off))\n", + name, name, name, name); + } else { + GLSLHF("#define %s_texOff(off) " + "%s_tex(%s_pos + %s_rot * vec2(off)/%s_size)\n", + name, name, name, name, name); + } +} + +static bool saved_img_find(struct gl_video *p, const char *name, + struct image *out) +{ + if (!name || !out) + return false; + + for (int i = 0; i < p->num_saved_imgs; i++) { + if (strcmp(p->saved_imgs[i].name, name) == 0) { + *out = p->saved_imgs[i].img; + return true; + } + } + + return false; +} + +static void saved_img_store(struct gl_video *p, const char *name, + struct image img) +{ + assert(name); + + for (int i = 0; i < p->num_saved_imgs; i++) { + if (strcmp(p->saved_imgs[i].name, name) == 0) { + p->saved_imgs[i].img = img; + return; + } + } + + MP_TARRAY_APPEND(p, p->saved_imgs, p->num_saved_imgs, (struct saved_img) { + .name = name, + .img = img + }); +} + +static bool pass_hook_setup_binds(struct gl_video *p, const char *name, + struct image img, struct tex_hook *hook) +{ + for (int t = 0; t < SHADER_MAX_BINDS; t++) { + char *bind_name = (char *)hook->bind_tex[t]; + + if (!bind_name) + continue; + + // This is a special name that means "currently hooked texture" + if (strcmp(bind_name, "HOOKED") == 0) { + int id = pass_bind(p, img); + hook_prelude(p, "HOOKED", id, img); + hook_prelude(p, name, id, img); + continue; + } + + // BIND can also be used to load user-defined textures, in which + // case we will directly load them as a uniform instead of + // generating the hook_prelude boilerplate + for (int u = 0; u < p->num_user_textures; u++) { + struct gl_user_shader_tex *utex = &p->user_textures[u]; + if (bstr_equals0(utex->name, bind_name)) { + gl_sc_uniform_texture(p->sc, bind_name, utex->tex); + goto next_bind; + } + } + + struct image bind_img; + if (!saved_img_find(p, bind_name, &bind_img)) { + // Clean up texture bindings and move on to the next hook + MP_TRACE(p, "Skipping hook on %s due to no texture named %s.\n", + name, bind_name); + p->num_pass_imgs -= t; + return false; + } + + hook_prelude(p, bind_name, pass_bind(p, bind_img), bind_img); + +next_bind: ; + } + + return true; +} + +static struct ra_tex **next_hook_tex(struct gl_video *p) +{ + if (p->idx_hook_textures == p->num_hook_textures) + MP_TARRAY_APPEND(p, p->hook_textures, p->num_hook_textures, NULL); + + return &p->hook_textures[p->idx_hook_textures++]; +} + +// Process hooks for a plane, saving the result and returning a new image +// If 'trans' is NULL, the shader is forbidden from transforming img +static struct image pass_hook(struct gl_video *p, const char *name, + struct image img, struct gl_transform *trans) +{ + if (!name) + return img; + + saved_img_store(p, name, img); + + MP_TRACE(p, "Running hooks for %s\n", name); + for (int i = 0; i < p->num_tex_hooks; i++) { + struct tex_hook *hook = &p->tex_hooks[i]; + + // Figure out if this pass hooks this texture + for (int h = 0; h < SHADER_MAX_HOOKS; h++) { + if (hook->hook_tex[h] && strcmp(hook->hook_tex[h], name) == 0) + goto found; + } + + continue; + +found: + // Check the hook's condition + if (hook->cond && !hook->cond(p, img, hook->priv)) { + MP_TRACE(p, "Skipping hook on %s due to condition.\n", name); + continue; + } + + const char *store_name = hook->save_tex ? hook->save_tex : name; + bool is_overwrite = strcmp(store_name, name) == 0; + + // If user shader is set to align HOOKED with reference and fix its + // offset, it requires HOOKED to be resizable and overwrited. + if (is_overwrite && hook->align_offset) { + if (!trans) { + MP_ERR(p, "Hook tried to align unresizable texture %s!\n", + name); + return img; + } + + struct gl_transform align_off = identity_trans; + align_off.t[0] = trans->t[0]; + align_off.t[1] = trans->t[1]; + + gl_transform_trans(align_off, &img.transform); + } + + if (!pass_hook_setup_binds(p, name, img, hook)) + continue; + + // Run the actual hook. This generates a series of GLSL shader + // instructions sufficient for drawing the hook's output + struct gl_transform hook_off = identity_trans; + hook->hook(p, img, &hook_off, hook->priv); + + int comps = hook->components ? hook->components : img.components; + skip_unused(p, comps); + + // Compute the updated FBO dimensions and store the result + struct mp_rect_f sz = {0, 0, img.w, img.h}; + gl_transform_rect(hook_off, &sz); + int w = lroundf(fabs(sz.x1 - sz.x0)); + int h = lroundf(fabs(sz.y1 - sz.y0)); + + struct ra_tex **tex = next_hook_tex(p); + finish_pass_tex(p, tex, w, h); + struct image saved_img = image_wrap(*tex, img.type, comps); + + // If the texture we're saving overwrites the "current" texture, also + // update the tex parameter so that the future loop cycles will use the + // updated values, and export the offset + if (is_overwrite) { + if (!trans && !gl_transform_eq(hook_off, identity_trans)) { + MP_ERR(p, "Hook tried changing size of unscalable texture %s!\n", + name); + return img; + } + + img = saved_img; + if (trans) { + gl_transform_trans(hook_off, trans); + + // If user shader is set to align HOOKED, the offset it produces + // is dynamic (with static resizing factor though). + // Align it with reference manually to get offset fixed. + if (hook->align_offset) { + trans->t[0] = 0.0; + trans->t[1] = 0.0; + } + } + } + + saved_img_store(p, store_name, saved_img); + } + + return img; +} + +// This can be used at any time in the middle of rendering to specify an +// optional hook point, which if triggered will render out to a new FBO and +// load the result back into vec4 color. Offsets applied by the hooks are +// accumulated in tex_trans, and the FBO is dimensioned according +// to p->texture_w/h +static void pass_opt_hook_point(struct gl_video *p, const char *name, + struct gl_transform *tex_trans) +{ + if (!name) + return; + + for (int i = 0; i < p->num_tex_hooks; i++) { + struct tex_hook *hook = &p->tex_hooks[i]; + + for (int h = 0; h < SHADER_MAX_HOOKS; h++) { + if (hook->hook_tex[h] && strcmp(hook->hook_tex[h], name) == 0) + goto found; + } + + for (int b = 0; b < SHADER_MAX_BINDS; b++) { + if (hook->bind_tex[b] && strcmp(hook->bind_tex[b], name) == 0) + goto found; + } + } + + // Nothing uses this texture, don't bother storing it + return; + +found: ; + struct ra_tex **tex = next_hook_tex(p); + finish_pass_tex(p, tex, p->texture_w, p->texture_h); + struct image img = image_wrap(*tex, PLANE_RGB, p->components); + img = pass_hook(p, name, img, tex_trans); + copy_image(p, &(int){0}, img); + p->texture_w = img.w; + p->texture_h = img.h; + p->components = img.components; + pass_describe(p, "(remainder pass)"); +} + +static void load_shader(struct gl_video *p, struct bstr body) +{ + gl_sc_hadd_bstr(p->sc, body); + gl_sc_uniform_dynamic(p->sc); + gl_sc_uniform_f(p->sc, "random", (double)av_lfg_get(&p->lfg) / UINT32_MAX); + gl_sc_uniform_dynamic(p->sc); + gl_sc_uniform_i(p->sc, "frame", p->frames_uploaded); + gl_sc_uniform_vec2(p->sc, "input_size", + (float[]){(p->src_rect.x1 - p->src_rect.x0) * + p->texture_offset.m[0][0], + (p->src_rect.y1 - p->src_rect.y0) * + p->texture_offset.m[1][1]}); + gl_sc_uniform_vec2(p->sc, "target_size", + (float[]){p->dst_rect.x1 - p->dst_rect.x0, + p->dst_rect.y1 - p->dst_rect.y0}); + gl_sc_uniform_vec2(p->sc, "tex_offset", + (float[]){p->src_rect.x0 * p->texture_offset.m[0][0] + + p->texture_offset.t[0], + p->src_rect.y0 * p->texture_offset.m[1][1] + + p->texture_offset.t[1]}); +} + +// Semantic equality +static bool double_seq(double a, double b) +{ + return (isnan(a) && isnan(b)) || a == b; +} + +static bool scaler_fun_eq(struct scaler_fun a, struct scaler_fun b) +{ + if ((a.name && !b.name) || (b.name && !a.name)) + return false; + + return ((!a.name && !b.name) || strcmp(a.name, b.name) == 0) && + double_seq(a.params[0], b.params[0]) && + double_seq(a.params[1], b.params[1]) && + a.blur == b.blur && + a.taper == b.taper; +} + +static bool scaler_conf_eq(struct scaler_config a, struct scaler_config b) +{ + // Note: antiring isn't compared because it doesn't affect LUT + // generation + return scaler_fun_eq(a.kernel, b.kernel) && + scaler_fun_eq(a.window, b.window) && + a.radius == b.radius && + a.clamp == b.clamp; +} + +static void reinit_scaler(struct gl_video *p, struct scaler *scaler, + const struct scaler_config *conf, + double scale_factor, + int sizes[]) +{ + assert(conf); + if (scaler_conf_eq(scaler->conf, *conf) && + scaler->scale_factor == scale_factor && + scaler->initialized) + return; + + uninit_scaler(p, scaler); + + if (scaler->index == SCALER_DSCALE && (!conf->kernel.name || + !conf->kernel.name[0])) + { + conf = &p->opts.scaler[SCALER_SCALE]; + } + + if (scaler->index == SCALER_CSCALE && (!conf->kernel.name || + !conf->kernel.name[0])) + { + conf = &p->opts.scaler[SCALER_SCALE]; + } + + struct filter_kernel bare_window; + const struct filter_kernel *t_kernel = mp_find_filter_kernel(conf->kernel.name); + const struct filter_window *t_window = mp_find_filter_window(conf->window.name); + bool is_tscale = scaler->index == SCALER_TSCALE; + if (!t_kernel) { + const struct filter_window *window = mp_find_filter_window(conf->kernel.name); + if (window) { + bare_window = (struct filter_kernel) { .f = *window }; + t_kernel = &bare_window; + } + } + + scaler->conf = *conf; + scaler->conf.kernel.name = (char *)handle_scaler_opt(conf->kernel.name, is_tscale); + scaler->conf.window.name = t_window ? (char *)t_window->name : NULL; + scaler->scale_factor = scale_factor; + scaler->insufficient = false; + scaler->initialized = true; + if (!t_kernel) + return; + + scaler->kernel_storage = *t_kernel; + scaler->kernel = &scaler->kernel_storage; + + if (!t_window) { + // fall back to the scaler's default window if available + t_window = mp_find_filter_window(t_kernel->window); + } + if (t_window) + scaler->kernel->w = *t_window; + + for (int n = 0; n < 2; n++) { + if (!isnan(conf->kernel.params[n])) + scaler->kernel->f.params[n] = conf->kernel.params[n]; + if (!isnan(conf->window.params[n])) + scaler->kernel->w.params[n] = conf->window.params[n]; + } + + if (conf->kernel.blur > 0.0) + scaler->kernel->f.blur = conf->kernel.blur; + if (conf->window.blur > 0.0) + scaler->kernel->w.blur = conf->window.blur; + + if (conf->kernel.taper > 0.0) + scaler->kernel->f.taper = conf->kernel.taper; + if (conf->window.taper > 0.0) + scaler->kernel->w.taper = conf->window.taper; + + if (scaler->kernel->f.resizable && conf->radius > 0.0) + scaler->kernel->f.radius = conf->radius; + + scaler->kernel->clamp = conf->clamp; + scaler->insufficient = !mp_init_filter(scaler->kernel, sizes, scale_factor); + + int size = scaler->kernel->size; + int num_components = size > 2 ? 4 : size; + const struct ra_format *fmt = ra_find_float16_format(p->ra, num_components); + assert(fmt); + + int width = (size + num_components - 1) / num_components; // round up + int stride = width * num_components; + assert(size <= stride); + + static const int lut_size = 256; + float *weights = talloc_array(NULL, float, lut_size * stride); + mp_compute_lut(scaler->kernel, lut_size, stride, weights); + + bool use_1d = scaler->kernel->polar && (p->ra->caps & RA_CAP_TEX_1D); + + struct ra_tex_params lut_params = { + .dimensions = use_1d ? 1 : 2, + .w = use_1d ? lut_size : width, + .h = use_1d ? 1 : lut_size, + .d = 1, + .format = fmt, + .render_src = true, + .src_linear = true, + .initial_data = weights, + }; + scaler->lut = ra_tex_create(p->ra, &lut_params); + + talloc_free(weights); + + debug_check_gl(p, "after initializing scaler"); +} + +// Special helper for sampling from two separated stages +static void pass_sample_separated(struct gl_video *p, struct image src, + struct scaler *scaler, int w, int h) +{ + // Separate the transformation into x and y components, per pass + struct gl_transform t_x = { + .m = {{src.transform.m[0][0], 0.0}, {src.transform.m[1][0], 1.0}}, + .t = {src.transform.t[0], 0.0}, + }; + struct gl_transform t_y = { + .m = {{1.0, src.transform.m[0][1]}, {0.0, src.transform.m[1][1]}}, + .t = {0.0, src.transform.t[1]}, + }; + + // First pass (scale only in the y dir) + src.transform = t_y; + sampler_prelude(p->sc, pass_bind(p, src)); + GLSLF("// first pass\n"); + pass_sample_separated_gen(p->sc, scaler, 0, 1); + GLSLF("color *= %f;\n", src.multiplier); + finish_pass_tex(p, &scaler->sep_fbo, src.w, h); + + // Second pass (scale only in the x dir) + src = image_wrap(scaler->sep_fbo, src.type, src.components); + src.transform = t_x; + pass_describe(p, "%s second pass", scaler->conf.kernel.name); + sampler_prelude(p->sc, pass_bind(p, src)); + pass_sample_separated_gen(p->sc, scaler, 1, 0); +} + +// Picks either the compute shader version or the regular sampler version +// depending on hardware support +static void pass_dispatch_sample_polar(struct gl_video *p, struct scaler *scaler, + struct image img, int w, int h) +{ + uint64_t reqs = RA_CAP_COMPUTE; + if ((p->ra->caps & reqs) != reqs) + goto fallback; + + int bound = ceil(scaler->kernel->radius_cutoff); + int offset = bound - 1; // padding top/left + int padding = offset + bound; // total padding + + float ratiox = (float)w / img.w, + ratioy = (float)h / img.h; + + // For performance we want to load at least as many pixels + // horizontally as there are threads in a warp (32 for nvidia), as + // well as enough to take advantage of shmem parallelism + const int warp_size = 32, threads = 256; + int bw = warp_size; + int bh = threads / bw; + + // We need to sample everything from base_min to base_max, so make sure + // we have enough room in shmem + int iw = (int)ceil(bw / ratiox) + padding + 1, + ih = (int)ceil(bh / ratioy) + padding + 1; + + int shmem_req = iw * ih * img.components * sizeof(float); + if (shmem_req > p->ra->max_shmem) + goto fallback; + + pass_is_compute(p, bw, bh, false); + pass_compute_polar(p->sc, scaler, img.components, bw, bh, iw, ih); + return; + +fallback: + // Fall back to regular polar shader when compute shaders are unsupported + // or the kernel is too big for shmem + pass_sample_polar(p->sc, scaler, img.components, + p->ra->caps & RA_CAP_GATHER); +} + +// Sample from image, with the src rectangle given by it. +// The dst rectangle is implicit by what the caller will do next, but w and h +// must still be what is going to be used (to dimension FBOs correctly). +// This will write the scaled contents to the vec4 "color". +// The scaler unit is initialized by this function; in order to avoid cache +// thrashing, the scaler unit should usually use the same parameters. +static void pass_sample(struct gl_video *p, struct image img, + struct scaler *scaler, const struct scaler_config *conf, + double scale_factor, int w, int h) +{ + reinit_scaler(p, scaler, conf, scale_factor, filter_sizes); + + // Describe scaler + const char *scaler_opt[] = { + [SCALER_SCALE] = "scale", + [SCALER_DSCALE] = "dscale", + [SCALER_CSCALE] = "cscale", + [SCALER_TSCALE] = "tscale", + }; + + pass_describe(p, "%s=%s (%s)", scaler_opt[scaler->index], + scaler->conf.kernel.name, plane_names[img.type]); + + bool is_separated = scaler->kernel && !scaler->kernel->polar; + + // Set up the transformation+prelude and bind the texture, for everything + // other than separated scaling (which does this in the subfunction) + if (!is_separated) + sampler_prelude(p->sc, pass_bind(p, img)); + + // Dispatch the scaler. They're all wildly different. + const char *name = scaler->conf.kernel.name; + if (strcmp(name, "bilinear") == 0) { + GLSL(color = texture(tex, pos);) + } else if (strcmp(name, "bicubic_fast") == 0) { + pass_sample_bicubic_fast(p->sc); + } else if (strcmp(name, "oversample") == 0) { + pass_sample_oversample(p->sc, scaler, w, h); + } else if (scaler->kernel && scaler->kernel->polar) { + pass_dispatch_sample_polar(p, scaler, img, w, h); + } else if (scaler->kernel) { + pass_sample_separated(p, img, scaler, w, h); + } else { + MP_ASSERT_UNREACHABLE(); // should never happen + } + + // Apply any required multipliers. Separated scaling already does this in + // its first stage + if (!is_separated) + GLSLF("color *= %f;\n", img.multiplier); + + // Micro-optimization: Avoid scaling unneeded channels + skip_unused(p, img.components); +} + +// Returns true if two images are semantically equivalent (same metadata) +static bool image_equiv(struct image a, struct image b) +{ + return a.type == b.type && + a.components == b.components && + a.multiplier == b.multiplier && + a.tex->params.format == b.tex->params.format && + a.tex->params.w == b.tex->params.w && + a.tex->params.h == b.tex->params.h && + a.w == b.w && + a.h == b.h && + gl_transform_eq(a.transform, b.transform); +} + +static void deband_hook(struct gl_video *p, struct image img, + struct gl_transform *trans, void *priv) +{ + pass_describe(p, "debanding (%s)", plane_names[img.type]); + pass_sample_deband(p->sc, p->opts.deband_opts, &p->lfg, + p->image_params.color.gamma); +} + +static void unsharp_hook(struct gl_video *p, struct image img, + struct gl_transform *trans, void *priv) +{ + pass_describe(p, "unsharp masking"); + pass_sample_unsharp(p->sc, p->opts.unsharp); +} + +struct szexp_ctx { + struct gl_video *p; + struct image img; +}; + +static bool szexp_lookup(void *priv, struct bstr var, float size[2]) +{ + struct szexp_ctx *ctx = priv; + struct gl_video *p = ctx->p; + + if (bstr_equals0(var, "NATIVE_CROPPED")) { + size[0] = (p->src_rect.x1 - p->src_rect.x0) * p->texture_offset.m[0][0]; + size[1] = (p->src_rect.y1 - p->src_rect.y0) * p->texture_offset.m[1][1]; + return true; + } + + // The size of OUTPUT is determined. It could be useful for certain + // user shaders to skip passes. + if (bstr_equals0(var, "OUTPUT")) { + size[0] = p->dst_rect.x1 - p->dst_rect.x0; + size[1] = p->dst_rect.y1 - p->dst_rect.y0; + return true; + } + + // HOOKED is a special case + if (bstr_equals0(var, "HOOKED")) { + size[0] = ctx->img.w; + size[1] = ctx->img.h; + return true; + } + + for (int o = 0; o < p->num_saved_imgs; o++) { + if (bstr_equals0(var, p->saved_imgs[o].name)) { + size[0] = p->saved_imgs[o].img.w; + size[1] = p->saved_imgs[o].img.h; + return true; + } + } + + return false; +} + +static bool user_hook_cond(struct gl_video *p, struct image img, void *priv) +{ + struct gl_user_shader_hook *shader = priv; + assert(shader); + + float res = false; + struct szexp_ctx ctx = {p, img}; + eval_szexpr(p->log, &ctx, szexp_lookup, shader->cond, &res); + return res; +} + +static void user_hook(struct gl_video *p, struct image img, + struct gl_transform *trans, void *priv) +{ + struct gl_user_shader_hook *shader = priv; + assert(shader); + load_shader(p, shader->pass_body); + + pass_describe(p, "user shader: %.*s (%s)", BSTR_P(shader->pass_desc), + plane_names[img.type]); + + if (shader->compute.active) { + p->pass_compute = shader->compute; + GLSLF("hook();\n"); + } else { + GLSLF("color = hook();\n"); + } + + // Make sure we at least create a legal FBO on failure, since it's better + // to do this and display an error message than just crash OpenGL + float w = 1.0, h = 1.0; + + eval_szexpr(p->log, &(struct szexp_ctx){p, img}, szexp_lookup, shader->width, &w); + eval_szexpr(p->log, &(struct szexp_ctx){p, img}, szexp_lookup, shader->height, &h); + + *trans = (struct gl_transform){{{w / img.w, 0}, {0, h / img.h}}}; + gl_transform_trans(shader->offset, trans); +} + +static bool add_user_hook(void *priv, struct gl_user_shader_hook hook) +{ + struct gl_video *p = priv; + struct gl_user_shader_hook *copy = talloc_ptrtype(p, copy); + *copy = hook; + + struct tex_hook texhook = { + .save_tex = bstrdup0(copy, hook.save_tex), + .components = hook.components, + .align_offset = hook.align_offset, + .hook = user_hook, + .cond = user_hook_cond, + .priv = copy, + }; + + for (int h = 0; h < SHADER_MAX_HOOKS; h++) + texhook.hook_tex[h] = bstrdup0(copy, hook.hook_tex[h]); + for (int h = 0; h < SHADER_MAX_BINDS; h++) + texhook.bind_tex[h] = bstrdup0(copy, hook.bind_tex[h]); + + MP_TARRAY_APPEND(p, p->tex_hooks, p->num_tex_hooks, texhook); + return true; +} + +static bool add_user_tex(void *priv, struct gl_user_shader_tex tex) +{ + struct gl_video *p = priv; + + tex.tex = ra_tex_create(p->ra, &tex.params); + TA_FREEP(&tex.params.initial_data); + + if (!tex.tex) + return false; + + MP_TARRAY_APPEND(p, p->user_textures, p->num_user_textures, tex); + return true; +} + +static void load_user_shaders(struct gl_video *p, char **shaders) +{ + if (!shaders) + return; + + for (int n = 0; shaders[n] != NULL; n++) { + struct bstr file = load_cached_file(p, shaders[n]); + parse_user_shader(p->log, p->ra, file, p, add_user_hook, add_user_tex); + } +} + +static void gl_video_setup_hooks(struct gl_video *p) +{ + gl_video_reset_hooks(p); + + if (p->opts.deband) { + MP_TARRAY_APPEND(p, p->tex_hooks, p->num_tex_hooks, (struct tex_hook) { + .hook_tex = {"LUMA", "CHROMA", "RGB", "XYZ"}, + .bind_tex = {"HOOKED"}, + .hook = deband_hook, + }); + } + + if (p->opts.unsharp != 0.0) { + MP_TARRAY_APPEND(p, p->tex_hooks, p->num_tex_hooks, (struct tex_hook) { + .hook_tex = {"MAIN"}, + .bind_tex = {"HOOKED"}, + .hook = unsharp_hook, + }); + } + + load_user_shaders(p, p->opts.user_shaders); +} + +// sample from video textures, set "color" variable to yuv value +static void pass_read_video(struct gl_video *p) +{ + struct image img[4]; + struct gl_transform offsets[4]; + pass_get_images(p, &p->image, img, offsets); + + // To keep the code as simple as possibly, we currently run all shader + // stages even if they would be unnecessary (e.g. no hooks for a texture). + // In the future, deferred image should optimize this away. + + // Merge semantically identical textures. This loop is done from back + // to front so that merged textures end up in the right order while + // simultaneously allowing us to skip unnecessary merges + for (int n = 3; n >= 0; n--) { + if (img[n].type == PLANE_NONE) + continue; + + int first = n; + int num = 0; + + for (int i = 0; i < n; i++) { + if (image_equiv(img[n], img[i]) && + gl_transform_eq(offsets[n], offsets[i])) + { + GLSLF("// merging plane %d ...\n", i); + copy_image(p, &num, img[i]); + first = MPMIN(first, i); + img[i] = (struct image){0}; + } + } + + if (num > 0) { + GLSLF("// merging plane %d ... into %d\n", n, first); + copy_image(p, &num, img[n]); + pass_describe(p, "merging planes"); + finish_pass_tex(p, &p->merge_tex[n], img[n].w, img[n].h); + img[first] = image_wrap(p->merge_tex[n], img[n].type, num); + img[n] = (struct image){0}; + } + } + + // If any textures are still in integer format by this point, we need + // to introduce an explicit conversion pass to avoid breaking hooks/scaling + for (int n = 0; n < 4; n++) { + if (img[n].tex && img[n].tex->params.format->ctype == RA_CTYPE_UINT) { + GLSLF("// use_integer fix for plane %d\n", n); + copy_image(p, &(int){0}, img[n]); + pass_describe(p, "use_integer fix"); + finish_pass_tex(p, &p->integer_tex[n], img[n].w, img[n].h); + img[n] = image_wrap(p->integer_tex[n], img[n].type, + img[n].components); + } + } + + // The basic idea is we assume the rgb/luma texture is the "reference" and + // scale everything else to match, after all planes are finalized. + // We find the reference texture first, in order to maintain texture offset + // between hooks on different type of planes. + int reference_tex_num = 0; + for (int n = 0; n < 4; n++) { + switch (img[n].type) { + case PLANE_RGB: + case PLANE_XYZ: + case PLANE_LUMA: break; + default: continue; + } + + reference_tex_num = n; + break; + } + + // Dispatch the hooks for all of these textures, saving and perhaps + // modifying them in the process + for (int n = 0; n < 4; n++) { + const char *name; + switch (img[n].type) { + case PLANE_RGB: name = "RGB"; break; + case PLANE_LUMA: name = "LUMA"; break; + case PLANE_CHROMA: name = "CHROMA"; break; + case PLANE_ALPHA: name = "ALPHA"; break; + case PLANE_XYZ: name = "XYZ"; break; + default: continue; + } + + img[n] = pass_hook(p, name, img[n], &offsets[n]); + + if (reference_tex_num == n) { + // The reference texture is finalized now. + p->texture_w = img[n].w; + p->texture_h = img[n].h; + p->texture_offset = offsets[n]; + } + } + + // At this point all planes are finalized but they may not be at the + // required size yet. Furthermore, they may have texture offsets that + // require realignment. + + // Compute the reference rect + struct mp_rect_f src = {0.0, 0.0, p->image_params.w, p->image_params.h}; + struct mp_rect_f ref = src; + gl_transform_rect(p->texture_offset, &ref); + + // Explicitly scale all of the textures that don't match + for (int n = 0; n < 4; n++) { + if (img[n].type == PLANE_NONE) + continue; + + // If the planes are aligned identically, we will end up with the + // exact same source rectangle. + struct mp_rect_f rect = src; + gl_transform_rect(offsets[n], &rect); + if (mp_rect_f_seq(ref, rect)) + continue; + + // If the rectangles differ, then our planes have a different + // alignment and/or size. First of all, we have to compute the + // corrections required to meet the target rectangle + struct gl_transform fix = { + .m = {{(ref.x1 - ref.x0) / (rect.x1 - rect.x0), 0.0}, + {0.0, (ref.y1 - ref.y0) / (rect.y1 - rect.y0)}}, + .t = {ref.x0, ref.y0}, + }; + + // Since the scale in texture space is different from the scale in + // absolute terms, we have to scale the coefficients down to be + // relative to the texture's physical dimensions and local offset + struct gl_transform scale = { + .m = {{(float)img[n].w / p->texture_w, 0.0}, + {0.0, (float)img[n].h / p->texture_h}}, + .t = {-rect.x0, -rect.y0}, + }; + if (p->image_params.rotate % 180 == 90) + MPSWAP(double, scale.m[0][0], scale.m[1][1]); + + gl_transform_trans(scale, &fix); + + // Since the texture transform is a function of the texture coordinates + // to texture space, rather than the other way around, we have to + // actually apply the *inverse* of this. Fortunately, calculating + // the inverse is relatively easy here. + fix.m[0][0] = 1.0 / fix.m[0][0]; + fix.m[1][1] = 1.0 / fix.m[1][1]; + fix.t[0] = fix.m[0][0] * -fix.t[0]; + fix.t[1] = fix.m[1][1] * -fix.t[1]; + gl_transform_trans(fix, &img[n].transform); + + int scaler_id = -1; + const char *name = NULL; + switch (img[n].type) { + case PLANE_RGB: + case PLANE_LUMA: + case PLANE_XYZ: + scaler_id = SCALER_SCALE; + // these aren't worth hooking, fringe hypothetical cases only + break; + case PLANE_CHROMA: + scaler_id = SCALER_CSCALE; + name = "CHROMA_SCALED"; + break; + case PLANE_ALPHA: + // alpha always uses bilinear + name = "ALPHA_SCALED"; + } + + if (scaler_id < 0) + continue; + + const struct scaler_config *conf = &p->opts.scaler[scaler_id]; + + if (scaler_id == SCALER_CSCALE && (!conf->kernel.name || + !conf->kernel.name[0])) + { + conf = &p->opts.scaler[SCALER_SCALE]; + } + + struct scaler *scaler = &p->scaler[scaler_id]; + + // bilinear scaling is a free no-op thanks to GPU sampling + if (strcmp(conf->kernel.name, "bilinear") != 0) { + GLSLF("// upscaling plane %d\n", n); + pass_sample(p, img[n], scaler, conf, 1.0, p->texture_w, p->texture_h); + finish_pass_tex(p, &p->scale_tex[n], p->texture_w, p->texture_h); + img[n] = image_wrap(p->scale_tex[n], img[n].type, img[n].components); + } + + // Run any post-scaling hooks + img[n] = pass_hook(p, name, img[n], NULL); + } + + // All planes are of the same size and properly aligned at this point + pass_describe(p, "combining planes"); + int coord = 0; + for (int i = 0; i < 4; i++) { + if (img[i].type != PLANE_NONE) + copy_image(p, &coord, img[i]); + } + p->components = coord; +} + +// Utility function that simply binds a texture and reads from it, without any +// transformations. +static void pass_read_tex(struct gl_video *p, struct ra_tex *tex) +{ + struct image img = image_wrap(tex, PLANE_RGB, p->components); + copy_image(p, &(int){0}, img); +} + +// yuv conversion, and any other conversions before main up/down-scaling +static void pass_convert_yuv(struct gl_video *p) +{ + struct gl_shader_cache *sc = p->sc; + + struct mp_csp_params cparams = MP_CSP_PARAMS_DEFAULTS; + cparams.gray = p->is_gray; + cparams.is_float = p->ra_format.component_type == RA_CTYPE_FLOAT; + mp_csp_set_image_params(&cparams, &p->image_params); + mp_csp_equalizer_state_get(p->video_eq, &cparams); + p->user_gamma = 1.0 / (cparams.gamma * p->opts.gamma); + + pass_describe(p, "color conversion"); + + if (p->color_swizzle[0]) + GLSLF("color = color.%s;\n", p->color_swizzle); + + // Pre-colormatrix input gamma correction + if (cparams.color.space == MP_CSP_XYZ) + pass_linearize(p->sc, p->image_params.color.gamma); + + // We always explicitly normalize the range in pass_read_video + cparams.input_bits = cparams.texture_bits = 0; + + // Conversion to RGB. For RGB itself, this still applies e.g. brightness + // and contrast controls, or expansion of e.g. LSB-packed 10 bit data. + struct mp_cmat m = {{{0}}}; + mp_get_csp_matrix(&cparams, &m); + gl_sc_uniform_mat3(sc, "colormatrix", true, &m.m[0][0]); + gl_sc_uniform_vec3(sc, "colormatrix_c", m.c); + + GLSL(color.rgb = mat3(colormatrix) * color.rgb + colormatrix_c;) + + if (cparams.color.space == MP_CSP_XYZ) { + pass_delinearize(p->sc, p->image_params.color.gamma); + // mp_get_csp_matrix implicitly converts XYZ to DCI-P3 + p->image_params.color.space = MP_CSP_RGB; + p->image_params.color.primaries = MP_CSP_PRIM_DCI_P3; + } + + if (p->image_params.color.space == MP_CSP_BT_2020_C) { + // Conversion for C'rcY'cC'bc via the BT.2020 CL system: + // C'bc = (B'-Y'c) / 1.9404 | C'bc <= 0 + // = (B'-Y'c) / 1.5816 | C'bc > 0 + // + // C'rc = (R'-Y'c) / 1.7184 | C'rc <= 0 + // = (R'-Y'c) / 0.9936 | C'rc > 0 + // + // as per the BT.2020 specification, table 4. This is a non-linear + // transformation because (constant) luminance receives non-equal + // contributions from the three different channels. + GLSLF("// constant luminance conversion \n" + "color.br = color.br * mix(vec2(1.5816, 0.9936), \n" + " vec2(1.9404, 1.7184), \n" + " %s(lessThanEqual(color.br, vec2(0))))\n" + " + color.gg; \n", + gl_sc_bvec(p->sc, 2)); + // Expand channels to camera-linear light. This shader currently just + // assumes everything uses the BT.2020 12-bit gamma function, since the + // difference between 10 and 12-bit is negligible for anything other + // than 12-bit content. + GLSLF("color.rgb = mix(color.rgb * vec3(1.0/4.5), \n" + " pow((color.rgb + vec3(0.0993))*vec3(1.0/1.0993), \n" + " vec3(1.0/0.45)), \n" + " %s(lessThanEqual(vec3(0.08145), color.rgb))); \n", + gl_sc_bvec(p->sc, 3)); + // Calculate the green channel from the expanded RYcB + // The BT.2020 specification says Yc = 0.2627*R + 0.6780*G + 0.0593*B + GLSL(color.g = (color.g - 0.2627*color.r - 0.0593*color.b)*1.0/0.6780;) + // Recompress to receive the R'G'B' result, same as other systems + GLSLF("color.rgb = mix(color.rgb * vec3(4.5), \n" + " vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993), \n" + " %s(lessThanEqual(vec3(0.0181), color.rgb))); \n", + gl_sc_bvec(p->sc, 3)); + } + + p->components = 3; + if (!p->has_alpha || p->opts.alpha_mode == ALPHA_NO) { + GLSL(color.a = 1.0;) + } else if (p->image_params.alpha == MP_ALPHA_PREMUL) { + p->components = 4; + } else { + p->components = 4; + GLSL(color = vec4(color.rgb * color.a, color.a);) // straight -> premul + } +} + +static void get_scale_factors(struct gl_video *p, bool transpose_rot, double xy[2]) +{ + double target_w = p->src_rect.x1 - p->src_rect.x0; + double target_h = p->src_rect.y1 - p->src_rect.y0; + if (transpose_rot && p->image_params.rotate % 180 == 90) + MPSWAP(double, target_w, target_h); + xy[0] = (p->dst_rect.x1 - p->dst_rect.x0) / target_w; + xy[1] = (p->dst_rect.y1 - p->dst_rect.y0) / target_h; +} + +// Cropping. +static void compute_src_transform(struct gl_video *p, struct gl_transform *tr) +{ + float sx = (p->src_rect.x1 - p->src_rect.x0) / (float)p->texture_w, + sy = (p->src_rect.y1 - p->src_rect.y0) / (float)p->texture_h, + ox = p->src_rect.x0, + oy = p->src_rect.y0; + struct gl_transform transform = {{{sx, 0}, {0, sy}}, {ox, oy}}; + + gl_transform_trans(p->texture_offset, &transform); + + *tr = transform; +} + +// Takes care of the main scaling and pre/post-conversions +static void pass_scale_main(struct gl_video *p) +{ + // Figure out the main scaler. + double xy[2]; + get_scale_factors(p, true, xy); + + // actual scale factor should be divided by the scale factor of prescaling. + xy[0] /= p->texture_offset.m[0][0]; + xy[1] /= p->texture_offset.m[1][1]; + + // The calculation of scale factor involves 32-bit float(from gl_transform), + // use non-strict equality test to tolerate precision loss. + bool downscaling = xy[0] < 1.0 - FLT_EPSILON || xy[1] < 1.0 - FLT_EPSILON; + bool upscaling = !downscaling && (xy[0] > 1.0 + FLT_EPSILON || + xy[1] > 1.0 + FLT_EPSILON); + double scale_factor = 1.0; + + struct scaler *scaler = &p->scaler[SCALER_SCALE]; + struct scaler_config scaler_conf = p->opts.scaler[SCALER_SCALE]; + if (p->opts.scaler_resizes_only && !downscaling && !upscaling) { + scaler_conf.kernel.name = "bilinear"; + // For scaler-resizes-only, we round the texture offset to + // the nearest round value in order to prevent ugly blurriness + // (in exchange for slightly shifting the image by up to half a + // subpixel) + p->texture_offset.t[0] = roundf(p->texture_offset.t[0]); + p->texture_offset.t[1] = roundf(p->texture_offset.t[1]); + } + if (downscaling && p->opts.scaler[SCALER_DSCALE].kernel.name) { + scaler_conf = p->opts.scaler[SCALER_DSCALE]; + scaler = &p->scaler[SCALER_DSCALE]; + } + + // When requesting correct-downscaling and the clip is anamorphic, and + // because only a single scale factor is used for both axes, enable it only + // when both axes are downscaled, and use the milder of the factors to not + // end up with too much blur on one axis (even if we end up with sub-optimal + // scale factor on the other axis). This is better than not respecting + // correct scaling at all for anamorphic clips. + double f = MPMAX(xy[0], xy[1]); + if (p->opts.correct_downscaling && f < 1.0) + scale_factor = 1.0 / f; + + // Pre-conversion, like linear light/sigmoidization + GLSLF("// scaler pre-conversion\n"); + bool use_linear = false; + if (downscaling) { + use_linear = p->opts.linear_downscaling; + + // Linear light downscaling results in nasty artifacts for HDR curves + // due to the potentially extreme brightness differences severely + // compounding any ringing. So just scale in gamma light instead. + if (mp_trc_is_hdr(p->image_params.color.gamma)) + use_linear = false; + } else if (upscaling) { + use_linear = p->opts.linear_upscaling || p->opts.sigmoid_upscaling; + } + + if (use_linear) { + p->use_linear = true; + pass_linearize(p->sc, p->image_params.color.gamma); + pass_opt_hook_point(p, "LINEAR", NULL); + } + + bool use_sigmoid = use_linear && p->opts.sigmoid_upscaling && upscaling; + float sig_center, sig_slope, sig_offset, sig_scale; + if (use_sigmoid) { + // Coefficients for the sigmoidal transform are taken from the + // formula here: http://www.imagemagick.org/Usage/color_mods/#sigmoidal + sig_center = p->opts.sigmoid_center; + sig_slope = p->opts.sigmoid_slope; + // This function needs to go through (0,0) and (1,1) so we compute the + // values at 1 and 0, and then scale/shift them, respectively. + sig_offset = 1.0/(1+expf(sig_slope * sig_center)); + sig_scale = 1.0/(1+expf(sig_slope * (sig_center-1))) - sig_offset; + GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) + GLSLF("color.rgb = %f - log(1.0/(color.rgb * %f + %f) - 1.0) * 1.0/%f;\n", + sig_center, sig_scale, sig_offset, sig_slope); + pass_opt_hook_point(p, "SIGMOID", NULL); + } + + pass_opt_hook_point(p, "PREKERNEL", NULL); + + int vp_w = p->dst_rect.x1 - p->dst_rect.x0; + int vp_h = p->dst_rect.y1 - p->dst_rect.y0; + struct gl_transform transform; + compute_src_transform(p, &transform); + + GLSLF("// main scaling\n"); + finish_pass_tex(p, &p->indirect_tex, p->texture_w, p->texture_h); + struct image src = image_wrap(p->indirect_tex, PLANE_RGB, p->components); + gl_transform_trans(transform, &src.transform); + pass_sample(p, src, scaler, &scaler_conf, scale_factor, vp_w, vp_h); + + // Changes the texture size to display size after main scaler. + p->texture_w = vp_w; + p->texture_h = vp_h; + + pass_opt_hook_point(p, "POSTKERNEL", NULL); + + GLSLF("// scaler post-conversion\n"); + if (use_sigmoid) { + // Inverse of the transformation above + GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) + GLSLF("color.rgb = (1.0/(1.0 + exp(%f * (%f - color.rgb))) - %f) * 1.0/%f;\n", + sig_slope, sig_center, sig_offset, sig_scale); + } +} + +// Adapts the colors to the right output color space. (Final pass during +// rendering) +// If OSD is true, ignore any changes that may have been made to the video +// by previous passes (i.e. linear scaling) +static void pass_colormanage(struct gl_video *p, struct mp_colorspace src, + struct mp_colorspace fbo_csp, int flags, bool osd) +{ + struct ra *ra = p->ra; + + // Configure the destination according to the FBO color space, + // unless specific transfer function, primaries or target peak + // is set. If values are set to _AUTO, the most likely intended + // values are guesstimated later in this function. + struct mp_colorspace dst = { + .gamma = p->opts.target_trc == MP_CSP_TRC_AUTO ? + fbo_csp.gamma : p->opts.target_trc, + .primaries = p->opts.target_prim == MP_CSP_PRIM_AUTO ? + fbo_csp.primaries : p->opts.target_prim, + .light = MP_CSP_LIGHT_DISPLAY, + .hdr.max_luma = !p->opts.target_peak ? + fbo_csp.hdr.max_luma : p->opts.target_peak, + }; + + if (!p->colorspace_override_warned && + ((fbo_csp.gamma && dst.gamma != fbo_csp.gamma) || + (fbo_csp.primaries && dst.primaries != fbo_csp.primaries))) + { + MP_WARN(p, "One or more colorspace value is being overridden " + "by user while the FBO provides colorspace information: " + "transfer function: (dst: %s, fbo: %s), " + "primaries: (dst: %s, fbo: %s). " + "Rendering can lead to incorrect results!\n", + m_opt_choice_str(mp_csp_trc_names, dst.gamma), + m_opt_choice_str(mp_csp_trc_names, fbo_csp.gamma), + m_opt_choice_str(mp_csp_prim_names, dst.primaries), + m_opt_choice_str(mp_csp_prim_names, fbo_csp.primaries)); + p->colorspace_override_warned = true; + } + + if (dst.gamma == MP_CSP_TRC_HLG) + dst.light = MP_CSP_LIGHT_SCENE_HLG; + + if (p->use_lut_3d && (flags & RENDER_SCREEN_COLOR)) { + // The 3DLUT is always generated against the video's original source + // space, *not* the reference space. (To avoid having to regenerate + // the 3DLUT for the OSD on every frame) + enum mp_csp_prim prim_orig = p->image_params.color.primaries; + enum mp_csp_trc trc_orig = p->image_params.color.gamma; + + // One exception: HDR is not implemented by LittleCMS for technical + // limitation reasons, so we use a gamma 2.2 input curve here instead. + // We could pick any value we want here, the difference is just coding + // efficiency. + if (mp_trc_is_hdr(trc_orig)) + trc_orig = MP_CSP_TRC_GAMMA22; + + if (gl_video_get_lut3d(p, prim_orig, trc_orig)) { + dst.primaries = prim_orig; + dst.gamma = trc_orig; + assert(dst.primaries && dst.gamma); + } + } + + if (dst.primaries == MP_CSP_PRIM_AUTO) { + // The vast majority of people are on sRGB or BT.709 displays, so pick + // this as the default output color space. + dst.primaries = MP_CSP_PRIM_BT_709; + + if (src.primaries == MP_CSP_PRIM_BT_601_525 || + src.primaries == MP_CSP_PRIM_BT_601_625) + { + // Since we auto-pick BT.601 and BT.709 based on the dimensions, + // combined with the fact that they're very similar to begin with, + // and to avoid confusing the average user, just don't adapt BT.601 + // content automatically at all. + dst.primaries = src.primaries; + } + } + + if (dst.gamma == MP_CSP_TRC_AUTO) { + // Most people seem to complain when the image is darker or brighter + // than what they're "used to", so just avoid changing the gamma + // altogether by default. The only exceptions to this rule apply to + // very unusual TRCs, which even hardcode technoluddites would probably + // not enjoy viewing unaltered. + dst.gamma = src.gamma; + + // Avoid outputting linear light or HDR content "by default". For these + // just pick gamma 2.2 as a default, since it's a good estimate for + // the response of typical displays + if (dst.gamma == MP_CSP_TRC_LINEAR || mp_trc_is_hdr(dst.gamma)) + dst.gamma = MP_CSP_TRC_GAMMA22; + } + + // If there's no specific signal peak known for the output display, infer + // it from the chosen transfer function. Also normalize the src peak, in + // case it was unknown + if (!dst.hdr.max_luma) + dst.hdr.max_luma = mp_trc_nom_peak(dst.gamma) * MP_REF_WHITE; + if (!src.hdr.max_luma) + src.hdr.max_luma = mp_trc_nom_peak(src.gamma) * MP_REF_WHITE; + + // Whitelist supported modes + switch (p->opts.tone_map.curve) { + case TONE_MAPPING_AUTO: + case TONE_MAPPING_CLIP: + case TONE_MAPPING_MOBIUS: + case TONE_MAPPING_REINHARD: + case TONE_MAPPING_HABLE: + case TONE_MAPPING_GAMMA: + case TONE_MAPPING_LINEAR: + case TONE_MAPPING_BT_2390: + break; + default: + MP_WARN(p, "Tone mapping curve unsupported by vo_gpu, falling back.\n"); + p->opts.tone_map.curve = TONE_MAPPING_AUTO; + break; + } + + switch (p->opts.tone_map.gamut_mode) { + case GAMUT_AUTO: + case GAMUT_WARN: + case GAMUT_CLIP: + case GAMUT_DESATURATE: + break; + default: + MP_WARN(p, "Gamut mapping mode unsupported by vo_gpu, falling back.\n"); + p->opts.tone_map.gamut_mode = GAMUT_AUTO; + break; + } + + struct gl_tone_map_opts tone_map = p->opts.tone_map; + bool detect_peak = tone_map.compute_peak >= 0 && mp_trc_is_hdr(src.gamma) + && src.hdr.max_luma > dst.hdr.max_luma; + + if (detect_peak && !p->hdr_peak_ssbo) { + struct { + float average[2]; + int32_t frame_sum; + uint32_t frame_max; + uint32_t counter; + } peak_ssbo = {0}; + + struct ra_buf_params params = { + .type = RA_BUF_TYPE_SHADER_STORAGE, + .size = sizeof(peak_ssbo), + .initial_data = &peak_ssbo, + }; + + p->hdr_peak_ssbo = ra_buf_create(ra, ¶ms); + if (!p->hdr_peak_ssbo) { + MP_WARN(p, "Failed to create HDR peak detection SSBO, disabling.\n"); + tone_map.compute_peak = p->opts.tone_map.compute_peak = -1; + detect_peak = false; + } + } + + if (detect_peak) { + pass_describe(p, "detect HDR peak"); + pass_is_compute(p, 8, 8, true); // 8x8 is good for performance + gl_sc_ssbo(p->sc, "PeakDetect", p->hdr_peak_ssbo, + "vec2 average;" + "int frame_sum;" + "uint frame_max;" + "uint counter;" + ); + } else { + tone_map.compute_peak = -1; + } + + // Adapt from src to dst as necessary + pass_color_map(p->sc, p->use_linear && !osd, src, dst, &tone_map); + + if (p->use_lut_3d && (flags & RENDER_SCREEN_COLOR)) { + gl_sc_uniform_texture(p->sc, "lut_3d", p->lut_3d_texture); + GLSL(vec3 cpos;) + for (int i = 0; i < 3; i++) + GLSLF("cpos[%d] = LUT_POS(color[%d], %d.0);\n", i, i, p->lut_3d_size[i]); + GLSL(color.rgb = tex3D(lut_3d, cpos).rgb;) + } +} + +void gl_video_set_fb_depth(struct gl_video *p, int fb_depth) +{ + p->fb_depth = fb_depth; +} + +static void pass_dither(struct gl_video *p) +{ + // Assume 8 bits per component if unknown. + int dst_depth = p->fb_depth > 0 ? p->fb_depth : 8; + if (p->opts.dither_depth > 0) + dst_depth = p->opts.dither_depth; + + if (p->opts.dither_depth < 0 || p->opts.dither_algo == DITHER_NONE) + return; + + if (p->opts.dither_algo == DITHER_ERROR_DIFFUSION) { + const struct error_diffusion_kernel *kernel = + mp_find_error_diffusion_kernel(p->opts.error_diffusion); + int o_w = p->dst_rect.x1 - p->dst_rect.x0, + o_h = p->dst_rect.y1 - p->dst_rect.y0; + + int shmem_req = mp_ef_compute_shared_memory_size(kernel, o_h); + if (shmem_req > p->ra->max_shmem) { + MP_WARN(p, "Fallback to dither=fruit because there is no enough " + "shared memory (%d/%d).\n", + shmem_req, (int)p->ra->max_shmem); + p->opts.dither_algo = DITHER_FRUIT; + } else { + finish_pass_tex(p, &p->error_diffusion_tex[0], o_w, o_h); + + struct image img = image_wrap(p->error_diffusion_tex[0], PLANE_RGB, p->components); + + // Ensure the block size doesn't exceed the maximum of the + // implementation. + int block_size = MPMIN(p->ra->max_compute_group_threads, o_h); + + pass_describe(p, "dither=error-diffusion (kernel=%s, depth=%d)", + kernel->name, dst_depth); + + p->pass_compute = (struct compute_info) { + .active = true, + .threads_w = block_size, + .threads_h = 1, + .directly_writes = true + }; + + int tex_id = pass_bind(p, img); + + pass_error_diffusion(p->sc, kernel, tex_id, o_w, o_h, + dst_depth, block_size); + + finish_pass_tex(p, &p->error_diffusion_tex[1], o_w, o_h); + + img = image_wrap(p->error_diffusion_tex[1], PLANE_RGB, p->components); + copy_image(p, &(int){0}, img); + + return; + } + } + + if (!p->dither_texture) { + MP_VERBOSE(p, "Dither to %d.\n", dst_depth); + + int tex_size = 0; + void *tex_data = NULL; + const struct ra_format *fmt = NULL; + void *temp = NULL; + + if (p->opts.dither_algo == DITHER_FRUIT) { + int sizeb = p->opts.dither_size; + int size = 1 << sizeb; + + if (p->last_dither_matrix_size != size) { + p->last_dither_matrix = talloc_realloc(p, p->last_dither_matrix, + float, size * size); + mp_make_fruit_dither_matrix(p->last_dither_matrix, sizeb); + p->last_dither_matrix_size = size; + } + + // Prefer R16 texture since they provide higher precision. + fmt = ra_find_unorm_format(p->ra, 2, 1); + if (!fmt) + fmt = ra_find_float16_format(p->ra, 1); + if (fmt) { + tex_size = size; + tex_data = p->last_dither_matrix; + if (fmt->ctype == RA_CTYPE_UNORM) { + uint16_t *t = temp = talloc_array(NULL, uint16_t, size * size); + for (int n = 0; n < size * size; n++) + t[n] = p->last_dither_matrix[n] * UINT16_MAX; + tex_data = t; + } + } else { + MP_VERBOSE(p, "GL too old. Falling back to ordered dither.\n"); + p->opts.dither_algo = DITHER_ORDERED; + } + } + + if (p->opts.dither_algo == DITHER_ORDERED) { + temp = talloc_array(NULL, char, 8 * 8); + mp_make_ordered_dither_matrix(temp, 8); + + fmt = ra_find_unorm_format(p->ra, 1, 1); + tex_size = 8; + tex_data = temp; + } + + struct ra_tex_params params = { + .dimensions = 2, + .w = tex_size, + .h = tex_size, + .d = 1, + .format = fmt, + .render_src = true, + .src_repeat = true, + .initial_data = tex_data, + }; + p->dither_texture = ra_tex_create(p->ra, ¶ms); + + debug_check_gl(p, "dither setup"); + + talloc_free(temp); + + if (!p->dither_texture) + return; + } + + GLSLF("// dithering\n"); + + // This defines how many bits are considered significant for output on + // screen. The superfluous bits will be used for rounding according to the + // dither matrix. The precision of the source implicitly decides how many + // dither patterns can be visible. + int dither_quantization = (1 << dst_depth) - 1; + int dither_size = p->dither_texture->params.w; + + gl_sc_uniform_texture(p->sc, "dither", p->dither_texture); + + GLSLF("vec2 dither_pos = gl_FragCoord.xy * 1.0/%d.0;\n", dither_size); + + if (p->opts.temporal_dither) { + int phase = (p->frames_rendered / p->opts.temporal_dither_period) % 8u; + float r = phase * (M_PI / 2); // rotate + float m = phase < 4 ? 1 : -1; // mirror + + float matrix[2][2] = {{cos(r), -sin(r) }, + {sin(r) * m, cos(r) * m}}; + gl_sc_uniform_dynamic(p->sc); + gl_sc_uniform_mat2(p->sc, "dither_trafo", true, &matrix[0][0]); + + GLSL(dither_pos = dither_trafo * dither_pos;) + } + + GLSL(float dither_value = texture(dither, dither_pos).r;) + GLSLF("color = floor(color * %d.0 + dither_value + 0.5 / %d.0) * 1.0/%d.0;\n", + dither_quantization, dither_size * dither_size, dither_quantization); +} + +// Draws the OSD, in scene-referred colors.. If cms is true, subtitles are +// instead adapted to the display's gamut. +static void pass_draw_osd(struct gl_video *p, int osd_flags, int frame_flags, + double pts, struct mp_osd_res rect, struct ra_fbo fbo, + bool cms) +{ + if (frame_flags & RENDER_FRAME_VF_SUBS) + osd_flags |= OSD_DRAW_SUB_FILTER; + + if ((osd_flags & OSD_DRAW_SUB_ONLY) && (osd_flags & OSD_DRAW_OSD_ONLY)) + return; + + mpgl_osd_generate(p->osd, rect, pts, p->image_params.stereo3d, osd_flags); + + timer_pool_start(p->osd_timer); + for (int n = 0; n < MAX_OSD_PARTS; n++) { + // (This returns false if this part is empty with nothing to draw.) + if (!mpgl_osd_draw_prepare(p->osd, n, p->sc)) + continue; + // When subtitles need to be color managed, assume they're in sRGB + // (for lack of anything saner to do) + if (cms) { + static const struct mp_colorspace csp_srgb = { + .primaries = MP_CSP_PRIM_BT_709, + .gamma = MP_CSP_TRC_SRGB, + .light = MP_CSP_LIGHT_DISPLAY, + }; + + pass_colormanage(p, csp_srgb, fbo.color_space, frame_flags, true); + } + mpgl_osd_draw_finish(p->osd, n, p->sc, fbo); + } + + timer_pool_stop(p->osd_timer); + pass_describe(p, "drawing osd"); + pass_record(p, timer_pool_measure(p->osd_timer)); +} + +static float chroma_realign(int size, int pixel) +{ + return size / (float)chroma_upsize(size, pixel); +} + +// Minimal rendering code path, for GLES or OpenGL 2.1 without proper FBOs. +static void pass_render_frame_dumb(struct gl_video *p) +{ + struct image img[4]; + struct gl_transform off[4]; + pass_get_images(p, &p->image, img, off); + + struct gl_transform transform; + compute_src_transform(p, &transform); + + int index = 0; + for (int i = 0; i < p->plane_count; i++) { + int cw = img[i].type == PLANE_CHROMA ? p->ra_format.chroma_w : 1; + int ch = img[i].type == PLANE_CHROMA ? p->ra_format.chroma_h : 1; + if (p->image_params.rotate % 180 == 90) + MPSWAP(int, cw, ch); + + struct gl_transform t = transform; + t.m[0][0] *= chroma_realign(p->texture_w, cw); + t.m[1][1] *= chroma_realign(p->texture_h, ch); + + t.t[0] /= cw; + t.t[1] /= ch; + + t.t[0] += off[i].t[0]; + t.t[1] += off[i].t[1]; + + gl_transform_trans(img[i].transform, &t); + img[i].transform = t; + + copy_image(p, &index, img[i]); + } + + pass_convert_yuv(p); +} + +// The main rendering function, takes care of everything up to and including +// upscaling. p->image is rendered. +// flags: bit set of RENDER_FRAME_* flags +static bool pass_render_frame(struct gl_video *p, struct mp_image *mpi, + uint64_t id, int flags) +{ + // initialize the texture parameters and temporary variables + p->texture_w = p->image_params.w; + p->texture_h = p->image_params.h; + p->texture_offset = identity_trans; + p->components = 0; + p->num_saved_imgs = 0; + p->idx_hook_textures = 0; + p->use_linear = false; + + // try uploading the frame + if (!pass_upload_image(p, mpi, id)) + return false; + + if (p->image_params.rotate % 180 == 90) + MPSWAP(int, p->texture_w, p->texture_h); + + if (p->dumb_mode) + return true; + + pass_read_video(p); + pass_opt_hook_point(p, "NATIVE", &p->texture_offset); + pass_convert_yuv(p); + pass_opt_hook_point(p, "MAINPRESUB", &p->texture_offset); + + // For subtitles + double vpts = p->image.mpi->pts; + if (vpts == MP_NOPTS_VALUE) + vpts = p->osd_pts; + + if (p->osd && p->opts.blend_subs == BLEND_SUBS_VIDEO && + (flags & RENDER_FRAME_SUBS)) + { + double scale[2]; + get_scale_factors(p, false, scale); + struct mp_osd_res rect = { + .w = p->texture_w, .h = p->texture_h, + .display_par = scale[1] / scale[0], // counter compensate scaling + }; + finish_pass_tex(p, &p->blend_subs_tex, rect.w, rect.h); + struct ra_fbo fbo = { p->blend_subs_tex }; + pass_draw_osd(p, OSD_DRAW_SUB_ONLY, flags, vpts, rect, fbo, false); + pass_read_tex(p, p->blend_subs_tex); + pass_describe(p, "blend subs video"); + } + pass_opt_hook_point(p, "MAIN", &p->texture_offset); + + pass_scale_main(p); + + int vp_w = p->dst_rect.x1 - p->dst_rect.x0, + vp_h = p->dst_rect.y1 - p->dst_rect.y0; + if (p->osd && p->opts.blend_subs == BLEND_SUBS_YES && + (flags & RENDER_FRAME_SUBS)) + { + // Recreate the real video size from the src/dst rects + struct mp_osd_res rect = { + .w = vp_w, .h = vp_h, + .ml = -p->src_rect.x0, .mr = p->src_rect.x1 - p->image_params.w, + .mt = -p->src_rect.y0, .mb = p->src_rect.y1 - p->image_params.h, + .display_par = 1.0, + }; + // Adjust margins for scale + double scale[2]; + get_scale_factors(p, true, scale); + rect.ml *= scale[0]; rect.mr *= scale[0]; + rect.mt *= scale[1]; rect.mb *= scale[1]; + // We should always blend subtitles in non-linear light + if (p->use_linear) { + pass_delinearize(p->sc, p->image_params.color.gamma); + p->use_linear = false; + } + finish_pass_tex(p, &p->blend_subs_tex, p->texture_w, p->texture_h); + struct ra_fbo fbo = { p->blend_subs_tex }; + pass_draw_osd(p, OSD_DRAW_SUB_ONLY, flags, vpts, rect, fbo, false); + pass_read_tex(p, p->blend_subs_tex); + pass_describe(p, "blend subs"); + } + + pass_opt_hook_point(p, "SCALED", NULL); + + return true; +} + +static void pass_draw_to_screen(struct gl_video *p, struct ra_fbo fbo, int flags) +{ + if (p->dumb_mode) + pass_render_frame_dumb(p); + + // Adjust the overall gamma before drawing to screen + if (p->user_gamma != 1) { + gl_sc_uniform_f(p->sc, "user_gamma", p->user_gamma); + GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) + GLSL(color.rgb = pow(color.rgb, vec3(user_gamma));) + } + + pass_colormanage(p, p->image_params.color, fbo.color_space, flags, false); + + // Since finish_pass_fbo doesn't work with compute shaders, and neither + // does the checkerboard/dither code, we may need an indirection via + // p->screen_tex here. + if (p->pass_compute.active) { + int o_w = p->dst_rect.x1 - p->dst_rect.x0, + o_h = p->dst_rect.y1 - p->dst_rect.y0; + finish_pass_tex(p, &p->screen_tex, o_w, o_h); + struct image tmp = image_wrap(p->screen_tex, PLANE_RGB, p->components); + copy_image(p, &(int){0}, tmp); + } + + if (p->has_alpha){ + if (p->opts.alpha_mode == ALPHA_BLEND_TILES) { + // Draw checkerboard pattern to indicate transparency + GLSLF("// transparency checkerboard\n"); + GLSL(bvec2 tile = lessThan(fract(gl_FragCoord.xy * 1.0/32.0), vec2(0.5));) + GLSL(vec3 background = vec3(tile.x == tile.y ? 0.93 : 0.87);) + GLSL(color.rgb += background.rgb * (1.0 - color.a);) + GLSL(color.a = 1.0;) + } else if (p->opts.alpha_mode == ALPHA_BLEND) { + // Blend into background color (usually black) + struct m_color c = p->opts.background; + GLSLF("vec4 background = vec4(%f, %f, %f, %f);\n", + c.r / 255.0, c.g / 255.0, c.b / 255.0, c.a / 255.0); + GLSL(color.rgb += background.rgb * (1.0 - color.a);) + GLSL(color.a = background.a;) + } + } + + pass_opt_hook_point(p, "OUTPUT", NULL); + + if (flags & RENDER_SCREEN_COLOR) + pass_dither(p); + pass_describe(p, "output to screen"); + finish_pass_fbo(p, fbo, false, &p->dst_rect); +} + +// flags: bit set of RENDER_FRAME_* flags +static bool update_surface(struct gl_video *p, struct mp_image *mpi, + uint64_t id, struct surface *surf, int flags) +{ + int vp_w = p->dst_rect.x1 - p->dst_rect.x0, + vp_h = p->dst_rect.y1 - p->dst_rect.y0; + + pass_info_reset(p, false); + if (!pass_render_frame(p, mpi, id, flags)) + return false; + + // Frame blending should always be done in linear light to preserve the + // overall brightness, otherwise this will result in flashing dark frames + // because mixing in compressed light artificially darkens the results + if (!p->use_linear) { + p->use_linear = true; + pass_linearize(p->sc, p->image_params.color.gamma); + } + + finish_pass_tex(p, &surf->tex, vp_w, vp_h); + surf->id = id; + surf->pts = mpi->pts; + return true; +} + +// Draws an interpolate frame to fbo, based on the frame timing in t +// flags: bit set of RENDER_FRAME_* flags +static void gl_video_interpolate_frame(struct gl_video *p, struct vo_frame *t, + struct ra_fbo fbo, int flags) +{ + bool is_new = false; + + // Reset the queue completely if this is a still image, to avoid any + // interpolation artifacts from surrounding frames when unpausing or + // framestepping + if (t->still) + gl_video_reset_surfaces(p); + + // First of all, figure out if we have a frame available at all, and draw + // it manually + reset the queue if not + if (p->surfaces[p->surface_now].id == 0) { + struct surface *now = &p->surfaces[p->surface_now]; + if (!update_surface(p, t->current, t->frame_id, now, flags)) + return; + p->surface_idx = p->surface_now; + is_new = true; + } + + // Find the right frame for this instant + if (t->current) { + int next = surface_wrap(p->surface_now + 1); + while (p->surfaces[next].id && + p->surfaces[next].id > p->surfaces[p->surface_now].id && + p->surfaces[p->surface_now].id < t->frame_id) + { + p->surface_now = next; + next = surface_wrap(next + 1); + } + } + + // Figure out the queue size. For illustration, a filter radius of 2 would + // look like this: _ A [B] C D _ + // A is surface_bse, B is surface_now, C is surface_now+1 and D is + // surface_end. + struct scaler *tscale = &p->scaler[SCALER_TSCALE]; + reinit_scaler(p, tscale, &p->opts.scaler[SCALER_TSCALE], 1, tscale_sizes); + bool oversample = strcmp(tscale->conf.kernel.name, "oversample") == 0; + bool linear = strcmp(tscale->conf.kernel.name, "linear") == 0; + int size; + + if (oversample || linear) { + size = 2; + } else { + assert(tscale->kernel && !tscale->kernel->polar); + size = ceil(tscale->kernel->size); + } + + int radius = size/2; + int surface_now = p->surface_now; + int surface_bse = surface_wrap(surface_now - (radius-1)); + int surface_end = surface_wrap(surface_now + radius); + assert(surface_wrap(surface_bse + size-1) == surface_end); + + // Render new frames while there's room in the queue. Note that technically, + // this should be done before the step where we find the right frame, but + // it only barely matters at the very beginning of playback, and this way + // makes the code much more linear. + int surface_dst = surface_wrap(p->surface_idx + 1); + for (int i = 0; i < t->num_frames; i++) { + // Avoid overwriting data we might still need + if (surface_dst == surface_bse - 1) + break; + + struct mp_image *f = t->frames[i]; + uint64_t f_id = t->frame_id + i; + if (!mp_image_params_equal(&f->params, &p->real_image_params)) + continue; + + if (f_id > p->surfaces[p->surface_idx].id) { + struct surface *dst = &p->surfaces[surface_dst]; + if (!update_surface(p, f, f_id, dst, flags)) + return; + p->surface_idx = surface_dst; + surface_dst = surface_wrap(surface_dst + 1); + is_new = true; + } + } + + // Figure out whether the queue is "valid". A queue is invalid if the + // frames' PTS is not monotonically increasing. Anything else is invalid, + // so avoid blending incorrect data and just draw the latest frame as-is. + // Possible causes for failure of this condition include seeks, pausing, + // end of playback or start of playback. + bool valid = true; + for (int i = surface_bse, ii; valid && i != surface_end; i = ii) { + ii = surface_wrap(i + 1); + if (p->surfaces[i].id == 0 || p->surfaces[ii].id == 0) { + valid = false; + } else if (p->surfaces[ii].id < p->surfaces[i].id) { + valid = false; + MP_DBG(p, "interpolation queue underrun\n"); + } + } + + // Update OSD PTS to synchronize subtitles with the displayed frame + p->osd_pts = p->surfaces[surface_now].pts; + + // Finally, draw the right mix of frames to the screen. + if (!is_new) + pass_info_reset(p, true); + pass_describe(p, "interpolation"); + if (!valid || t->still) { + // surface_now is guaranteed to be valid, so we can safely use it. + pass_read_tex(p, p->surfaces[surface_now].tex); + p->is_interpolated = false; + } else { + double mix = t->vsync_offset / t->ideal_frame_duration; + // The scaler code always wants the fcoord to be between 0 and 1, + // so we try to adjust by using the previous set of N frames instead + // (which requires some extra checking to make sure it's valid) + if (mix < 0.0) { + int prev = surface_wrap(surface_bse - 1); + if (p->surfaces[prev].id != 0 && + p->surfaces[prev].id < p->surfaces[surface_bse].id) + { + mix += 1.0; + surface_bse = prev; + } else { + mix = 0.0; // at least don't blow up, this should only + // ever happen at the start of playback + } + } + + if (oversample) { + // Oversample uses the frame area as mix ratio, not the vsync + // position itself + double vsync_dist = t->vsync_interval / t->ideal_frame_duration, + threshold = tscale->conf.kernel.params[0]; + threshold = isnan(threshold) ? 0.0 : threshold; + mix = (1 - mix) / vsync_dist; + mix = mix <= 0 + threshold ? 0 : mix; + mix = mix >= 1 - threshold ? 1 : mix; + mix = 1 - mix; + } + + // Blend the frames together + if (oversample || linear) { + gl_sc_uniform_dynamic(p->sc); + gl_sc_uniform_f(p->sc, "inter_coeff", mix); + GLSL(color = mix(texture(texture0, texcoord0), + texture(texture1, texcoord1), + inter_coeff);) + } else { + gl_sc_uniform_dynamic(p->sc); + gl_sc_uniform_f(p->sc, "fcoord", mix); + pass_sample_separated_gen(p->sc, tscale, 0, 0); + } + + // Load all the required frames + for (int i = 0; i < size; i++) { + struct image img = + image_wrap(p->surfaces[surface_wrap(surface_bse+i)].tex, + PLANE_RGB, p->components); + // Since the code in pass_sample_separated currently assumes + // the textures are bound in-order and starting at 0, we just + // assert to make sure this is the case (which it should always be) + int id = pass_bind(p, img); + assert(id == i); + } + + MP_TRACE(p, "inter frame dur: %f vsync: %f, mix: %f\n", + t->ideal_frame_duration, t->vsync_interval, mix); + p->is_interpolated = true; + } + pass_draw_to_screen(p, fbo, flags); + + p->frames_drawn += 1; +} + +void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, + struct ra_fbo fbo, int flags) +{ + gl_video_update_options(p); + + struct mp_rect target_rc = {0, 0, fbo.tex->params.w, fbo.tex->params.h}; + + p->broken_frame = false; + + bool has_frame = !!frame->current; + + struct m_color c = p->clear_color; + float clear_color[4] = {c.r / 255.0, c.g / 255.0, c.b / 255.0, c.a / 255.0}; + p->ra->fns->clear(p->ra, fbo.tex, clear_color, &target_rc); + + if (p->hwdec_overlay) { + if (has_frame) { + float *color = p->hwdec_overlay->overlay_colorkey; + p->ra->fns->clear(p->ra, fbo.tex, color, &p->dst_rect); + } + + p->hwdec_overlay->driver->overlay_frame(p->hwdec_overlay, frame->current, + &p->src_rect, &p->dst_rect, + frame->frame_id != p->image.id); + + if (frame->current) + p->osd_pts = frame->current->pts; + + // Disable GL rendering + has_frame = false; + } + + if (has_frame) { + bool interpolate = p->opts.interpolation && frame->display_synced && + (p->frames_drawn || !frame->still); + if (interpolate) { + double ratio = frame->ideal_frame_duration / frame->vsync_interval; + if (fabs(ratio - 1.0) < p->opts.interpolation_threshold) + interpolate = false; + } + + if (interpolate) { + gl_video_interpolate_frame(p, frame, fbo, flags); + } else { + bool is_new = frame->frame_id != p->image.id; + + // Redrawing a frame might update subtitles. + if (frame->still && p->opts.blend_subs) + is_new = true; + + if (is_new || !p->output_tex_valid) { + p->output_tex_valid = false; + + pass_info_reset(p, !is_new); + if (!pass_render_frame(p, frame->current, frame->frame_id, flags)) + goto done; + + // For the non-interpolation case, we draw to a single "cache" + // texture to speed up subsequent re-draws (if any exist) + struct ra_fbo dest_fbo = fbo; + bool repeats = frame->num_vsyncs > 1 && frame->display_synced; + if ((repeats || frame->still) && !p->dumb_mode && + (p->ra->caps & RA_CAP_BLIT) && fbo.tex->params.blit_dst) + { + // Attempt to use the same format as the destination FBO + // if possible. Some RAs use a wrapped dummy format here, + // so fall back to the fbo_format in that case. + const struct ra_format *fmt = fbo.tex->params.format; + if (fmt->dummy_format) + fmt = p->fbo_format; + + bool r = ra_tex_resize(p->ra, p->log, &p->output_tex, + fbo.tex->params.w, fbo.tex->params.h, + fmt); + if (r) { + dest_fbo = (struct ra_fbo) { p->output_tex }; + p->output_tex_valid = true; + } + } + pass_draw_to_screen(p, dest_fbo, flags); + } + + // "output tex valid" and "output tex needed" are equivalent + if (p->output_tex_valid && fbo.tex->params.blit_dst) { + pass_info_reset(p, true); + pass_describe(p, "redraw cached frame"); + struct mp_rect src = p->dst_rect; + struct mp_rect dst = src; + if (fbo.flip) { + dst.y0 = fbo.tex->params.h - src.y0; + dst.y1 = fbo.tex->params.h - src.y1; + } + timer_pool_start(p->blit_timer); + p->ra->fns->blit(p->ra, fbo.tex, p->output_tex, &dst, &src); + timer_pool_stop(p->blit_timer); + pass_record(p, timer_pool_measure(p->blit_timer)); + } + } + } + +done: + + debug_check_gl(p, "after video rendering"); + + if (p->osd && (flags & (RENDER_FRAME_SUBS | RENDER_FRAME_OSD))) { + // If we haven't actually drawn anything so far, then we technically + // need to consider this the start of a new pass. Let's call it a + // redraw just because, since it's basically a blank frame anyway + if (!has_frame) + pass_info_reset(p, true); + + int osd_flags = p->opts.blend_subs ? OSD_DRAW_OSD_ONLY : 0; + if (!(flags & RENDER_FRAME_SUBS)) + osd_flags |= OSD_DRAW_OSD_ONLY; + if (!(flags & RENDER_FRAME_OSD)) + osd_flags |= OSD_DRAW_SUB_ONLY; + + pass_draw_osd(p, osd_flags, flags, p->osd_pts, p->osd_rect, fbo, true); + debug_check_gl(p, "after OSD rendering"); + } + + p->broken_frame |= gl_sc_error_state(p->sc); + if (p->broken_frame) { + // Make the screen solid blue to make it visually clear that an + // error has occurred + float color[4] = {0.0, 0.05, 0.5, 1.0}; + p->ra->fns->clear(p->ra, fbo.tex, color, &target_rc); + } + + p->frames_rendered++; + pass_report_performance(p); +} + +void gl_video_screenshot(struct gl_video *p, struct vo_frame *frame, + struct voctrl_screenshot *args) +{ + if (!p->ra->fns->tex_download) + return; + + bool ok = false; + struct mp_image *res = NULL; + struct ra_tex *target = NULL; + struct mp_rect old_src = p->src_rect; + struct mp_rect old_dst = p->dst_rect; + struct mp_osd_res old_osd = p->osd_rect; + struct vo_frame *nframe = vo_frame_ref(frame); + + // Disable interpolation and such. + nframe->redraw = true; + nframe->repeat = false; + nframe->still = true; + nframe->pts = 0; + nframe->duration = -1; + + if (!args->scaled) { + int w, h; + mp_image_params_get_dsize(&p->image_params, &w, &h); + if (w < 1 || h < 1) + return; + + int src_w = p->image_params.w; + int src_h = p->image_params.h; + struct mp_rect src = {0, 0, src_w, src_h}; + struct mp_rect dst = {0, 0, w, h}; + + if (mp_image_crop_valid(&p->image_params)) + src = p->image_params.crop; + + if (p->image_params.rotate % 180 == 90) { + MPSWAP(int, w, h); + MPSWAP(int, src_w, src_h); + } + mp_rect_rotate(&src, src_w, src_h, p->image_params.rotate); + mp_rect_rotate(&dst, w, h, p->image_params.rotate); + + struct mp_osd_res osd = { + .display_par = 1.0, + .w = mp_rect_w(dst), + .h = mp_rect_h(dst), + }; + gl_video_resize(p, &src, &dst, &osd); + } + + gl_video_reset_surfaces(p); + + struct ra_tex_params params = { + .dimensions = 2, + .downloadable = true, + .w = p->osd_rect.w, + .h = p->osd_rect.h, + .d = 1, + .render_dst = true, + }; + + params.format = ra_find_unorm_format(p->ra, 1, 4); + int mpfmt = IMGFMT_RGB0; + if (args->high_bit_depth && p->ra_format.component_bits > 8) { + const struct ra_format *fmt = ra_find_unorm_format(p->ra, 2, 4); + if (fmt && fmt->renderable) { + params.format = fmt; + mpfmt = IMGFMT_RGBA64; + } + } + + if (!params.format || !params.format->renderable) + goto done; + target = ra_tex_create(p->ra, ¶ms); + if (!target) + goto done; + + int flags = 0; + if (args->subs) + flags |= RENDER_FRAME_SUBS; + if (args->osd) + flags |= RENDER_FRAME_OSD; + if (args->scaled) + flags |= RENDER_SCREEN_COLOR; + gl_video_render_frame(p, nframe, (struct ra_fbo){target}, flags); + + res = mp_image_alloc(mpfmt, params.w, params.h); + if (!res) + goto done; + + struct ra_tex_download_params download_params = { + .tex = target, + .dst = res->planes[0], + .stride = res->stride[0], + }; + if (!p->ra->fns->tex_download(p->ra, &download_params)) + goto done; + + if (p->broken_frame) + goto done; + + ok = true; +done: + talloc_free(nframe); + ra_tex_free(p->ra, &target); + gl_video_resize(p, &old_src, &old_dst, &old_osd); + gl_video_reset_surfaces(p); + if (!ok) + TA_FREEP(&res); + args->res = res; +} + +// Use this color instead of the global option. +void gl_video_set_clear_color(struct gl_video *p, struct m_color c) +{ + p->force_clear_color = true; + p->clear_color = c; +} + +void gl_video_set_osd_pts(struct gl_video *p, double pts) +{ + p->osd_pts = pts; +} + +bool gl_video_check_osd_change(struct gl_video *p, struct mp_osd_res *res, + double pts) +{ + return p->osd ? mpgl_osd_check_change(p->osd, res, pts) : false; +} + +void gl_video_resize(struct gl_video *p, + struct mp_rect *src, struct mp_rect *dst, + struct mp_osd_res *osd) +{ + if (mp_rect_equals(&p->src_rect, src) && + mp_rect_equals(&p->dst_rect, dst) && + osd_res_equals(p->osd_rect, *osd)) + return; + + p->src_rect = *src; + p->dst_rect = *dst; + p->osd_rect = *osd; + + gl_video_reset_surfaces(p); + + if (p->osd) + mpgl_osd_resize(p->osd, p->osd_rect, p->image_params.stereo3d); +} + +static void frame_perf_data(struct pass_info pass[], struct mp_frame_perf *out) +{ + for (int i = 0; i < VO_PASS_PERF_MAX; i++) { + if (!pass[i].desc.len) + break; + out->perf[out->count] = pass[i].perf; + strncpy(out->desc[out->count], pass[i].desc.start, + sizeof(out->desc[out->count]) - 1); + out->desc[out->count][sizeof(out->desc[out->count]) - 1] = '\0'; + out->count++; + } +} + +void gl_video_perfdata(struct gl_video *p, struct voctrl_performance_data *out) +{ + *out = (struct voctrl_performance_data){0}; + frame_perf_data(p->pass_fresh, &out->fresh); + frame_perf_data(p->pass_redraw, &out->redraw); +} + +// Returns false on failure. +static bool pass_upload_image(struct gl_video *p, struct mp_image *mpi, uint64_t id) +{ + struct video_image *vimg = &p->image; + + if (vimg->id == id) + return true; + + unref_current_image(p); + + mpi = mp_image_new_ref(mpi); + if (!mpi) + goto error; + + vimg->mpi = mpi; + vimg->id = id; + p->osd_pts = mpi->pts; + p->frames_uploaded++; + + if (p->hwdec_active) { + // Hardware decoding + + if (!p->hwdec_mapper) + goto error; + + pass_describe(p, "map frame (hwdec)"); + timer_pool_start(p->upload_timer); + bool ok = ra_hwdec_mapper_map(p->hwdec_mapper, vimg->mpi) >= 0; + timer_pool_stop(p->upload_timer); + pass_record(p, timer_pool_measure(p->upload_timer)); + + vimg->hwdec_mapped = true; + if (ok) { + struct mp_image layout = {0}; + mp_image_set_params(&layout, &p->image_params); + struct ra_tex **tex = p->hwdec_mapper->tex; + for (int n = 0; n < p->plane_count; n++) { + vimg->planes[n] = (struct texplane){ + .w = mp_image_plane_w(&layout, n), + .h = mp_image_plane_h(&layout, n), + .tex = tex[n], + }; + } + } else { + MP_FATAL(p, "Mapping hardware decoded surface failed.\n"); + goto error; + } + return true; + } + + // Software decoding + assert(mpi->num_planes == p->plane_count); + + timer_pool_start(p->upload_timer); + for (int n = 0; n < p->plane_count; n++) { + struct texplane *plane = &vimg->planes[n]; + if (!plane->tex) { + timer_pool_stop(p->upload_timer); + goto error; + } + + struct ra_tex_upload_params params = { + .tex = plane->tex, + .src = mpi->planes[n], + .invalidate = true, + .stride = mpi->stride[n], + }; + + plane->flipped = params.stride < 0; + if (plane->flipped) { + int h = mp_image_plane_h(mpi, n); + params.src = (char *)params.src + (h - 1) * params.stride; + params.stride = -params.stride; + } + + struct dr_buffer *mapped = gl_find_dr_buffer(p, mpi->planes[n]); + if (mapped) { + params.buf = mapped->buf; + params.buf_offset = (uintptr_t)params.src - + (uintptr_t)mapped->buf->data; + params.src = NULL; + } + + if (p->using_dr_path != !!mapped) { + p->using_dr_path = !!mapped; + MP_VERBOSE(p, "DR enabled: %s\n", p->using_dr_path ? "yes" : "no"); + } + + if (!p->ra->fns->tex_upload(p->ra, ¶ms)) { + timer_pool_stop(p->upload_timer); + goto error; + } + + if (mapped && !mapped->mpi) + mapped->mpi = mp_image_new_ref(mpi); + } + timer_pool_stop(p->upload_timer); + + bool using_pbo = p->ra->use_pbo || !(p->ra->caps & RA_CAP_DIRECT_UPLOAD); + const char *mode = p->using_dr_path ? "DR" : using_pbo ? "PBO" : "naive"; + pass_describe(p, "upload frame (%s)", mode); + pass_record(p, timer_pool_measure(p->upload_timer)); + + return true; + +error: + unref_current_image(p); + p->broken_frame = true; + return false; +} + +static bool test_fbo(struct gl_video *p, const struct ra_format *fmt) +{ + MP_VERBOSE(p, "Testing FBO format %s\n", fmt->name); + struct ra_tex *tex = NULL; + bool success = ra_tex_resize(p->ra, p->log, &tex, 16, 16, fmt); + ra_tex_free(p->ra, &tex); + return success; +} + +// Return whether dumb-mode can be used without disabling any features. +// Essentially, vo_gpu with mostly default settings will return true. +static bool check_dumb_mode(struct gl_video *p) +{ + struct gl_video_opts *o = &p->opts; + if (p->use_integer_conversion) + return false; + if (o->dumb_mode > 0) // requested by user + return true; + if (o->dumb_mode < 0) // disabled by user + return false; + + // otherwise, use auto-detection + if (o->correct_downscaling || o->linear_downscaling || + o->linear_upscaling || o->sigmoid_upscaling || o->interpolation || + o->blend_subs || o->deband || o->unsharp) + return false; + // check remaining scalers (tscale is already implicitly excluded above) + for (int i = 0; i < SCALER_COUNT; i++) { + if (i != SCALER_TSCALE) { + const char *name = o->scaler[i].kernel.name; + if (name && strcmp(name, "bilinear") != 0) + return false; + } + } + if (o->user_shaders && o->user_shaders[0]) + return false; + return true; +} + +// Disable features that are not supported with the current OpenGL version. +static void check_gl_features(struct gl_video *p) +{ + struct ra *ra = p->ra; + bool have_float_tex = !!ra_find_float16_format(ra, 1); + bool have_mglsl = ra->glsl_version >= 130; // modern GLSL + const struct ra_format *rg_tex = ra_find_unorm_format(p->ra, 1, 2); + bool have_texrg = rg_tex && !rg_tex->luminance_alpha; + bool have_compute = ra->caps & RA_CAP_COMPUTE; + bool have_ssbo = ra->caps & RA_CAP_BUF_RW; + bool have_fragcoord = ra->caps & RA_CAP_FRAGCOORD; + + const char *auto_fbo_fmts[] = {"rgba16f", "rgba16hf", "rgba16", + "rgb10_a2", "rgba8", 0}; + const char *user_fbo_fmts[] = {p->opts.fbo_format, 0}; + const char **fbo_fmts = user_fbo_fmts[0] && strcmp(user_fbo_fmts[0], "auto") + ? user_fbo_fmts : auto_fbo_fmts; + bool user_specified_fbo_fmt = fbo_fmts == user_fbo_fmts; + bool fbo_test_result = false; + bool have_fbo = false; + p->fbo_format = NULL; + for (int n = 0; fbo_fmts[n]; n++) { + const char *fmt = fbo_fmts[n]; + const struct ra_format *f = ra_find_named_format(p->ra, fmt); + if (!f && user_specified_fbo_fmt) + MP_WARN(p, "FBO format '%s' not found!\n", fmt); + if (f && f->renderable && f->linear_filter && + (fbo_test_result = test_fbo(p, f))) { + MP_VERBOSE(p, "Using FBO format %s.\n", f->name); + have_fbo = true; + p->fbo_format = f; + break; + } + + if (user_specified_fbo_fmt) { + MP_WARN(p, "User-specified FBO format '%s' failed to initialize! " + "(exists=%d, renderable=%d, linear_filter=%d, " + "fbo_test_result=%d)\n", + fmt, !!f, f ? f->renderable : 0, f ? f->linear_filter : 0, + fbo_test_result); + } + } + + if (!have_fragcoord && p->opts.dither_depth >= 0 && + p->opts.dither_algo != DITHER_NONE) + { + p->opts.dither_algo = DITHER_NONE; + MP_WARN(p, "Disabling dithering (no gl_FragCoord).\n"); + } + if (!have_fragcoord && p->opts.alpha_mode == ALPHA_BLEND_TILES) { + p->opts.alpha_mode = ALPHA_BLEND; + // Verbose, since this is the default setting + MP_VERBOSE(p, "Disabling alpha checkerboard (no gl_FragCoord).\n"); + } + if (!have_fbo && have_compute) { + have_compute = false; + MP_WARN(p, "Force-disabling compute shaders as an FBO format was not " + "available! See your FBO format configuration!\n"); + } + + if (have_compute && have_fbo && !p->fbo_format->storable) { + have_compute = false; + MP_WARN(p, "Force-disabling compute shaders as the chosen FBO format " + "is not storable! See your FBO format configuration!\n"); + } + + if (!have_compute && p->opts.dither_algo == DITHER_ERROR_DIFFUSION) { + MP_WARN(p, "Disabling error diffusion dithering because compute shader " + "was not supported. Fallback to dither=fruit instead.\n"); + p->opts.dither_algo = DITHER_FRUIT; + } + + bool have_compute_peak = have_compute && have_ssbo; + if (!have_compute_peak && p->opts.tone_map.compute_peak >= 0) { + int msgl = p->opts.tone_map.compute_peak == 1 ? MSGL_WARN : MSGL_V; + MP_MSG(p, msgl, "Disabling HDR peak computation (one or more of the " + "following is not supported: compute shaders=%d, " + "SSBO=%d).\n", have_compute, have_ssbo); + p->opts.tone_map.compute_peak = -1; + } + + p->forced_dumb_mode = p->opts.dumb_mode > 0 || !have_fbo || !have_texrg; + bool voluntarily_dumb = check_dumb_mode(p); + if (p->forced_dumb_mode || voluntarily_dumb) { + if (voluntarily_dumb) { + MP_VERBOSE(p, "No advanced processing required. Enabling dumb mode.\n"); + } else if (p->opts.dumb_mode <= 0) { + MP_WARN(p, "High bit depth FBOs unsupported. Enabling dumb mode.\n" + "Most extended features will be disabled.\n"); + } + p->dumb_mode = true; + static const struct scaler_config dumb_scaler_config = { + {"bilinear", .params = {NAN, NAN}}, + {.params = {NAN, NAN}}, + }; + // Most things don't work, so whitelist all options that still work. + p->opts = (struct gl_video_opts){ + .scaler = { + [SCALER_SCALE] = dumb_scaler_config, + [SCALER_DSCALE] = dumb_scaler_config, + [SCALER_CSCALE] = dumb_scaler_config, + [SCALER_TSCALE] = dumb_scaler_config, + }, + .gamma = p->opts.gamma, + .gamma_auto = p->opts.gamma_auto, + .pbo = p->opts.pbo, + .fbo_format = p->opts.fbo_format, + .alpha_mode = p->opts.alpha_mode, + .use_rectangle = p->opts.use_rectangle, + .background = p->opts.background, + .dither_algo = p->opts.dither_algo, + .dither_depth = p->opts.dither_depth, + .dither_size = p->opts.dither_size, + .error_diffusion = p->opts.error_diffusion, + .temporal_dither = p->opts.temporal_dither, + .temporal_dither_period = p->opts.temporal_dither_period, + .tex_pad_x = p->opts.tex_pad_x, + .tex_pad_y = p->opts.tex_pad_y, + .tone_map = p->opts.tone_map, + .early_flush = p->opts.early_flush, + .icc_opts = p->opts.icc_opts, + .hwdec_interop = p->opts.hwdec_interop, + .target_trc = p->opts.target_trc, + .target_prim = p->opts.target_prim, + .target_peak = p->opts.target_peak, + }; + if (!have_fbo) + p->use_lut_3d = false; + return; + } + p->dumb_mode = false; + + // Normally, we want to disable them by default if FBOs are unavailable, + // because they will be slow (not critically slow, but still slower). + // Without FP textures, we must always disable them. + // I don't know if luminance alpha float textures exist, so disregard them. + for (int n = 0; n < SCALER_COUNT; n++) { + const struct filter_kernel *kernel = + mp_find_filter_kernel(p->opts.scaler[n].kernel.name); + if (kernel) { + char *reason = NULL; + if (!have_float_tex) + reason = "(float tex. missing)"; + if (!have_mglsl) + reason = "(GLSL version too old)"; + if (reason) { + MP_WARN(p, "Disabling scaler #%d %s %s.\n", n, + p->opts.scaler[n].kernel.name, reason); + // p->opts is a copy => we can just mess with it. + p->opts.scaler[n].kernel.name = "bilinear"; + if (n == SCALER_TSCALE) + p->opts.interpolation = false; + } + } + } + + int use_cms = p->opts.target_prim != MP_CSP_PRIM_AUTO || + p->opts.target_trc != MP_CSP_TRC_AUTO || p->use_lut_3d; + + // mix() is needed for some gamma functions + if (!have_mglsl && (p->opts.linear_downscaling || + p->opts.linear_upscaling || p->opts.sigmoid_upscaling)) + { + p->opts.linear_downscaling = false; + p->opts.linear_upscaling = false; + p->opts.sigmoid_upscaling = false; + MP_WARN(p, "Disabling linear/sigmoid scaling (GLSL version too old).\n"); + } + if (!have_mglsl && use_cms) { + p->opts.target_prim = MP_CSP_PRIM_AUTO; + p->opts.target_trc = MP_CSP_TRC_AUTO; + p->use_lut_3d = false; + MP_WARN(p, "Disabling color management (GLSL version too old).\n"); + } + if (!have_mglsl && p->opts.deband) { + p->opts.deband = false; + MP_WARN(p, "Disabling debanding (GLSL version too old).\n"); + } +} + +static void init_gl(struct gl_video *p) +{ + debug_check_gl(p, "before init_gl"); + + p->upload_timer = timer_pool_create(p->ra); + p->blit_timer = timer_pool_create(p->ra); + p->osd_timer = timer_pool_create(p->ra); + + debug_check_gl(p, "after init_gl"); + + ra_dump_tex_formats(p->ra, MSGL_DEBUG); + ra_dump_img_formats(p->ra, MSGL_DEBUG); +} + +void gl_video_uninit(struct gl_video *p) +{ + if (!p) + return; + + uninit_video(p); + ra_hwdec_ctx_uninit(&p->hwdec_ctx); + gl_sc_destroy(p->sc); + + ra_tex_free(p->ra, &p->lut_3d_texture); + ra_buf_free(p->ra, &p->hdr_peak_ssbo); + + timer_pool_destroy(p->upload_timer); + timer_pool_destroy(p->blit_timer); + timer_pool_destroy(p->osd_timer); + + for (int i = 0; i < VO_PASS_PERF_MAX; i++) { + talloc_free(p->pass_fresh[i].desc.start); + talloc_free(p->pass_redraw[i].desc.start); + } + + mpgl_osd_destroy(p->osd); + + // Forcibly destroy possibly remaining image references. This should also + // cause gl_video_dr_free_buffer() to be called for the remaining buffers. + gc_pending_dr_fences(p, true); + + // Should all have been unreffed already. + assert(!p->num_dr_buffers); + + talloc_free(p); +} + +void gl_video_reset(struct gl_video *p) +{ + gl_video_reset_surfaces(p); +} + +bool gl_video_showing_interpolated_frame(struct gl_video *p) +{ + return p->is_interpolated; +} + +static bool is_imgfmt_desc_supported(struct gl_video *p, + const struct ra_imgfmt_desc *desc) +{ + if (!desc->num_planes) + return false; + + if (desc->planes[0]->ctype == RA_CTYPE_UINT && p->forced_dumb_mode) + return false; + + return true; +} + +bool gl_video_check_format(struct gl_video *p, int mp_format) +{ + struct ra_imgfmt_desc desc; + if (ra_get_imgfmt_desc(p->ra, mp_format, &desc) && + is_imgfmt_desc_supported(p, &desc)) + return true; + if (ra_hwdec_get(&p->hwdec_ctx, mp_format)) + return true; + return false; +} + +void gl_video_config(struct gl_video *p, struct mp_image_params *params) +{ + unmap_overlay(p); + unref_current_image(p); + + if (!mp_image_params_equal(&p->real_image_params, params)) { + uninit_video(p); + p->real_image_params = *params; + p->image_params = *params; + if (params->imgfmt) + init_video(p); + } + + gl_video_reset_surfaces(p); +} + +void gl_video_set_osd_source(struct gl_video *p, struct osd_state *osd) +{ + mpgl_osd_destroy(p->osd); + p->osd = NULL; + p->osd_state = osd; + reinit_osd(p); +} + +struct gl_video *gl_video_init(struct ra *ra, struct mp_log *log, + struct mpv_global *g) +{ + struct gl_video *p = talloc_ptrtype(NULL, p); + *p = (struct gl_video) { + .ra = ra, + .global = g, + .log = log, + .sc = gl_sc_create(ra, g, log), + .video_eq = mp_csp_equalizer_create(p, g), + .opts_cache = m_config_cache_alloc(p, g, &gl_video_conf), + }; + // make sure this variable is initialized to *something* + p->pass = p->pass_fresh; + struct gl_video_opts *opts = p->opts_cache->opts; + p->cms = gl_lcms_init(p, log, g, opts->icc_opts), + p->opts = *opts; + for (int n = 0; n < SCALER_COUNT; n++) + p->scaler[n] = (struct scaler){.index = n}; + // our VAO always has the vec2 position as the first element + MP_TARRAY_APPEND(p, p->vao, p->vao_len, (struct ra_renderpass_input) { + .name = "position", + .type = RA_VARTYPE_FLOAT, + .dim_v = 2, + .dim_m = 1, + .offset = 0, + }); + init_gl(p); + reinit_from_options(p); + return p; +} + +// Get static string for scaler shader. If "tscale" is set to true, the +// scaler must be a separable convolution filter. +static const char *handle_scaler_opt(const char *name, bool tscale) +{ + if (name && name[0]) { + const struct filter_kernel *kernel = mp_find_filter_kernel(name); + if (kernel && (!tscale || !kernel->polar)) + return kernel->f.name; + + const struct filter_window *window = mp_find_filter_window(name); + if (window) + return window->name; + + for (const char *const *filter = tscale ? fixed_tscale_filters + : fixed_scale_filters; + *filter; filter++) { + if (strcmp(*filter, name) == 0) + return *filter; + } + } + return NULL; +} + +static void gl_video_update_options(struct gl_video *p) +{ + if (m_config_cache_update(p->opts_cache)) { + gl_lcms_update_options(p->cms); + reinit_from_options(p); + } + + if (mp_csp_equalizer_state_changed(p->video_eq)) + p->output_tex_valid = false; +} + +static void reinit_from_options(struct gl_video *p) +{ + p->use_lut_3d = gl_lcms_has_profile(p->cms); + + // Copy the option fields, so that check_gl_features() can mutate them. + // This works only for the fields themselves of course, not for any memory + // referenced by them. + p->opts = *(struct gl_video_opts *)p->opts_cache->opts; + + if (!p->force_clear_color) + p->clear_color = p->opts.background; + + check_gl_features(p); + uninit_rendering(p); + if (p->opts.shader_cache) + gl_sc_set_cache_dir(p->sc, p->opts.shader_cache_dir); + p->ra->use_pbo = p->opts.pbo; + gl_video_setup_hooks(p); + reinit_osd(p); + + struct mp_vo_opts *vo_opts = mp_get_config_group(p, p->global, &vo_sub_opts); + if (p->opts.interpolation && !vo_opts->video_sync && !p->dsi_warned) { + MP_WARN(p, "Interpolation now requires enabling display-sync mode.\n" + "E.g.: --video-sync=display-resample\n"); + p->dsi_warned = true; + } + talloc_free(vo_opts); + + if (p->opts.correct_downscaling && !p->correct_downscaling_warned) { + const char *name = p->opts.scaler[SCALER_DSCALE].kernel.name; + if (!name) + name = p->opts.scaler[SCALER_SCALE].kernel.name; + if (!name || !strcmp(name, "bilinear")) { + MP_WARN(p, "correct-downscaling requires non-bilinear scaler.\n"); + p->correct_downscaling_warned = true; + } + } +} + +void gl_video_configure_queue(struct gl_video *p, struct vo *vo) +{ + gl_video_update_options(p); + + int queue_size = 1; + + // Figure out an adequate size for the interpolation queue. The larger + // the radius, the earlier we need to queue frames. + if (p->opts.interpolation) { + const struct filter_kernel *kernel = + mp_find_filter_kernel(p->opts.scaler[SCALER_TSCALE].kernel.name); + if (kernel) { + // filter_scale wouldn't be correctly initialized were we to use it here. + // This is fine since we're always upsampling, but beware if downsampling + // is added! + double radius = kernel->f.radius; + radius = radius > 0 ? radius : p->opts.scaler[SCALER_TSCALE].radius; + queue_size += 1 + ceil(radius); + } else { + // Oversample/linear case + queue_size += 2; + } + } + + vo_set_queue_params(vo, 0, queue_size); +} + +static int validate_scaler_opt(struct mp_log *log, const m_option_t *opt, + struct bstr name, const char **value) +{ + struct bstr param = bstr0(*value); + char s[32] = {0}; + int r = 1; + bool tscale = bstr_equals0(name, "tscale"); + if (bstr_equals0(param, "help")) { + r = M_OPT_EXIT; + } else if (bstr_equals0(name, "dscale") && !param.len) { + return r; // empty dscale means "use same as upscaler" + } else if (bstr_equals0(name, "cscale") && !param.len) { + return r; // empty cscale means "use same as upscaler" + } else { + snprintf(s, sizeof(s), "%.*s", BSTR_P(param)); + if (!handle_scaler_opt(s, tscale)) + r = M_OPT_INVALID; + } + if (r < 1) { + mp_info(log, "Available scalers:\n"); + for (const char *const *filter = tscale ? fixed_tscale_filters + : fixed_scale_filters; + *filter; filter++) { + mp_info(log, " %s\n", *filter); + } + for (int n = 0; mp_filter_kernels[n].f.name; n++) { + if (!tscale || !mp_filter_kernels[n].polar) + mp_info(log, " %s\n", mp_filter_kernels[n].f.name); + } + for (int n = 0; mp_filter_windows[n].name; n++) { + for (int m = 0; mp_filter_kernels[m].f.name; m++) { + if (!strcmp(mp_filter_windows[n].name, mp_filter_kernels[m].f.name)) + goto next_window; // don't log duplicates + } + mp_info(log, " %s\n", mp_filter_windows[n].name); +next_window: ; + } + if (s[0]) + mp_fatal(log, "No scaler named '%s' found!\n", s); + } + return r; +} + +static int validate_window_opt(struct mp_log *log, const m_option_t *opt, + struct bstr name, const char **value) +{ + struct bstr param = bstr0(*value); + char s[32] = {0}; + int r = 1; + if (bstr_equals0(param, "help")) { + r = M_OPT_EXIT; + } else if (!param.len) { + return r; // empty string means "use preferred window" + } else { + snprintf(s, sizeof(s), "%.*s", BSTR_P(param)); + const struct filter_window *window = mp_find_filter_window(s); + if (!window) + r = M_OPT_INVALID; + } + if (r < 1) { + mp_info(log, "Available windows:\n"); + for (int n = 0; mp_filter_windows[n].name; n++) + mp_info(log, " %s\n", mp_filter_windows[n].name); + if (s[0]) + mp_fatal(log, "No window named '%s' found!\n", s); + } + return r; +} + +static int validate_error_diffusion_opt(struct mp_log *log, const m_option_t *opt, + struct bstr name, const char **value) +{ + struct bstr param = bstr0(*value); + char s[32] = {0}; + int r = 1; + if (bstr_equals0(param, "help")) { + r = M_OPT_EXIT; + } else { + snprintf(s, sizeof(s), "%.*s", BSTR_P(param)); + const struct error_diffusion_kernel *k = mp_find_error_diffusion_kernel(s); + if (!k) + r = M_OPT_INVALID; + } + if (r < 1) { + mp_info(log, "Available error diffusion kernels:\n"); + for (int n = 0; mp_error_diffusion_kernels[n].name; n++) + mp_info(log, " %s\n", mp_error_diffusion_kernels[n].name); + if (s[0]) + mp_fatal(log, "No error diffusion kernel named '%s' found!\n", s); + } + return r; +} + +void gl_video_set_ambient_lux(struct gl_video *p, int lux) +{ + if (p->opts.gamma_auto) { + p->opts.gamma = gl_video_scale_ambient_lux(16.0, 256.0, 1.0, 1.2, lux); + MP_TRACE(p, "ambient light changed: %d lux (gamma: %f)\n", lux, + p->opts.gamma); + } +} + +static void *gl_video_dr_alloc_buffer(struct gl_video *p, size_t size) +{ + struct ra_buf_params params = { + .type = RA_BUF_TYPE_TEX_UPLOAD, + .host_mapped = true, + .size = size, + }; + + struct ra_buf *buf = ra_buf_create(p->ra, ¶ms); + if (!buf) + return NULL; + + MP_TARRAY_GROW(p, p->dr_buffers, p->num_dr_buffers); + p->dr_buffers[p->num_dr_buffers++] = (struct dr_buffer){ .buf = buf }; + + return buf->data; +} + +static void gl_video_dr_free_buffer(void *opaque, uint8_t *data) +{ + struct gl_video *p = opaque; + + for (int n = 0; n < p->num_dr_buffers; n++) { + struct dr_buffer *buffer = &p->dr_buffers[n]; + if (buffer->buf->data == data) { + assert(!buffer->mpi); // can't be freed while it has a ref + ra_buf_free(p->ra, &buffer->buf); + MP_TARRAY_REMOVE_AT(p->dr_buffers, p->num_dr_buffers, n); + return; + } + } + // not found - must not happen + MP_ASSERT_UNREACHABLE(); +} + +struct mp_image *gl_video_get_image(struct gl_video *p, int imgfmt, int w, int h, + int stride_align, int flags) +{ + if (flags & VO_DR_FLAG_HOST_CACHED) { + if (p->ra->caps & RA_CAP_SLOW_DR) { + MP_VERBOSE(p, "DR path suspected slow/uncached, disabling.\n"); + return NULL; + } + } + + if (!gl_video_check_format(p, imgfmt)) + return NULL; + + int size = mp_image_get_alloc_size(imgfmt, w, h, stride_align); + if (size < 0) + return NULL; + + int alloc_size = size + stride_align; + void *ptr = gl_video_dr_alloc_buffer(p, alloc_size); + if (!ptr) + return NULL; + + // (we expect vo.c to proxy the free callback, so it happens in the same + // thread it was allocated in, removing the need for synchronization) + struct mp_image *res = mp_image_from_buffer(imgfmt, w, h, stride_align, + ptr, alloc_size, p, + gl_video_dr_free_buffer); + if (!res) + gl_video_dr_free_buffer(p, ptr); + return res; +} + +void gl_video_init_hwdecs(struct gl_video *p, struct ra_ctx *ra_ctx, + struct mp_hwdec_devices *devs, + bool load_all_by_default) +{ + assert(!p->hwdec_ctx.ra_ctx); + p->hwdec_ctx = (struct ra_hwdec_ctx) { + .log = p->log, + .global = p->global, + .ra_ctx = ra_ctx, + }; + + ra_hwdec_ctx_init(&p->hwdec_ctx, devs, p->opts.hwdec_interop, load_all_by_default); +} + +void gl_video_load_hwdecs_for_img_fmt(struct gl_video *p, struct mp_hwdec_devices *devs, + struct hwdec_imgfmt_request *params) +{ + assert(p->hwdec_ctx.ra_ctx); + ra_hwdec_ctx_load_fmt(&p->hwdec_ctx, devs, params); +} diff --git a/video/out/gpu/video.h b/video/out/gpu/video.h new file mode 100644 index 0000000..411d336 --- /dev/null +++ b/video/out/gpu/video.h @@ -0,0 +1,238 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MP_GL_VIDEO_H +#define MP_GL_VIDEO_H + +#include <stdbool.h> + +#include "options/m_option.h" +#include "sub/osd.h" +#include "utils.h" +#include "lcms.h" +#include "shader_cache.h" +#include "video/csputils.h" +#include "video/out/filter_kernels.h" + +struct scaler_fun { + char *name; + float params[2]; + float blur; + float taper; +}; + +struct scaler_config { + struct scaler_fun kernel; + struct scaler_fun window; + float radius; + float antiring; + float clamp; +}; + +struct scaler { + int index; + struct scaler_config conf; + double scale_factor; + bool initialized; + struct filter_kernel *kernel; + struct ra_tex *lut; + struct ra_tex *sep_fbo; + bool insufficient; + + // kernel points here + struct filter_kernel kernel_storage; +}; + +enum scaler_unit { + SCALER_SCALE, // luma/video + SCALER_DSCALE, // luma-video downscaling + SCALER_CSCALE, // chroma upscaling + SCALER_TSCALE, // temporal scaling (interpolation) + SCALER_COUNT +}; + +enum dither_algo { + DITHER_NONE = 0, + DITHER_FRUIT, + DITHER_ORDERED, + DITHER_ERROR_DIFFUSION, +}; + +enum alpha_mode { + ALPHA_NO = 0, + ALPHA_YES, + ALPHA_BLEND, + ALPHA_BLEND_TILES, +}; + +enum blend_subs_mode { + BLEND_SUBS_NO = 0, + BLEND_SUBS_YES, + BLEND_SUBS_VIDEO, +}; + +enum tone_mapping { + TONE_MAPPING_AUTO, + TONE_MAPPING_CLIP, + TONE_MAPPING_MOBIUS, + TONE_MAPPING_REINHARD, + TONE_MAPPING_HABLE, + TONE_MAPPING_GAMMA, + TONE_MAPPING_LINEAR, + TONE_MAPPING_SPLINE, + TONE_MAPPING_BT_2390, + TONE_MAPPING_BT_2446A, + TONE_MAPPING_ST2094_40, + TONE_MAPPING_ST2094_10, +}; + +enum gamut_mode { + GAMUT_AUTO, + GAMUT_CLIP, + GAMUT_PERCEPTUAL, + GAMUT_RELATIVE, + GAMUT_SATURATION, + GAMUT_ABSOLUTE, + GAMUT_DESATURATE, + GAMUT_DARKEN, + GAMUT_WARN, + GAMUT_LINEAR, +}; + +struct gl_tone_map_opts { + int curve; + float curve_param; + float max_boost; + bool inverse; + int compute_peak; + float decay_rate; + float scene_threshold_low; + float scene_threshold_high; + float peak_percentile; + float contrast_recovery; + float contrast_smoothness; + int gamut_mode; + bool visualize; +}; + +struct gl_video_opts { + int dumb_mode; + struct scaler_config scaler[4]; + float gamma; + bool gamma_auto; + int target_prim; + int target_trc; + int target_peak; + int target_contrast; + int target_gamut; + struct gl_tone_map_opts tone_map; + bool correct_downscaling; + bool linear_downscaling; + bool linear_upscaling; + bool sigmoid_upscaling; + float sigmoid_center; + float sigmoid_slope; + bool scaler_resizes_only; + bool pbo; + int dither_depth; + int dither_algo; + int dither_size; + bool temporal_dither; + int temporal_dither_period; + char *error_diffusion; + char *fbo_format; + int alpha_mode; + bool use_rectangle; + struct m_color background; + bool interpolation; + float interpolation_threshold; + int blend_subs; + char **user_shaders; + char **user_shader_opts; + bool deband; + struct deband_opts *deband_opts; + float unsharp; + int tex_pad_x, tex_pad_y; + struct mp_icc_opts *icc_opts; + bool shader_cache; + int early_flush; + char *shader_cache_dir; + char *hwdec_interop; +}; + +extern const struct m_sub_options gl_video_conf; + +struct gl_video; +struct vo_frame; +struct voctrl_screenshot; + +enum { + RENDER_FRAME_SUBS = 1 << 0, + RENDER_FRAME_OSD = 1 << 1, + RENDER_FRAME_VF_SUBS = 1 << 2, + RENDER_SCREEN_COLOR = 1 << 3, // 3D LUT and dithering + RENDER_FRAME_DEF = RENDER_FRAME_SUBS | RENDER_FRAME_OSD | RENDER_SCREEN_COLOR, +}; + +struct gl_video *gl_video_init(struct ra *ra, struct mp_log *log, + struct mpv_global *g); +void gl_video_uninit(struct gl_video *p); +void gl_video_set_osd_source(struct gl_video *p, struct osd_state *osd); +bool gl_video_check_format(struct gl_video *p, int mp_format); +void gl_video_config(struct gl_video *p, struct mp_image_params *params); +void gl_video_render_frame(struct gl_video *p, struct vo_frame *frame, + struct ra_fbo fbo, int flags); +void gl_video_resize(struct gl_video *p, + struct mp_rect *src, struct mp_rect *dst, + struct mp_osd_res *osd); +void gl_video_set_fb_depth(struct gl_video *p, int fb_depth); +void gl_video_perfdata(struct gl_video *p, struct voctrl_performance_data *out); +void gl_video_set_clear_color(struct gl_video *p, struct m_color color); +void gl_video_set_osd_pts(struct gl_video *p, double pts); +bool gl_video_check_osd_change(struct gl_video *p, struct mp_osd_res *osd, + double pts); + +void gl_video_screenshot(struct gl_video *p, struct vo_frame *frame, + struct voctrl_screenshot *args); + +float gl_video_scale_ambient_lux(float lmin, float lmax, + float rmin, float rmax, float lux); +void gl_video_set_ambient_lux(struct gl_video *p, int lux); +void gl_video_set_icc_profile(struct gl_video *p, bstr icc_data); +bool gl_video_icc_auto_enabled(struct gl_video *p); +bool gl_video_gamma_auto_enabled(struct gl_video *p); +struct mp_colorspace gl_video_get_output_colorspace(struct gl_video *p); + +void gl_video_reset(struct gl_video *p); +bool gl_video_showing_interpolated_frame(struct gl_video *p); + +struct mp_hwdec_devices; +void gl_video_init_hwdecs(struct gl_video *p, struct ra_ctx *ra_ctx, + struct mp_hwdec_devices *devs, + bool load_all_by_default); +struct hwdec_imgfmt_request; +void gl_video_load_hwdecs_for_img_fmt(struct gl_video *p, struct mp_hwdec_devices *devs, + struct hwdec_imgfmt_request *params); + +struct vo; +void gl_video_configure_queue(struct gl_video *p, struct vo *vo); + +struct mp_image *gl_video_get_image(struct gl_video *p, int imgfmt, int w, int h, + int stride_align, int flags); + + +#endif diff --git a/video/out/gpu/video_shaders.c b/video/out/gpu/video_shaders.c new file mode 100644 index 0000000..6c0e8a8 --- /dev/null +++ b/video/out/gpu/video_shaders.c @@ -0,0 +1,1033 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <math.h> + +#include "video_shaders.h" +#include "video.h" + +#define GLSL(x) gl_sc_add(sc, #x "\n"); +#define GLSLF(...) gl_sc_addf(sc, __VA_ARGS__) +#define GLSLH(x) gl_sc_hadd(sc, #x "\n"); +#define GLSLHF(...) gl_sc_haddf(sc, __VA_ARGS__) + +// Set up shared/commonly used variables and macros +void sampler_prelude(struct gl_shader_cache *sc, int tex_num) +{ + GLSLF("#undef tex\n"); + GLSLF("#undef texmap\n"); + GLSLF("#define tex texture%d\n", tex_num); + GLSLF("#define texmap texmap%d\n", tex_num); + GLSLF("vec2 pos = texcoord%d;\n", tex_num); + GLSLF("vec2 size = texture_size%d;\n", tex_num); + GLSLF("vec2 pt = pixel_size%d;\n", tex_num); +} + +static void pass_sample_separated_get_weights(struct gl_shader_cache *sc, + struct scaler *scaler) +{ + gl_sc_uniform_texture(sc, "lut", scaler->lut); + GLSLF("float ypos = LUT_POS(fcoord, %d.0);\n", scaler->lut->params.h); + + int N = scaler->kernel->size; + int width = (N + 3) / 4; // round up + + GLSLF("float weights[%d];\n", N); + for (int i = 0; i < N; i++) { + if (i % 4 == 0) + GLSLF("c = texture(lut, vec2(%f, ypos));\n", (i / 4 + 0.5) / width); + GLSLF("weights[%d] = c[%d];\n", i, i % 4); + } +} + +// Handle a single pass (either vertical or horizontal). The direction is given +// by the vector (d_x, d_y). If the vector is 0, then planar interpolation is +// used instead (samples from texture0 through textureN) +void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler, + int d_x, int d_y) +{ + int N = scaler->kernel->size; + bool use_ar = scaler->conf.antiring > 0; + bool planar = d_x == 0 && d_y == 0; + GLSL(color = vec4(0.0);) + GLSLF("{\n"); + if (!planar) { + GLSLF("vec2 dir = vec2(%d.0, %d.0);\n", d_x, d_y); + GLSL(pt *= dir;) + GLSL(float fcoord = dot(fract(pos * size - vec2(0.5)), dir);) + GLSLF("vec2 base = pos - fcoord * pt - pt * vec2(%d.0);\n", N / 2 - 1); + } + GLSL(vec4 c;) + if (use_ar) { + GLSL(vec4 hi = vec4(0.0);) + GLSL(vec4 lo = vec4(1.0);) + } + pass_sample_separated_get_weights(sc, scaler); + GLSLF("// scaler samples\n"); + for (int n = 0; n < N; n++) { + if (planar) { + GLSLF("c = texture(texture%d, texcoord%d);\n", n, n); + } else { + GLSLF("c = texture(tex, base + pt * vec2(%d.0));\n", n); + } + GLSLF("color += vec4(weights[%d]) * c;\n", n); + if (use_ar && (n == N/2-1 || n == N/2)) { + GLSL(lo = min(lo, c);) + GLSL(hi = max(hi, c);) + } + } + if (use_ar) + GLSLF("color = mix(color, clamp(color, lo, hi), %f);\n", + scaler->conf.antiring); + GLSLF("}\n"); +} + +// Subroutine for computing and adding an individual texel contribution +// If planar is false, samples directly +// If planar is true, takes the pixel from inX[idx] where X is the component and +// `idx` must be defined by the caller +static void polar_sample(struct gl_shader_cache *sc, struct scaler *scaler, + int x, int y, int components, bool planar) +{ + double radius = scaler->kernel->radius * scaler->kernel->filter_scale; + double radius_cutoff = scaler->kernel->radius_cutoff; + + // Since we can't know the subpixel position in advance, assume a + // worst case scenario + int yy = y > 0 ? y-1 : y; + int xx = x > 0 ? x-1 : x; + double dmax = sqrt(xx*xx + yy*yy); + // Skip samples definitely outside the radius + if (dmax >= radius_cutoff) + return; + GLSLF("d = length(vec2(%d.0, %d.0) - fcoord);\n", x, y); + // Check for samples that might be skippable + bool maybe_skippable = dmax >= radius_cutoff - M_SQRT2; + if (maybe_skippable) + GLSLF("if (d < %f) {\n", radius_cutoff); + + // get the weight for this pixel + if (scaler->lut->params.dimensions == 1) { + GLSLF("w = tex1D(lut, LUT_POS(d * 1.0/%f, %d.0)).r;\n", + radius, scaler->lut->params.w); + } else { + GLSLF("w = texture(lut, vec2(0.5, LUT_POS(d * 1.0/%f, %d.0))).r;\n", + radius, scaler->lut->params.h); + } + GLSL(wsum += w;) + + if (planar) { + for (int n = 0; n < components; n++) + GLSLF("color[%d] += w * in%d[idx];\n", n, n); + } else { + GLSLF("in0 = texture(tex, base + pt * vec2(%d.0, %d.0));\n", x, y); + GLSL(color += vec4(w) * in0;) + } + + if (maybe_skippable) + GLSLF("}\n"); +} + +void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler, + int components, bool sup_gather) +{ + GLSL(color = vec4(0.0);) + GLSLF("{\n"); + GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));) + GLSL(vec2 base = pos - fcoord * pt;) + GLSLF("float w, d, wsum = 0.0;\n"); + for (int n = 0; n < components; n++) + GLSLF("vec4 in%d;\n", n); + GLSL(int idx;) + + gl_sc_uniform_texture(sc, "lut", scaler->lut); + + GLSLF("// scaler samples\n"); + int bound = ceil(scaler->kernel->radius_cutoff); + for (int y = 1-bound; y <= bound; y += 2) { + for (int x = 1-bound; x <= bound; x += 2) { + // First we figure out whether it's more efficient to use direct + // sampling or gathering. The problem is that gathering 4 texels + // only to discard some of them is very wasteful, so only do it if + // we suspect it will be a win rather than a loss. This is the case + // exactly when all four texels are within bounds + bool use_gather = sqrt(x*x + y*y) < scaler->kernel->radius_cutoff; + + if (!sup_gather) + use_gather = false; + + if (use_gather) { + // Gather the four surrounding texels simultaneously + for (int n = 0; n < components; n++) { + GLSLF("in%d = textureGatherOffset(tex, base, " + "ivec2(%d, %d), %d);\n", n, x, y, n); + } + + // Mix in all of the points with their weights + for (int p = 0; p < 4; p++) { + // The four texels are gathered counterclockwise starting + // from the bottom left + static const int xo[4] = {0, 1, 1, 0}; + static const int yo[4] = {1, 1, 0, 0}; + if (x+xo[p] > bound || y+yo[p] > bound) + continue; + GLSLF("idx = %d;\n", p); + polar_sample(sc, scaler, x+xo[p], y+yo[p], components, true); + } + } else { + // switch to direct sampling instead, for efficiency/compatibility + for (int yy = y; yy <= bound && yy <= y+1; yy++) { + for (int xx = x; xx <= bound && xx <= x+1; xx++) + polar_sample(sc, scaler, xx, yy, components, false); + } + } + } + } + + GLSL(color = color / vec4(wsum);) + GLSLF("}\n"); +} + +// bw/bh: block size +// iw/ih: input size (pre-calculated to fit all required texels) +void pass_compute_polar(struct gl_shader_cache *sc, struct scaler *scaler, + int components, int bw, int bh, int iw, int ih) +{ + int bound = ceil(scaler->kernel->radius_cutoff); + int offset = bound - 1; // padding top/left + + GLSL(color = vec4(0.0);) + GLSLF("{\n"); + GLSL(vec2 wpos = texmap(gl_WorkGroupID * gl_WorkGroupSize);) + GLSL(vec2 wbase = wpos - pt * fract(wpos * size - vec2(0.5));) + GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));) + GLSL(vec2 base = pos - pt * fcoord;) + GLSL(ivec2 rel = ivec2(round((base - wbase) * size));) + GLSL(int idx;) + GLSLF("float w, d, wsum = 0.0;\n"); + gl_sc_uniform_texture(sc, "lut", scaler->lut); + + // Load all relevant texels into shmem + for (int c = 0; c < components; c++) + GLSLHF("shared float in%d[%d];\n", c, ih * iw); + + GLSL(vec4 c;) + GLSLF("for (int y = int(gl_LocalInvocationID.y); y < %d; y += %d) {\n", ih, bh); + GLSLF("for (int x = int(gl_LocalInvocationID.x); x < %d; x += %d) {\n", iw, bw); + GLSLF("c = texture(tex, wbase + pt * vec2(x - %d, y - %d));\n", offset, offset); + for (int c = 0; c < components; c++) + GLSLF("in%d[%d * y + x] = c[%d];\n", c, iw, c); + GLSLF("}}\n"); + GLSL(groupMemoryBarrier();) + GLSL(barrier();) + + // Dispatch the actual samples + GLSLF("// scaler samples\n"); + for (int y = 1-bound; y <= bound; y++) { + for (int x = 1-bound; x <= bound; x++) { + GLSLF("idx = %d * rel.y + rel.x + %d;\n", iw, + iw * (y + offset) + x + offset); + polar_sample(sc, scaler, x, y, components, true); + } + } + + GLSL(color = color / vec4(wsum);) + GLSLF("}\n"); +} + +static void bicubic_calcweights(struct gl_shader_cache *sc, const char *t, const char *s) +{ + // Explanation of how bicubic scaling with only 4 texel fetches is done: + // http://www.mate.tue.nl/mate/pdfs/10318.pdf + // 'Efficient GPU-Based Texture Interpolation using Uniform B-Splines' + // Explanation why this algorithm normally always blurs, even with unit + // scaling: + // http://bigwww.epfl.ch/preprints/ruijters1001p.pdf + // 'GPU Prefilter for Accurate Cubic B-spline Interpolation' + GLSLF("vec4 %s = vec4(-0.5, 0.1666, 0.3333, -0.3333) * %s" + " + vec4(1, 0, -0.5, 0.5);\n", t, s); + GLSLF("%s = %s * %s + vec4(0, 0, -0.5, 0.5);\n", t, t, s); + GLSLF("%s = %s * %s + vec4(-0.6666, 0, 0.8333, 0.1666);\n", t, t, s); + GLSLF("%s.xy *= vec2(1, 1) / vec2(%s.z, %s.w);\n", t, t, t); + GLSLF("%s.xy += vec2(1.0 + %s, 1.0 - %s);\n", t, s, s); +} + +void pass_sample_bicubic_fast(struct gl_shader_cache *sc) +{ + GLSLF("{\n"); + GLSL(vec2 fcoord = fract(pos * size + vec2(0.5, 0.5));) + bicubic_calcweights(sc, "parmx", "fcoord.x"); + bicubic_calcweights(sc, "parmy", "fcoord.y"); + GLSL(vec4 cdelta;) + GLSL(cdelta.xz = parmx.rg * vec2(-pt.x, pt.x);) + GLSL(cdelta.yw = parmy.rg * vec2(-pt.y, pt.y);) + // first y-interpolation + GLSL(vec4 ar = texture(tex, pos + cdelta.xy);) + GLSL(vec4 ag = texture(tex, pos + cdelta.xw);) + GLSL(vec4 ab = mix(ag, ar, parmy.b);) + // second y-interpolation + GLSL(vec4 br = texture(tex, pos + cdelta.zy);) + GLSL(vec4 bg = texture(tex, pos + cdelta.zw);) + GLSL(vec4 aa = mix(bg, br, parmy.b);) + // x-interpolation + GLSL(color = mix(aa, ab, parmx.b);) + GLSLF("}\n"); +} + +void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler, + int w, int h) +{ + GLSLF("{\n"); + GLSL(vec2 pos = pos - vec2(0.5) * pt;) // round to nearest + GLSL(vec2 fcoord = fract(pos * size - vec2(0.5));) + // Determine the mixing coefficient vector + gl_sc_uniform_vec2(sc, "output_size", (float[2]){w, h}); + GLSL(vec2 coeff = fcoord * output_size/size;) + float threshold = scaler->conf.kernel.params[0]; + threshold = isnan(threshold) ? 0.0 : threshold; + GLSLF("coeff = (coeff - %f) * 1.0/%f;\n", threshold, 1.0 - 2 * threshold); + GLSL(coeff = clamp(coeff, 0.0, 1.0);) + // Compute the right blend of colors + GLSL(color = texture(tex, pos + pt * (coeff - fcoord));) + GLSLF("}\n"); +} + +// Common constants for SMPTE ST.2084 (HDR) +static const float PQ_M1 = 2610./4096 * 1./4, + PQ_M2 = 2523./4096 * 128, + PQ_C1 = 3424./4096, + PQ_C2 = 2413./4096 * 32, + PQ_C3 = 2392./4096 * 32; + +// Common constants for ARIB STD-B67 (HLG) +static const float HLG_A = 0.17883277, + HLG_B = 0.28466892, + HLG_C = 0.55991073; + +// Common constants for Panasonic V-Log +static const float VLOG_B = 0.00873, + VLOG_C = 0.241514, + VLOG_D = 0.598206; + +// Common constants for Sony S-Log +static const float SLOG_A = 0.432699, + SLOG_B = 0.037584, + SLOG_C = 0.616596 + 0.03, + SLOG_P = 3.538813, + SLOG_Q = 0.030001, + SLOG_K2 = 155.0 / 219.0; + +// Linearize (expand), given a TRC as input. In essence, this is the ITU-R +// EOTF, calculated on an idealized (reference) monitor with a white point of +// MP_REF_WHITE and infinite contrast. +// +// These functions always output to a normalized scale of [0,1], for +// convenience of the video.c code that calls it. To get the values in an +// absolute scale, multiply the result by `mp_trc_nom_peak(trc)` +void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) +{ + if (trc == MP_CSP_TRC_LINEAR) + return; + + GLSLF("// linearize\n"); + + // Note that this clamp may technically violate the definition of + // ITU-R BT.2100, which allows for sub-blacks and super-whites to be + // displayed on the display where such would be possible. That said, the + // problem is that not all gamma curves are well-defined on the values + // outside this range, so we ignore it and just clip anyway for sanity. + GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) + + switch (trc) { + case MP_CSP_TRC_SRGB: + GLSLF("color.rgb = mix(color.rgb * vec3(1.0/12.92), \n" + " pow((color.rgb + vec3(0.055))/vec3(1.055), vec3(2.4)), \n" + " %s(lessThan(vec3(0.04045), color.rgb))); \n", + gl_sc_bvec(sc, 3)); + break; + case MP_CSP_TRC_BT_1886: + GLSL(color.rgb = pow(color.rgb, vec3(2.4));) + break; + case MP_CSP_TRC_GAMMA18: + GLSL(color.rgb = pow(color.rgb, vec3(1.8));) + break; + case MP_CSP_TRC_GAMMA20: + GLSL(color.rgb = pow(color.rgb, vec3(2.0));) + break; + case MP_CSP_TRC_GAMMA22: + GLSL(color.rgb = pow(color.rgb, vec3(2.2));) + break; + case MP_CSP_TRC_GAMMA24: + GLSL(color.rgb = pow(color.rgb, vec3(2.4));) + break; + case MP_CSP_TRC_GAMMA26: + GLSL(color.rgb = pow(color.rgb, vec3(2.6));) + break; + case MP_CSP_TRC_GAMMA28: + GLSL(color.rgb = pow(color.rgb, vec3(2.8));) + break; + case MP_CSP_TRC_PRO_PHOTO: + GLSLF("color.rgb = mix(color.rgb * vec3(1.0/16.0), \n" + " pow(color.rgb, vec3(1.8)), \n" + " %s(lessThan(vec3(0.03125), color.rgb))); \n", + gl_sc_bvec(sc, 3)); + break; + case MP_CSP_TRC_PQ: + GLSLF("color.rgb = pow(color.rgb, vec3(1.0/%f));\n", PQ_M2); + GLSLF("color.rgb = max(color.rgb - vec3(%f), vec3(0.0)) \n" + " / (vec3(%f) - vec3(%f) * color.rgb);\n", + PQ_C1, PQ_C2, PQ_C3); + GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", 1.0 / PQ_M1); + // PQ's output range is 0-10000, but we need it to be relative to + // MP_REF_WHITE instead, so rescale + GLSLF("color.rgb *= vec3(%f);\n", 10000 / MP_REF_WHITE); + break; + case MP_CSP_TRC_HLG: + GLSLF("color.rgb = mix(vec3(4.0) * color.rgb * color.rgb,\n" + " exp((color.rgb - vec3(%f)) * vec3(1.0/%f)) + vec3(%f),\n" + " %s(lessThan(vec3(0.5), color.rgb)));\n", + HLG_C, HLG_A, HLG_B, gl_sc_bvec(sc, 3)); + GLSLF("color.rgb *= vec3(1.0/%f);\n", MP_REF_WHITE_HLG); + break; + case MP_CSP_TRC_V_LOG: + GLSLF("color.rgb = mix((color.rgb - vec3(0.125)) * vec3(1.0/5.6), \n" + " pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" + " - vec3(%f), \n" + " %s(lessThanEqual(vec3(0.181), color.rgb))); \n", + VLOG_D, VLOG_C, VLOG_B, gl_sc_bvec(sc, 3)); + break; + case MP_CSP_TRC_S_LOG1: + GLSLF("color.rgb = pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f))\n" + " - vec3(%f);\n", + SLOG_C, SLOG_A, SLOG_B); + break; + case MP_CSP_TRC_S_LOG2: + GLSLF("color.rgb = mix((color.rgb - vec3(%f)) * vec3(1.0/%f), \n" + " (pow(vec3(10.0), (color.rgb - vec3(%f)) * vec3(1.0/%f)) \n" + " - vec3(%f)) * vec3(1.0/%f), \n" + " %s(lessThanEqual(vec3(%f), color.rgb))); \n", + SLOG_Q, SLOG_P, SLOG_C, SLOG_A, SLOG_B, SLOG_K2, gl_sc_bvec(sc, 3), SLOG_Q); + break; + case MP_CSP_TRC_ST428: + GLSL(color.rgb = vec3(52.37/48.0) * pow(color.rgb, vec3(2.6));); + break; + default: + abort(); + } + + // Rescale to prevent clipping on non-float textures + GLSLF("color.rgb *= vec3(1.0/%f);\n", mp_trc_nom_peak(trc)); +} + +// Delinearize (compress), given a TRC as output. This corresponds to the +// inverse EOTF (not the OETF) in ITU-R terminology, again assuming a +// reference monitor. +// +// Like pass_linearize, this functions ingests values on an normalized scale +void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc) +{ + if (trc == MP_CSP_TRC_LINEAR) + return; + + GLSLF("// delinearize\n"); + GLSL(color.rgb = clamp(color.rgb, 0.0, 1.0);) + GLSLF("color.rgb *= vec3(%f);\n", mp_trc_nom_peak(trc)); + + switch (trc) { + case MP_CSP_TRC_SRGB: + GLSLF("color.rgb = mix(color.rgb * vec3(12.92), \n" + " vec3(1.055) * pow(color.rgb, vec3(1.0/2.4)) \n" + " - vec3(0.055), \n" + " %s(lessThanEqual(vec3(0.0031308), color.rgb))); \n", + gl_sc_bvec(sc, 3)); + break; + case MP_CSP_TRC_BT_1886: + GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));) + break; + case MP_CSP_TRC_GAMMA18: + GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.8));) + break; + case MP_CSP_TRC_GAMMA20: + GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.0));) + break; + case MP_CSP_TRC_GAMMA22: + GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.2));) + break; + case MP_CSP_TRC_GAMMA24: + GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));) + break; + case MP_CSP_TRC_GAMMA26: + GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.6));) + break; + case MP_CSP_TRC_GAMMA28: + GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.8));) + break; + case MP_CSP_TRC_PRO_PHOTO: + GLSLF("color.rgb = mix(color.rgb * vec3(16.0), \n" + " pow(color.rgb, vec3(1.0/1.8)), \n" + " %s(lessThanEqual(vec3(0.001953), color.rgb))); \n", + gl_sc_bvec(sc, 3)); + break; + case MP_CSP_TRC_PQ: + GLSLF("color.rgb *= vec3(1.0/%f);\n", 10000 / MP_REF_WHITE); + GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", PQ_M1); + GLSLF("color.rgb = (vec3(%f) + vec3(%f) * color.rgb) \n" + " / (vec3(1.0) + vec3(%f) * color.rgb);\n", + PQ_C1, PQ_C2, PQ_C3); + GLSLF("color.rgb = pow(color.rgb, vec3(%f));\n", PQ_M2); + break; + case MP_CSP_TRC_HLG: + GLSLF("color.rgb *= vec3(%f);\n", MP_REF_WHITE_HLG); + GLSLF("color.rgb = mix(vec3(0.5) * sqrt(color.rgb),\n" + " vec3(%f) * log(color.rgb - vec3(%f)) + vec3(%f),\n" + " %s(lessThan(vec3(1.0), color.rgb)));\n", + HLG_A, HLG_B, HLG_C, gl_sc_bvec(sc, 3)); + break; + case MP_CSP_TRC_V_LOG: + GLSLF("color.rgb = mix(vec3(5.6) * color.rgb + vec3(0.125), \n" + " vec3(%f) * log(color.rgb + vec3(%f)) \n" + " + vec3(%f), \n" + " %s(lessThanEqual(vec3(0.01), color.rgb))); \n", + VLOG_C / M_LN10, VLOG_B, VLOG_D, gl_sc_bvec(sc, 3)); + break; + case MP_CSP_TRC_S_LOG1: + GLSLF("color.rgb = vec3(%f) * log(color.rgb + vec3(%f)) + vec3(%f);\n", + SLOG_A / M_LN10, SLOG_B, SLOG_C); + break; + case MP_CSP_TRC_S_LOG2: + GLSLF("color.rgb = mix(vec3(%f) * color.rgb + vec3(%f), \n" + " vec3(%f) * log(vec3(%f) * color.rgb + vec3(%f)) \n" + " + vec3(%f), \n" + " %s(lessThanEqual(vec3(0.0), color.rgb))); \n", + SLOG_P, SLOG_Q, SLOG_A / M_LN10, SLOG_K2, SLOG_B, SLOG_C, gl_sc_bvec(sc, 3)); + break; + case MP_CSP_TRC_ST428: + GLSL(color.rgb = pow(color.rgb * vec3(48.0/52.37), vec3(1.0/2.6));); + break; + default: + abort(); + } +} + +// Apply the OOTF mapping from a given light type to display-referred light. +// Assumes absolute scale values. `peak` is used to tune the OOTF where +// applicable (currently only HLG). +static void pass_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, + float peak) +{ + if (light == MP_CSP_LIGHT_DISPLAY) + return; + + GLSLF("// apply ootf\n"); + + switch (light) + { + case MP_CSP_LIGHT_SCENE_HLG: { + // HLG OOTF from BT.2100, scaled to the chosen display peak + float gamma = MPMAX(1.0, 1.2 + 0.42 * log10(peak * MP_REF_WHITE / 1000.0)); + GLSLF("color.rgb *= vec3(%f * pow(dot(src_luma, color.rgb), %f));\n", + peak / pow(12.0 / MP_REF_WHITE_HLG, gamma), gamma - 1.0); + break; + } + case MP_CSP_LIGHT_SCENE_709_1886: + // This OOTF is defined by encoding the result as 709 and then decoding + // it as 1886; although this is called 709_1886 we actually use the + // more precise (by one decimal) values from BT.2020 instead + GLSLF("color.rgb = mix(color.rgb * vec3(4.5), \n" + " vec3(1.0993) * pow(color.rgb, vec3(0.45)) - vec3(0.0993), \n" + " %s(lessThan(vec3(0.0181), color.rgb))); \n", + gl_sc_bvec(sc, 3)); + GLSL(color.rgb = pow(color.rgb, vec3(2.4));) + break; + case MP_CSP_LIGHT_SCENE_1_2: + GLSL(color.rgb = pow(color.rgb, vec3(1.2));) + break; + default: + abort(); + } +} + +// Inverse of the function pass_ootf, for completeness' sake. +static void pass_inverse_ootf(struct gl_shader_cache *sc, enum mp_csp_light light, + float peak) +{ + if (light == MP_CSP_LIGHT_DISPLAY) + return; + + GLSLF("// apply inverse ootf\n"); + + switch (light) + { + case MP_CSP_LIGHT_SCENE_HLG: { + float gamma = MPMAX(1.0, 1.2 + 0.42 * log10(peak * MP_REF_WHITE / 1000.0)); + GLSLF("color.rgb *= vec3(1.0/%f);\n", peak / pow(12.0 / MP_REF_WHITE_HLG, gamma)); + GLSLF("color.rgb /= vec3(max(1e-6, pow(dot(src_luma, color.rgb), %f)));\n", + (gamma - 1.0) / gamma); + break; + } + case MP_CSP_LIGHT_SCENE_709_1886: + GLSL(color.rgb = pow(color.rgb, vec3(1.0/2.4));) + GLSLF("color.rgb = mix(color.rgb * vec3(1.0/4.5), \n" + " pow((color.rgb + vec3(0.0993)) * vec3(1.0/1.0993), \n" + " vec3(1/0.45)), \n" + " %s(lessThan(vec3(0.08145), color.rgb))); \n", + gl_sc_bvec(sc, 3)); + break; + case MP_CSP_LIGHT_SCENE_1_2: + GLSL(color.rgb = pow(color.rgb, vec3(1.0/1.2));) + break; + default: + abort(); + } +} + +// Average light level for SDR signals. This is equal to a signal level of 0.5 +// under a typical presentation gamma of about 2.0. +static const float sdr_avg = 0.25; + +static void hdr_update_peak(struct gl_shader_cache *sc, + const struct gl_tone_map_opts *opts) +{ + // Update the sig_peak/sig_avg from the old SSBO state + GLSL(if (average.y > 0.0) {) + GLSL( sig_avg = max(1e-3, average.x);) + GLSL( sig_peak = max(1.00, average.y);) + GLSL(}) + + // Chosen to avoid overflowing on an 8K buffer + const float log_min = 1e-3, log_scale = 400.0, sig_scale = 10000.0; + + // For performance, and to avoid overflows, we tally up the sub-results per + // pixel using shared memory first + GLSLH(shared int wg_sum;) + GLSLH(shared uint wg_max;) + GLSL(wg_sum = 0; wg_max = 0u;) + GLSL(barrier();) + GLSLF("float sig_log = log(max(sig_max, %f));\n", log_min); + GLSLF("atomicAdd(wg_sum, int(sig_log * %f));\n", log_scale); + GLSLF("atomicMax(wg_max, uint(sig_max * %f));\n", sig_scale); + + // Have one thread per work group update the global atomics + GLSL(memoryBarrierShared();) + GLSL(barrier();) + GLSL(if (gl_LocalInvocationIndex == 0u) {) + GLSL( int wg_avg = wg_sum / int(gl_WorkGroupSize.x * gl_WorkGroupSize.y);) + GLSL( atomicAdd(frame_sum, wg_avg);) + GLSL( atomicMax(frame_max, wg_max);) + GLSL( memoryBarrierBuffer();) + GLSL(}) + GLSL(barrier();) + + // Finally, to update the global state, we increment a counter per dispatch + GLSL(uint num_wg = gl_NumWorkGroups.x * gl_NumWorkGroups.y;) + GLSL(if (gl_LocalInvocationIndex == 0u && atomicAdd(counter, 1u) == num_wg - 1u) {) + GLSL( counter = 0u;) + GLSL( vec2 cur = vec2(float(frame_sum) / float(num_wg), frame_max);) + GLSLF(" cur *= vec2(1.0/%f, 1.0/%f);\n", log_scale, sig_scale); + GLSL( cur.x = exp(cur.x);) + GLSL( if (average.y == 0.0)) + GLSL( average = cur;) + + // Use an IIR low-pass filter to smooth out the detected values, with a + // configurable decay rate based on the desired time constant (tau) + if (opts->decay_rate) { + float decay = 1.0f - expf(-1.0f / opts->decay_rate); + GLSLF(" average += %f * (cur - average);\n", decay); + } else { + GLSLF(" average = cur;\n"); + } + + // Scene change hysteresis + float log_db = 10.0 / log(10.0); + GLSLF(" float weight = smoothstep(%f, %f, abs(log(cur.x / average.x)));\n", + opts->scene_threshold_low / log_db, + opts->scene_threshold_high / log_db); + GLSL( average = mix(average, cur, weight);) + + // Reset SSBO state for the next frame + GLSL( frame_sum = 0; frame_max = 0u;) + GLSL( memoryBarrierBuffer();) + GLSL(}) +} + +static inline float pq_delinearize(float x) +{ + x *= MP_REF_WHITE / 10000.0; + x = powf(x, PQ_M1); + x = (PQ_C1 + PQ_C2 * x) / (1.0 + PQ_C3 * x); + x = pow(x, PQ_M2); + return x; +} + +// Tone map from a known peak brightness to the range [0,1]. If ref_peak +// is 0, we will use peak detection instead +static void pass_tone_map(struct gl_shader_cache *sc, + float src_peak, float dst_peak, + const struct gl_tone_map_opts *opts) +{ + GLSLF("// HDR tone mapping\n"); + + // To prevent discoloration due to out-of-bounds clipping, we need to make + // sure to reduce the value range as far as necessary to keep the entire + // signal in range, so tone map based on the brightest component. + GLSL(int sig_idx = 0;) + GLSL(if (color[1] > color[sig_idx]) sig_idx = 1;) + GLSL(if (color[2] > color[sig_idx]) sig_idx = 2;) + GLSL(float sig_max = color[sig_idx];) + GLSLF("float sig_peak = %f;\n", src_peak); + GLSLF("float sig_avg = %f;\n", sdr_avg); + + if (opts->compute_peak >= 0) + hdr_update_peak(sc, opts); + + // Always hard-clip the upper bound of the signal range to avoid functions + // exploding on inputs greater than 1.0 + GLSLF("vec3 sig = min(color.rgb, sig_peak);\n"); + + // This function always operates on an absolute scale, so ignore the + // dst_peak normalization for it + float dst_scale = dst_peak; + enum tone_mapping curve = opts->curve ? opts->curve : TONE_MAPPING_BT_2390; + if (curve == TONE_MAPPING_BT_2390) + dst_scale = 1.0; + + // Rescale the variables in order to bring it into a representation where + // 1.0 represents the dst_peak. This is because all of the tone mapping + // algorithms are defined in such a way that they map to the range [0.0, 1.0]. + if (dst_scale > 1.0) { + GLSLF("sig *= 1.0/%f;\n", dst_scale); + GLSLF("sig_peak *= 1.0/%f;\n", dst_scale); + } + + GLSL(float sig_orig = sig[sig_idx];) + GLSLF("float slope = min(%f, %f / sig_avg);\n", opts->max_boost, sdr_avg); + GLSL(sig *= slope;) + GLSL(sig_peak *= slope;) + + float param = opts->curve_param; + switch (curve) { + case TONE_MAPPING_CLIP: + GLSLF("sig = min(%f * sig, 1.0);\n", isnan(param) ? 1.0 : param); + break; + + case TONE_MAPPING_MOBIUS: + GLSLF("if (sig_peak > (1.0 + 1e-6)) {\n"); + GLSLF("const float j = %f;\n", isnan(param) ? 0.3 : param); + // solve for M(j) = j; M(sig_peak) = 1.0; M'(j) = 1.0 + // where M(x) = scale * (x+a)/(x+b) + GLSLF("float a = -j*j * (sig_peak - 1.0) / (j*j - 2.0*j + sig_peak);\n"); + GLSLF("float b = (j*j - 2.0*j*sig_peak + sig_peak) / " + "max(1e-6, sig_peak - 1.0);\n"); + GLSLF("float scale = (b*b + 2.0*b*j + j*j) / (b-a);\n"); + GLSLF("sig = mix(sig, scale * (sig + vec3(a)) / (sig + vec3(b))," + " %s(greaterThan(sig, vec3(j))));\n", + gl_sc_bvec(sc, 3)); + GLSLF("}\n"); + break; + + case TONE_MAPPING_REINHARD: { + float contrast = isnan(param) ? 0.5 : param, + offset = (1.0 - contrast) / contrast; + GLSLF("sig = sig / (sig + vec3(%f));\n", offset); + GLSLF("float scale = (sig_peak + %f) / sig_peak;\n", offset); + GLSL(sig *= scale;) + break; + } + + case TONE_MAPPING_HABLE: { + float A = 0.15, B = 0.50, C = 0.10, D = 0.20, E = 0.02, F = 0.30; + GLSLHF("vec3 hable(vec3 x) {\n"); + GLSLHF("return (x * (%f*x + vec3(%f)) + vec3(%f)) / " + " (x * (%f*x + vec3(%f)) + vec3(%f)) " + " - vec3(%f);\n", + A, C*B, D*E, + A, B, D*F, + E/F); + GLSLHF("}\n"); + GLSLF("sig = hable(max(vec3(0.0), sig)) / hable(vec3(sig_peak)).x;\n"); + break; + } + + case TONE_MAPPING_GAMMA: { + float gamma = isnan(param) ? 1.8 : param; + GLSLF("const float cutoff = 0.05, gamma = 1.0/%f;\n", gamma); + GLSL(float scale = pow(cutoff / sig_peak, gamma.x) / cutoff;) + GLSLF("sig = mix(scale * sig," + " pow(sig / sig_peak, vec3(gamma))," + " %s(greaterThan(sig, vec3(cutoff))));\n", + gl_sc_bvec(sc, 3)); + break; + } + + case TONE_MAPPING_LINEAR: { + float coeff = isnan(param) ? 1.0 : param; + GLSLF("sig = min(%f / sig_peak, 1.0) * sig;\n", coeff); + break; + } + + case TONE_MAPPING_BT_2390: + // We first need to encode both sig and sig_peak into PQ space + GLSLF("vec4 sig_pq = vec4(sig.rgb, sig_peak); \n" + "sig_pq *= vec4(1.0/%f); \n" + "sig_pq = pow(sig_pq, vec4(%f)); \n" + "sig_pq = (vec4(%f) + vec4(%f) * sig_pq) \n" + " / (vec4(1.0) + vec4(%f) * sig_pq); \n" + "sig_pq = pow(sig_pq, vec4(%f)); \n", + 10000.0 / MP_REF_WHITE, PQ_M1, PQ_C1, PQ_C2, PQ_C3, PQ_M2); + // Encode both the signal and the target brightness to be relative to + // the source peak brightness, and figure out the target peak in this space + GLSLF("float scale = 1.0 / sig_pq.a; \n" + "sig_pq.rgb *= vec3(scale); \n" + "float maxLum = %f * scale; \n", + pq_delinearize(dst_peak)); + // Apply piece-wise hermite spline + GLSLF("float ks = 1.5 * maxLum - 0.5; \n" + "vec3 tb = (sig_pq.rgb - vec3(ks)) / vec3(1.0 - ks); \n" + "vec3 tb2 = tb * tb; \n" + "vec3 tb3 = tb2 * tb; \n" + "vec3 pb = (2.0 * tb3 - 3.0 * tb2 + vec3(1.0)) * vec3(ks) + \n" + " (tb3 - 2.0 * tb2 + tb) * vec3(1.0 - ks) + \n" + " (-2.0 * tb3 + 3.0 * tb2) * vec3(maxLum); \n" + "sig = mix(pb, sig_pq.rgb, %s(lessThan(sig_pq.rgb, vec3(ks)))); \n", + gl_sc_bvec(sc, 3)); + // Convert back from PQ space to linear light + GLSLF("sig *= vec3(sig_pq.a); \n" + "sig = pow(sig, vec3(1.0/%f)); \n" + "sig = max(sig - vec3(%f), 0.0) / \n" + " (vec3(%f) - vec3(%f) * sig); \n" + "sig = pow(sig, vec3(1.0/%f)); \n" + "sig *= vec3(%f); \n", + PQ_M2, PQ_C1, PQ_C2, PQ_C3, PQ_M1, 10000.0 / MP_REF_WHITE); + break; + + default: + abort(); + } + + GLSLF("float coeff = max(sig[sig_idx] - %f, 1e-6) / \n" + " max(sig[sig_idx], 1.0); \n" + "coeff = %f * pow(coeff / %f, %f); \n" + "color.rgb *= sig[sig_idx] / sig_orig; \n" + "color.rgb = mix(color.rgb, %f * sig, coeff); \n", + 0.18 / dst_scale, 0.90, dst_scale, 0.20, dst_scale); +} + +// Map colors from one source space to another. These source spaces must be +// known (i.e. not MP_CSP_*_AUTO), as this function won't perform any +// auto-guessing. If is_linear is true, we assume the input has already been +// linearized (e.g. for linear-scaling). If `opts->compute_peak` is true, we +// will detect the peak instead of relying on metadata. Note that this requires +// the caller to have already bound the appropriate SSBO and set up the compute +// shader metadata +void pass_color_map(struct gl_shader_cache *sc, bool is_linear, + struct mp_colorspace src, struct mp_colorspace dst, + const struct gl_tone_map_opts *opts) +{ + GLSLF("// color mapping\n"); + + // Some operations need access to the video's luma coefficients, so make + // them available + float rgb2xyz[3][3]; + mp_get_rgb2xyz_matrix(mp_get_csp_primaries(src.primaries), rgb2xyz); + gl_sc_uniform_vec3(sc, "src_luma", rgb2xyz[1]); + mp_get_rgb2xyz_matrix(mp_get_csp_primaries(dst.primaries), rgb2xyz); + gl_sc_uniform_vec3(sc, "dst_luma", rgb2xyz[1]); + + bool need_ootf = src.light != dst.light; + if (src.light == MP_CSP_LIGHT_SCENE_HLG && src.hdr.max_luma != dst.hdr.max_luma) + need_ootf = true; + + // All operations from here on require linear light as a starting point, + // so we linearize even if src.gamma == dst.gamma when one of the other + // operations needs it + bool need_linear = src.gamma != dst.gamma || + src.primaries != dst.primaries || + src.hdr.max_luma != dst.hdr.max_luma || + need_ootf; + + if (need_linear && !is_linear) { + // We also pull it up so that 1.0 is the reference white + pass_linearize(sc, src.gamma); + is_linear = true; + } + + // Pre-scale the incoming values into an absolute scale + GLSLF("color.rgb *= vec3(%f);\n", mp_trc_nom_peak(src.gamma)); + + if (need_ootf) + pass_ootf(sc, src.light, src.hdr.max_luma / MP_REF_WHITE); + + // Tone map to prevent clipping due to excessive brightness + if (src.hdr.max_luma > dst.hdr.max_luma) { + pass_tone_map(sc, src.hdr.max_luma / MP_REF_WHITE, + dst.hdr.max_luma / MP_REF_WHITE, opts); + } + + // Adapt to the right colorspace if necessary + if (src.primaries != dst.primaries) { + struct mp_csp_primaries csp_src = mp_get_csp_primaries(src.primaries), + csp_dst = mp_get_csp_primaries(dst.primaries); + float m[3][3] = {{0}}; + mp_get_cms_matrix(csp_src, csp_dst, MP_INTENT_RELATIVE_COLORIMETRIC, m); + gl_sc_uniform_mat3(sc, "cms_matrix", true, &m[0][0]); + GLSL(color.rgb = cms_matrix * color.rgb;) + + if (!opts->gamut_mode || opts->gamut_mode == GAMUT_DESATURATE) { + GLSL(float cmin = min(min(color.r, color.g), color.b);) + GLSL(if (cmin < 0.0) { + float luma = dot(dst_luma, color.rgb); + float coeff = cmin / (cmin - luma); + color.rgb = mix(color.rgb, vec3(luma), coeff); + }) + GLSLF("float cmax = 1.0/%f * max(max(color.r, color.g), color.b);\n", + dst.hdr.max_luma / MP_REF_WHITE); + GLSL(if (cmax > 1.0) color.rgb /= cmax;) + } + } + + if (need_ootf) + pass_inverse_ootf(sc, dst.light, dst.hdr.max_luma / MP_REF_WHITE); + + // Post-scale the outgoing values from absolute scale to normalized. + // For SDR, we normalize to the chosen signal peak. For HDR, we normalize + // to the encoding range of the transfer function. + float dst_range = dst.hdr.max_luma / MP_REF_WHITE; + if (mp_trc_is_hdr(dst.gamma)) + dst_range = mp_trc_nom_peak(dst.gamma); + + GLSLF("color.rgb *= vec3(%f);\n", 1.0 / dst_range); + + // Warn for remaining out-of-gamut colors if enabled + if (opts->gamut_mode == GAMUT_WARN) { + GLSL(if (any(greaterThan(color.rgb, vec3(1.005))) || + any(lessThan(color.rgb, vec3(-0.005))))) + GLSL(color.rgb = vec3(1.0) - color.rgb;) // invert + } + + if (is_linear) + pass_delinearize(sc, dst.gamma); +} + +// Wide usage friendly PRNG, shamelessly stolen from a GLSL tricks forum post. +// Obtain random numbers by calling rand(h), followed by h = permute(h) to +// update the state. Assumes the texture was hooked. +// permute() was modified from the original to avoid "large" numbers in +// calculations, since low-end mobile GPUs choke on them (overflow). +static void prng_init(struct gl_shader_cache *sc, AVLFG *lfg) +{ + GLSLH(float mod289(float x) { return x - floor(x * 1.0/289.0) * 289.0; }) + GLSLHF("float permute(float x) {\n"); + GLSLH(return mod289( mod289(34.0*x + 1.0) * (fract(x) + 1.0) );) + GLSLHF("}\n"); + GLSLH(float rand(float x) { return fract(x * 1.0/41.0); }) + + // Initialize the PRNG by hashing the position + a random uniform + GLSL(vec3 _m = vec3(HOOKED_pos, random) + vec3(1.0);) + GLSL(float h = permute(permute(permute(_m.x)+_m.y)+_m.z);) + gl_sc_uniform_dynamic(sc); + gl_sc_uniform_f(sc, "random", (double)av_lfg_get(lfg) / UINT32_MAX); +} + +const struct deband_opts deband_opts_def = { + .iterations = 1, + .threshold = 48.0, + .range = 16.0, + .grain = 32.0, +}; + +#define OPT_BASE_STRUCT struct deband_opts +const struct m_sub_options deband_conf = { + .opts = (const m_option_t[]) { + {"iterations", OPT_INT(iterations), M_RANGE(0, 16)}, + {"threshold", OPT_FLOAT(threshold), M_RANGE(0.0, 4096.0)}, + {"range", OPT_FLOAT(range), M_RANGE(1.0, 64.0)}, + {"grain", OPT_FLOAT(grain), M_RANGE(0.0, 4096.0)}, + {0} + }, + .size = sizeof(struct deband_opts), + .defaults = &deband_opts_def, +}; + +// Stochastically sample a debanded result from a hooked texture. +void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts, + AVLFG *lfg, enum mp_csp_trc trc) +{ + // Initialize the PRNG + GLSLF("{\n"); + prng_init(sc, lfg); + + // Helper: Compute a stochastic approximation of the avg color around a + // pixel + GLSLHF("vec4 average(float range, inout float h) {\n"); + // Compute a random rangle and distance + GLSLH(float dist = rand(h) * range; h = permute(h);) + GLSLH(float dir = rand(h) * 6.2831853; h = permute(h);) + GLSLH(vec2 o = dist * vec2(cos(dir), sin(dir));) + + // Sample at quarter-turn intervals around the source pixel + GLSLH(vec4 ref[4];) + GLSLH(ref[0] = HOOKED_texOff(vec2( o.x, o.y));) + GLSLH(ref[1] = HOOKED_texOff(vec2(-o.y, o.x));) + GLSLH(ref[2] = HOOKED_texOff(vec2(-o.x, -o.y));) + GLSLH(ref[3] = HOOKED_texOff(vec2( o.y, -o.x));) + + // Return the (normalized) average + GLSLH(return (ref[0] + ref[1] + ref[2] + ref[3])*0.25;) + GLSLHF("}\n"); + + // Sample the source pixel + GLSL(color = HOOKED_tex(HOOKED_pos);) + GLSLF("vec4 avg, diff;\n"); + for (int i = 1; i <= opts->iterations; i++) { + // Sample the average pixel and use it instead of the original if + // the difference is below the given threshold + GLSLF("avg = average(%f, h);\n", i * opts->range); + GLSL(diff = abs(color - avg);) + GLSLF("color = mix(avg, color, %s(greaterThan(diff, vec4(%f))));\n", + gl_sc_bvec(sc, 4), opts->threshold / (i * 16384.0)); + } + + // Add some random noise to smooth out residual differences + GLSL(vec3 noise;) + GLSL(noise.x = rand(h); h = permute(h);) + GLSL(noise.y = rand(h); h = permute(h);) + GLSL(noise.z = rand(h); h = permute(h);) + + // Noise is scaled to the signal level to prevent extreme noise for HDR + float gain = opts->grain/8192.0 / mp_trc_nom_peak(trc); + GLSLF("color.xyz += %f * (noise - vec3(0.5));\n", gain); + GLSLF("}\n"); +} + +// Assumes the texture was hooked +void pass_sample_unsharp(struct gl_shader_cache *sc, float param) { + GLSLF("{\n"); + GLSL(float st1 = 1.2;) + GLSL(vec4 p = HOOKED_tex(HOOKED_pos);) + GLSL(vec4 sum1 = HOOKED_texOff(st1 * vec2(+1, +1)) + + HOOKED_texOff(st1 * vec2(+1, -1)) + + HOOKED_texOff(st1 * vec2(-1, +1)) + + HOOKED_texOff(st1 * vec2(-1, -1));) + GLSL(float st2 = 1.5;) + GLSL(vec4 sum2 = HOOKED_texOff(st2 * vec2(+1, 0)) + + HOOKED_texOff(st2 * vec2( 0, +1)) + + HOOKED_texOff(st2 * vec2(-1, 0)) + + HOOKED_texOff(st2 * vec2( 0, -1));) + GLSL(vec4 t = p * 0.859375 + sum2 * -0.1171875 + sum1 * -0.09765625;) + GLSLF("color = p + t * %f;\n", param); + GLSLF("}\n"); +} diff --git a/video/out/gpu/video_shaders.h b/video/out/gpu/video_shaders.h new file mode 100644 index 0000000..27e7874 --- /dev/null +++ b/video/out/gpu/video_shaders.h @@ -0,0 +1,59 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MP_GL_VIDEO_SHADERS_H +#define MP_GL_VIDEO_SHADERS_H + +#include <libavutil/lfg.h> + +#include "utils.h" +#include "video.h" + +struct deband_opts { + int iterations; + float threshold; + float range; + float grain; +}; + +extern const struct deband_opts deband_opts_def; +extern const struct m_sub_options deband_conf; + +void sampler_prelude(struct gl_shader_cache *sc, int tex_num); +void pass_sample_separated_gen(struct gl_shader_cache *sc, struct scaler *scaler, + int d_x, int d_y); +void pass_sample_polar(struct gl_shader_cache *sc, struct scaler *scaler, + int components, bool sup_gather); +void pass_compute_polar(struct gl_shader_cache *sc, struct scaler *scaler, + int components, int bw, int bh, int iw, int ih); +void pass_sample_bicubic_fast(struct gl_shader_cache *sc); +void pass_sample_oversample(struct gl_shader_cache *sc, struct scaler *scaler, + int w, int h); + +void pass_linearize(struct gl_shader_cache *sc, enum mp_csp_trc trc); +void pass_delinearize(struct gl_shader_cache *sc, enum mp_csp_trc trc); + +void pass_color_map(struct gl_shader_cache *sc, bool is_linear, + struct mp_colorspace src, struct mp_colorspace dst, + const struct gl_tone_map_opts *opts); + +void pass_sample_deband(struct gl_shader_cache *sc, struct deband_opts *opts, + AVLFG *lfg, enum mp_csp_trc trc); + +void pass_sample_unsharp(struct gl_shader_cache *sc, float param); + +#endif diff --git a/video/out/gpu_next/context.c b/video/out/gpu_next/context.c new file mode 100644 index 0000000..2887cff --- /dev/null +++ b/video/out/gpu_next/context.c @@ -0,0 +1,240 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <https://www.gnu.org/licenses/>. + */ + +#include <libplacebo/config.h> + +#ifdef PL_HAVE_D3D11 +#include <libplacebo/d3d11.h> +#endif + +#ifdef PL_HAVE_OPENGL +#include <libplacebo/opengl.h> +#endif + +#include "context.h" +#include "config.h" +#include "common/common.h" +#include "options/m_config.h" +#include "video/out/placebo/utils.h" +#include "video/out/gpu/video.h" + +#if HAVE_D3D11 +#include "osdep/windows_utils.h" +#include "video/out/d3d11/ra_d3d11.h" +#include "video/out/d3d11/context.h" +#endif + +#if HAVE_GL +#include "video/out/opengl/context.h" +#include "video/out/opengl/ra_gl.h" +# if HAVE_EGL +#include <EGL/egl.h> +# endif +#endif + +#if HAVE_VULKAN +#include "video/out/vulkan/context.h" +#endif + +#if HAVE_D3D11 +static bool d3d11_pl_init(struct vo *vo, struct gpu_ctx *ctx, + struct ra_ctx_opts *ctx_opts) +{ +#if !defined(PL_HAVE_D3D11) + MP_MSG(ctx, vo->probing ? MSGL_V : MSGL_ERR, + "libplacebo was built without D3D11 support.\n"); + return false; +#else // defined(PL_HAVE_D3D11) + bool success = false; + + ID3D11Device *device = ra_d3d11_get_device(ctx->ra_ctx->ra); + IDXGISwapChain *swapchain = ra_d3d11_ctx_get_swapchain(ctx->ra_ctx); + if (!device || !swapchain) { + mp_err(ctx->log, + "Failed to receive required components from the mpv d3d11 " + "context! (device: %s, swap chain: %s)\n", + device ? "OK" : "failed", + swapchain ? "OK" : "failed"); + goto err_out; + } + + pl_d3d11 d3d11 = pl_d3d11_create(ctx->pllog, + pl_d3d11_params( + .device = device, + ) + ); + if (!d3d11) { + mp_err(ctx->log, "Failed to acquire a d3d11 libplacebo context!\n"); + goto err_out; + } + ctx->gpu = d3d11->gpu; + + mppl_log_set_probing(ctx->pllog, false); + + ctx->swapchain = pl_d3d11_create_swapchain(d3d11, + pl_d3d11_swapchain_params( + .swapchain = swapchain, + ) + ); + if (!ctx->swapchain) { + mp_err(ctx->log, "Failed to acquire a d3d11 libplacebo swap chain!\n"); + goto err_out; + } + + success = true; + +err_out: + SAFE_RELEASE(swapchain); + SAFE_RELEASE(device); + + return success; +#endif // defined(PL_HAVE_D3D11) +} +#endif // HAVE_D3D11 + +struct gpu_ctx *gpu_ctx_create(struct vo *vo, struct gl_video_opts *gl_opts) +{ + struct gpu_ctx *ctx = talloc_zero(NULL, struct gpu_ctx); + ctx->log = vo->log; + + struct ra_ctx_opts *ctx_opts = mp_get_config_group(ctx, vo->global, &ra_ctx_conf); + ctx_opts->want_alpha = gl_opts->alpha_mode == ALPHA_YES; + ctx->ra_ctx = ra_ctx_create(vo, *ctx_opts); + if (!ctx->ra_ctx) + goto err_out; + +#if HAVE_VULKAN + struct mpvk_ctx *vkctx = ra_vk_ctx_get(ctx->ra_ctx); + if (vkctx) { + ctx->pllog = vkctx->pllog; + ctx->gpu = vkctx->gpu; + ctx->swapchain = vkctx->swapchain; + return ctx; + } +#endif + + ctx->pllog = mppl_log_create(ctx, ctx->log); + if (!ctx->pllog) + goto err_out; + + mppl_log_set_probing(ctx->pllog, vo->probing); + +#if HAVE_D3D11 + if (ra_is_d3d11(ctx->ra_ctx->ra)) { + if (!d3d11_pl_init(vo, ctx, ctx_opts)) + goto err_out; + + return ctx; + } +#endif + +#if HAVE_GL && defined(PL_HAVE_OPENGL) + if (ra_is_gl(ctx->ra_ctx->ra)) { + struct GL *gl = ra_gl_get(ctx->ra_ctx->ra); + pl_opengl opengl = pl_opengl_create(ctx->pllog, + pl_opengl_params( + .debug = ctx_opts->debug, + .allow_software = ctx_opts->allow_sw, + .get_proc_addr_ex = (void *) gl->get_fn, + .proc_ctx = gl->fn_ctx, +# if HAVE_EGL + .egl_display = eglGetCurrentDisplay(), + .egl_context = eglGetCurrentContext(), +# endif + ) + ); + if (!opengl) + goto err_out; + ctx->gpu = opengl->gpu; + + mppl_log_set_probing(ctx->pllog, false); + + ctx->swapchain = pl_opengl_create_swapchain(opengl, pl_opengl_swapchain_params( + .max_swapchain_depth = vo->opts->swapchain_depth, + .framebuffer.flipped = gl->flipped, + )); + if (!ctx->swapchain) + goto err_out; + + return ctx; + } +#elif HAVE_GL + if (ra_is_gl(ctx->ra_ctx->ra)) { + MP_MSG(ctx, vo->probing ? MSGL_V : MSGL_ERR, + "libplacebo was built without OpenGL support.\n"); + } +#endif + +err_out: + gpu_ctx_destroy(&ctx); + return NULL; +} + +bool gpu_ctx_resize(struct gpu_ctx *ctx, int w, int h) +{ +#if HAVE_VULKAN + if (ra_vk_ctx_get(ctx->ra_ctx)) + // vulkan RA handles this by itself + return true; +#endif + + return pl_swapchain_resize(ctx->swapchain, &w, &h); +} + +void gpu_ctx_destroy(struct gpu_ctx **ctxp) +{ + struct gpu_ctx *ctx = *ctxp; + if (!ctx) + return; + if (!ctx->ra_ctx) + goto skip_common_pl_cleanup; + +#if HAVE_VULKAN + if (ra_vk_ctx_get(ctx->ra_ctx)) + // vulkan RA context handles pl cleanup by itself, + // skip common local clean-up. + goto skip_common_pl_cleanup; +#endif + + if (ctx->swapchain) + pl_swapchain_destroy(&ctx->swapchain); + + if (ctx->gpu) { +#if HAVE_GL && defined(PL_HAVE_OPENGL) + if (ra_is_gl(ctx->ra_ctx->ra)) { + pl_opengl opengl = pl_opengl_get(ctx->gpu); + pl_opengl_destroy(&opengl); + } +#endif + +#if HAVE_D3D11 && defined(PL_HAVE_D3D11) + if (ra_is_d3d11(ctx->ra_ctx->ra)) { + pl_d3d11 d3d11 = pl_d3d11_get(ctx->gpu); + pl_d3d11_destroy(&d3d11); + } +#endif + } + + if (ctx->pllog) + pl_log_destroy(&ctx->pllog); + +skip_common_pl_cleanup: + ra_ctx_destroy(&ctx->ra_ctx); + + talloc_free(ctx); + *ctxp = NULL; +} diff --git a/video/out/gpu_next/context.h b/video/out/gpu_next/context.h new file mode 100644 index 0000000..b98b9e7 --- /dev/null +++ b/video/out/gpu_next/context.h @@ -0,0 +1,40 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <https://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <libplacebo/renderer.h> + +struct mp_log; +struct ra_ctx; +struct vo; +struct gl_video_opts; + +struct gpu_ctx { + struct mp_log *log; + struct ra_ctx *ra_ctx; + + pl_log pllog; + pl_gpu gpu; + pl_swapchain swapchain; + + void *priv; +}; + +struct gpu_ctx *gpu_ctx_create(struct vo *vo, struct gl_video_opts *gl_opts); +bool gpu_ctx_resize(struct gpu_ctx *ctx, int w, int h); +void gpu_ctx_destroy(struct gpu_ctx **ctxp); diff --git a/video/out/hwdec/dmabuf_interop.h b/video/out/hwdec/dmabuf_interop.h new file mode 100644 index 0000000..e9b3e8e --- /dev/null +++ b/video/out/hwdec/dmabuf_interop.h @@ -0,0 +1,57 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <libavutil/hwcontext_drm.h> + +#include "video/out/gpu/hwdec.h" + +struct dmabuf_interop { + bool use_modifiers; + bool composed_layers; + + bool (*interop_init)(struct ra_hwdec_mapper *mapper, + const struct ra_imgfmt_desc *desc); + void (*interop_uninit)(const struct ra_hwdec_mapper *mapper); + + bool (*interop_map)(struct ra_hwdec_mapper *mapper, + struct dmabuf_interop *dmabuf_interop, + bool probing); + void (*interop_unmap)(struct ra_hwdec_mapper *mapper); +}; + +struct dmabuf_interop_priv { + int num_planes; + struct mp_image layout; + struct ra_tex *tex[4]; + + AVDRMFrameDescriptor desc; + bool surface_acquired; + + void *interop_mapper_priv; +}; + +typedef bool (*dmabuf_interop_init)(const struct ra_hwdec *hw, + struct dmabuf_interop *dmabuf_interop); + +bool dmabuf_interop_gl_init(const struct ra_hwdec *hw, + struct dmabuf_interop *dmabuf_interop); +bool dmabuf_interop_pl_init(const struct ra_hwdec *hw, + struct dmabuf_interop *dmabuf_interop); +bool dmabuf_interop_wl_init(const struct ra_hwdec *hw, + struct dmabuf_interop *dmabuf_interop); diff --git a/video/out/hwdec/dmabuf_interop_gl.c b/video/out/hwdec/dmabuf_interop_gl.c new file mode 100644 index 0000000..e7fb103 --- /dev/null +++ b/video/out/hwdec/dmabuf_interop_gl.c @@ -0,0 +1,311 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "dmabuf_interop.h" + +#include <drm_fourcc.h> +#include <EGL/egl.h> +#include "video/out/opengl/ra_gl.h" + +typedef void* GLeglImageOES; +typedef void *EGLImageKHR; + +// Any EGL_EXT_image_dma_buf_import definitions used in this source file. +#define EGL_LINUX_DMA_BUF_EXT 0x3270 +#define EGL_LINUX_DRM_FOURCC_EXT 0x3271 +#define EGL_DMA_BUF_PLANE0_FD_EXT 0x3272 +#define EGL_DMA_BUF_PLANE0_OFFSET_EXT 0x3273 +#define EGL_DMA_BUF_PLANE0_PITCH_EXT 0x3274 +#define EGL_DMA_BUF_PLANE1_FD_EXT 0x3275 +#define EGL_DMA_BUF_PLANE1_OFFSET_EXT 0x3276 +#define EGL_DMA_BUF_PLANE1_PITCH_EXT 0x3277 +#define EGL_DMA_BUF_PLANE2_FD_EXT 0x3278 +#define EGL_DMA_BUF_PLANE2_OFFSET_EXT 0x3279 +#define EGL_DMA_BUF_PLANE2_PITCH_EXT 0x327A + + +// Any EGL_EXT_image_dma_buf_import definitions used in this source file. +#define EGL_DMA_BUF_PLANE3_FD_EXT 0x3440 +#define EGL_DMA_BUF_PLANE3_OFFSET_EXT 0x3441 +#define EGL_DMA_BUF_PLANE3_PITCH_EXT 0x3442 +#define EGL_DMA_BUF_PLANE0_MODIFIER_LO_EXT 0x3443 +#define EGL_DMA_BUF_PLANE0_MODIFIER_HI_EXT 0x3444 +#define EGL_DMA_BUF_PLANE1_MODIFIER_LO_EXT 0x3445 +#define EGL_DMA_BUF_PLANE1_MODIFIER_HI_EXT 0x3446 +#define EGL_DMA_BUF_PLANE2_MODIFIER_LO_EXT 0x3447 +#define EGL_DMA_BUF_PLANE2_MODIFIER_HI_EXT 0x3448 +#define EGL_DMA_BUF_PLANE3_MODIFIER_LO_EXT 0x3449 +#define EGL_DMA_BUF_PLANE3_MODIFIER_HI_EXT 0x344A + +struct vaapi_gl_mapper_priv { + GLuint gl_textures[4]; + EGLImageKHR images[4]; + + EGLImageKHR (EGLAPIENTRY *CreateImageKHR)(EGLDisplay, EGLContext, + EGLenum, EGLClientBuffer, + const EGLint *); + EGLBoolean (EGLAPIENTRY *DestroyImageKHR)(EGLDisplay, EGLImageKHR); + void (EGLAPIENTRY *EGLImageTargetTexture2DOES)(GLenum, GLeglImageOES); +}; + +static bool vaapi_gl_mapper_init(struct ra_hwdec_mapper *mapper, + const struct ra_imgfmt_desc *desc) +{ + struct dmabuf_interop_priv *p_mapper = mapper->priv; + struct vaapi_gl_mapper_priv *p = talloc_ptrtype(NULL, p); + p_mapper->interop_mapper_priv = p; + + *p = (struct vaapi_gl_mapper_priv) { + // EGL_KHR_image_base + .CreateImageKHR = (void *)eglGetProcAddress("eglCreateImageKHR"), + .DestroyImageKHR = (void *)eglGetProcAddress("eglDestroyImageKHR"), + // GL_OES_EGL_image + .EGLImageTargetTexture2DOES = + (void *)eglGetProcAddress("glEGLImageTargetTexture2DOES"), + }; + + if (!p->CreateImageKHR || !p->DestroyImageKHR || + !p->EGLImageTargetTexture2DOES) + return false; + + GL *gl = ra_gl_get(mapper->ra); + gl->GenTextures(4, p->gl_textures); + for (int n = 0; n < desc->num_planes; n++) { + gl->BindTexture(GL_TEXTURE_2D, p->gl_textures[n]); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + gl->BindTexture(GL_TEXTURE_2D, 0); + + struct ra_tex_params params = { + .dimensions = 2, + .w = mp_image_plane_w(&p_mapper->layout, n), + .h = mp_image_plane_h(&p_mapper->layout, n), + .d = 1, + .format = desc->planes[n], + .render_src = true, + .src_linear = true, + }; + + if (params.format->ctype != RA_CTYPE_UNORM) + return false; + + p_mapper->tex[n] = ra_create_wrapped_tex(mapper->ra, ¶ms, + p->gl_textures[n]); + if (!p_mapper->tex[n]) + return false; + } + + return true; +} + +static void vaapi_gl_mapper_uninit(const struct ra_hwdec_mapper *mapper) +{ + struct dmabuf_interop_priv *p_mapper = mapper->priv; + struct vaapi_gl_mapper_priv *p = p_mapper->interop_mapper_priv; + + if (p) { + GL *gl = ra_gl_get(mapper->ra); + gl->DeleteTextures(4, p->gl_textures); + for (int n = 0; n < 4; n++) { + p->gl_textures[n] = 0; + ra_tex_free(mapper->ra, &p_mapper->tex[n]); + } + talloc_free(p); + p_mapper->interop_mapper_priv = NULL; + } +} + +#define ADD_ATTRIB(name, value) \ + do { \ + assert(num_attribs + 3 < MP_ARRAY_SIZE(attribs)); \ + attribs[num_attribs++] = (name); \ + attribs[num_attribs++] = (value); \ + attribs[num_attribs] = EGL_NONE; \ + } while(0) + +#define ADD_PLANE_ATTRIBS(plane) do { \ + uint64_t drm_format_modifier = p_mapper->desc.objects[p_mapper->desc.layers[i].planes[j].object_index].format_modifier; \ + ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _FD_EXT, \ + p_mapper->desc.objects[p_mapper->desc.layers[i].planes[j].object_index].fd); \ + ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _OFFSET_EXT, \ + p_mapper->desc.layers[i].planes[j].offset); \ + ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _PITCH_EXT, \ + p_mapper->desc.layers[i].planes[j].pitch); \ + if (dmabuf_interop->use_modifiers && drm_format_modifier != DRM_FORMAT_MOD_INVALID) { \ + ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _MODIFIER_LO_EXT, drm_format_modifier & 0xfffffffful); \ + ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _MODIFIER_HI_EXT, drm_format_modifier >> 32); \ + } \ + } while (0) + +static bool vaapi_gl_map(struct ra_hwdec_mapper *mapper, + struct dmabuf_interop *dmabuf_interop, + bool probing) +{ + struct dmabuf_interop_priv *p_mapper = mapper->priv; + struct vaapi_gl_mapper_priv *p = p_mapper->interop_mapper_priv; + + GL *gl = ra_gl_get(mapper->ra); + + for (int i = 0, n = 0; i < p_mapper->desc.nb_layers; i++) { + /* + * As we must map surfaces as one texture per plane, we can only support + * a subset of possible multi-plane layer formats. This is due to having + * to manually establish what DRM format each synthetic layer should + * have. + */ + uint32_t format[AV_DRM_MAX_PLANES] = { + p_mapper->desc.layers[i].format, + }; + + if (p_mapper->desc.layers[i].nb_planes > 1) { + switch (p_mapper->desc.layers[i].format) { + case DRM_FORMAT_NV12: + case DRM_FORMAT_NV16: + format[0] = DRM_FORMAT_R8; + format[1] = DRM_FORMAT_GR88; + break; + case DRM_FORMAT_YUV420: + format[0] = DRM_FORMAT_R8; + format[1] = DRM_FORMAT_R8; + format[2] = DRM_FORMAT_R8; + break; + case DRM_FORMAT_P010: +#ifdef DRM_FORMAT_P030 /* Format added in a newer libdrm version than minimum */ + case DRM_FORMAT_P030: +#endif + format[0] = DRM_FORMAT_R16; + format[1] = DRM_FORMAT_GR1616; + break; + default: + mp_msg(mapper->log, probing ? MSGL_DEBUG : MSGL_ERR, + "Cannot map unknown multi-plane format: 0x%08X\n", + p_mapper->desc.layers[i].format); + return false; + } + } else { + /* + * As OpenGL only has one guaranteed rgba format (rgba8), drivers + * that support importing dmabuf formats with different channel + * orders do implicit swizzling to get to rgba. However, we look at + * the original imgfmt to decide channel order, and we then swizzle + * based on that. So, we can get into a situation where we swizzle + * twice and end up with a mess. + * + * The simplest way to avoid that is to lie to OpenGL and say that + * the surface we are importing is in the natural channel order, so + * that our swizzling does the right thing. + * + * DRM ABGR corresponds to OpenGL RGBA due to different naming + * conventions. + */ + switch (format[0]) { + case DRM_FORMAT_ARGB8888: + case DRM_FORMAT_RGBA8888: + case DRM_FORMAT_BGRA8888: + format[0] = DRM_FORMAT_ABGR8888; + break; + case DRM_FORMAT_XRGB8888: + format[0] = DRM_FORMAT_XBGR8888; + break; + case DRM_FORMAT_RGBX8888: + case DRM_FORMAT_BGRX8888: + // Logically, these two formats should be handled as above, + // but there appear to be additional problems that make the + // format change here insufficient or incorrect, so we're + // doing nothing for now. + break; + } + } + + for (int j = 0; j < p_mapper->desc.layers[i].nb_planes; j++, n++) { + int attribs[48] = {EGL_NONE}; + int num_attribs = 0; + + ADD_ATTRIB(EGL_LINUX_DRM_FOURCC_EXT, format[j]); + ADD_ATTRIB(EGL_WIDTH, p_mapper->tex[n]->params.w); + ADD_ATTRIB(EGL_HEIGHT, p_mapper->tex[n]->params.h); + ADD_PLANE_ATTRIBS(0); + + p->images[n] = p->CreateImageKHR(eglGetCurrentDisplay(), + EGL_NO_CONTEXT, EGL_LINUX_DMA_BUF_EXT, NULL, attribs); + if (!p->images[n]) { + mp_msg(mapper->log, probing ? MSGL_DEBUG : MSGL_ERR, + "Failed to import surface in EGL: %u\n", eglGetError()); + return false; + } + + gl->BindTexture(GL_TEXTURE_2D, p->gl_textures[n]); + p->EGLImageTargetTexture2DOES(GL_TEXTURE_2D, p->images[n]); + + mapper->tex[n] = p_mapper->tex[n]; + } + } + + gl->BindTexture(GL_TEXTURE_2D, 0); + return true; +} + +static void vaapi_gl_unmap(struct ra_hwdec_mapper *mapper) +{ + struct dmabuf_interop_priv *p_mapper = mapper->priv; + struct vaapi_gl_mapper_priv *p = p_mapper->interop_mapper_priv; + + if (p) { + for (int n = 0; n < 4; n++) { + if (p->images[n]) + p->DestroyImageKHR(eglGetCurrentDisplay(), p->images[n]); + p->images[n] = 0; + } + } +} + +bool dmabuf_interop_gl_init(const struct ra_hwdec *hw, + struct dmabuf_interop *dmabuf_interop) +{ + if (!ra_is_gl(hw->ra_ctx->ra)) { + // This is not an OpenGL RA. + return false; + } + + if (!eglGetCurrentContext()) + return false; + + const char *exts = eglQueryString(eglGetCurrentDisplay(), EGL_EXTENSIONS); + if (!exts) + return false; + + GL *gl = ra_gl_get(hw->ra_ctx->ra); + if (!gl_check_extension(exts, "EGL_EXT_image_dma_buf_import") || + !gl_check_extension(exts, "EGL_KHR_image_base") || + !gl_check_extension(gl->extensions, "GL_OES_EGL_image") || + !(gl->mpgl_caps & MPGL_CAP_TEX_RG)) + return false; + + dmabuf_interop->use_modifiers = + gl_check_extension(exts, "EGL_EXT_image_dma_buf_import_modifiers"); + + MP_VERBOSE(hw, "using EGL dmabuf interop\n"); + + dmabuf_interop->interop_init = vaapi_gl_mapper_init; + dmabuf_interop->interop_uninit = vaapi_gl_mapper_uninit; + dmabuf_interop->interop_map = vaapi_gl_map; + dmabuf_interop->interop_unmap = vaapi_gl_unmap; + + return true; +} diff --git a/video/out/hwdec/dmabuf_interop_pl.c b/video/out/hwdec/dmabuf_interop_pl.c new file mode 100644 index 0000000..0a8ec5b --- /dev/null +++ b/video/out/hwdec/dmabuf_interop_pl.c @@ -0,0 +1,138 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <errno.h> +#include <unistd.h> + +#include "dmabuf_interop.h" +#include "video/out/placebo/ra_pl.h" +#include "video/out/placebo/utils.h" + +static bool vaapi_pl_map(struct ra_hwdec_mapper *mapper, + struct dmabuf_interop *dmabuf_interop, + bool probing) +{ + struct dmabuf_interop_priv *p = mapper->priv; + pl_gpu gpu = ra_pl_get(mapper->ra); + + struct ra_imgfmt_desc desc = {0}; + if (!ra_get_imgfmt_desc(mapper->ra, mapper->dst_params.imgfmt, &desc)) + return false; + + // The calling code validates that the total number of exported planes + // equals the number we expected in p->num_planes. + int layer = 0; + int layer_plane = 0; + for (int n = 0; n < p->num_planes; n++) { + + const struct ra_format *format = desc.planes[n]; + int id = p->desc.layers[layer].planes[layer_plane].object_index; + int fd = p->desc.objects[id].fd; + uint32_t size = p->desc.objects[id].size; + uint32_t offset = p->desc.layers[layer].planes[layer_plane].offset; + uint32_t pitch = p->desc.layers[layer].planes[layer_plane].pitch; + + // AMD drivers do not return the size in the surface description, so we + // need to query it manually. + if (size == 0) { + size = lseek(fd, 0, SEEK_END); + if (size == -1) { + MP_ERR(mapper, "Cannot obtain size of object with fd %d: %s\n", + fd, mp_strerror(errno)); + return false; + } + off_t err = lseek(fd, 0, SEEK_SET); + if (err == -1) { + MP_ERR(mapper, "Failed to reset offset for fd %d: %s\n", + fd, mp_strerror(errno)); + return false; + } + } + + struct pl_tex_params tex_params = { + .w = mp_image_plane_w(&p->layout, n), + .h = mp_image_plane_h(&p->layout, n), + .d = 0, + .format = format->priv, + .sampleable = true, + .import_handle = PL_HANDLE_DMA_BUF, + .shared_mem = (struct pl_shared_mem) { + .handle = { + .fd = fd, + }, + .size = size, + .offset = offset, + .drm_format_mod = p->desc.objects[id].format_modifier, + .stride_w = pitch, + }, + }; + + mppl_log_set_probing(gpu->log, probing); + pl_tex pltex = pl_tex_create(gpu, &tex_params); + mppl_log_set_probing(gpu->log, false); + if (!pltex) + return false; + + struct ra_tex *ratex = talloc_ptrtype(NULL, ratex); + int ret = mppl_wrap_tex(mapper->ra, pltex, ratex); + if (!ret) { + pl_tex_destroy(gpu, &pltex); + talloc_free(ratex); + return false; + } + mapper->tex[n] = ratex; + + MP_TRACE(mapper, "Object %d with fd %d imported as %p\n", + id, fd, ratex); + + layer_plane++; + if (layer_plane == p->desc.layers[layer].nb_planes) { + layer_plane = 0; + layer++; + } + } + return true; +} + +static void vaapi_pl_unmap(struct ra_hwdec_mapper *mapper) +{ + for (int n = 0; n < 4; n++) + ra_tex_free(mapper->ra, &mapper->tex[n]); +} + +bool dmabuf_interop_pl_init(const struct ra_hwdec *hw, + struct dmabuf_interop *dmabuf_interop) +{ + pl_gpu gpu = ra_pl_get(hw->ra_ctx->ra); + if (!gpu) { + // This is not a libplacebo RA; + return false; + } + + if (!(gpu->import_caps.tex & PL_HANDLE_DMA_BUF)) { + MP_VERBOSE(hw, "libplacebo dmabuf interop requires support for " + "PL_HANDLE_DMA_BUF import.\n"); + return false; + } + + MP_VERBOSE(hw, "using libplacebo dmabuf interop\n"); + + dmabuf_interop->interop_map = vaapi_pl_map; + dmabuf_interop->interop_unmap = vaapi_pl_unmap; + + return true; +} diff --git a/video/out/hwdec/dmabuf_interop_wl.c b/video/out/hwdec/dmabuf_interop_wl.c new file mode 100644 index 0000000..606a0aa --- /dev/null +++ b/video/out/hwdec/dmabuf_interop_wl.c @@ -0,0 +1,83 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ +#include "video/out/wldmabuf/ra_wldmabuf.h" +#include "dmabuf_interop.h" + +static bool mapper_init(struct ra_hwdec_mapper *mapper, + const struct ra_imgfmt_desc *desc) +{ + return true; +} + +static void mapper_uninit(const struct ra_hwdec_mapper *mapper) +{ +} + +static bool map(struct ra_hwdec_mapper *mapper, + struct dmabuf_interop *dmabuf_interop, + bool probing) +{ + // 1. only validate format when composed layers is enabled (i.e. vaapi) + // 2. for drmprime, just return true for now, as this use case + // has not been tested. + if (!dmabuf_interop->composed_layers) + return true; + + int layer_no = 0; + struct dmabuf_interop_priv *mapper_p = mapper->priv; + uint32_t drm_format = mapper_p->desc.layers[layer_no].format; + + if (mapper_p->desc.nb_layers != 1) { + MP_VERBOSE(mapper, "Mapped surface has separate layers - expected composed layers.\n"); + return false; + } else if (!ra_compatible_format(mapper->ra, drm_format, + mapper_p->desc.objects[0].format_modifier)) { + MP_VERBOSE(mapper, "Mapped surface with format %s; drm format '%s(%016lx)' " + "is not supported by compositor.\n", + mp_imgfmt_to_name(mapper->src->params.hw_subfmt), + mp_tag_str(drm_format), + mapper_p->desc.objects[0].format_modifier); + return false; + } + + MP_VERBOSE(mapper, "Supported Wayland display format %s: '%s(%016lx)'\n", + mp_imgfmt_to_name(mapper->src->params.hw_subfmt), + mp_tag_str(drm_format), mapper_p->desc.objects[0].format_modifier); + + return true; +} + +static void unmap(struct ra_hwdec_mapper *mapper) +{ +} + +bool dmabuf_interop_wl_init(const struct ra_hwdec *hw, + struct dmabuf_interop *dmabuf_interop) +{ + if (!ra_is_wldmabuf(hw->ra_ctx->ra)) + return false; + + if (strstr(hw->driver->name, "vaapi") != NULL) + dmabuf_interop->composed_layers = true; + + dmabuf_interop->interop_init = mapper_init; + dmabuf_interop->interop_uninit = mapper_uninit; + dmabuf_interop->interop_map = map; + dmabuf_interop->interop_unmap = unmap; + + return true; +} diff --git a/video/out/hwdec/hwdec_aimagereader.c b/video/out/hwdec/hwdec_aimagereader.c new file mode 100644 index 0000000..0dd5497 --- /dev/null +++ b/video/out/hwdec/hwdec_aimagereader.c @@ -0,0 +1,402 @@ +/* + * Copyright (c) 2021 sfan5 <sfan5@live.de> + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <dlfcn.h> +#include <EGL/egl.h> +#include <media/NdkImageReader.h> +#include <android/native_window_jni.h> +#include <libavcodec/mediacodec.h> +#include <libavutil/hwcontext.h> +#include <libavutil/hwcontext_mediacodec.h> + +#include "misc/jni.h" +#include "osdep/threads.h" +#include "osdep/timer.h" +#include "video/out/gpu/hwdec.h" +#include "video/out/opengl/ra_gl.h" + +typedef void *GLeglImageOES; +typedef void *EGLImageKHR; +#define EGL_NATIVE_BUFFER_ANDROID 0x3140 + +struct priv_owner { + struct mp_hwdec_ctx hwctx; + AImageReader *reader; + jobject surface; + void *lib_handle; + + media_status_t (*AImageReader_newWithUsage)( + int32_t, int32_t, int32_t, uint64_t, int32_t, AImageReader **); + media_status_t (*AImageReader_getWindow)( + AImageReader *, ANativeWindow **); + media_status_t (*AImageReader_setImageListener)( + AImageReader *, AImageReader_ImageListener *); + media_status_t (*AImageReader_acquireLatestImage)(AImageReader *, AImage **); + void (*AImageReader_delete)(AImageReader *); + media_status_t (*AImage_getHardwareBuffer)(const AImage *, AHardwareBuffer **); + void (*AImage_delete)(AImage *); + void (*AHardwareBuffer_describe)(const AHardwareBuffer *, AHardwareBuffer_Desc *); + jobject (*ANativeWindow_toSurface)(JNIEnv *, ANativeWindow *); +}; + +struct priv { + struct mp_log *log; + + GLuint gl_texture; + AImage *image; + EGLImageKHR egl_image; + + mp_mutex lock; + mp_cond cond; + bool image_available; + + EGLImageKHR (EGLAPIENTRY *CreateImageKHR)( + EGLDisplay, EGLContext, EGLenum, EGLClientBuffer, const EGLint *); + EGLBoolean (EGLAPIENTRY *DestroyImageKHR)(EGLDisplay, EGLImageKHR); + EGLClientBuffer (EGLAPIENTRY *GetNativeClientBufferANDROID)( + const struct AHardwareBuffer *); + void (EGLAPIENTRY *EGLImageTargetTexture2DOES)(GLenum, GLeglImageOES); +}; + +const static struct { const char *symbol; int offset; } lib_functions[] = { + { "AImageReader_newWithUsage", offsetof(struct priv_owner, AImageReader_newWithUsage) }, + { "AImageReader_getWindow", offsetof(struct priv_owner, AImageReader_getWindow) }, + { "AImageReader_setImageListener", offsetof(struct priv_owner, AImageReader_setImageListener) }, + { "AImageReader_acquireLatestImage", offsetof(struct priv_owner, AImageReader_acquireLatestImage) }, + { "AImageReader_delete", offsetof(struct priv_owner, AImageReader_delete) }, + { "AImage_getHardwareBuffer", offsetof(struct priv_owner, AImage_getHardwareBuffer) }, + { "AImage_delete", offsetof(struct priv_owner, AImage_delete) }, + { "AHardwareBuffer_describe", offsetof(struct priv_owner, AHardwareBuffer_describe) }, + { "ANativeWindow_toSurface", offsetof(struct priv_owner, ANativeWindow_toSurface) }, + { NULL, 0 }, +}; + + +static AVBufferRef *create_mediacodec_device_ref(jobject surface) +{ + AVBufferRef *device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_MEDIACODEC); + if (!device_ref) + return NULL; + + AVHWDeviceContext *ctx = (void *)device_ref->data; + AVMediaCodecDeviceContext *hwctx = ctx->hwctx; + hwctx->surface = surface; + + if (av_hwdevice_ctx_init(device_ref) < 0) + av_buffer_unref(&device_ref); + + return device_ref; +} + +static bool load_lib_functions(struct priv_owner *p, struct mp_log *log) +{ + p->lib_handle = dlopen("libmediandk.so", RTLD_NOW | RTLD_GLOBAL); + if (!p->lib_handle) + return false; + for (int i = 0; lib_functions[i].symbol; i++) { + const char *sym = lib_functions[i].symbol; + void *fun = dlsym(p->lib_handle, sym); + if (!fun) + fun = dlsym(RTLD_DEFAULT, sym); + if (!fun) { + mp_warn(log, "Could not resolve symbol %s\n", sym); + return false; + } + + *(void **) ((uint8_t*)p + lib_functions[i].offset) = fun; + } + return true; +} + +static int init(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + + if (!ra_is_gl(hw->ra_ctx->ra)) + return -1; + if (!eglGetCurrentContext()) + return -1; + + const char *exts = eglQueryString(eglGetCurrentDisplay(), EGL_EXTENSIONS); + if (!gl_check_extension(exts, "EGL_ANDROID_image_native_buffer")) + return -1; + + if (!load_lib_functions(p, hw->log)) + return -1; + + static const char *es2_exts[] = {"GL_OES_EGL_image_external", 0}; + static const char *es3_exts[] = {"GL_OES_EGL_image_external_essl3", 0}; + GL *gl = ra_gl_get(hw->ra_ctx->ra); + if (gl_check_extension(gl->extensions, es3_exts[0])) + hw->glsl_extensions = es3_exts; + else + hw->glsl_extensions = es2_exts; + + // dummy dimensions, AImageReader only transports hardware buffers + media_status_t ret = p->AImageReader_newWithUsage(16, 16, + AIMAGE_FORMAT_PRIVATE, AHARDWAREBUFFER_USAGE_GPU_SAMPLED_IMAGE, + 5, &p->reader); + if (ret != AMEDIA_OK) { + MP_ERR(hw, "newWithUsage failed: %d\n", ret); + return -1; + } + assert(p->reader); + + ANativeWindow *window; + ret = p->AImageReader_getWindow(p->reader, &window); + if (ret != AMEDIA_OK) { + MP_ERR(hw, "getWindow failed: %d\n", ret); + return -1; + } + assert(window); + + JNIEnv *env = MP_JNI_GET_ENV(hw); + assert(env); + jobject surface = p->ANativeWindow_toSurface(env, window); + p->surface = (*env)->NewGlobalRef(env, surface); + (*env)->DeleteLocalRef(env, surface); + + p->hwctx = (struct mp_hwdec_ctx) { + .driver_name = hw->driver->name, + .av_device_ref = create_mediacodec_device_ref(p->surface), + .hw_imgfmt = IMGFMT_MEDIACODEC, + }; + + if (!p->hwctx.av_device_ref) { + MP_VERBOSE(hw, "Failed to create hwdevice_ctx\n"); + return -1; + } + + hwdec_devices_add(hw->devs, &p->hwctx); + + return 0; +} + +static void uninit(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + JNIEnv *env = MP_JNI_GET_ENV(hw); + assert(env); + + if (p->surface) { + (*env)->DeleteGlobalRef(env, p->surface); + p->surface = NULL; + } + + if (p->reader) { + p->AImageReader_delete(p->reader); + p->reader = NULL; + } + + hwdec_devices_remove(hw->devs, &p->hwctx); + av_buffer_unref(&p->hwctx.av_device_ref); + + if (p->lib_handle) { + dlclose(p->lib_handle); + p->lib_handle = NULL; + } +} + +static void image_callback(void *context, AImageReader *reader) +{ + struct priv *p = context; + + mp_mutex_lock(&p->lock); + p->image_available = true; + mp_cond_signal(&p->cond); + mp_mutex_unlock(&p->lock); +} + +static int mapper_init(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + struct priv_owner *o = mapper->owner->priv; + GL *gl = ra_gl_get(mapper->ra); + + p->log = mapper->log; + mp_mutex_init(&p->lock); + mp_cond_init(&p->cond); + + p->CreateImageKHR = (void *)eglGetProcAddress("eglCreateImageKHR"); + p->DestroyImageKHR = (void *)eglGetProcAddress("eglDestroyImageKHR"); + p->GetNativeClientBufferANDROID = + (void *)eglGetProcAddress("eglGetNativeClientBufferANDROID"); + p->EGLImageTargetTexture2DOES = + (void *)eglGetProcAddress("glEGLImageTargetTexture2DOES"); + + if (!p->CreateImageKHR || !p->DestroyImageKHR || + !p->GetNativeClientBufferANDROID || !p->EGLImageTargetTexture2DOES) + return -1; + + AImageReader_ImageListener listener = { + .context = p, + .onImageAvailable = image_callback, + }; + o->AImageReader_setImageListener(o->reader, &listener); + + mapper->dst_params = mapper->src_params; + mapper->dst_params.imgfmt = IMGFMT_RGB0; + mapper->dst_params.hw_subfmt = 0; + + // texture creation + gl->GenTextures(1, &p->gl_texture); + gl->BindTexture(GL_TEXTURE_EXTERNAL_OES, p->gl_texture); + gl->TexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + gl->TexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + gl->TexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + gl->TexParameteri(GL_TEXTURE_EXTERNAL_OES, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + gl->BindTexture(GL_TEXTURE_EXTERNAL_OES, 0); + + struct ra_tex_params params = { + .dimensions = 2, + .w = mapper->src_params.w, + .h = mapper->src_params.h, + .d = 1, + .format = ra_find_unorm_format(mapper->ra, 1, 4), + .render_src = true, + .src_linear = true, + .external_oes = true, + }; + + if (params.format->ctype != RA_CTYPE_UNORM) + return -1; + + mapper->tex[0] = ra_create_wrapped_tex(mapper->ra, ¶ms, p->gl_texture); + if (!mapper->tex[0]) + return -1; + + return 0; +} + +static void mapper_uninit(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + struct priv_owner *o = mapper->owner->priv; + GL *gl = ra_gl_get(mapper->ra); + + o->AImageReader_setImageListener(o->reader, NULL); + + gl->DeleteTextures(1, &p->gl_texture); + p->gl_texture = 0; + + ra_tex_free(mapper->ra, &mapper->tex[0]); + + mp_mutex_destroy(&p->lock); + mp_cond_destroy(&p->cond); +} + +static void mapper_unmap(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + struct priv_owner *o = mapper->owner->priv; + + if (p->egl_image) { + p->DestroyImageKHR(eglGetCurrentDisplay(), p->egl_image); + p->egl_image = 0; + } + + if (p->image) { + o->AImage_delete(p->image); + p->image = NULL; + } +} + +static int mapper_map(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + struct priv_owner *o = mapper->owner->priv; + GL *gl = ra_gl_get(mapper->ra); + + { + if (mapper->src->imgfmt != IMGFMT_MEDIACODEC) + return -1; + AVMediaCodecBuffer *buffer = (AVMediaCodecBuffer *)mapper->src->planes[3]; + av_mediacodec_release_buffer(buffer, 1); + } + + bool image_available = false; + mp_mutex_lock(&p->lock); + if (!p->image_available) { + mp_cond_timedwait(&p->cond, &p->lock, MP_TIME_MS_TO_NS(100)); + if (!p->image_available) + MP_WARN(mapper, "Waiting for frame timed out!\n"); + } + image_available = p->image_available; + p->image_available = false; + mp_mutex_unlock(&p->lock); + + media_status_t ret = o->AImageReader_acquireLatestImage(o->reader, &p->image); + if (ret != AMEDIA_OK) { + MP_ERR(mapper, "acquireLatestImage failed: %d\n", ret); + // If we merely timed out waiting return success anyway to avoid + // flashing frames of render errors. + return image_available ? -1 : 0; + } + assert(p->image); + + AHardwareBuffer *hwbuf = NULL; + ret = o->AImage_getHardwareBuffer(p->image, &hwbuf); + if (ret != AMEDIA_OK) { + MP_ERR(mapper, "getHardwareBuffer failed: %d\n", ret); + return -1; + } + assert(hwbuf); + + // Update texture size since it may differ + AHardwareBuffer_Desc d; + o->AHardwareBuffer_describe(hwbuf, &d); + if (mapper->tex[0]->params.w != d.width || mapper->tex[0]->params.h != d.height) { + MP_VERBOSE(p, "Texture dimensions changed to %dx%d\n", d.width, d.height); + mapper->tex[0]->params.w = d.width; + mapper->tex[0]->params.h = d.height; + } + + EGLClientBuffer buf = p->GetNativeClientBufferANDROID(hwbuf); + if (!buf) + return -1; + + const int attribs[] = {EGL_NONE}; + p->egl_image = p->CreateImageKHR(eglGetCurrentDisplay(), + EGL_NO_CONTEXT, EGL_NATIVE_BUFFER_ANDROID, buf, attribs); + if (!p->egl_image) + return -1; + + gl->BindTexture(GL_TEXTURE_EXTERNAL_OES, p->gl_texture); + p->EGLImageTargetTexture2DOES(GL_TEXTURE_EXTERNAL_OES, p->egl_image); + gl->BindTexture(GL_TEXTURE_EXTERNAL_OES, 0); + + return 0; +} + + +const struct ra_hwdec_driver ra_hwdec_aimagereader = { + .name = "aimagereader", + .priv_size = sizeof(struct priv_owner), + .imgfmts = {IMGFMT_MEDIACODEC, 0}, + .init = init, + .uninit = uninit, + .mapper = &(const struct ra_hwdec_mapper_driver){ + .priv_size = sizeof(struct priv), + .init = mapper_init, + .uninit = mapper_uninit, + .map = mapper_map, + .unmap = mapper_unmap, + }, +}; diff --git a/video/out/hwdec/hwdec_cuda.c b/video/out/hwdec/hwdec_cuda.c new file mode 100644 index 0000000..68ad60d --- /dev/null +++ b/video/out/hwdec/hwdec_cuda.c @@ -0,0 +1,286 @@ +/* + * Copyright (c) 2016 Philip Langdale <philipl@overt.org> + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +/* + * This hwdec implements an optimized output path using CUDA->OpenGL + * or CUDA->Vulkan interop for frame data that is stored in CUDA + * device memory. Although it is not explicit in the code here, the + * only practical way to get data in this form is from the + * nvdec/cuvid decoder. + */ + +#include "config.h" +#include "hwdec_cuda.h" + +#include <libavutil/hwcontext.h> +#include <libavutil/hwcontext_cuda.h> + +int check_cu(const struct ra_hwdec *hw, CUresult err, const char *func) +{ + const char *err_name; + const char *err_string; + + struct cuda_hw_priv *p = hw->priv; + int level = hw->probing ? MSGL_V : MSGL_ERR; + + MP_TRACE(hw, "Calling %s\n", func); + + if (err == CUDA_SUCCESS) + return 0; + + p->cu->cuGetErrorName(err, &err_name); + p->cu->cuGetErrorString(err, &err_string); + + MP_MSG(hw, level, "%s failed", func); + if (err_name && err_string) + MP_MSG(hw, level, " -> %s: %s", err_name, err_string); + MP_MSG(hw, level, "\n"); + + return -1; +} + +#define CHECK_CU(x) check_cu(hw, (x), #x) + +const static cuda_interop_init interop_inits[] = { +#if HAVE_GL + cuda_gl_init, +#endif +#if HAVE_VULKAN + cuda_vk_init, +#endif + NULL +}; + +static int cuda_init(struct ra_hwdec *hw) +{ + AVBufferRef *hw_device_ctx = NULL; + CUcontext dummy; + int ret = 0; + struct cuda_hw_priv *p = hw->priv; + CudaFunctions *cu; + int level = hw->probing ? MSGL_V : MSGL_ERR; + + ret = cuda_load_functions(&p->cu, NULL); + if (ret != 0) { + MP_MSG(hw, level, "Failed to load CUDA symbols\n"); + return -1; + } + cu = p->cu; + + ret = CHECK_CU(cu->cuInit(0)); + if (ret < 0) + return -1; + + // Initialise CUDA context from backend. + for (int i = 0; interop_inits[i]; i++) { + if (interop_inits[i](hw)) { + break; + } + } + + if (!p->ext_init || !p->ext_uninit) { + MP_MSG(hw, level, + "CUDA hwdec only works with OpenGL or Vulkan backends.\n"); + return -1; + } + + hw_device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_CUDA); + if (!hw_device_ctx) + goto error; + + AVHWDeviceContext *device_ctx = (void *)hw_device_ctx->data; + + AVCUDADeviceContext *device_hwctx = device_ctx->hwctx; + device_hwctx->cuda_ctx = p->decode_ctx; + + ret = av_hwdevice_ctx_init(hw_device_ctx); + if (ret < 0) { + MP_MSG(hw, level, "av_hwdevice_ctx_init failed\n"); + goto error; + } + + ret = CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + if (ret < 0) + goto error; + + p->hwctx = (struct mp_hwdec_ctx) { + .driver_name = hw->driver->name, + .av_device_ref = hw_device_ctx, + .hw_imgfmt = IMGFMT_CUDA, + }; + hwdec_devices_add(hw->devs, &p->hwctx); + return 0; + + error: + av_buffer_unref(&hw_device_ctx); + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + + return -1; +} + +static void cuda_uninit(struct ra_hwdec *hw) +{ + struct cuda_hw_priv *p = hw->priv; + CudaFunctions *cu = p->cu; + + hwdec_devices_remove(hw->devs, &p->hwctx); + av_buffer_unref(&p->hwctx.av_device_ref); + + if (p->decode_ctx && p->decode_ctx != p->display_ctx) + CHECK_CU(cu->cuCtxDestroy(p->decode_ctx)); + + if (p->display_ctx) + CHECK_CU(cu->cuCtxDestroy(p->display_ctx)); + + cuda_free_functions(&p->cu); +} + +#undef CHECK_CU +#define CHECK_CU(x) check_cu((mapper)->owner, (x), #x) + +static int mapper_init(struct ra_hwdec_mapper *mapper) +{ + struct cuda_hw_priv *p_owner = mapper->owner->priv; + struct cuda_mapper_priv *p = mapper->priv; + CUcontext dummy; + CudaFunctions *cu = p_owner->cu; + int ret = 0, eret = 0; + + p->display_ctx = p_owner->display_ctx; + + int imgfmt = mapper->src_params.hw_subfmt; + mapper->dst_params = mapper->src_params; + mapper->dst_params.imgfmt = imgfmt; + mapper->dst_params.hw_subfmt = 0; + + mp_image_set_params(&p->layout, &mapper->dst_params); + + struct ra_imgfmt_desc desc; + if (!ra_get_imgfmt_desc(mapper->ra, imgfmt, &desc)) { + MP_ERR(mapper, "Unsupported format: %s\n", mp_imgfmt_to_name(imgfmt)); + return -1; + } + + ret = CHECK_CU(cu->cuCtxPushCurrent(p->display_ctx)); + if (ret < 0) + return ret; + + for (int n = 0; n < desc.num_planes; n++) { + if (!p_owner->ext_init(mapper, desc.planes[n], n)) + goto error; + } + + error: + eret = CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + if (eret < 0) + return eret; + + return ret; +} + +static void mapper_uninit(struct ra_hwdec_mapper *mapper) +{ + struct cuda_mapper_priv *p = mapper->priv; + struct cuda_hw_priv *p_owner = mapper->owner->priv; + CudaFunctions *cu = p_owner->cu; + CUcontext dummy; + + // Don't bail if any CUDA calls fail. This is all best effort. + CHECK_CU(cu->cuCtxPushCurrent(p->display_ctx)); + for (int n = 0; n < 4; n++) { + p_owner->ext_uninit(mapper, n); + ra_tex_free(mapper->ra, &mapper->tex[n]); + } + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); +} + +static void mapper_unmap(struct ra_hwdec_mapper *mapper) +{ +} + +static int mapper_map(struct ra_hwdec_mapper *mapper) +{ + struct cuda_mapper_priv *p = mapper->priv; + struct cuda_hw_priv *p_owner = mapper->owner->priv; + CudaFunctions *cu = p_owner->cu; + CUcontext dummy; + int ret = 0, eret = 0; + + ret = CHECK_CU(cu->cuCtxPushCurrent(p->display_ctx)); + if (ret < 0) + return ret; + + for (int n = 0; n < p->layout.num_planes; n++) { + if (p_owner->ext_wait) { + if (!p_owner->ext_wait(mapper, n)) + goto error; + } + + CUDA_MEMCPY2D cpy = { + .srcMemoryType = CU_MEMORYTYPE_DEVICE, + .srcDevice = (CUdeviceptr)mapper->src->planes[n], + .srcPitch = mapper->src->stride[n], + .srcY = 0, + .dstMemoryType = CU_MEMORYTYPE_ARRAY, + .dstArray = p->cu_array[n], + .WidthInBytes = mp_image_plane_w(&p->layout, n) * + mapper->tex[n]->params.format->pixel_size, + .Height = mp_image_plane_h(&p->layout, n), + }; + + ret = CHECK_CU(cu->cuMemcpy2DAsync(&cpy, 0)); + if (ret < 0) + goto error; + + if (p_owner->ext_signal) { + if (!p_owner->ext_signal(mapper, n)) + goto error; + } + } + if (p_owner->do_full_sync) + CHECK_CU(cu->cuStreamSynchronize(0)); + + // fall through + error: + + // Regardless of success or failure, we no longer need the source image, + // because this hwdec makes an explicit memcpy into the mapper textures + mp_image_unrefp(&mapper->src); + + eret = CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + if (eret < 0) + return eret; + + return ret; +} + +const struct ra_hwdec_driver ra_hwdec_cuda = { + .name = "cuda", + .imgfmts = {IMGFMT_CUDA, 0}, + .priv_size = sizeof(struct cuda_hw_priv), + .init = cuda_init, + .uninit = cuda_uninit, + .mapper = &(const struct ra_hwdec_mapper_driver){ + .priv_size = sizeof(struct cuda_mapper_priv), + .init = mapper_init, + .uninit = mapper_uninit, + .map = mapper_map, + .unmap = mapper_unmap, + }, +}; diff --git a/video/out/hwdec/hwdec_cuda.h b/video/out/hwdec/hwdec_cuda.h new file mode 100644 index 0000000..9c55053 --- /dev/null +++ b/video/out/hwdec/hwdec_cuda.h @@ -0,0 +1,59 @@ +/* + * Copyright (c) 2019 Philip Langdale <philipl@overt.org> + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <ffnvcodec/dynlink_loader.h> + +#include "video/out/gpu/hwdec.h" + +struct cuda_hw_priv { + struct mp_hwdec_ctx hwctx; + CudaFunctions *cu; + CUcontext display_ctx; + CUcontext decode_ctx; + + // Do we need to do a full CPU sync after copying + bool do_full_sync; + + bool (*ext_init)(struct ra_hwdec_mapper *mapper, + const struct ra_format *format, int n); + void (*ext_uninit)(const struct ra_hwdec_mapper *mapper, int n); + + // These are only necessary if the gpu api requires synchronisation + bool (*ext_wait)(const struct ra_hwdec_mapper *mapper, int n); + bool (*ext_signal)(const struct ra_hwdec_mapper *mapper, int n); +}; + +struct cuda_mapper_priv { + struct mp_image layout; + CUarray cu_array[4]; + + CUcontext display_ctx; + + void *ext[4]; +}; + +typedef bool (*cuda_interop_init)(const struct ra_hwdec *hw); + +bool cuda_gl_init(const struct ra_hwdec *hw); + +bool cuda_vk_init(const struct ra_hwdec *hw); + +int check_cu(const struct ra_hwdec *hw, CUresult err, const char *func); diff --git a/video/out/hwdec/hwdec_cuda_gl.c b/video/out/hwdec/hwdec_cuda_gl.c new file mode 100644 index 0000000..f20540e --- /dev/null +++ b/video/out/hwdec/hwdec_cuda_gl.c @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2019 Philip Langdale <philipl@overt.org> + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "hwdec_cuda.h" +#include "options/m_config.h" +#include "options/options.h" +#include "video/out/opengl/formats.h" +#include "video/out/opengl/ra_gl.h" + +#include <libavutil/hwcontext.h> +#include <libavutil/hwcontext_cuda.h> +#include <unistd.h> + +#define CHECK_CU(x) check_cu((mapper)->owner, (x), #x) + +struct ext_gl { + CUgraphicsResource cu_res; +}; + +static bool cuda_ext_gl_init(struct ra_hwdec_mapper *mapper, + const struct ra_format *format, int n) +{ + struct cuda_hw_priv *p_owner = mapper->owner->priv; + struct cuda_mapper_priv *p = mapper->priv; + CudaFunctions *cu = p_owner->cu; + int ret = 0; + CUcontext dummy; + + struct ext_gl *egl = talloc_ptrtype(NULL, egl); + p->ext[n] = egl; + + struct ra_tex_params params = { + .dimensions = 2, + .w = mp_image_plane_w(&p->layout, n), + .h = mp_image_plane_h(&p->layout, n), + .d = 1, + .format = format, + .render_src = true, + .src_linear = format->linear_filter, + }; + + mapper->tex[n] = ra_tex_create(mapper->ra, ¶ms); + if (!mapper->tex[n]) { + goto error; + } + + GLuint texture; + GLenum target; + ra_gl_get_raw_tex(mapper->ra, mapper->tex[n], &texture, &target); + + ret = CHECK_CU(cu->cuGraphicsGLRegisterImage(&egl->cu_res, texture, target, + CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD)); + if (ret < 0) + goto error; + + ret = CHECK_CU(cu->cuGraphicsMapResources(1, &egl->cu_res, 0)); + if (ret < 0) + goto error; + + ret = CHECK_CU(cu->cuGraphicsSubResourceGetMappedArray(&p->cu_array[n], egl->cu_res, + 0, 0)); + if (ret < 0) + goto error; + + ret = CHECK_CU(cu->cuGraphicsUnmapResources(1, &egl->cu_res, 0)); + if (ret < 0) + goto error; + + return true; + +error: + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + return false; +} + +static void cuda_ext_gl_uninit(const struct ra_hwdec_mapper *mapper, int n) +{ + struct cuda_hw_priv *p_owner = mapper->owner->priv; + struct cuda_mapper_priv *p = mapper->priv; + CudaFunctions *cu = p_owner->cu; + + struct ext_gl *egl = p->ext[n]; + if (egl && egl->cu_res) { + CHECK_CU(cu->cuGraphicsUnregisterResource(egl->cu_res)); + egl->cu_res = 0; + } + talloc_free(egl); +} + +#undef CHECK_CU +#define CHECK_CU(x) check_cu(hw, (x), #x) + +bool cuda_gl_init(const struct ra_hwdec *hw) { + int ret = 0; + struct cuda_hw_priv *p = hw->priv; + CudaFunctions *cu = p->cu; + + if (ra_is_gl(hw->ra_ctx->ra)) { + GL *gl = ra_gl_get(hw->ra_ctx->ra); + if (gl->version < 210 && gl->es < 300) { + MP_VERBOSE(hw, "need OpenGL >= 2.1 or OpenGL-ES >= 3.0\n"); + return false; + } + } else { + // This is not an OpenGL RA. + return false; + } + + CUdevice display_dev; + unsigned int device_count; + ret = CHECK_CU(cu->cuGLGetDevices(&device_count, &display_dev, 1, + CU_GL_DEVICE_LIST_ALL)); + if (ret < 0) + return false; + + ret = CHECK_CU(cu->cuCtxCreate(&p->display_ctx, CU_CTX_SCHED_BLOCKING_SYNC, + display_dev)); + if (ret < 0) + return false; + + p->decode_ctx = p->display_ctx; + + struct cuda_opts *opts = mp_get_config_group(NULL, hw->global, &cuda_conf); + int decode_dev_idx = opts->cuda_device; + talloc_free(opts); + + if (decode_dev_idx > -1) { + CUcontext dummy; + CUdevice decode_dev; + ret = CHECK_CU(cu->cuDeviceGet(&decode_dev, decode_dev_idx)); + if (ret < 0) { + CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + return false; + } + + if (decode_dev != display_dev) { + MP_INFO(hw, "Using separate decoder and display devices\n"); + + // Pop the display context. We won't use it again during init() + ret = CHECK_CU(cu->cuCtxPopCurrent(&dummy)); + if (ret < 0) + return false; + + ret = CHECK_CU(cu->cuCtxCreate(&p->decode_ctx, CU_CTX_SCHED_BLOCKING_SYNC, + decode_dev)); + if (ret < 0) + return false; + } + } + + // We don't have a way to do a GPU sync after copying + p->do_full_sync = true; + + p->ext_init = cuda_ext_gl_init; + p->ext_uninit = cuda_ext_gl_uninit; + + return true; +} diff --git a/video/out/hwdec/hwdec_cuda_vk.c b/video/out/hwdec/hwdec_cuda_vk.c new file mode 100644 index 0000000..b9f8caa --- /dev/null +++ b/video/out/hwdec/hwdec_cuda_vk.c @@ -0,0 +1,344 @@ +/* + * Copyright (c) 2019 Philip Langdale <philipl@overt.org> + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "config.h" +#include "hwdec_cuda.h" +#include "video/out/placebo/ra_pl.h" + +#include <libavutil/hwcontext.h> +#include <libavutil/hwcontext_cuda.h> +#include <libplacebo/vulkan.h> +#include <unistd.h> + +#if HAVE_WIN32_DESKTOP +#include <versionhelpers.h> +#define HANDLE_TYPE PL_HANDLE_WIN32 +#else +#define HANDLE_TYPE PL_HANDLE_FD +#endif + +#define CHECK_CU(x) check_cu((mapper)->owner, (x), #x) + +struct ext_vk { + CUexternalMemory mem; + CUmipmappedArray mma; + + pl_tex pltex; + pl_vulkan_sem vk_sem; + union pl_handle sem_handle; + CUexternalSemaphore cuda_sem; +}; + +static bool cuda_ext_vk_init(struct ra_hwdec_mapper *mapper, + const struct ra_format *format, int n) +{ + struct cuda_hw_priv *p_owner = mapper->owner->priv; + struct cuda_mapper_priv *p = mapper->priv; + CudaFunctions *cu = p_owner->cu; + int mem_fd = -1; + int ret = 0; + + struct ext_vk *evk = talloc_ptrtype(NULL, evk); + p->ext[n] = evk; + + pl_gpu gpu = ra_pl_get(mapper->ra); + + struct pl_tex_params tex_params = { + .w = mp_image_plane_w(&p->layout, n), + .h = mp_image_plane_h(&p->layout, n), + .d = 0, + .format = ra_pl_fmt_get(format), + .sampleable = true, + .export_handle = HANDLE_TYPE, + }; + + evk->pltex = pl_tex_create(gpu, &tex_params); + if (!evk->pltex) { + goto error; + } + + struct ra_tex *ratex = talloc_ptrtype(NULL, ratex); + ret = mppl_wrap_tex(mapper->ra, evk->pltex, ratex); + if (!ret) { + pl_tex_destroy(gpu, &evk->pltex); + talloc_free(ratex); + goto error; + } + mapper->tex[n] = ratex; + +#if !HAVE_WIN32_DESKTOP + mem_fd = dup(evk->pltex->shared_mem.handle.fd); + if (mem_fd < 0) + goto error; +#endif + + CUDA_EXTERNAL_MEMORY_HANDLE_DESC ext_desc = { +#if HAVE_WIN32_DESKTOP + .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_WIN32, + .handle.win32.handle = evk->pltex->shared_mem.handle.handle, +#else + .type = CU_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD, + .handle.fd = mem_fd, +#endif + .size = evk->pltex->shared_mem.size, + .flags = 0, + }; + ret = CHECK_CU(cu->cuImportExternalMemory(&evk->mem, &ext_desc)); + if (ret < 0) + goto error; + // CUDA takes ownership of imported memory + mem_fd = -1; + + CUarray_format cufmt; + switch (format->pixel_size / format->num_components) { + case 1: + cufmt = CU_AD_FORMAT_UNSIGNED_INT8; + break; + case 2: + cufmt = CU_AD_FORMAT_UNSIGNED_INT16; + break; + default: + ret = -1; + goto error; + } + + CUDA_EXTERNAL_MEMORY_MIPMAPPED_ARRAY_DESC tex_desc = { + .offset = evk->pltex->shared_mem.offset, + .arrayDesc = { + .Width = mp_image_plane_w(&p->layout, n), + .Height = mp_image_plane_h(&p->layout, n), + .Depth = 0, + .Format = cufmt, + .NumChannels = format->num_components, + .Flags = 0, + }, + .numLevels = 1, + }; + + ret = CHECK_CU(cu->cuExternalMemoryGetMappedMipmappedArray(&evk->mma, evk->mem, &tex_desc)); + if (ret < 0) + goto error; + + ret = CHECK_CU(cu->cuMipmappedArrayGetLevel(&p->cu_array[n], evk->mma, 0)); + if (ret < 0) + goto error; + + evk->vk_sem.sem = pl_vulkan_sem_create(gpu, pl_vulkan_sem_params( + .type = VK_SEMAPHORE_TYPE_TIMELINE, + .export_handle = HANDLE_TYPE, + .out_handle = &(evk->sem_handle), + )); + if (evk->vk_sem.sem == VK_NULL_HANDLE) { + ret = -1; + goto error; + } + // The returned FD or Handle is owned by the caller (us). + + CUDA_EXTERNAL_SEMAPHORE_HANDLE_DESC w_desc = { +#if HAVE_WIN32_DESKTOP + .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_WIN32, + .handle.win32.handle = evk->sem_handle.handle, +#else + .type = CU_EXTERNAL_SEMAPHORE_HANDLE_TYPE_TIMELINE_SEMAPHORE_FD, + .handle.fd = evk->sem_handle.fd, +#endif + }; + ret = CHECK_CU(cu->cuImportExternalSemaphore(&evk->cuda_sem, &w_desc)); + if (ret < 0) + goto error; + // CUDA takes ownership of an imported FD *but not* an imported Handle. + evk->sem_handle.fd = -1; + + return true; + +error: + MP_ERR(mapper, "cuda_ext_vk_init failed\n"); + if (mem_fd > -1) + close(mem_fd); +#if HAVE_WIN32_DESKTOP + if (evk->sem_handle.handle != NULL) + CloseHandle(evk->sem_handle.handle); +#else + if (evk->sem_handle.fd > -1) + close(evk->sem_handle.fd); +#endif + return false; +} + +static void cuda_ext_vk_uninit(const struct ra_hwdec_mapper *mapper, int n) +{ + struct cuda_hw_priv *p_owner = mapper->owner->priv; + struct cuda_mapper_priv *p = mapper->priv; + CudaFunctions *cu = p_owner->cu; + + struct ext_vk *evk = p->ext[n]; + if (evk) { + if (evk->mma) { + CHECK_CU(cu->cuMipmappedArrayDestroy(evk->mma)); + evk->mma = 0; + } + if (evk->mem) { + CHECK_CU(cu->cuDestroyExternalMemory(evk->mem)); + evk->mem = 0; + } + if (evk->cuda_sem) { + CHECK_CU(cu->cuDestroyExternalSemaphore(evk->cuda_sem)); + evk->cuda_sem = 0; + } + pl_vulkan_sem_destroy(ra_pl_get(mapper->ra), &evk->vk_sem.sem); +#if HAVE_WIN32_DESKTOP + CloseHandle(evk->sem_handle.handle); +#endif + } + talloc_free(evk); +} + +static bool cuda_ext_vk_wait(const struct ra_hwdec_mapper *mapper, int n) +{ + struct cuda_hw_priv *p_owner = mapper->owner->priv; + struct cuda_mapper_priv *p = mapper->priv; + CudaFunctions *cu = p_owner->cu; + int ret; + struct ext_vk *evk = p->ext[n]; + + evk->vk_sem.value += 1; + ret = pl_vulkan_hold_ex(ra_pl_get(mapper->ra), pl_vulkan_hold_params( + .tex = evk->pltex, + .layout = VK_IMAGE_LAYOUT_GENERAL, + .qf = VK_QUEUE_FAMILY_EXTERNAL, + .semaphore = evk->vk_sem, + )); + if (!ret) + return false; + + CUDA_EXTERNAL_SEMAPHORE_WAIT_PARAMS wp = { + .params = { + .fence = { + .value = evk->vk_sem.value + } + } + }; + ret = CHECK_CU(cu->cuWaitExternalSemaphoresAsync(&evk->cuda_sem, + &wp, 1, 0)); + return ret == 0; +} + +static bool cuda_ext_vk_signal(const struct ra_hwdec_mapper *mapper, int n) +{ + struct cuda_hw_priv *p_owner = mapper->owner->priv; + struct cuda_mapper_priv *p = mapper->priv; + CudaFunctions *cu = p_owner->cu; + int ret; + struct ext_vk *evk = p->ext[n]; + + evk->vk_sem.value += 1; + CUDA_EXTERNAL_SEMAPHORE_SIGNAL_PARAMS sp = { + .params = { + .fence = { + .value = evk->vk_sem.value + } + } + }; + ret = CHECK_CU(cu->cuSignalExternalSemaphoresAsync(&evk->cuda_sem, + &sp, 1, 0)); + if (ret != 0) + return false; + + pl_vulkan_release_ex(ra_pl_get(mapper->ra), pl_vulkan_release_params( + .tex = evk->pltex, + .layout = VK_IMAGE_LAYOUT_GENERAL, + .qf = VK_QUEUE_FAMILY_EXTERNAL, + .semaphore = evk->vk_sem, + )); + return ret == 0; +} + +#undef CHECK_CU +#define CHECK_CU(x) check_cu(hw, (x), #x) + +bool cuda_vk_init(const struct ra_hwdec *hw) { + int ret = 0; + int level = hw->probing ? MSGL_V : MSGL_ERR; + struct cuda_hw_priv *p = hw->priv; + CudaFunctions *cu = p->cu; + + pl_gpu gpu = ra_pl_get(hw->ra_ctx->ra); + if (gpu != NULL) { + if (!(gpu->export_caps.tex & HANDLE_TYPE)) { + MP_VERBOSE(hw, "CUDA hwdec with Vulkan requires exportable texture memory of type 0x%X.\n", + HANDLE_TYPE); + return false; + } else if (!(gpu->export_caps.sync & HANDLE_TYPE)) { + MP_VERBOSE(hw, "CUDA hwdec with Vulkan requires exportable semaphores of type 0x%X.\n", + HANDLE_TYPE); + return false; + } + } else { + // This is not a Vulkan RA. + return false; + } + + if (!cu->cuImportExternalMemory) { + MP_MSG(hw, level, "CUDA hwdec with Vulkan requires driver version 410.48 or newer.\n"); + return false; + } + + int device_count; + ret = CHECK_CU(cu->cuDeviceGetCount(&device_count)); + if (ret < 0) + return false; + + CUdevice display_dev = -1; + for (int i = 0; i < device_count; i++) { + CUdevice dev; + ret = CHECK_CU(cu->cuDeviceGet(&dev, i)); + if (ret < 0) + continue; + + CUuuid uuid; + ret = CHECK_CU(cu->cuDeviceGetUuid(&uuid, dev)); + if (ret < 0) + continue; + + if (memcmp(gpu->uuid, uuid.bytes, sizeof (gpu->uuid)) == 0) { + display_dev = dev; + break; + } + } + + if (display_dev == -1) { + MP_MSG(hw, level, "Could not match Vulkan display device in CUDA.\n"); + return false; + } + + ret = CHECK_CU(cu->cuCtxCreate(&p->display_ctx, CU_CTX_SCHED_BLOCKING_SYNC, + display_dev)); + if (ret < 0) + return false; + + p->decode_ctx = p->display_ctx; + + p->ext_init = cuda_ext_vk_init; + p->ext_uninit = cuda_ext_vk_uninit; + p->ext_wait = cuda_ext_vk_wait; + p->ext_signal = cuda_ext_vk_signal; + + return true; +} + diff --git a/video/out/hwdec/hwdec_drmprime.c b/video/out/hwdec/hwdec_drmprime.c new file mode 100644 index 0000000..f7c6250 --- /dev/null +++ b/video/out/hwdec/hwdec_drmprime.c @@ -0,0 +1,294 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <fcntl.h> +#include <stddef.h> +#include <string.h> +#include <assert.h> +#include <unistd.h> + +#include <libavutil/hwcontext.h> +#include <libavutil/hwcontext_drm.h> +#include <xf86drm.h> + +#include "config.h" + +#include "libmpv/render_gl.h" +#include "options/m_config.h" +#include "video/fmt-conversion.h" +#include "video/out/drm_common.h" +#include "video/out/gpu/hwdec.h" +#include "video/out/hwdec/dmabuf_interop.h" + +extern const struct m_sub_options drm_conf; + +struct priv_owner { + struct mp_hwdec_ctx hwctx; + int *formats; + + struct dmabuf_interop dmabuf_interop; +}; + +static void uninit(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + if (p->hwctx.driver_name) + hwdec_devices_remove(hw->devs, &p->hwctx); + av_buffer_unref(&p->hwctx.av_device_ref); +} + +const static dmabuf_interop_init interop_inits[] = { +#if HAVE_DMABUF_INTEROP_GL + dmabuf_interop_gl_init, +#endif +#if HAVE_VAAPI + dmabuf_interop_pl_init, +#endif +#if HAVE_DMABUF_WAYLAND + dmabuf_interop_wl_init, +#endif + NULL +}; + +static int init(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + + for (int i = 0; interop_inits[i]; i++) { + if (interop_inits[i](hw, &p->dmabuf_interop)) { + break; + } + } + + if (!p->dmabuf_interop.interop_map || !p->dmabuf_interop.interop_unmap) { + MP_VERBOSE(hw, "drmprime hwdec requires at least one dmabuf interop backend.\n"); + return -1; + } + + /* + * The drm_params resource is not provided when using X11 or Wayland, but + * there are extensions that supposedly provide this information from the + * drivers. Not properly documented. Of course. + */ + mpv_opengl_drm_params_v2 *params = ra_get_native_resource(hw->ra_ctx->ra, + "drm_params_v2"); + + /* + * Respect drm_device option, so there is a way to control this when not + * using a DRM gpu context. If drm_params_v2 are present, they will already + * respect this option. + */ + void *tmp = talloc_new(NULL); + struct drm_opts *drm_opts = mp_get_config_group(tmp, hw->global, &drm_conf); + const char *opt_path = drm_opts->device_path; + + const char *device_path = params && params->render_fd > -1 ? + drmGetRenderDeviceNameFromFd(params->render_fd) : + opt_path ? opt_path : "/dev/dri/renderD128"; + MP_VERBOSE(hw, "Using DRM device: %s\n", device_path); + + int ret = av_hwdevice_ctx_create(&p->hwctx.av_device_ref, + AV_HWDEVICE_TYPE_DRM, + device_path, NULL, 0); + talloc_free(tmp); + if (ret != 0) { + MP_VERBOSE(hw, "Failed to create hwdevice_ctx: %s\n", av_err2str(ret)); + return -1; + } + + /* + * At the moment, there is no way to discover compatible formats + * from the hwdevice_ctx, and in fact the ffmpeg hwaccels hard-code + * formats too, so we're not missing out on anything. + */ + int num_formats = 0; + MP_TARRAY_APPEND(p, p->formats, num_formats, IMGFMT_NV12); + MP_TARRAY_APPEND(p, p->formats, num_formats, IMGFMT_420P); + MP_TARRAY_APPEND(p, p->formats, num_formats, pixfmt2imgfmt(AV_PIX_FMT_NV16)); + MP_TARRAY_APPEND(p, p->formats, num_formats, 0); // terminate it + + p->hwctx.hw_imgfmt = IMGFMT_DRMPRIME; + p->hwctx.supported_formats = p->formats; + p->hwctx.driver_name = hw->driver->name; + hwdec_devices_add(hw->devs, &p->hwctx); + + return 0; +} + +static void mapper_unmap(struct ra_hwdec_mapper *mapper) +{ + struct priv_owner *p_owner = mapper->owner->priv; + struct dmabuf_interop_priv *p = mapper->priv; + + p_owner->dmabuf_interop.interop_unmap(mapper); + + if (p->surface_acquired) { + for (int n = 0; n < p->desc.nb_objects; n++) { + if (p->desc.objects[n].fd > -1) + close(p->desc.objects[n].fd); + } + p->surface_acquired = false; + } +} + +static void mapper_uninit(struct ra_hwdec_mapper *mapper) +{ + struct priv_owner *p_owner = mapper->owner->priv; + if (p_owner->dmabuf_interop.interop_uninit) { + p_owner->dmabuf_interop.interop_uninit(mapper); + } +} + +static bool check_fmt(struct ra_hwdec_mapper *mapper, int fmt) +{ + struct priv_owner *p_owner = mapper->owner->priv; + for (int n = 0; p_owner->formats && p_owner->formats[n]; n++) { + if (p_owner->formats[n] == fmt) + return true; + } + return false; +} + +static int mapper_init(struct ra_hwdec_mapper *mapper) +{ + struct priv_owner *p_owner = mapper->owner->priv; + struct dmabuf_interop_priv *p = mapper->priv; + + mapper->dst_params = mapper->src_params; + + /* + * rpi4_8 and rpi4_10 function identically to NV12. These two pixel + * formats however are not defined in upstream ffmpeg so a string + * comparison is used to identify them instead of a mpv IMGFMT. + */ + const char* fmt_name = mp_imgfmt_to_name(mapper->src_params.hw_subfmt); + if (strcmp(fmt_name, "rpi4_8") == 0 || strcmp(fmt_name, "rpi4_10") == 0) + mapper->dst_params.imgfmt = IMGFMT_NV12; + else + mapper->dst_params.imgfmt = mapper->src_params.hw_subfmt; + mapper->dst_params.hw_subfmt = 0; + + struct ra_imgfmt_desc desc = {0}; + + if (mapper->ra->num_formats && + !ra_get_imgfmt_desc(mapper->ra, mapper->dst_params.imgfmt, &desc)) + return -1; + + p->num_planes = desc.num_planes; + mp_image_set_params(&p->layout, &mapper->dst_params); + + if (p_owner->dmabuf_interop.interop_init) + if (!p_owner->dmabuf_interop.interop_init(mapper, &desc)) + return -1; + + if (!check_fmt(mapper, mapper->dst_params.imgfmt)) + { + MP_FATAL(mapper, "unsupported DRM image format %s\n", + mp_imgfmt_to_name(mapper->dst_params.imgfmt)); + return -1; + } + + return 0; +} + +static int mapper_map(struct ra_hwdec_mapper *mapper) +{ + struct priv_owner *p_owner = mapper->owner->priv; + struct dmabuf_interop_priv *p = mapper->priv; + + /* + * Although we use the same AVDRMFrameDescriptor to hold the dmabuf + * properties, we additionally need to dup the fds to ensure the + * frame doesn't disappear out from under us. And then for clarity, + * we copy all the individual fields. + */ + const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)mapper->src->planes[0]; + p->desc.nb_layers = desc->nb_layers; + p->desc.nb_objects = desc->nb_objects; + for (int i = 0; i < desc->nb_layers; i++) { + p->desc.layers[i].format = desc->layers[i].format; + p->desc.layers[i].nb_planes = desc->layers[i].nb_planes; + for (int j = 0; j < desc->layers[i].nb_planes; j++) { + p->desc.layers[i].planes[j].object_index = desc->layers[i].planes[j].object_index; + p->desc.layers[i].planes[j].offset = desc->layers[i].planes[j].offset; + p->desc.layers[i].planes[j].pitch = desc->layers[i].planes[j].pitch; + } + } + for (int i = 0; i < desc->nb_objects; i++) { + p->desc.objects[i].format_modifier = desc->objects[i].format_modifier; + p->desc.objects[i].size = desc->objects[i].size; + // Initialise fds to -1 to make partial failure cleanup easier. + p->desc.objects[i].fd = -1; + } + // Surface is now safe to treat as acquired to allow for unmapping to run. + p->surface_acquired = true; + + // Now actually dup the fds + for (int i = 0; i < desc->nb_objects; i++) { + p->desc.objects[i].fd = fcntl(desc->objects[i].fd, F_DUPFD_CLOEXEC, 0); + if (p->desc.objects[i].fd == -1) { + MP_ERR(mapper, "Failed to duplicate dmabuf fd: %s\n", + mp_strerror(errno)); + goto err; + } + } + + // We can handle composed formats if the total number of planes is still + // equal the number of planes we expect. Complex formats with auxiliary + // planes cannot be supported. + + int num_returned_planes = 0; + for (int i = 0; i < p->desc.nb_layers; i++) { + num_returned_planes += p->desc.layers[i].nb_planes; + } + + if (p->num_planes != 0 && p->num_planes != num_returned_planes) { + MP_ERR(mapper, + "Mapped surface with format '%s' has unexpected number of planes. " + "(%d layers and %d planes, but expected %d planes)\n", + mp_imgfmt_to_name(mapper->src->params.hw_subfmt), + p->desc.nb_layers, num_returned_planes, p->num_planes); + goto err; + } + + if (!p_owner->dmabuf_interop.interop_map(mapper, &p_owner->dmabuf_interop, + false)) + goto err; + + return 0; + +err: + mapper_unmap(mapper); + + MP_FATAL(mapper, "mapping DRM dmabuf failed\n"); + return -1; +} + +const struct ra_hwdec_driver ra_hwdec_drmprime = { + .name = "drmprime", + .priv_size = sizeof(struct priv_owner), + .imgfmts = {IMGFMT_DRMPRIME, 0}, + .init = init, + .uninit = uninit, + .mapper = &(const struct ra_hwdec_mapper_driver){ + .priv_size = sizeof(struct dmabuf_interop_priv), + .init = mapper_init, + .uninit = mapper_uninit, + .map = mapper_map, + .unmap = mapper_unmap, + }, +}; diff --git a/video/out/hwdec/hwdec_drmprime_overlay.c b/video/out/hwdec/hwdec_drmprime_overlay.c new file mode 100644 index 0000000..6b6aae6 --- /dev/null +++ b/video/out/hwdec/hwdec_drmprime_overlay.c @@ -0,0 +1,334 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> +#include <stdbool.h> + +#include <libavutil/hwcontext.h> +#include <libavutil/hwcontext_drm.h> + +#include "video/hwdec.h" +#include "common/msg.h" +#include "options/m_config.h" +#include "libmpv/render_gl.h" +#include "video/out/drm_atomic.h" +#include "video/out/drm_common.h" +#include "video/out/drm_prime.h" +#include "video/out/gpu/hwdec.h" +#include "video/mp_image.h" + +extern const struct m_sub_options drm_conf; + +struct drm_frame { + struct drm_prime_framebuffer fb; + struct mp_image *image; // associated mpv image +}; + +struct priv { + struct mp_log *log; + struct mp_hwdec_ctx hwctx; + + struct mp_image_params params; + + struct drm_atomic_context *ctx; + struct drm_frame current_frame, last_frame, old_frame; + + struct mp_rect src, dst; + + int display_w, display_h; + + struct drm_prime_handle_refs handle_refs; +}; + +static void set_current_frame(struct ra_hwdec *hw, struct drm_frame *frame) +{ + struct priv *p = hw->priv; + + // frame will be on screen after next vsync + // current_frame is currently the displayed frame and will be replaced + // by frame after next vsync. + // We used old frame as triple buffering to make sure that the drm framebuffer + // is not being displayed when we release it. + + if (p->ctx) { + drm_prime_destroy_framebuffer(p->log, p->ctx->fd, &p->old_frame.fb, &p->handle_refs); + } + + mp_image_setrefp(&p->old_frame.image, p->last_frame.image); + p->old_frame.fb = p->last_frame.fb; + + mp_image_setrefp(&p->last_frame.image, p->current_frame.image); + p->last_frame.fb = p->current_frame.fb; + + if (frame) { + p->current_frame.fb = frame->fb; + mp_image_setrefp(&p->current_frame.image, frame->image); + } else { + memset(&p->current_frame.fb, 0, sizeof(p->current_frame.fb)); + mp_image_setrefp(&p->current_frame.image, NULL); + } +} + +static void scale_dst_rect(struct ra_hwdec *hw, int source_w, int source_h ,struct mp_rect *src, struct mp_rect *dst) +{ + struct priv *p = hw->priv; + + // drm can allow to have a layer that has a different size from framebuffer + // we scale here the destination size to video mode + double hratio = p->display_w / (double)source_w; + double vratio = p->display_h / (double)source_h; + double ratio = hratio <= vratio ? hratio : vratio; + + dst->x0 = src->x0 * ratio; + dst->x1 = src->x1 * ratio; + dst->y0 = src->y0 * ratio; + dst->y1 = src->y1 * ratio; + + int offset_x = (p->display_w - ratio * source_w) / 2; + int offset_y = (p->display_h - ratio * source_h) / 2; + + dst->x0 += offset_x; + dst->x1 += offset_x; + dst->y0 += offset_y; + dst->y1 += offset_y; +} + +static void disable_video_plane(struct ra_hwdec *hw) +{ + struct priv *p = hw->priv; + if (!p->ctx) + return; + + if (!p->ctx->drmprime_video_plane) + return; + + // Disabling the drmprime video plane is needed on some devices when using + // the primary plane for video. Primary buffer can't be active with no + // framebuffer associated. So we need this function to commit it right away + // as mpv will free all framebuffers on playback end. + drmModeAtomicReqPtr request = drmModeAtomicAlloc(); + if (request) { + drm_object_set_property(request, p->ctx->drmprime_video_plane, "FB_ID", 0); + drm_object_set_property(request, p->ctx->drmprime_video_plane, "CRTC_ID", 0); + + int ret = drmModeAtomicCommit(p->ctx->fd, request, + 0, NULL); + + if (ret) + MP_ERR(hw, "Failed to commit disable plane request (code %d)", ret); + drmModeAtomicFree(request); + } +} + +static int overlay_frame(struct ra_hwdec *hw, struct mp_image *hw_image, + struct mp_rect *src, struct mp_rect *dst, bool newframe) +{ + struct priv *p = hw->priv; + AVDRMFrameDescriptor *desc = NULL; + drmModeAtomicReq *request = NULL; + struct drm_frame next_frame = {0}; + int ret; + + struct ra *ra = hw->ra_ctx->ra; + + // grab atomic request from native resources + if (p->ctx) { + struct mpv_opengl_drm_params_v2 *drm_params; + drm_params = (mpv_opengl_drm_params_v2 *)ra_get_native_resource(ra, "drm_params_v2"); + if (!drm_params) { + MP_ERR(hw, "Failed to retrieve drm params from native resources\n"); + return -1; + } + if (drm_params->atomic_request_ptr) { + request = *drm_params->atomic_request_ptr; + } else { + MP_ERR(hw, "drm params pointer to atomic request is invalid\n"); + return -1; + } + } + + if (hw_image) { + + // grab draw plane windowing info to eventually upscale the overlay + // as egl windows could be upscaled to draw plane. + struct mpv_opengl_drm_draw_surface_size *draw_surface_size = ra_get_native_resource(ra, "drm_draw_surface_size"); + if (draw_surface_size) { + scale_dst_rect(hw, draw_surface_size->width, draw_surface_size->height, dst, &p->dst); + } else { + p->dst = *dst; + } + p->src = *src; + + next_frame.image = hw_image; + desc = (AVDRMFrameDescriptor *)hw_image->planes[0]; + + if (desc) { + int srcw = p->src.x1 - p->src.x0; + int srch = p->src.y1 - p->src.y0; + int dstw = MP_ALIGN_UP(p->dst.x1 - p->dst.x0, 2); + int dsth = MP_ALIGN_UP(p->dst.y1 - p->dst.y0, 2); + + if (drm_prime_create_framebuffer(p->log, p->ctx->fd, desc, srcw, srch, &next_frame.fb, &p->handle_refs)) { + ret = -1; + goto fail; + } + + if (request) { + drm_object_set_property(request, p->ctx->drmprime_video_plane, "FB_ID", next_frame.fb.fb_id); + drm_object_set_property(request, p->ctx->drmprime_video_plane, "CRTC_ID", p->ctx->crtc->id); + drm_object_set_property(request, p->ctx->drmprime_video_plane, "SRC_X", p->src.x0 << 16); + drm_object_set_property(request, p->ctx->drmprime_video_plane, "SRC_Y", p->src.y0 << 16); + drm_object_set_property(request, p->ctx->drmprime_video_plane, "SRC_W", srcw << 16); + drm_object_set_property(request, p->ctx->drmprime_video_plane, "SRC_H", srch << 16); + drm_object_set_property(request, p->ctx->drmprime_video_plane, "CRTC_X", MP_ALIGN_DOWN(p->dst.x0, 2)); + drm_object_set_property(request, p->ctx->drmprime_video_plane, "CRTC_Y", MP_ALIGN_DOWN(p->dst.y0, 2)); + drm_object_set_property(request, p->ctx->drmprime_video_plane, "CRTC_W", dstw); + drm_object_set_property(request, p->ctx->drmprime_video_plane, "CRTC_H", dsth); + drm_object_set_property(request, p->ctx->drmprime_video_plane, "ZPOS", 0); + } else { + ret = drmModeSetPlane(p->ctx->fd, p->ctx->drmprime_video_plane->id, p->ctx->crtc->id, next_frame.fb.fb_id, 0, + MP_ALIGN_DOWN(p->dst.x0, 2), MP_ALIGN_DOWN(p->dst.y0, 2), dstw, dsth, + p->src.x0 << 16, p->src.y0 << 16 , srcw << 16, srch << 16); + if (ret < 0) { + MP_ERR(hw, "Failed to set the drmprime video plane %d (buffer %d).\n", + p->ctx->drmprime_video_plane->id, next_frame.fb.fb_id); + goto fail; + } + } + } + } else { + disable_video_plane(hw); + + while (p->old_frame.fb.fb_id) + set_current_frame(hw, NULL); + } + + set_current_frame(hw, &next_frame); + return 0; + + fail: + drm_prime_destroy_framebuffer(p->log, p->ctx->fd, &next_frame.fb, &p->handle_refs); + return ret; +} + +static void uninit(struct ra_hwdec *hw) +{ + struct priv *p = hw->priv; + + disable_video_plane(hw); + set_current_frame(hw, NULL); + + hwdec_devices_remove(hw->devs, &p->hwctx); + av_buffer_unref(&p->hwctx.av_device_ref); + + if (p->ctx) { + drm_atomic_destroy_context(p->ctx); + p->ctx = NULL; + } +} + +static int init(struct ra_hwdec *hw) +{ + struct priv *p = hw->priv; + int draw_plane, drmprime_video_plane; + + p->log = hw->log; + + void *tmp = talloc_new(NULL); + struct drm_opts *opts = mp_get_config_group(tmp, hw->global, &drm_conf); + draw_plane = opts->draw_plane; + drmprime_video_plane = opts->drmprime_video_plane; + talloc_free(tmp); + + struct mpv_opengl_drm_params_v2 *drm_params; + + drm_params = ra_get_native_resource(hw->ra_ctx->ra, "drm_params_v2"); + if (drm_params) { + p->ctx = drm_atomic_create_context(p->log, drm_params->fd, drm_params->crtc_id, + drm_params->connector_id, draw_plane, drmprime_video_plane); + if (!p->ctx) { + mp_err(p->log, "Failed to retrieve DRM atomic context.\n"); + goto err; + } + if (!p->ctx->drmprime_video_plane) { + mp_warn(p->log, "No drmprime video plane. You might need to specify it manually using --drm-drmprime-video-plane\n"); + goto err; + } + } else { + mp_verbose(p->log, "Failed to retrieve DRM fd from native display.\n"); + goto err; + } + + drmModeCrtcPtr crtc; + crtc = drmModeGetCrtc(p->ctx->fd, p->ctx->crtc->id); + if (crtc) { + p->display_w = crtc->mode.hdisplay; + p->display_h = crtc->mode.vdisplay; + drmModeFreeCrtc(crtc); + } + + uint64_t has_prime; + if (drmGetCap(p->ctx->fd, DRM_CAP_PRIME, &has_prime) < 0) { + MP_ERR(hw, "Card does not support prime handles.\n"); + goto err; + } + + if (has_prime) { + drm_prime_init_handle_ref_count(p, &p->handle_refs); + } + + disable_video_plane(hw); + + p->hwctx = (struct mp_hwdec_ctx) { + .driver_name = hw->driver->name, + .hw_imgfmt = IMGFMT_DRMPRIME, + }; + + char *device = drmGetDeviceNameFromFd2(p->ctx->fd); + int ret = av_hwdevice_ctx_create(&p->hwctx.av_device_ref, + AV_HWDEVICE_TYPE_DRM, device, NULL, 0); + + if (device) + free(device); + + if (ret != 0) { + MP_VERBOSE(hw, "Failed to create hwdevice_ctx: %s\n", av_err2str(ret)); + goto err; + } + + hwdec_devices_add(hw->devs, &p->hwctx); + + return 0; + +err: + uninit(hw); + return -1; +} + +const struct ra_hwdec_driver ra_hwdec_drmprime_overlay = { + .name = "drmprime-overlay", + .priv_size = sizeof(struct priv), + .imgfmts = {IMGFMT_DRMPRIME, 0}, + .init = init, + .overlay_frame = overlay_frame, + .uninit = uninit, +}; diff --git a/video/out/hwdec/hwdec_ios_gl.m b/video/out/hwdec/hwdec_ios_gl.m new file mode 100644 index 0000000..633cc3d --- /dev/null +++ b/video/out/hwdec/hwdec_ios_gl.m @@ -0,0 +1,222 @@ +/* + * Copyright (c) 2013 Stefano Pigozzi <stefano.pigozzi@gmail.com> + * 2017 Aman Gupta <ffmpeg@tmm1.net> + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> + +#include <CoreVideo/CoreVideo.h> +#include <OpenGLES/EAGL.h> + +#include <libavutil/hwcontext.h> + +#include "video/out/gpu/hwdec.h" +#include "video/mp_image_pool.h" +#include "video/out/opengl/ra_gl.h" +#include "hwdec_vt.h" + +static bool check_hwdec(const struct ra_hwdec *hw) +{ + if (!ra_is_gl(hw->ra_ctx->ra)) + return false; + + GL *gl = ra_gl_get(hw->ra_ctx->ra); + if (gl->es < 200) { + MP_ERR(hw, "need OpenGLES 2.0 for CVOpenGLESTextureCacheCreateTextureFromImage()\n"); + return false; + } + + if ([EAGLContext currentContext] == nil) { + MP_ERR(hw, "need a current EAGLContext set\n"); + return false; + } + + return true; +} + +// In GLES3 mode, CVOpenGLESTextureCacheCreateTextureFromImage() +// will return error -6683 unless invoked with GL_LUMINANCE and +// GL_LUMINANCE_ALPHA (http://stackoverflow.com/q/36213994/332798) +// If a format trues to use GL_RED/GL_RG instead, try to find a format +// that uses GL_LUMINANCE[_ALPHA] instead. +static const struct ra_format *find_la_variant(struct ra *ra, + const struct ra_format *fmt) +{ + GLint internal_format; + GLenum format; + GLenum type; + ra_gl_get_format(fmt, &internal_format, &format, &type); + + if (format == GL_RED) { + format = internal_format = GL_LUMINANCE; + } else if (format == GL_RG) { + format = internal_format = GL_LUMINANCE_ALPHA; + } else { + return fmt; + } + + for (int n = 0; n < ra->num_formats; n++) { + const struct ra_format *fmt2 = ra->formats[n]; + GLint internal_format2; + GLenum format2; + GLenum type2; + ra_gl_get_format(fmt2, &internal_format2, &format2, &type2); + if (internal_format2 == internal_format && + format2 == format && type2 == type) + return fmt2; + } + + return NULL; +} + +static int mapper_init(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + + for (int n = 0; n < p->desc.num_planes; n++) { + p->desc.planes[n] = find_la_variant(mapper->ra, p->desc.planes[n]); + if (!p->desc.planes[n] || p->desc.planes[n]->ctype != RA_CTYPE_UNORM) { + MP_ERR(mapper, "Format unsupported.\n"); + return -1; + } + } + + CVReturn err = CVOpenGLESTextureCacheCreate( + kCFAllocatorDefault, + NULL, + [EAGLContext currentContext], + NULL, + &p->gl_texture_cache); + + if (err != noErr) { + MP_ERR(mapper, "Failure in CVOpenGLESTextureCacheCreate: %d\n", err); + return -1; + } + + return 0; +} + +static void mapper_unmap(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + + for (int i = 0; i < p->desc.num_planes; i++) { + ra_tex_free(mapper->ra, &mapper->tex[i]); + if (p->gl_planes[i]) { + CFRelease(p->gl_planes[i]); + p->gl_planes[i] = NULL; + } + } + + CVOpenGLESTextureCacheFlush(p->gl_texture_cache, 0); +} + +static int mapper_map(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + GL *gl = ra_gl_get(mapper->ra); + + CVPixelBufferRelease(p->pbuf); + p->pbuf = (CVPixelBufferRef)mapper->src->planes[3]; + CVPixelBufferRetain(p->pbuf); + + const bool planar = CVPixelBufferIsPlanar(p->pbuf); + const int planes = CVPixelBufferGetPlaneCount(p->pbuf); + assert((planar && planes == p->desc.num_planes) || p->desc.num_planes == 1); + + for (int i = 0; i < p->desc.num_planes; i++) { + const struct ra_format *fmt = p->desc.planes[i]; + + GLint internal_format; + GLenum format; + GLenum type; + ra_gl_get_format(fmt, &internal_format, &format, &type); + + CVReturn err = CVOpenGLESTextureCacheCreateTextureFromImage( + kCFAllocatorDefault, + p->gl_texture_cache, + p->pbuf, + NULL, + GL_TEXTURE_2D, + internal_format, + CVPixelBufferGetWidthOfPlane(p->pbuf, i), + CVPixelBufferGetHeightOfPlane(p->pbuf, i), + format, + type, + i, + &p->gl_planes[i]); + + if (err != noErr) { + MP_ERR(mapper, "error creating texture for plane %d: %d\n", i, err); + return -1; + } + + gl->BindTexture(GL_TEXTURE_2D, CVOpenGLESTextureGetName(p->gl_planes[i])); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + gl->BindTexture(GL_TEXTURE_2D, 0); + + struct ra_tex_params params = { + .dimensions = 2, + .w = CVPixelBufferGetWidthOfPlane(p->pbuf, i), + .h = CVPixelBufferGetHeightOfPlane(p->pbuf, i), + .d = 1, + .format = fmt, + .render_src = true, + .src_linear = true, + }; + + mapper->tex[i] = ra_create_wrapped_tex( + mapper->ra, + ¶ms, + CVOpenGLESTextureGetName(p->gl_planes[i]) + ); + if (!mapper->tex[i]) + return -1; + } + + return 0; +} + +static void mapper_uninit(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + + CVPixelBufferRelease(p->pbuf); + if (p->gl_texture_cache) { + CFRelease(p->gl_texture_cache); + p->gl_texture_cache = NULL; + } +} + +bool vt_gl_init(const struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + + if (!check_hwdec(hw)) + return false; + + p->interop_init = mapper_init; + p->interop_uninit = mapper_uninit; + p->interop_map = mapper_map; + p->interop_unmap = mapper_unmap; + + return true; +} diff --git a/video/out/hwdec/hwdec_mac_gl.c b/video/out/hwdec/hwdec_mac_gl.c new file mode 100644 index 0000000..b73f5b9 --- /dev/null +++ b/video/out/hwdec/hwdec_mac_gl.c @@ -0,0 +1,169 @@ +/* + * Copyright (c) 2013 Stefano Pigozzi <stefano.pigozzi@gmail.com> + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> + +#include <IOSurface/IOSurface.h> +#include <CoreVideo/CoreVideo.h> +#include <OpenGL/OpenGL.h> +#include <OpenGL/CGLIOSurface.h> + +#include <libavutil/hwcontext.h> + +#include "video/mp_image_pool.h" +#include "video/out/gpu/hwdec.h" +#include "video/out/opengl/ra_gl.h" +#include "hwdec_vt.h" + +static bool check_hwdec(const struct ra_hwdec *hw) +{ + if (!ra_is_gl(hw->ra_ctx->ra)) + return false; + + GL *gl = ra_gl_get(hw->ra_ctx->ra); + if (gl->version < 300) { + MP_ERR(hw, "need >= OpenGL 3.0 for core rectangle texture support\n"); + return false; + } + + if (!CGLGetCurrentContext()) { + MP_ERR(hw, "need cocoa opengl backend to be active"); + return false; + } + + return true; +} + +static int mapper_init(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + GL *gl = ra_gl_get(mapper->ra); + + gl->GenTextures(MP_MAX_PLANES, p->gl_planes); + + for (int n = 0; n < p->desc.num_planes; n++) { + if (p->desc.planes[n]->ctype != RA_CTYPE_UNORM) { + MP_ERR(mapper, "Format unsupported.\n"); + return -1; + } + } + + return 0; +} + +static void mapper_unmap(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + + // Is this sane? No idea how to release the texture without deleting it. + CVPixelBufferRelease(p->pbuf); + p->pbuf = NULL; + + for (int i = 0; i < p->desc.num_planes; i++) + ra_tex_free(mapper->ra, &mapper->tex[i]); +} + +static int mapper_map(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + GL *gl = ra_gl_get(mapper->ra); + + CVPixelBufferRelease(p->pbuf); + p->pbuf = (CVPixelBufferRef)mapper->src->planes[3]; + CVPixelBufferRetain(p->pbuf); + IOSurfaceRef surface = CVPixelBufferGetIOSurface(p->pbuf); + if (!surface) { + MP_ERR(mapper, "CVPixelBuffer has no IOSurface\n"); + return -1; + } + + const bool planar = CVPixelBufferIsPlanar(p->pbuf); + const int planes = CVPixelBufferGetPlaneCount(p->pbuf); + assert((planar && planes == p->desc.num_planes) || p->desc.num_planes == 1); + + GLenum gl_target = GL_TEXTURE_RECTANGLE; + + for (int i = 0; i < p->desc.num_planes; i++) { + const struct ra_format *fmt = p->desc.planes[i]; + + GLint internal_format; + GLenum format; + GLenum type; + ra_gl_get_format(fmt, &internal_format, &format, &type); + + gl->BindTexture(gl_target, p->gl_planes[i]); + + CGLError err = CGLTexImageIOSurface2D( + CGLGetCurrentContext(), gl_target, + internal_format, + IOSurfaceGetWidthOfPlane(surface, i), + IOSurfaceGetHeightOfPlane(surface, i), + format, type, surface, i); + + gl->BindTexture(gl_target, 0); + + if (err != kCGLNoError) { + MP_ERR(mapper, + "error creating IOSurface texture for plane %d: %s (%x)\n", + i, CGLErrorString(err), gl->GetError()); + return -1; + } + + struct ra_tex_params params = { + .dimensions = 2, + .w = IOSurfaceGetWidthOfPlane(surface, i), + .h = IOSurfaceGetHeightOfPlane(surface, i), + .d = 1, + .format = fmt, + .render_src = true, + .src_linear = true, + .non_normalized = gl_target == GL_TEXTURE_RECTANGLE, + }; + + mapper->tex[i] = ra_create_wrapped_tex(mapper->ra, ¶ms, + p->gl_planes[i]); + if (!mapper->tex[i]) + return -1; + } + + return 0; +} + +static void mapper_uninit(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + GL *gl = ra_gl_get(mapper->ra); + + gl->DeleteTextures(MP_MAX_PLANES, p->gl_planes); +} + +bool vt_gl_init(const struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + + if (!check_hwdec(hw)) + return false; + + p->interop_init = mapper_init; + p->interop_uninit = mapper_uninit; + p->interop_map = mapper_map; + p->interop_unmap = mapper_unmap; + + return true; +} diff --git a/video/out/hwdec/hwdec_vaapi.c b/video/out/hwdec/hwdec_vaapi.c new file mode 100644 index 0000000..d8a4517 --- /dev/null +++ b/video/out/hwdec/hwdec_vaapi.c @@ -0,0 +1,557 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stddef.h> +#include <string.h> +#include <assert.h> +#include <unistd.h> + +#include <libavutil/hwcontext.h> +#include <libavutil/hwcontext_vaapi.h> +#include <va/va_drmcommon.h> + +#include "config.h" + +#include "video/out/gpu/hwdec.h" +#include "video/out/hwdec/dmabuf_interop.h" +#include "video/fmt-conversion.h" +#include "video/mp_image_pool.h" +#include "video/vaapi.h" + +#if HAVE_VAAPI_DRM +#include "libmpv/render_gl.h" +#endif + +#if HAVE_VAAPI_X11 +#include <va/va_x11.h> + +static VADisplay *create_x11_va_display(struct ra *ra) +{ + Display *x11 = ra_get_native_resource(ra, "x11"); + return x11 ? vaGetDisplay(x11) : NULL; +} +#endif + +#if HAVE_VAAPI_WAYLAND +#include <va/va_wayland.h> + +static VADisplay *create_wayland_va_display(struct ra *ra) +{ + struct wl_display *wl = ra_get_native_resource(ra, "wl"); + + return wl ? vaGetDisplayWl(wl) : NULL; +} +#endif + +#if HAVE_VAAPI_DRM +#include <va/va_drm.h> + +static VADisplay *create_drm_va_display(struct ra *ra) +{ + mpv_opengl_drm_params_v2 *params = ra_get_native_resource(ra, "drm_params_v2"); + if (!params || params->render_fd == -1) + return NULL; + + return vaGetDisplayDRM(params->render_fd); +} +#endif + +struct va_create_native { + const char *name; + VADisplay *(*create)(struct ra *ra); +}; + +static const struct va_create_native create_native_cbs[] = { +#if HAVE_VAAPI_X11 + {"x11", create_x11_va_display}, +#endif +#if HAVE_VAAPI_WAYLAND + {"wayland", create_wayland_va_display}, +#endif +#if HAVE_VAAPI_DRM + {"drm", create_drm_va_display}, +#endif +}; + +static VADisplay *create_native_va_display(struct ra *ra, struct mp_log *log) +{ + for (int n = 0; n < MP_ARRAY_SIZE(create_native_cbs); n++) { + const struct va_create_native *disp = &create_native_cbs[n]; + mp_verbose(log, "Trying to open a %s VA display...\n", disp->name); + VADisplay *display = disp->create(ra); + if (display) + return display; + } + return NULL; +} + +static void determine_working_formats(struct ra_hwdec *hw); + +struct priv_owner { + struct mp_vaapi_ctx *ctx; + VADisplay *display; + int *formats; + bool probing_formats; // temporary during init + + struct dmabuf_interop dmabuf_interop; +}; + +static void uninit(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + if (p->ctx) { + hwdec_devices_remove(hw->devs, &p->ctx->hwctx); + if (p->ctx->hwctx.conversion_config) { + AVVAAPIHWConfig *hwconfig = p->ctx->hwctx.conversion_config; + vaDestroyConfig(p->ctx->display, hwconfig->config_id); + av_freep(&p->ctx->hwctx.conversion_config); + } + } + va_destroy(p->ctx); +} + +const static dmabuf_interop_init interop_inits[] = { +#if HAVE_DMABUF_INTEROP_GL + dmabuf_interop_gl_init, +#endif + dmabuf_interop_pl_init, +#if HAVE_DMABUF_WAYLAND + dmabuf_interop_wl_init, +#endif + NULL +}; + +static int init(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + VAStatus vas; + + for (int i = 0; interop_inits[i]; i++) { + if (interop_inits[i](hw, &p->dmabuf_interop)) { + break; + } + } + + if (!p->dmabuf_interop.interop_map || !p->dmabuf_interop.interop_unmap) { + MP_VERBOSE(hw, "VAAPI hwdec only works with OpenGL or Vulkan backends.\n"); + return -1; + } + + p->display = create_native_va_display(hw->ra_ctx->ra, hw->log); + if (!p->display) { + MP_VERBOSE(hw, "Could not create a VA display.\n"); + return -1; + } + + p->ctx = va_initialize(p->display, hw->log, true); + if (!p->ctx) { + vaTerminate(p->display); + return -1; + } + if (!p->ctx->av_device_ref) { + MP_VERBOSE(hw, "libavutil vaapi code rejected the driver?\n"); + return -1; + } + + if (hw->probing && va_guess_if_emulated(p->ctx)) { + return -1; + } + + determine_working_formats(hw); + if (!p->formats || !p->formats[0]) { + return -1; + } + + VAConfigID config_id; + AVVAAPIHWConfig *hwconfig = NULL; + vas = vaCreateConfig(p->display, VAProfileNone, VAEntrypointVideoProc, NULL, + 0, &config_id); + if (vas == VA_STATUS_SUCCESS) { + hwconfig = av_hwdevice_hwconfig_alloc(p->ctx->av_device_ref); + hwconfig->config_id = config_id; + } + + // it's now safe to set the display resource + ra_add_native_resource(hw->ra_ctx->ra, "VADisplay", p->display); + + p->ctx->hwctx.hw_imgfmt = IMGFMT_VAAPI; + p->ctx->hwctx.supported_formats = p->formats; + p->ctx->hwctx.driver_name = hw->driver->name; + p->ctx->hwctx.conversion_filter_name = "scale_vaapi"; + p->ctx->hwctx.conversion_config = hwconfig; + hwdec_devices_add(hw->devs, &p->ctx->hwctx); + return 0; +} + +static void mapper_unmap(struct ra_hwdec_mapper *mapper) +{ + struct priv_owner *p_owner = mapper->owner->priv; + struct dmabuf_interop_priv *p = mapper->priv; + + p_owner->dmabuf_interop.interop_unmap(mapper); + + if (p->surface_acquired) { + for (int n = 0; n < p->desc.nb_objects; n++) + close(p->desc.objects[n].fd); + p->surface_acquired = false; + } +} + +static void mapper_uninit(struct ra_hwdec_mapper *mapper) +{ + struct priv_owner *p_owner = mapper->owner->priv; + if (p_owner->dmabuf_interop.interop_uninit) { + p_owner->dmabuf_interop.interop_uninit(mapper); + } +} + +static bool check_fmt(struct ra_hwdec_mapper *mapper, int fmt) +{ + struct priv_owner *p_owner = mapper->owner->priv; + for (int n = 0; p_owner->formats && p_owner->formats[n]; n++) { + if (p_owner->formats[n] == fmt) + return true; + } + return false; +} + +static int mapper_init(struct ra_hwdec_mapper *mapper) +{ + struct priv_owner *p_owner = mapper->owner->priv; + struct dmabuf_interop_priv *p = mapper->priv; + + mapper->dst_params = mapper->src_params; + mapper->dst_params.imgfmt = mapper->src_params.hw_subfmt; + mapper->dst_params.hw_subfmt = 0; + + struct ra_imgfmt_desc desc = {0}; + + if (mapper->ra->num_formats && + !ra_get_imgfmt_desc(mapper->ra, mapper->dst_params.imgfmt, &desc)) + return -1; + + p->num_planes = desc.num_planes; + mp_image_set_params(&p->layout, &mapper->dst_params); + + if (p_owner->dmabuf_interop.interop_init) + if (!p_owner->dmabuf_interop.interop_init(mapper, &desc)) + return -1; + + if (!p_owner->probing_formats && !check_fmt(mapper, mapper->dst_params.imgfmt)) + { + MP_FATAL(mapper, "unsupported VA image format %s\n", + mp_imgfmt_to_name(mapper->dst_params.imgfmt)); + return -1; + } + + return 0; +} + +static void close_file_descriptors(VADRMPRIMESurfaceDescriptor desc) +{ + for (int i = 0; i < desc.num_objects; i++) + close(desc.objects[i].fd); +} + +static int mapper_map(struct ra_hwdec_mapper *mapper) +{ + struct priv_owner *p_owner = mapper->owner->priv; + struct dmabuf_interop_priv *p = mapper->priv; + VAStatus status; + VADisplay *display = p_owner->display; + VADRMPRIMESurfaceDescriptor desc = {0}; + + uint32_t flags = p_owner->dmabuf_interop.composed_layers ? + VA_EXPORT_SURFACE_COMPOSED_LAYERS : VA_EXPORT_SURFACE_SEPARATE_LAYERS; + status = vaExportSurfaceHandle(display, va_surface_id(mapper->src), + VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2, + VA_EXPORT_SURFACE_READ_ONLY | + flags, + &desc); + if (!CHECK_VA_STATUS_LEVEL(mapper, "vaExportSurfaceHandle()", + p_owner->probing_formats ? MSGL_DEBUG : MSGL_ERR)) + { + close_file_descriptors(desc); + goto err; + } + vaSyncSurface(display, va_surface_id(mapper->src)); + // No need to error out if sync fails, but good to know if it did. + CHECK_VA_STATUS(mapper, "vaSyncSurface()"); + p->surface_acquired = true; + + // We use AVDRMFrameDescriptor to store the dmabuf so we need to copy the + // values over. + int num_returned_planes = 0; + p->desc.nb_layers = desc.num_layers; + p->desc.nb_objects = desc.num_objects; + for (int i = 0; i < desc.num_layers; i++) { + p->desc.layers[i].format = desc.layers[i].drm_format; + p->desc.layers[i].nb_planes = desc.layers[i].num_planes; + for (int j = 0; j < desc.layers[i].num_planes; j++) + { + p->desc.layers[i].planes[j].object_index = desc.layers[i].object_index[j]; + p->desc.layers[i].planes[j].offset = desc.layers[i].offset[j]; + p->desc.layers[i].planes[j].pitch = desc.layers[i].pitch[j]; + } + + num_returned_planes += desc.layers[i].num_planes; + } + for (int i = 0; i < desc.num_objects; i++) { + p->desc.objects[i].format_modifier = desc.objects[i].drm_format_modifier; + p->desc.objects[i].fd = desc.objects[i].fd; + p->desc.objects[i].size = desc.objects[i].size; + } + + // We can handle composed formats if the total number of planes is still + // equal the number of planes we expect. Complex formats with auxiliary + // planes cannot be supported. + if (p->num_planes != 0 && p->num_planes != num_returned_planes) { + mp_msg(mapper->log, p_owner->probing_formats ? MSGL_DEBUG : MSGL_ERR, + "Mapped surface with format '%s' has unexpected number of planes. " + "(%d layers and %d planes, but expected %d planes)\n", + mp_imgfmt_to_name(mapper->src->params.hw_subfmt), + desc.num_layers, num_returned_planes, p->num_planes); + goto err; + } + + if (!p_owner->dmabuf_interop.interop_map(mapper, &p_owner->dmabuf_interop, + p_owner->probing_formats)) + goto err; + + if (desc.fourcc == VA_FOURCC_YV12) + MPSWAP(struct ra_tex*, mapper->tex[1], mapper->tex[2]); + + return 0; + +err: + mapper_unmap(mapper); + + if (!p_owner->probing_formats) + MP_FATAL(mapper, "mapping VAAPI EGL image failed\n"); + return -1; +} + +static bool try_format_map(struct ra_hwdec *hw, struct mp_image *surface) +{ + struct ra_hwdec_mapper *mapper = ra_hwdec_mapper_create(hw, &surface->params); + if (!mapper) { + MP_DBG(hw, "Failed to create mapper\n"); + return false; + } + + bool ok = ra_hwdec_mapper_map(mapper, surface) >= 0; + ra_hwdec_mapper_free(&mapper); + return ok; +} + +static void try_format_pixfmt(struct ra_hwdec *hw, enum AVPixelFormat pixfmt) +{ + bool supported = false; + struct priv_owner *p = hw->priv; + + int mp_fmt = pixfmt2imgfmt(pixfmt); + if (!mp_fmt) + return; + + int num_formats = 0; + for (int n = 0; p->formats && p->formats[n]; n++) { + if (p->formats[n] == mp_fmt) + return; // already added + num_formats += 1; + } + + AVBufferRef *fref = NULL; + struct mp_image *s = NULL; + AVFrame *frame = NULL; + fref = av_hwframe_ctx_alloc(p->ctx->av_device_ref); + if (!fref) + goto err; + AVHWFramesContext *fctx = (void *)fref->data; + fctx->format = AV_PIX_FMT_VAAPI; + fctx->sw_format = pixfmt; + fctx->width = 128; + fctx->height = 128; + if (av_hwframe_ctx_init(fref) < 0) + goto err; + frame = av_frame_alloc(); + if (!frame) + goto err; + if (av_hwframe_get_buffer(fref, frame, 0) < 0) + goto err; + s = mp_image_from_av_frame(frame); + if (!s || !mp_image_params_valid(&s->params)) + goto err; + if (try_format_map(hw, s)) { + supported = true; + MP_TARRAY_APPEND(p, p->formats, num_formats, mp_fmt); + MP_TARRAY_APPEND(p, p->formats, num_formats, 0); // terminate it + } +err: + if (!supported) + MP_DBG(hw, "Unsupported format: %s\n", + mp_imgfmt_to_name(mp_fmt)); + + talloc_free(s); + av_frame_free(&frame); + av_buffer_unref(&fref); +} + +static void try_format_config(struct ra_hwdec *hw, AVVAAPIHWConfig *hwconfig) +{ + struct priv_owner *p = hw->priv; + enum AVPixelFormat *fmts = NULL; + + AVHWFramesConstraints *fc = + av_hwdevice_get_hwframe_constraints(p->ctx->av_device_ref, hwconfig); + if (!fc) { + MP_WARN(hw, "failed to retrieve libavutil frame constraints\n"); + return; + } + + /* + * We need a hwframe_ctx to be able to get the valid formats, but to + * initialise it, we need a format, so we get the first format from the + * hwconfig. We don't care about the other formats in the config because the + * transfer formats list will already include them. + */ + AVBufferRef *fref = NULL; + fref = av_hwframe_ctx_alloc(p->ctx->av_device_ref); + if (!fref) { + MP_WARN(hw, "failed to alloc libavutil frame context\n"); + goto err; + } + AVHWFramesContext *fctx = (void *)fref->data; + fctx->format = AV_PIX_FMT_VAAPI; + fctx->sw_format = fc->valid_sw_formats[0]; + fctx->width = 128; + fctx->height = 128; + if (av_hwframe_ctx_init(fref) < 0) { + MP_WARN(hw, "failed to init libavutil frame context\n"); + goto err; + } + + int ret = av_hwframe_transfer_get_formats(fref, AV_HWFRAME_TRANSFER_DIRECTION_TO, &fmts, 0); + if (ret) { + MP_WARN(hw, "failed to get libavutil frame context supported formats\n"); + goto err; + } + + for (int n = 0; fmts && + fmts[n] != AV_PIX_FMT_NONE; n++) + try_format_pixfmt(hw, fmts[n]); + +err: + av_hwframe_constraints_free(&fc); + av_buffer_unref(&fref); + av_free(fmts); +} + +static void determine_working_formats(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + VAStatus status; + VAProfile *profiles = NULL; + VAEntrypoint *entrypoints = NULL; + + MP_VERBOSE(hw, "Going to probe surface formats (may log bogus errors)...\n"); + p->probing_formats = true; + + AVVAAPIHWConfig *hwconfig = av_hwdevice_hwconfig_alloc(p->ctx->av_device_ref); + if (!hwconfig) { + MP_WARN(hw, "Could not allocate FFmpeg AVVAAPIHWConfig\n"); + goto done; + } + + profiles = talloc_zero_array(NULL, VAProfile, vaMaxNumProfiles(p->display)); + entrypoints = talloc_zero_array(NULL, VAEntrypoint, + vaMaxNumEntrypoints(p->display)); + int num_profiles = 0; + status = vaQueryConfigProfiles(p->display, profiles, &num_profiles); + if (!CHECK_VA_STATUS(hw, "vaQueryConfigProfiles()")) + num_profiles = 0; + + /* + * We need to find one declared format to bootstrap probing. So find a valid + * decoding profile and use its config. If try_format_config() finds any + * formats, they will be all the supported formats, and we don't need to + * look at any other profiles. + */ + for (int n = 0; n < num_profiles; n++) { + VAProfile profile = profiles[n]; + if (profile == VAProfileNone) { + // We don't use the None profile. + continue; + } + int num_ep = 0; + status = vaQueryConfigEntrypoints(p->display, profile, entrypoints, + &num_ep); + if (status != VA_STATUS_SUCCESS) { + MP_DBG(hw, "vaQueryConfigEntrypoints(): '%s' for profile %d", + vaErrorStr(status), (int)profile); + continue; + } + for (int ep = 0; ep < num_ep; ep++) { + if (entrypoints[ep] != VAEntrypointVLD) { + // We are only interested in decoding entrypoints. + continue; + } + VAConfigID config = VA_INVALID_ID; + status = vaCreateConfig(p->display, profile, entrypoints[ep], + NULL, 0, &config); + if (status != VA_STATUS_SUCCESS) { + MP_DBG(hw, "vaCreateConfig(): '%s' for profile %d", + vaErrorStr(status), (int)profile); + continue; + } + + hwconfig->config_id = config; + try_format_config(hw, hwconfig); + + vaDestroyConfig(p->display, config); + if (p->formats && p->formats[0]) { + goto done; + } + } + } + +done: + av_free(hwconfig); + talloc_free(profiles); + talloc_free(entrypoints); + + p->probing_formats = false; + + MP_DBG(hw, "Supported formats:\n"); + for (int n = 0; p->formats && p->formats[n]; n++) + MP_DBG(hw, " %s\n", mp_imgfmt_to_name(p->formats[n])); + MP_VERBOSE(hw, "Done probing surface formats.\n"); +} + +const struct ra_hwdec_driver ra_hwdec_vaapi = { + .name = "vaapi", + .priv_size = sizeof(struct priv_owner), + .imgfmts = {IMGFMT_VAAPI, 0}, + .init = init, + .uninit = uninit, + .mapper = &(const struct ra_hwdec_mapper_driver){ + .priv_size = sizeof(struct dmabuf_interop_priv), + .init = mapper_init, + .uninit = mapper_uninit, + .map = mapper_map, + .unmap = mapper_unmap, + }, +}; diff --git a/video/out/hwdec/hwdec_vt.c b/video/out/hwdec/hwdec_vt.c new file mode 100644 index 0000000..ab41d02 --- /dev/null +++ b/video/out/hwdec/hwdec_vt.c @@ -0,0 +1,141 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stddef.h> +#include <string.h> +#include <assert.h> +#include <unistd.h> + +#include <libavutil/hwcontext.h> +#include <libavutil/hwcontext_videotoolbox.h> + +#include "config.h" + +#include "video/out/gpu/hwdec.h" +#include "video/out/hwdec/hwdec_vt.h" + +static void uninit(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + + hwdec_devices_remove(hw->devs, &p->hwctx); + av_buffer_unref(&p->hwctx.av_device_ref); +} + +const static vt_interop_init interop_inits[] = { +#if HAVE_VIDEOTOOLBOX_GL || HAVE_IOS_GL + vt_gl_init, +#endif +#if HAVE_VIDEOTOOLBOX_PL + vt_pl_init, +#endif + NULL +}; + +static int init(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + + for (int i = 0; interop_inits[i]; i++) { + if (interop_inits[i](hw)) { + break; + } + } + + if (!p->interop_map || !p->interop_unmap) { + MP_VERBOSE(hw, "VT hwdec only works with OpenGL or Vulkan backends.\n"); + return -1; + } + + p->hwctx = (struct mp_hwdec_ctx){ + .driver_name = hw->driver->name, + .hw_imgfmt = IMGFMT_VIDEOTOOLBOX, + }; + + int ret = av_hwdevice_ctx_create(&p->hwctx.av_device_ref, + AV_HWDEVICE_TYPE_VIDEOTOOLBOX, NULL, NULL, 0); + if (ret != 0) { + MP_VERBOSE(hw, "Failed to create hwdevice_ctx: %s\n", av_err2str(ret)); + return -1; + } + + hwdec_devices_add(hw->devs, &p->hwctx); + + return 0; +} + +static void mapper_unmap(struct ra_hwdec_mapper *mapper) +{ + struct priv_owner *p_owner = mapper->owner->priv; + + p_owner->interop_unmap(mapper); +} + +static void mapper_uninit(struct ra_hwdec_mapper *mapper) +{ + struct priv_owner *p_owner = mapper->owner->priv; + if (p_owner->interop_uninit) { + p_owner->interop_uninit(mapper); + } +} + +static int mapper_init(struct ra_hwdec_mapper *mapper) +{ + struct priv_owner *p_owner = mapper->owner->priv; + struct priv *p = mapper->priv; + + mapper->dst_params = mapper->src_params; + mapper->dst_params.imgfmt = mapper->src_params.hw_subfmt; + mapper->dst_params.hw_subfmt = 0; + + if (!mapper->dst_params.imgfmt) { + MP_ERR(mapper, "Unsupported CVPixelBuffer format.\n"); + return -1; + } + + if (!ra_get_imgfmt_desc(mapper->ra, mapper->dst_params.imgfmt, &p->desc)) { + MP_ERR(mapper, "Unsupported texture format.\n"); + return -1; + } + + if (p_owner->interop_init) + return p_owner->interop_init(mapper); + + return 0; +} + +static int mapper_map(struct ra_hwdec_mapper *mapper) +{ + struct priv_owner *p_owner = mapper->owner->priv; + + return p_owner->interop_map(mapper); +} + +const struct ra_hwdec_driver ra_hwdec_videotoolbox = { + .name = "videotoolbox", + .priv_size = sizeof(struct priv_owner), + .imgfmts = {IMGFMT_VIDEOTOOLBOX, 0}, + .init = init, + .uninit = uninit, + .mapper = &(const struct ra_hwdec_mapper_driver){ + .priv_size = sizeof(struct priv), + .init = mapper_init, + .uninit = mapper_uninit, + .map = mapper_map, + .unmap = mapper_unmap, + }, +}; diff --git a/video/out/hwdec/hwdec_vt.h b/video/out/hwdec/hwdec_vt.h new file mode 100644 index 0000000..b79c641 --- /dev/null +++ b/video/out/hwdec/hwdec_vt.h @@ -0,0 +1,63 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#pragma once + +#include <CoreVideo/CoreVideo.h> + +#include "config.h" +#include "video/out/gpu/hwdec.h" + +struct priv_owner { + struct mp_hwdec_ctx hwctx; + + int (*interop_init)(struct ra_hwdec_mapper *mapper); + void (*interop_uninit)(struct ra_hwdec_mapper *mapper); + + int (*interop_map)(struct ra_hwdec_mapper *mapper); + void (*interop_unmap)(struct ra_hwdec_mapper *mapper); +}; + +#ifndef __OBJC__ +typedef struct __CVMetalTextureCache *CVMetalTextureCacheRef; +typedef CVImageBufferRef CVMetalTextureRef; +#endif + +struct priv { + void *interop_mapper_priv; + + CVPixelBufferRef pbuf; + +#if HAVE_VIDEOTOOLBOX_GL + GLuint gl_planes[MP_MAX_PLANES]; +#elif HAVE_IOS_GL + CVOpenGLESTextureCacheRef gl_texture_cache; + CVOpenGLESTextureRef gl_planes[MP_MAX_PLANES]; +#endif + +#if HAVE_VIDEOTOOLBOX_PL + CVMetalTextureCacheRef mtl_texture_cache; + CVMetalTextureRef mtl_planes[MP_MAX_PLANES]; +#endif + + struct ra_imgfmt_desc desc; +}; + +typedef bool (*vt_interop_init)(const struct ra_hwdec *hw); + +bool vt_gl_init(const struct ra_hwdec *hw); +bool vt_pl_init(const struct ra_hwdec *hw); diff --git a/video/out/hwdec/hwdec_vt_pl.m b/video/out/hwdec/hwdec_vt_pl.m new file mode 100644 index 0000000..cd133a0 --- /dev/null +++ b/video/out/hwdec/hwdec_vt_pl.m @@ -0,0 +1,312 @@ +/* + * Copyright (c) 2013 Stefano Pigozzi <stefano.pigozzi@gmail.com> + * 2017 Aman Gupta <ffmpeg@tmm1.net> + * 2023 rcombs <rcombs@rcombs.me> + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> + +#include <CoreVideo/CoreVideo.h> +#include <Metal/Metal.h> + +#include <libavutil/hwcontext.h> + +#include <libplacebo/renderer.h> + +#include "config.h" + +#include "video/out/gpu/hwdec.h" +#include "video/out/placebo/ra_pl.h" +#include "video/mp_image_pool.h" + +#if HAVE_VULKAN +#include "video/out/vulkan/common.h" +#endif + +#include "hwdec_vt.h" + +static bool check_hwdec(const struct ra_hwdec *hw) +{ + pl_gpu gpu = ra_pl_get(hw->ra_ctx->ra); + if (!gpu) { + // This is not a libplacebo RA; + return false; + } + + if (!(gpu->import_caps.tex & PL_HANDLE_MTL_TEX)) { + MP_VERBOSE(hw, "VideoToolbox libplacebo interop requires support for " + "PL_HANDLE_MTL_TEX import.\n"); + return false; + } + + return true; +} + +static int mapper_init(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + + mapper->dst_params = mapper->src_params; + mapper->dst_params.imgfmt = mapper->src_params.hw_subfmt; + mapper->dst_params.hw_subfmt = 0; + + if (!mapper->dst_params.imgfmt) { + MP_ERR(mapper, "Unsupported CVPixelBuffer format.\n"); + return -1; + } + + if (!ra_get_imgfmt_desc(mapper->ra, mapper->dst_params.imgfmt, &p->desc)) { + MP_ERR(mapper, "Unsupported texture format.\n"); + return -1; + } + + for (int n = 0; n < p->desc.num_planes; n++) { + if (!p->desc.planes[n] || p->desc.planes[n]->ctype != RA_CTYPE_UNORM) { + MP_ERR(mapper, "Format unsupported.\n"); + return -1; + } + } + + id<MTLDevice> mtl_device = nil; + +#ifdef VK_EXT_METAL_OBJECTS_SPEC_VERSION + pl_gpu gpu = ra_pl_get(mapper->ra); + if (gpu) { + pl_vulkan vulkan = pl_vulkan_get(gpu); + if (vulkan && vulkan->device && vulkan->instance && vulkan->get_proc_addr) { + PFN_vkExportMetalObjectsEXT pExportMetalObjects = (PFN_vkExportMetalObjectsEXT)vulkan->get_proc_addr(vulkan->instance, "vkExportMetalObjectsEXT"); + if (pExportMetalObjects) { + VkExportMetalDeviceInfoEXT device_info = { + .sType = VK_STRUCTURE_TYPE_EXPORT_METAL_DEVICE_INFO_EXT, + .pNext = NULL, + .mtlDevice = nil, + }; + + VkExportMetalObjectsInfoEXT objects_info = { + .sType = VK_STRUCTURE_TYPE_EXPORT_METAL_OBJECTS_INFO_EXT, + .pNext = &device_info, + }; + + pExportMetalObjects(vulkan->device, &objects_info); + + mtl_device = device_info.mtlDevice; + [mtl_device retain]; + } + } + } +#endif + + if (!mtl_device) { + mtl_device = MTLCreateSystemDefaultDevice(); + } + + CVReturn err = CVMetalTextureCacheCreate( + kCFAllocatorDefault, + NULL, + mtl_device, + NULL, + &p->mtl_texture_cache); + + [mtl_device release]; + + if (err != noErr) { + MP_ERR(mapper, "Failure in CVOpenGLESTextureCacheCreate: %d\n", err); + return -1; + } + + return 0; +} + +static void mapper_unmap(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + + for (int i = 0; i < p->desc.num_planes; i++) { + ra_tex_free(mapper->ra, &mapper->tex[i]); + if (p->mtl_planes[i]) { + CFRelease(p->mtl_planes[i]); + p->mtl_planes[i] = NULL; + } + } + + CVMetalTextureCacheFlush(p->mtl_texture_cache, 0); +} + +static const struct { + const char *glsl; + MTLPixelFormat mtl; +} mtl_fmts[] = { + {"r16f", MTLPixelFormatR16Float }, + {"r32f", MTLPixelFormatR32Float }, + {"rg16f", MTLPixelFormatRG16Float }, + {"rg32f", MTLPixelFormatRG32Float }, + {"rgba16f", MTLPixelFormatRGBA16Float }, + {"rgba32f", MTLPixelFormatRGBA32Float }, + {"r11f_g11f_b10f", MTLPixelFormatRG11B10Float }, + + {"r8", MTLPixelFormatR8Unorm }, + {"r16", MTLPixelFormatR16Unorm }, + {"rg8", MTLPixelFormatRG8Unorm }, + {"rg16", MTLPixelFormatRG16Unorm }, + {"rgba8", MTLPixelFormatRGBA8Unorm }, + {"rgba16", MTLPixelFormatRGBA16Unorm }, + {"rgb10_a2", MTLPixelFormatRGB10A2Unorm }, + + {"r8_snorm", MTLPixelFormatR8Snorm }, + {"r16_snorm", MTLPixelFormatR16Snorm }, + {"rg8_snorm", MTLPixelFormatRG8Snorm }, + {"rg16_snorm", MTLPixelFormatRG16Snorm }, + {"rgba8_snorm", MTLPixelFormatRGBA8Snorm }, + {"rgba16_snorm", MTLPixelFormatRGBA16Snorm }, + + {"r8ui", MTLPixelFormatR8Uint }, + {"r16ui", MTLPixelFormatR16Uint }, + {"r32ui", MTLPixelFormatR32Uint }, + {"rg8ui", MTLPixelFormatRG8Uint }, + {"rg16ui", MTLPixelFormatRG16Uint }, + {"rg32ui", MTLPixelFormatRG32Uint }, + {"rgba8ui", MTLPixelFormatRGBA8Uint }, + {"rgba16ui", MTLPixelFormatRGBA16Uint }, + {"rgba32ui", MTLPixelFormatRGBA32Uint }, + {"rgb10_a2ui", MTLPixelFormatRGB10A2Uint }, + + {"r8i", MTLPixelFormatR8Sint }, + {"r16i", MTLPixelFormatR16Sint }, + {"r32i", MTLPixelFormatR32Sint }, + {"rg8i", MTLPixelFormatRG8Sint }, + {"rg16i", MTLPixelFormatRG16Sint }, + {"rg32i", MTLPixelFormatRG32Sint }, + {"rgba8i", MTLPixelFormatRGBA8Sint }, + {"rgba16i", MTLPixelFormatRGBA16Sint }, + {"rgba32i", MTLPixelFormatRGBA32Sint }, + + { NULL, MTLPixelFormatInvalid }, +}; + +static MTLPixelFormat get_mtl_fmt(const char* glsl) +{ + if (!glsl) + return MTLPixelFormatInvalid; + + for (int i = 0; mtl_fmts[i].glsl; i++) { + if (!strcmp(glsl, mtl_fmts[i].glsl)) + return mtl_fmts[i].mtl; + } + + return MTLPixelFormatInvalid; +} + +static int mapper_map(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + pl_gpu gpu = ra_pl_get(mapper->owner->ra_ctx->ra); + + CVPixelBufferRelease(p->pbuf); + p->pbuf = (CVPixelBufferRef)mapper->src->planes[3]; + CVPixelBufferRetain(p->pbuf); + + const bool planar = CVPixelBufferIsPlanar(p->pbuf); + const int planes = CVPixelBufferGetPlaneCount(p->pbuf); + assert((planar && planes == p->desc.num_planes) || p->desc.num_planes == 1); + + for (int i = 0; i < p->desc.num_planes; i++) { + const struct ra_format *fmt = p->desc.planes[i]; + + pl_fmt plfmt = ra_pl_fmt_get(fmt); + MTLPixelFormat format = get_mtl_fmt(plfmt->glsl_format); + + if (!format) { + MP_ERR(mapper, "Format unsupported.\n"); + return -1; + } + + size_t width = CVPixelBufferGetWidthOfPlane(p->pbuf, i), + height = CVPixelBufferGetHeightOfPlane(p->pbuf, i); + + CVReturn err = CVMetalTextureCacheCreateTextureFromImage( + kCFAllocatorDefault, + p->mtl_texture_cache, + p->pbuf, + NULL, + format, + width, + height, + i, + &p->mtl_planes[i]); + + if (err != noErr) { + MP_ERR(mapper, "error creating texture for plane %d: %d\n", i, err); + return -1; + } + + struct pl_tex_params tex_params = { + .w = width, + .h = height, + .d = 0, + .format = plfmt, + .sampleable = true, + .import_handle = PL_HANDLE_MTL_TEX, + .shared_mem = (struct pl_shared_mem) { + .handle = { + .handle = CVMetalTextureGetTexture(p->mtl_planes[i]), + }, + }, + }; + + pl_tex pltex = pl_tex_create(gpu, &tex_params); + if (!pltex) + return -1; + + struct ra_tex *ratex = talloc_ptrtype(NULL, ratex); + int ret = mppl_wrap_tex(mapper->ra, pltex, ratex); + if (!ret) { + pl_tex_destroy(gpu, &pltex); + talloc_free(ratex); + return -1; + } + mapper->tex[i] = ratex; + } + + return 0; +} + +static void mapper_uninit(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + + CVPixelBufferRelease(p->pbuf); + if (p->mtl_texture_cache) { + CFRelease(p->mtl_texture_cache); + p->mtl_texture_cache = NULL; + } +} + +bool vt_pl_init(const struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + + if (!check_hwdec(hw)) + return false; + + p->interop_init = mapper_init; + p->interop_uninit = mapper_uninit; + p->interop_map = mapper_map; + p->interop_unmap = mapper_unmap; + + return true; +} diff --git a/video/out/hwdec/hwdec_vulkan.c b/video/out/hwdec/hwdec_vulkan.c new file mode 100644 index 0000000..5f7354d --- /dev/null +++ b/video/out/hwdec/hwdec_vulkan.c @@ -0,0 +1,333 @@ +/* + * Copyright (c) 2022 Philip Langdale <philipl@overt.org> + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "config.h" +#include "video/out/gpu/hwdec.h" +#include "video/out/vulkan/context.h" +#include "video/out/placebo/ra_pl.h" + +#include <libavutil/hwcontext.h> +#include <libavutil/hwcontext_vulkan.h> + +struct vulkan_hw_priv { + struct mp_hwdec_ctx hwctx; + pl_gpu gpu; +}; + +struct vulkan_mapper_priv { + struct mp_image layout; + AVVkFrame *vkf; + pl_tex tex[4]; +}; + +static void lock_queue(struct AVHWDeviceContext *ctx, + uint32_t queue_family, uint32_t index) +{ + pl_vulkan vulkan = ctx->user_opaque; + vulkan->lock_queue(vulkan, queue_family, index); +} + +static void unlock_queue(struct AVHWDeviceContext *ctx, + uint32_t queue_family, uint32_t index) +{ + pl_vulkan vulkan = ctx->user_opaque; + vulkan->unlock_queue(vulkan, queue_family, index); +} + +static int vulkan_init(struct ra_hwdec *hw) +{ + AVBufferRef *hw_device_ctx = NULL; + int ret = 0; + struct vulkan_hw_priv *p = hw->priv; + int level = hw->probing ? MSGL_V : MSGL_ERR; + + struct mpvk_ctx *vk = ra_vk_ctx_get(hw->ra_ctx); + if (!vk) { + MP_MSG(hw, level, "This is not a libplacebo vulkan gpu api context.\n"); + return 0; + } + + p->gpu = ra_pl_get(hw->ra_ctx->ra); + if (!p->gpu) { + MP_MSG(hw, level, "Failed to obtain pl_gpu.\n"); + return 0; + } + + /* + * libplacebo initialises all queues, but we still need to discover which + * one is the decode queue. + */ + uint32_t num_qf = 0; + VkQueueFamilyProperties *qf = NULL; + vkGetPhysicalDeviceQueueFamilyProperties(vk->vulkan->phys_device, &num_qf, NULL); + if (!num_qf) + goto error; + + qf = talloc_array(NULL, VkQueueFamilyProperties, num_qf); + vkGetPhysicalDeviceQueueFamilyProperties(vk->vulkan->phys_device, &num_qf, qf); + + int decode_index = -1, decode_count = 0; + for (int i = 0; i < num_qf; i++) { + /* + * Pick the first discovered decode queue that we find. Maybe a day will + * come when this needs to be smarter, but I'm sure a bunch of other + * things will have to change too. + */ + if ((qf[i].queueFlags) & VK_QUEUE_VIDEO_DECODE_BIT_KHR) { + decode_index = i; + decode_count = qf[i].queueCount; + } + } + + hw_device_ctx = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_VULKAN); + if (!hw_device_ctx) + goto error; + + AVHWDeviceContext *device_ctx = (void *)hw_device_ctx->data; + AVVulkanDeviceContext *device_hwctx = device_ctx->hwctx; + + device_ctx->user_opaque = (void *)vk->vulkan; + device_hwctx->lock_queue = lock_queue; + device_hwctx->unlock_queue = unlock_queue; + device_hwctx->get_proc_addr = vk->vkinst->get_proc_addr; + device_hwctx->inst = vk->vkinst->instance; + device_hwctx->phys_dev = vk->vulkan->phys_device; + device_hwctx->act_dev = vk->vulkan->device; + device_hwctx->device_features = *vk->vulkan->features; + device_hwctx->enabled_inst_extensions = vk->vkinst->extensions; + device_hwctx->nb_enabled_inst_extensions = vk->vkinst->num_extensions; + device_hwctx->enabled_dev_extensions = vk->vulkan->extensions; + device_hwctx->nb_enabled_dev_extensions = vk->vulkan->num_extensions; + device_hwctx->queue_family_index = vk->vulkan->queue_graphics.index; + device_hwctx->nb_graphics_queues = vk->vulkan->queue_graphics.count; + device_hwctx->queue_family_tx_index = vk->vulkan->queue_transfer.index; + device_hwctx->nb_tx_queues = vk->vulkan->queue_transfer.count; + device_hwctx->queue_family_comp_index = vk->vulkan->queue_compute.index; + device_hwctx->nb_comp_queues = vk->vulkan->queue_compute.count; + device_hwctx->queue_family_decode_index = decode_index; + device_hwctx->nb_decode_queues = decode_count; + + ret = av_hwdevice_ctx_init(hw_device_ctx); + if (ret < 0) { + MP_MSG(hw, level, "av_hwdevice_ctx_init failed\n"); + goto error; + } + + p->hwctx = (struct mp_hwdec_ctx) { + .driver_name = hw->driver->name, + .av_device_ref = hw_device_ctx, + .hw_imgfmt = IMGFMT_VULKAN, + }; + hwdec_devices_add(hw->devs, &p->hwctx); + + talloc_free(qf); + return 0; + + error: + talloc_free(qf); + av_buffer_unref(&hw_device_ctx); + return -1; +} + +static void vulkan_uninit(struct ra_hwdec *hw) +{ + struct vulkan_hw_priv *p = hw->priv; + + hwdec_devices_remove(hw->devs, &p->hwctx); + av_buffer_unref(&p->hwctx.av_device_ref); +} + +static int mapper_init(struct ra_hwdec_mapper *mapper) +{ + struct vulkan_mapper_priv *p = mapper->priv; + + mapper->dst_params = mapper->src_params; + mapper->dst_params.imgfmt = mapper->src_params.hw_subfmt; + mapper->dst_params.hw_subfmt = 0; + + mp_image_set_params(&p->layout, &mapper->dst_params); + + struct ra_imgfmt_desc desc = {0}; + if (!ra_get_imgfmt_desc(mapper->ra, mapper->dst_params.imgfmt, &desc)) + return -1; + + return 0; +} + +static void mapper_uninit(struct ra_hwdec_mapper *mapper) +{ + +} + +static void mapper_unmap(struct ra_hwdec_mapper *mapper) +{ + struct vulkan_hw_priv *p_owner = mapper->owner->priv; + struct vulkan_mapper_priv *p = mapper->priv; + if (!mapper->src) + goto end; + + AVHWFramesContext *hwfc = (AVHWFramesContext *) mapper->src->hwctx->data;; + const AVVulkanFramesContext *vkfc = hwfc->hwctx;; + AVVkFrame *vkf = p->vkf; + + int num_images; + for (num_images = 0; (vkf->img[num_images] != VK_NULL_HANDLE); num_images++); + + for (int i = 0; (p->tex[i] != NULL); i++) { + pl_tex *tex = &p->tex[i]; + if (!*tex) + continue; + + // If we have multiple planes and one image, then that is a multiplane + // frame. Anything else is treated as one-image-per-plane. + int index = p->layout.num_planes > 1 && num_images == 1 ? 0 : i; + + // Update AVVkFrame state to reflect current layout + bool ok = pl_vulkan_hold_ex(p_owner->gpu, pl_vulkan_hold_params( + .tex = *tex, + .out_layout = &vkf->layout[index], + .qf = VK_QUEUE_FAMILY_IGNORED, + .semaphore = (pl_vulkan_sem) { + .sem = vkf->sem[index], + .value = vkf->sem_value[index] + 1, + }, + )); + + vkf->access[index] = 0; + vkf->sem_value[index] += !!ok; + *tex = NULL; + } + + vkfc->unlock_frame(hwfc, vkf); + + end: + for (int i = 0; i < p->layout.num_planes; i++) + ra_tex_free(mapper->ra, &mapper->tex[i]); + + p->vkf = NULL; +} + +static int mapper_map(struct ra_hwdec_mapper *mapper) +{ + bool result = false; + struct vulkan_hw_priv *p_owner = mapper->owner->priv; + struct vulkan_mapper_priv *p = mapper->priv; + pl_vulkan vk = pl_vulkan_get(p_owner->gpu); + if (!vk) + return -1; + + AVHWFramesContext *hwfc = (AVHWFramesContext *) mapper->src->hwctx->data; + const AVVulkanFramesContext *vkfc = hwfc->hwctx; + AVVkFrame *vkf = (AVVkFrame *) mapper->src->planes[0]; + + /* + * We need to use the dimensions from the HW Frames Context for the + * textures, as the underlying images may be larger than the logical frame + * size. This most often happens with 1080p content where the actual frame + * height is 1088. + */ + struct mp_image raw_layout; + mp_image_setfmt(&raw_layout, p->layout.params.imgfmt); + mp_image_set_size(&raw_layout, hwfc->width, hwfc->height); + + int num_images; + for (num_images = 0; (vkf->img[num_images] != VK_NULL_HANDLE); num_images++); + const VkFormat *vk_fmt = av_vkfmt_from_pixfmt(hwfc->sw_format); + + vkfc->lock_frame(hwfc, vkf); + + for (int i = 0; i < p->layout.num_planes; i++) { + pl_tex *tex = &p->tex[i]; + VkImageAspectFlags aspect = VK_IMAGE_ASPECT_COLOR_BIT; + int index = i; + + // If we have multiple planes and one image, then that is a multiplane + // frame. Anything else is treated as one-image-per-plane. + if (p->layout.num_planes > 1 && num_images == 1) { + index = 0; + + switch (i) { + case 0: + aspect = VK_IMAGE_ASPECT_PLANE_0_BIT_KHR; + break; + case 1: + aspect = VK_IMAGE_ASPECT_PLANE_1_BIT_KHR; + break; + case 2: + aspect = VK_IMAGE_ASPECT_PLANE_2_BIT_KHR; + break; + default: + goto error; + } + } + + *tex = pl_vulkan_wrap(p_owner->gpu, pl_vulkan_wrap_params( + .image = vkf->img[index], + .width = mp_image_plane_w(&raw_layout, i), + .height = mp_image_plane_h(&raw_layout, i), + .format = vk_fmt[i], + .usage = vkfc->usage, + .aspect = aspect, + )); + if (!*tex) + goto error; + + pl_vulkan_release_ex(p_owner->gpu, pl_vulkan_release_params( + .tex = p->tex[i], + .layout = vkf->layout[index], + .qf = VK_QUEUE_FAMILY_IGNORED, + .semaphore = (pl_vulkan_sem) { + .sem = vkf->sem[index], + .value = vkf->sem_value[index], + }, + )); + + struct ra_tex *ratex = talloc_ptrtype(NULL, ratex); + result = mppl_wrap_tex(mapper->ra, *tex, ratex); + if (!result) { + pl_tex_destroy(p_owner->gpu, tex); + talloc_free(ratex); + goto error; + } + mapper->tex[i] = ratex; + } + + p->vkf = vkf; + return 0; + + error: + vkfc->unlock_frame(hwfc, vkf); + mapper_unmap(mapper); + return -1; +} + +const struct ra_hwdec_driver ra_hwdec_vulkan = { + .name = "vulkan", + .imgfmts = {IMGFMT_VULKAN, 0}, + .priv_size = sizeof(struct vulkan_hw_priv), + .init = vulkan_init, + .uninit = vulkan_uninit, + .mapper = &(const struct ra_hwdec_mapper_driver){ + .priv_size = sizeof(struct vulkan_mapper_priv), + .init = mapper_init, + .uninit = mapper_uninit, + .map = mapper_map, + .unmap = mapper_unmap, + }, +}; diff --git a/video/out/libmpv.h b/video/out/libmpv.h new file mode 100644 index 0000000..a697eaf --- /dev/null +++ b/video/out/libmpv.h @@ -0,0 +1,83 @@ +#pragma once + +#include <stdint.h> +#include <stdbool.h> +#include "libmpv/render.h" +#include "vo.h" + +// Helper for finding a parameter value. It returns the direct pointer to the +// value, and if not present, just returns the def argument. In particular, if +// def is not NULL, this never returns NULL (unless a param value is defined +// as accepting NULL, or the libmpv API user is triggering UB). +void *get_mpv_render_param(mpv_render_param *params, mpv_render_param_type type, + void *def); + +#define GET_MPV_RENDER_PARAM(params, type, ctype, def) \ + (*(ctype *)get_mpv_render_param(params, type, &(ctype){(def)})) + +typedef int (*mp_render_cb_control_fn)(struct vo *vo, void *cb_ctx, int *events, + uint32_t request, void *data); +void mp_render_context_set_control_callback(mpv_render_context *ctx, + mp_render_cb_control_fn callback, + void *callback_ctx); +bool mp_render_context_acquire(mpv_render_context *ctx); + +struct render_backend { + struct mpv_global *global; + struct mp_log *log; + const struct render_backend_fns *fns; + + // Set on init, immutable afterwards. + int driver_caps; + struct mp_hwdec_devices *hwdec_devs; + + void *priv; +}; + +// Generic backend for rendering via libmpv. This corresponds to vo/vo_driver, +// except for rendering via the mpv_render_*() API. (As a consequence it's as +// generic as the VO API.) Like with VOs, one backend can support multiple +// underlying GPU APIs. +struct render_backend_fns { + // Returns libmpv error code. In particular, this function has to check for + // MPV_RENDER_PARAM_API_TYPE, and silently return MPV_ERROR_NOT_IMPLEMENTED + // if the API is not included in this backend. + // If this fails, ->destroy() will be called. + int (*init)(struct render_backend *ctx, mpv_render_param *params); + // Check if the passed IMGFMT_ is supported. + bool (*check_format)(struct render_backend *ctx, int imgfmt); + // Implementation of mpv_render_context_set_parameter(). Optional. + int (*set_parameter)(struct render_backend *ctx, mpv_render_param param); + // Like vo_driver.reconfig(). + void (*reconfig)(struct render_backend *ctx, struct mp_image_params *params); + // Like VOCTRL_RESET. + void (*reset)(struct render_backend *ctx); + void (*screenshot)(struct render_backend *ctx, struct vo_frame *frame, + struct voctrl_screenshot *args); + void (*perfdata)(struct render_backend *ctx, + struct voctrl_performance_data *out); + // Like vo_driver.get_image(). + struct mp_image *(*get_image)(struct render_backend *ctx, int imgfmt, + int w, int h, int stride_align, int flags); + // This has two purposes: 1. set queue attributes on VO, 2. update the + // renderer's OSD pointer. Keep in mind that as soon as the caller releases + // the renderer lock, the VO pointer can become invalid. The OSD pointer + // will technically remain valid (even though it's a vo field), until it's + // unset with this function. + // Will be called if vo changes, or if renderer options change. + void (*update_external)(struct render_backend *ctx, struct vo *vo); + // Update screen area. + void (*resize)(struct render_backend *ctx, struct mp_rect *src, + struct mp_rect *dst, struct mp_osd_res *osd); + // Get target surface size from mpv_render_context_render() arguments. + int (*get_target_size)(struct render_backend *ctx, mpv_render_param *params, + int *out_w, int *out_h); + // Implementation of mpv_render_context_render(). + int (*render)(struct render_backend *ctx, mpv_render_param *params, + struct vo_frame *frame); + // Free all data in ctx->priv. + void (*destroy)(struct render_backend *ctx); +}; + +extern const struct render_backend_fns render_backend_gpu; +extern const struct render_backend_fns render_backend_sw; diff --git a/video/out/libmpv_sw.c b/video/out/libmpv_sw.c new file mode 100644 index 0000000..f1b08f0 --- /dev/null +++ b/video/out/libmpv_sw.c @@ -0,0 +1,208 @@ +#include "libmpv/render_gl.h" +#include "libmpv.h" +#include "sub/osd.h" +#include "video/sws_utils.h" + +struct priv { + struct libmpv_gpu_context *context; + + struct mp_sws_context *sws; + struct osd_state *osd; + + struct mp_image_params src_params, dst_params; + struct mp_rect src_rc, dst_rc; + struct mp_osd_res osd_rc; + bool anything_changed; +}; + +static int init(struct render_backend *ctx, mpv_render_param *params) +{ + ctx->priv = talloc_zero(NULL, struct priv); + struct priv *p = ctx->priv; + + char *api = get_mpv_render_param(params, MPV_RENDER_PARAM_API_TYPE, NULL); + if (!api) + return MPV_ERROR_INVALID_PARAMETER; + + if (strcmp(api, MPV_RENDER_API_TYPE_SW) != 0) + return MPV_ERROR_NOT_IMPLEMENTED; + + p->sws = mp_sws_alloc(p); + mp_sws_enable_cmdline_opts(p->sws, ctx->global); + + p->anything_changed = true; + + return 0; +} + +static bool check_format(struct render_backend *ctx, int imgfmt) +{ + struct priv *p = ctx->priv; + + // Note: we don't know the output format yet. Using an arbitrary supported + // format is fine, because we know that any supported input format can + // be converted to any supported output format. + return mp_sws_supports_formats(p->sws, IMGFMT_RGB0, imgfmt); +} + +static int set_parameter(struct render_backend *ctx, mpv_render_param param) +{ + return MPV_ERROR_NOT_IMPLEMENTED; +} + +static void reconfig(struct render_backend *ctx, struct mp_image_params *params) +{ + struct priv *p = ctx->priv; + + p->src_params = *params; + p->anything_changed = true; +} + +static void reset(struct render_backend *ctx) +{ + // stateless +} + +static void update_external(struct render_backend *ctx, struct vo *vo) +{ + struct priv *p = ctx->priv; + + p->osd = vo ? vo->osd : NULL; +} + +static void resize(struct render_backend *ctx, struct mp_rect *src, + struct mp_rect *dst, struct mp_osd_res *osd) +{ + struct priv *p = ctx->priv; + + p->src_rc = *src; + p->dst_rc = *dst; + p->osd_rc = *osd; + p->anything_changed = true; +} + +static int get_target_size(struct render_backend *ctx, mpv_render_param *params, + int *out_w, int *out_h) +{ + int *sz = get_mpv_render_param(params, MPV_RENDER_PARAM_SW_SIZE, NULL); + if (!sz) + return MPV_ERROR_INVALID_PARAMETER; + + *out_w = sz[0]; + *out_h = sz[1]; + return 0; +} + +static int render(struct render_backend *ctx, mpv_render_param *params, + struct vo_frame *frame) +{ + struct priv *p = ctx->priv; + + int *sz = get_mpv_render_param(params, MPV_RENDER_PARAM_SW_SIZE, NULL); + char *fmt = get_mpv_render_param(params, MPV_RENDER_PARAM_SW_FORMAT, NULL); + size_t *stride = get_mpv_render_param(params, MPV_RENDER_PARAM_SW_STRIDE, NULL); + void *ptr = get_mpv_render_param(params, MPV_RENDER_PARAM_SW_POINTER, NULL); + + if (!sz || !fmt || !stride || !ptr) + return MPV_ERROR_INVALID_PARAMETER; + + char *prev_fmt = mp_imgfmt_to_name(p->dst_params.imgfmt); + if (strcmp(prev_fmt, fmt) != 0) + p->anything_changed = true; + + if (sz[0] != p->dst_params.w || sz[1] != p->dst_params.h) + p->anything_changed = true; + + if (p->anything_changed) { + p->dst_params = (struct mp_image_params){ + .imgfmt = mp_imgfmt_from_name(bstr0(fmt)), + .w = sz[0], + .h = sz[1], + }; + + // Exclude "problematic" formats. In particular, reject multi-plane and + // hw formats. Exclude non-byte-aligned formats for easier stride + // checking. + struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(p->dst_params.imgfmt); + if (!(desc.flags & MP_IMGFLAG_COLOR_RGB) || + !(desc.flags & (MP_IMGFLAG_TYPE_UINT | MP_IMGFLAG_TYPE_FLOAT)) || + (desc.flags & MP_IMGFLAG_TYPE_PAL8) || + !(desc.flags & MP_IMGFLAG_BYTE_ALIGNED) || + desc.num_planes != 1) + return MPV_ERROR_UNSUPPORTED; + + mp_image_params_guess_csp(&p->dst_params); + + // Can be unset if rendering before any video was loaded. + if (p->src_params.imgfmt) { + p->sws->src = p->src_params; + p->sws->src.w = mp_rect_w(p->src_rc); + p->sws->src.h = mp_rect_h(p->src_rc); + + p->sws->dst = p->dst_params; + p->sws->dst.w = mp_rect_w(p->dst_rc); + p->sws->dst.h = mp_rect_h(p->dst_rc); + + if (mp_sws_reinit(p->sws) < 0) + return MPV_ERROR_UNSUPPORTED; // probably + } + + p->anything_changed = false; + } + + struct mp_image wrap_img = {0}; + mp_image_set_params(&wrap_img, &p->dst_params); + + size_t bpp = wrap_img.fmt.bpp[0] / 8; + if (!bpp || bpp * wrap_img.w > *stride || *stride % bpp) + return MPV_ERROR_INVALID_PARAMETER; + + wrap_img.planes[0] = ptr; + wrap_img.stride[0] = *stride; + + struct mp_image *img = frame->current; + if (img) { + assert(p->src_params.imgfmt); + + mp_image_clear_rc_inv(&wrap_img, p->dst_rc); + + struct mp_image src = *img; + struct mp_rect src_rc = p->src_rc; + src_rc.x0 = MP_ALIGN_DOWN(src_rc.x0, src.fmt.align_x); + src_rc.y0 = MP_ALIGN_DOWN(src_rc.y0, src.fmt.align_y); + mp_image_crop_rc(&src, src_rc); + + struct mp_image dst = wrap_img; + mp_image_crop_rc(&dst, p->dst_rc); + + if (mp_sws_scale(p->sws, &dst, &src) < 0) { + mp_image_clear(&wrap_img, 0, 0, wrap_img.w, wrap_img.h); + return MPV_ERROR_GENERIC; + } + } else { + mp_image_clear(&wrap_img, 0, 0, wrap_img.w, wrap_img.h); + } + + if (p->osd) + osd_draw_on_image(p->osd, p->osd_rc, img ? img->pts : 0, 0, &wrap_img); + + return 0; +} + +static void destroy(struct render_backend *ctx) +{ + // nop +} + +const struct render_backend_fns render_backend_sw = { + .init = init, + .check_format = check_format, + .set_parameter = set_parameter, + .reconfig = reconfig, + .reset = reset, + .update_external = update_external, + .resize = resize, + .get_target_size = get_target_size, + .render = render, + .destroy = destroy, +}; diff --git a/video/out/mac/common.swift b/video/out/mac/common.swift new file mode 100644 index 0000000..aac7050 --- /dev/null +++ b/video/out/mac/common.swift @@ -0,0 +1,691 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +import Cocoa +import IOKit.pwr_mgt + +class Common: NSObject { + var mpv: MPVHelper? + var log: LogHelper + let queue: DispatchQueue = DispatchQueue(label: "io.mpv.queue") + + var window: Window? + var view: View? + var titleBar: TitleBar? + + var link: CVDisplayLink? + + let eventsLock = NSLock() + var events: Int = 0 + + var lightSensor: io_connect_t = 0 + var lastLmu: UInt64 = 0 + var lightSensorIOPort: IONotificationPortRef? + + var displaySleepAssertion: IOPMAssertionID = IOPMAssertionID(0) + + var appNotificationObservers: [NSObjectProtocol] = [] + + var cursorVisibilityWanted: Bool = true + + var title: String = "mpv" { + didSet { if let window = window { window.title = title } } + } + + init(_ mpLog: OpaquePointer?) { + log = LogHelper(mpLog) + } + + func initMisc(_ vo: UnsafeMutablePointer<vo>) { + guard let mpv = mpv else { + log.sendError("Something went wrong, no MPVHelper was initialized") + exit(1) + } + + startDisplayLink(vo) + initLightSensor() + addDisplayReconfigureObserver() + addAppNotifications() + mpv.setMacOptionCallback(macOptsWakeupCallback, context: self) + } + + func initApp() { + guard let mpv = mpv else { + log.sendError("Something went wrong, no MPVHelper was initialized") + exit(1) + } + + var policy: NSApplication.ActivationPolicy = .regular + switch mpv.macOpts.macos_app_activation_policy { + case 0: + policy = .regular + case 1: + policy = .accessory + case 2: + policy = .prohibited + default: + break + } + + NSApp.setActivationPolicy(policy) + setAppIcon() + } + + func initWindow(_ vo: UnsafeMutablePointer<vo>, _ previousActiveApp: NSRunningApplication?) { + let (mpv, targetScreen, wr) = getInitProperties(vo) + + guard let view = self.view else { + log.sendError("Something went wrong, no View was initialized") + exit(1) + } + + window = Window(contentRect: wr, screen: targetScreen, view: view, common: self) + guard let window = self.window else { + log.sendError("Something went wrong, no Window was initialized") + exit(1) + } + + window.setOnTop(Bool(mpv.opts.ontop), Int(mpv.opts.ontop_level)) + window.setOnAllWorkspaces(Bool(mpv.opts.all_workspaces)) + window.keepAspect = Bool(mpv.opts.keepaspect_window) + window.title = title + window.border = Bool(mpv.opts.border) + + titleBar = TitleBar(frame: wr, window: window, common: self) + + let minimized = Bool(mpv.opts.window_minimized) + window.isRestorable = false + window.isReleasedWhenClosed = false + window.setMaximized(minimized ? false : Bool(mpv.opts.window_maximized)) + window.setMinimized(minimized) + window.makeMain() + window.makeKey() + + if !minimized { + window.orderFront(nil) + } + + NSApp.activate(ignoringOtherApps: mpv.opts.focus_on_open) + + // workaround for macOS 10.15 to refocus the previous App + if (!mpv.opts.focus_on_open) { + previousActiveApp?.activate(options: .activateAllWindows) + } + } + + func initView(_ vo: UnsafeMutablePointer<vo>, _ layer: CALayer) { + let (_, _, wr) = getInitProperties(vo) + + view = View(frame: wr, common: self) + guard let view = self.view else { + log.sendError("Something went wrong, no View was initialized") + exit(1) + } + + view.layer = layer + view.wantsLayer = true + view.layerContentsPlacement = .scaleProportionallyToFit + } + + func initWindowState() { + if mpv?.opts.fullscreen ?? false { + DispatchQueue.main.async { + self.window?.toggleFullScreen(nil) + } + } else { + window?.isMovableByWindowBackground = true + } + } + + func uninitCommon() { + setCursorVisibility(true) + stopDisplaylink() + uninitLightSensor() + removeDisplayReconfigureObserver() + removeAppNotifications() + enableDisplaySleep() + window?.orderOut(nil) + + titleBar?.removeFromSuperview() + view?.removeFromSuperview() + } + + func displayLinkCallback(_ displayLink: CVDisplayLink, + _ inNow: UnsafePointer<CVTimeStamp>, + _ inOutputTime: UnsafePointer<CVTimeStamp>, + _ flagsIn: CVOptionFlags, + _ flagsOut: UnsafeMutablePointer<CVOptionFlags>) -> CVReturn + { + return kCVReturnSuccess + } + + func startDisplayLink(_ vo: UnsafeMutablePointer<vo>) { + CVDisplayLinkCreateWithActiveCGDisplays(&link) + + guard let screen = getTargetScreen(forFullscreen: false) ?? NSScreen.main, + let link = self.link else + { + log.sendWarning("Couldn't start DisplayLink, no MPVHelper, Screen or DisplayLink available") + return + } + + CVDisplayLinkSetCurrentCGDisplay(link, screen.displayID) + CVDisplayLinkSetOutputHandler(link) { link, now, out, inFlags, outFlags -> CVReturn in + return self.displayLinkCallback(link, now, out, inFlags, outFlags) + } + CVDisplayLinkStart(link) + } + + func stopDisplaylink() { + if let link = self.link, CVDisplayLinkIsRunning(link) { + CVDisplayLinkStop(link) + } + } + + func updateDisplaylink() { + guard let screen = window?.screen, let link = self.link else { + log.sendWarning("Couldn't update DisplayLink, no Screen or DisplayLink available") + return + } + + CVDisplayLinkSetCurrentCGDisplay(link, screen.displayID) + queue.asyncAfter(deadline: DispatchTime.now() + 0.1) { + self.flagEvents(VO_EVENT_WIN_STATE) + } + } + + func currentFps() -> Double { + if let link = self.link { + var actualFps = CVDisplayLinkGetActualOutputVideoRefreshPeriod(link) + let nominalData = CVDisplayLinkGetNominalOutputVideoRefreshPeriod(link) + + if (nominalData.flags & Int32(CVTimeFlags.isIndefinite.rawValue)) < 1 { + let nominalFps = Double(nominalData.timeScale) / Double(nominalData.timeValue) + + if actualFps > 0 { + actualFps = 1/actualFps + } + + if fabs(actualFps - nominalFps) > 0.1 { + log.sendVerbose("Falling back to nominal display refresh rate: \(nominalFps)") + return nominalFps + } else { + return actualFps + } + } + } else { + log.sendWarning("No DisplayLink available") + } + + log.sendWarning("Falling back to standard display refresh rate: 60Hz") + return 60.0 + } + + func enableDisplaySleep() { + IOPMAssertionRelease(displaySleepAssertion) + displaySleepAssertion = IOPMAssertionID(0) + } + + func disableDisplaySleep() { + if displaySleepAssertion != IOPMAssertionID(0) { return } + IOPMAssertionCreateWithName( + kIOPMAssertionTypePreventUserIdleDisplaySleep as CFString, + IOPMAssertionLevel(kIOPMAssertionLevelOn), + "io.mpv.video_playing_back" as CFString, + &displaySleepAssertion) + } + + func lmuToLux(_ v: UInt64) -> Int { + // the polinomial approximation for apple lmu value -> lux was empirically + // derived by firefox developers (Apple provides no documentation). + // https://bugzilla.mozilla.org/show_bug.cgi?id=793728 + let power_c4: Double = 1 / pow(10, 27) + let power_c3: Double = 1 / pow(10, 19) + let power_c2: Double = 1 / pow(10, 12) + let power_c1: Double = 1 / pow(10, 5) + + let lum = Double(v) + let term4: Double = -3.0 * power_c4 * pow(lum, 4.0) + let term3: Double = 2.6 * power_c3 * pow(lum, 3.0) + let term2: Double = -3.4 * power_c2 * pow(lum, 2.0) + let term1: Double = 3.9 * power_c1 * lum + + let lux = Int(ceil(term4 + term3 + term2 + term1 - 0.19)) + return lux > 0 ? lux : 0 + } + + var lightSensorCallback: IOServiceInterestCallback = { (ctx, service, messageType, messageArgument) -> Void in + let com = unsafeBitCast(ctx, to: Common.self) + + var outputs: UInt32 = 2 + var values: [UInt64] = [0, 0] + + var kr = IOConnectCallMethod(com.lightSensor, 0, nil, 0, nil, 0, &values, &outputs, nil, nil) + if kr == KERN_SUCCESS { + var mean = (values[0] + values[1]) / 2 + if com.lastLmu != mean { + com.lastLmu = mean + com.lightSensorUpdate() + } + } + } + + func lightSensorUpdate() { + log.sendWarning("lightSensorUpdate not implemented") + } + + func initLightSensor() { + let srv = IOServiceGetMatchingService(kIOMasterPortDefault, IOServiceMatching("AppleLMUController")) + if srv == IO_OBJECT_NULL { + log.sendVerbose("Can't find an ambient light sensor") + return + } + + lightSensorIOPort = IONotificationPortCreate(kIOMasterPortDefault) + IONotificationPortSetDispatchQueue(lightSensorIOPort, queue) + var n = io_object_t() + IOServiceAddInterestNotification(lightSensorIOPort, srv, kIOGeneralInterest, lightSensorCallback, MPVHelper.bridge(obj: self), &n) + let kr = IOServiceOpen(srv, mach_task_self_, 0, &lightSensor) + IOObjectRelease(srv) + + if kr != KERN_SUCCESS { + log.sendVerbose("Can't start ambient light sensor connection") + return + } + lightSensorCallback(MPVHelper.bridge(obj: self), 0, 0, nil) + } + + func uninitLightSensor() { + if lightSensorIOPort != nil { + IONotificationPortDestroy(lightSensorIOPort) + IOObjectRelease(lightSensor) + } + } + + var reconfigureCallback: CGDisplayReconfigurationCallBack = { (display, flags, userInfo) in + if flags.contains(.setModeFlag) { + let com = unsafeBitCast(userInfo, to: Common.self) + let displayID = com.window?.screen?.displayID ?? display + + if displayID == display { + com.log.sendVerbose("Detected display mode change, updating screen refresh rate") + com.flagEvents(VO_EVENT_WIN_STATE) + } + } + } + + func addDisplayReconfigureObserver() { + CGDisplayRegisterReconfigurationCallback(reconfigureCallback, MPVHelper.bridge(obj: self)) + } + + func removeDisplayReconfigureObserver() { + CGDisplayRemoveReconfigurationCallback(reconfigureCallback, MPVHelper.bridge(obj: self)) + } + + func addAppNotifications() { + appNotificationObservers.append(NotificationCenter.default.addObserver( + forName: NSApplication.didBecomeActiveNotification, + object: nil, + queue: .main, + using: { [weak self] (_) in self?.appDidBecomeActive() } + )) + appNotificationObservers.append(NotificationCenter.default.addObserver( + forName: NSApplication.didResignActiveNotification, + object: nil, + queue: .main, + using: { [weak self] (_) in self?.appDidResignActive() } + )) + } + + func removeAppNotifications() { + appNotificationObservers.forEach { NotificationCenter.default.removeObserver($0) } + appNotificationObservers.removeAll() + } + + func appDidBecomeActive() { + flagEvents(VO_EVENT_FOCUS) + } + + func appDidResignActive() { + flagEvents(VO_EVENT_FOCUS) + } + + func setAppIcon() { + if let app = NSApp as? Application, + ProcessInfo.processInfo.environment["MPVBUNDLE"] != "true" + { + NSApp.applicationIconImage = app.getMPVIcon() + } + } + + func updateCursorVisibility() { + setCursorVisibility(cursorVisibilityWanted) + } + + func setCursorVisibility(_ visible: Bool) { + NSCursor.setHiddenUntilMouseMoves(!visible && (view?.canHideCursor() ?? false)) + } + + func updateICCProfile() { + log.sendWarning("updateICCProfile not implemented") + } + + func getScreenBy(id screenID: Int) -> NSScreen? { + if screenID >= NSScreen.screens.count { + log.sendInfo("Screen ID \(screenID) does not exist, falling back to current device") + return nil + } else if screenID < 0 { + return nil + } + return NSScreen.screens[screenID] + } + + func getScreenBy(name screenName: String?) -> NSScreen? { + for screen in NSScreen.screens { + if screen.localizedName == screenName { + return screen + } + } + return nil + } + + func getTargetScreen(forFullscreen fs: Bool) -> NSScreen? { + guard let mpv = mpv else { + log.sendWarning("Unexpected nil value in getTargetScreen") + return nil + } + + let screenID = fs ? mpv.opts.fsscreen_id : mpv.opts.screen_id + var name: String? + if let screenName = fs ? mpv.opts.fsscreen_name : mpv.opts.screen_name { + name = String(cString: screenName) + } + return getScreenBy(id: Int(screenID)) ?? getScreenBy(name: name) + } + + func getCurrentScreen() -> NSScreen? { + return window != nil ? window?.screen : + getTargetScreen(forFullscreen: false) ?? + NSScreen.main + } + + func getWindowGeometry(forScreen screen: NSScreen, + videoOut vo: UnsafeMutablePointer<vo>) -> NSRect { + let r = screen.convertRectToBacking(screen.frame) + let targetFrame = (mpv?.macOpts.macos_geometry_calculation ?? Int32(FRAME_VISIBLE)) == FRAME_VISIBLE + ? screen.visibleFrame : screen.frame + let rv = screen.convertRectToBacking(targetFrame) + + // convert origin to be relative to target screen + var originY = rv.origin.y - r.origin.y + let originX = rv.origin.x - r.origin.x + // flip the y origin, mp_rect expects the origin at the top-left + // macOS' windowing system operates from the bottom-left + originY = -(originY + rv.size.height) + var screenRC: mp_rect = mp_rect(x0: Int32(originX), + y0: Int32(originY), + x1: Int32(originX + rv.size.width), + y1: Int32(originY + rv.size.height)) + + var geo: vo_win_geometry = vo_win_geometry() + vo_calc_window_geometry2(vo, &screenRC, Double(screen.backingScaleFactor), &geo) + vo_apply_window_geometry(vo, &geo) + + let height = CGFloat(geo.win.y1 - geo.win.y0) + let width = CGFloat(geo.win.x1 - geo.win.x0) + // flip the y origin again + let y = CGFloat(-geo.win.y1) + let x = CGFloat(geo.win.x0) + return screen.convertRectFromBacking(NSMakeRect(x, y, width, height)) + } + + func getInitProperties(_ vo: UnsafeMutablePointer<vo>) -> (MPVHelper, NSScreen, NSRect) { + guard let mpv = mpv else { + log.sendError("Something went wrong, no MPVHelper was initialized") + exit(1) + } + guard let targetScreen = getTargetScreen(forFullscreen: false) ?? NSScreen.main else { + log.sendError("Something went wrong, no Screen was found") + exit(1) + } + + let wr = getWindowGeometry(forScreen: targetScreen, videoOut: vo) + + return (mpv, targetScreen, wr) + } + + // call before initApp, because on macOS +10.15 it changes the active App + func getActiveApp() -> NSRunningApplication? { + return NSWorkspace.shared.runningApplications.first(where: {$0.isActive}) + } + + func flagEvents(_ ev: Int) { + eventsLock.lock() + events |= ev + eventsLock.unlock() + + guard let vout = mpv?.vo else { + log.sendWarning("vo nil in flagEvents") + return + } + vo_wakeup(vout) + } + + func checkEvents() -> Int { + eventsLock.lock() + let ev = events + events = 0 + eventsLock.unlock() + return ev + } + + func windowDidEndAnimation() {} + func windowSetToFullScreen() {} + func windowSetToWindow() {} + func windowDidUpdateFrame() {} + func windowDidChangeScreen() {} + func windowDidChangeScreenProfile() {} + func windowDidChangeBackingProperties() {} + func windowWillStartLiveResize() {} + func windowDidEndLiveResize() {} + func windowDidResize() {} + func windowDidChangeOcclusionState() {} + + @objc func control(_ vo: UnsafeMutablePointer<vo>, + events: UnsafeMutablePointer<Int32>, + request: UInt32, + data: UnsafeMutableRawPointer?) -> Int32 + { + guard let mpv = mpv else { + log.sendWarning("Unexpected nil value in Control Callback") + return VO_FALSE + } + + switch mp_voctrl(request) { + case VOCTRL_CHECK_EVENTS: + events.pointee |= Int32(checkEvents()) + return VO_TRUE + case VOCTRL_VO_OPTS_CHANGED: + var opt: UnsafeMutableRawPointer? + while mpv.nextChangedOption(property: &opt) { + switch opt { + case MPVHelper.getPointer(&mpv.optsPtr.pointee.border): + DispatchQueue.main.async { + self.window?.border = Bool(mpv.opts.border) + } + case MPVHelper.getPointer(&mpv.optsPtr.pointee.fullscreen): + DispatchQueue.main.async { + self.window?.toggleFullScreen(nil) + } + case MPVHelper.getPointer(&mpv.optsPtr.pointee.ontop): fallthrough + case MPVHelper.getPointer(&mpv.optsPtr.pointee.ontop_level): + DispatchQueue.main.async { + self.window?.setOnTop(Bool(mpv.opts.ontop), Int(mpv.opts.ontop_level)) + } + case MPVHelper.getPointer(&mpv.optsPtr.pointee.all_workspaces): + DispatchQueue.main.async { + self.window?.setOnAllWorkspaces(Bool(mpv.opts.all_workspaces)) + } + case MPVHelper.getPointer(&mpv.optsPtr.pointee.keepaspect_window): + DispatchQueue.main.async { + self.window?.keepAspect = Bool(mpv.opts.keepaspect_window) + } + case MPVHelper.getPointer(&mpv.optsPtr.pointee.window_minimized): + DispatchQueue.main.async { + self.window?.setMinimized(Bool(mpv.opts.window_minimized)) + } + case MPVHelper.getPointer(&mpv.optsPtr.pointee.window_maximized): + DispatchQueue.main.async { + self.window?.setMaximized(Bool(mpv.opts.window_maximized)) + } + default: + break + } + } + return VO_TRUE + case VOCTRL_GET_DISPLAY_FPS: + let fps = data!.assumingMemoryBound(to: CDouble.self) + fps.pointee = currentFps() + return VO_TRUE + case VOCTRL_GET_HIDPI_SCALE: + let scaleFactor = data!.assumingMemoryBound(to: CDouble.self) + let screen = getCurrentScreen() + let factor = window?.backingScaleFactor ?? + screen?.backingScaleFactor ?? 1.0 + scaleFactor.pointee = Double(factor) + return VO_TRUE + case VOCTRL_RESTORE_SCREENSAVER: + enableDisplaySleep() + return VO_TRUE + case VOCTRL_KILL_SCREENSAVER: + disableDisplaySleep() + return VO_TRUE + case VOCTRL_SET_CURSOR_VISIBILITY: + let cursorVisibility = data!.assumingMemoryBound(to: CBool.self) + cursorVisibilityWanted = cursorVisibility.pointee + DispatchQueue.main.async { + self.setCursorVisibility(self.cursorVisibilityWanted) + } + return VO_TRUE + case VOCTRL_GET_ICC_PROFILE: + let screen = getCurrentScreen() + guard var iccData = screen?.colorSpace?.iccProfileData else { + log.sendWarning("No Screen available to retrieve ICC profile") + return VO_TRUE + } + + let icc = data!.assumingMemoryBound(to: bstr.self) + iccData.withUnsafeMutableBytes { (ptr: UnsafeMutableRawBufferPointer) in + guard let baseAddress = ptr.baseAddress, ptr.count > 0 else { return } + let u8Ptr = baseAddress.assumingMemoryBound(to: UInt8.self) + icc.pointee = bstrdup(nil, bstr(start: u8Ptr, len: ptr.count)) + } + return VO_TRUE + case VOCTRL_GET_AMBIENT_LUX: + if lightSensor != 0 { + let lux = data!.assumingMemoryBound(to: Int32.self) + lux.pointee = Int32(lmuToLux(lastLmu)) + return VO_TRUE; + } + return VO_NOTIMPL + case VOCTRL_GET_UNFS_WINDOW_SIZE: + let sizeData = data!.assumingMemoryBound(to: Int32.self) + let size = UnsafeMutableBufferPointer(start: sizeData, count: 2) + var rect = window?.unfsContentFrame ?? NSRect(x: 0, y: 0, width: 1280, height: 720) + if let screen = window?.currentScreen, !Bool(mpv.opts.hidpi_window_scale) { + rect = screen.convertRectToBacking(rect) + } + + size[0] = Int32(rect.size.width) + size[1] = Int32(rect.size.height) + return VO_TRUE + case VOCTRL_SET_UNFS_WINDOW_SIZE: + let sizeData = data!.assumingMemoryBound(to: Int32.self) + let size = UnsafeBufferPointer(start: sizeData, count: 2) + var rect = NSMakeRect(0, 0, CGFloat(size[0]), CGFloat(size[1])) + DispatchQueue.main.async { + if let screen = self.window?.currentScreen, !Bool(self.mpv?.opts.hidpi_window_scale ?? true) { + rect = screen.convertRectFromBacking(rect) + } + self.window?.updateSize(rect.size) + } + return VO_TRUE + case VOCTRL_GET_DISPLAY_NAMES: + let dnames = data!.assumingMemoryBound(to: UnsafeMutablePointer<UnsafeMutablePointer<Int8>?>?.self) + var array: UnsafeMutablePointer<UnsafeMutablePointer<Int8>?>? = nil + var count: Int32 = 0 + let displayName = getCurrentScreen()?.localizedName ?? "Unknown" + + SWIFT_TARRAY_STRING_APPEND(nil, &array, &count, ta_xstrdup(nil, displayName)) + SWIFT_TARRAY_STRING_APPEND(nil, &array, &count, nil) + dnames.pointee = array + return VO_TRUE + case VOCTRL_GET_DISPLAY_RES: + guard let screen = getCurrentScreen() else { + log.sendWarning("No Screen available to retrieve frame") + return VO_NOTAVAIL + } + let sizeData = data!.assumingMemoryBound(to: Int32.self) + let size = UnsafeMutableBufferPointer(start: sizeData, count: 2) + let frame = screen.convertRectToBacking(screen.frame) + size[0] = Int32(frame.size.width) + size[1] = Int32(frame.size.height) + return VO_TRUE + case VOCTRL_GET_FOCUSED: + let focus = data!.assumingMemoryBound(to: CBool.self) + focus.pointee = NSApp.isActive + return VO_TRUE + case VOCTRL_UPDATE_WINDOW_TITLE: + let titleData = data!.assumingMemoryBound(to: Int8.self) + DispatchQueue.main.async { + let title = NSString(utf8String: titleData) as String? + self.title = title ?? "Unknown Title" + } + return VO_TRUE + default: + return VO_NOTIMPL + } + } + + let macOptsWakeupCallback: swift_wakeup_cb_fn = { ( ctx ) in + let com = unsafeBitCast(ctx, to: Common.self) + DispatchQueue.main.async { + com.macOptsUpdate() + } + } + + func macOptsUpdate() { + guard let mpv = mpv else { + log.sendWarning("Unexpected nil value in mac opts update") + return + } + + var opt: UnsafeMutableRawPointer? + while mpv.nextChangedMacOption(property: &opt) { + switch opt { + case MPVHelper.getPointer(&mpv.macOptsPtr.pointee.macos_title_bar_appearance): + titleBar?.set(appearance: Int(mpv.macOpts.macos_title_bar_appearance)) + case MPVHelper.getPointer(&mpv.macOptsPtr.pointee.macos_title_bar_material): + titleBar?.set(material: Int(mpv.macOpts.macos_title_bar_material)) + case MPVHelper.getPointer(&mpv.macOptsPtr.pointee.macos_title_bar_color): + titleBar?.set(color: mpv.macOpts.macos_title_bar_color) + default: + break + } + } + } +} diff --git a/video/out/mac/gl_layer.swift b/video/out/mac/gl_layer.swift new file mode 100644 index 0000000..dd96af7 --- /dev/null +++ b/video/out/mac/gl_layer.swift @@ -0,0 +1,322 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +import Cocoa +import OpenGL.GL +import OpenGL.GL3 + +let glVersions: [CGLOpenGLProfile] = [ + kCGLOGLPVersion_3_2_Core, + kCGLOGLPVersion_Legacy +] + +let glFormatBase: [CGLPixelFormatAttribute] = [ + kCGLPFAOpenGLProfile, + kCGLPFAAccelerated, + kCGLPFADoubleBuffer +] + +let glFormatSoftwareBase: [CGLPixelFormatAttribute] = [ + kCGLPFAOpenGLProfile, + kCGLPFARendererID, + CGLPixelFormatAttribute(UInt32(kCGLRendererGenericFloatID)), + kCGLPFADoubleBuffer +] + +let glFormatOptional: [[CGLPixelFormatAttribute]] = [ + [kCGLPFABackingStore], + [kCGLPFAAllowOfflineRenderers] +] + +let glFormat10Bit: [CGLPixelFormatAttribute] = [ + kCGLPFAColorSize, + _CGLPixelFormatAttribute(rawValue: 64), + kCGLPFAColorFloat +] + +let glFormatAutoGPU: [CGLPixelFormatAttribute] = [ + kCGLPFASupportsAutomaticGraphicsSwitching +] + +let attributeLookUp: [UInt32:String] = [ + kCGLOGLPVersion_3_2_Core.rawValue: "kCGLOGLPVersion_3_2_Core", + kCGLOGLPVersion_Legacy.rawValue: "kCGLOGLPVersion_Legacy", + kCGLPFAOpenGLProfile.rawValue: "kCGLPFAOpenGLProfile", + UInt32(kCGLRendererGenericFloatID): "kCGLRendererGenericFloatID", + kCGLPFARendererID.rawValue: "kCGLPFARendererID", + kCGLPFAAccelerated.rawValue: "kCGLPFAAccelerated", + kCGLPFADoubleBuffer.rawValue: "kCGLPFADoubleBuffer", + kCGLPFABackingStore.rawValue: "kCGLPFABackingStore", + kCGLPFAColorSize.rawValue: "kCGLPFAColorSize", + kCGLPFAColorFloat.rawValue: "kCGLPFAColorFloat", + kCGLPFAAllowOfflineRenderers.rawValue: "kCGLPFAAllowOfflineRenderers", + kCGLPFASupportsAutomaticGraphicsSwitching.rawValue: "kCGLPFASupportsAutomaticGraphicsSwitching", +] + +class GLLayer: CAOpenGLLayer { + unowned var cocoaCB: CocoaCB + var libmpv: LibmpvHelper { get { return cocoaCB.libmpv } } + + let displayLock = NSLock() + let cglContext: CGLContextObj + let cglPixelFormat: CGLPixelFormatObj + var needsFlip: Bool = false + var forceDraw: Bool = false + var surfaceSize: NSSize = NSSize(width: 0, height: 0) + var bufferDepth: GLint = 8 + + enum Draw: Int { case normal = 1, atomic, atomicEnd } + var draw: Draw = .normal + + let queue: DispatchQueue = DispatchQueue(label: "io.mpv.queue.draw") + + var needsICCUpdate: Bool = false { + didSet { + if needsICCUpdate == true { + update() + } + } + } + + var inLiveResize: Bool = false { + didSet { + if inLiveResize { + isAsynchronous = true + } + update(force: true) + } + } + + init(cocoaCB ccb: CocoaCB) { + cocoaCB = ccb + (cglPixelFormat, bufferDepth) = GLLayer.createPixelFormat(ccb) + cglContext = GLLayer.createContext(ccb, cglPixelFormat) + super.init() + autoresizingMask = [.layerWidthSizable, .layerHeightSizable] + backgroundColor = NSColor.black.cgColor + + if bufferDepth > 8 { + contentsFormat = .RGBA16Float + } + + var i: GLint = 1 + CGLSetParameter(cglContext, kCGLCPSwapInterval, &i) + CGLSetCurrentContext(cglContext) + + libmpv.initRender() + libmpv.setRenderUpdateCallback(updateCallback, context: self) + libmpv.setRenderControlCallback(cocoaCB.controlCallback, context: cocoaCB) + } + + // necessary for when the layer containing window changes the screen + override init(layer: Any) { + guard let oldLayer = layer as? GLLayer else { + fatalError("init(layer: Any) passed an invalid layer") + } + cocoaCB = oldLayer.cocoaCB + surfaceSize = oldLayer.surfaceSize + cglPixelFormat = oldLayer.cglPixelFormat + cglContext = oldLayer.cglContext + super.init() + } + + required init?(coder: NSCoder) { + fatalError("init(coder:) has not been implemented") + } + + override func canDraw(inCGLContext ctx: CGLContextObj, + pixelFormat pf: CGLPixelFormatObj, + forLayerTime t: CFTimeInterval, + displayTime ts: UnsafePointer<CVTimeStamp>?) -> Bool { + if inLiveResize == false { + isAsynchronous = false + } + return cocoaCB.backendState == .initialized && + (forceDraw || libmpv.isRenderUpdateFrame()) + } + + override func draw(inCGLContext ctx: CGLContextObj, + pixelFormat pf: CGLPixelFormatObj, + forLayerTime t: CFTimeInterval, + displayTime ts: UnsafePointer<CVTimeStamp>?) { + needsFlip = false + forceDraw = false + + if draw.rawValue >= Draw.atomic.rawValue { + if draw == .atomic { + draw = .atomicEnd + } else { + atomicDrawingEnd() + } + } + + updateSurfaceSize() + libmpv.drawRender(surfaceSize, bufferDepth, ctx) + + if needsICCUpdate { + needsICCUpdate = false + cocoaCB.updateICCProfile() + } + } + + func updateSurfaceSize() { + var dims: [GLint] = [0, 0, 0, 0] + glGetIntegerv(GLenum(GL_VIEWPORT), &dims) + surfaceSize = NSSize(width: CGFloat(dims[2]), height: CGFloat(dims[3])) + + if NSEqualSizes(surfaceSize, NSZeroSize) { + surfaceSize = bounds.size + surfaceSize.width *= contentsScale + surfaceSize.height *= contentsScale + } + } + + func atomicDrawingStart() { + if draw == .normal { + NSDisableScreenUpdates() + draw = .atomic + } + } + + func atomicDrawingEnd() { + if draw.rawValue >= Draw.atomic.rawValue { + NSEnableScreenUpdates() + draw = .normal + } + } + + override func copyCGLPixelFormat(forDisplayMask mask: UInt32) -> CGLPixelFormatObj { + return cglPixelFormat + } + + override func copyCGLContext(forPixelFormat pf: CGLPixelFormatObj) -> CGLContextObj { + contentsScale = cocoaCB.window?.backingScaleFactor ?? 1.0 + return cglContext + } + + let updateCallback: mpv_render_update_fn = { (ctx) in + let layer: GLLayer = unsafeBitCast(ctx, to: GLLayer.self) + layer.update() + } + + override func display() { + displayLock.lock() + let isUpdate = needsFlip + super.display() + CATransaction.flush() + if isUpdate && needsFlip { + CGLSetCurrentContext(cglContext) + if libmpv.isRenderUpdateFrame() { + libmpv.drawRender(NSZeroSize, bufferDepth, cglContext, skip: true) + } + } + displayLock.unlock() + } + + func update(force: Bool = false) { + if force { forceDraw = true } + queue.async { + if self.forceDraw || !self.inLiveResize { + self.needsFlip = true + self.display() + } + } + } + + class func createPixelFormat(_ ccb: CocoaCB) -> (CGLPixelFormatObj, GLint) { + var pix: CGLPixelFormatObj? + var depth: GLint = 8 + var err: CGLError = CGLError(rawValue: 0) + let swRender = ccb.libmpv.macOpts.cocoa_cb_sw_renderer + + if swRender != 1 { + (pix, depth, err) = GLLayer.findPixelFormat(ccb) + } + + if (err != kCGLNoError || pix == nil) && swRender != 0 { + (pix, depth, err) = GLLayer.findPixelFormat(ccb, software: true) + } + + guard let pixelFormat = pix, err == kCGLNoError else { + ccb.log.sendError("Couldn't create any CGL pixel format") + exit(1) + } + + return (pixelFormat, depth) + } + + class func findPixelFormat(_ ccb: CocoaCB, software: Bool = false) -> (CGLPixelFormatObj?, GLint, CGLError) { + var pix: CGLPixelFormatObj? + var err: CGLError = CGLError(rawValue: 0) + var npix: GLint = 0 + + for ver in glVersions { + var glBase = software ? glFormatSoftwareBase : glFormatBase + glBase.insert(CGLPixelFormatAttribute(ver.rawValue), at: 1) + + var glFormat = [glBase] + if ccb.libmpv.macOpts.cocoa_cb_10bit_context { + glFormat += [glFormat10Bit] + } + glFormat += glFormatOptional + + if !ccb.libmpv.macOpts.macos_force_dedicated_gpu { + glFormat += [glFormatAutoGPU] + } + + for index in stride(from: glFormat.count-1, through: 0, by: -1) { + let format = glFormat.flatMap { $0 } + [_CGLPixelFormatAttribute(rawValue: 0)] + err = CGLChoosePixelFormat(format, &pix, &npix) + + if err == kCGLBadAttribute || err == kCGLBadPixelFormat || pix == nil { + glFormat.remove(at: index) + } else { + let attArray = format.map({ (value: _CGLPixelFormatAttribute) -> String in + return attributeLookUp[value.rawValue] ?? String(value.rawValue) + }) + + ccb.log.sendVerbose("Created CGL pixel format with attributes: " + + "\(attArray.joined(separator: ", "))") + return (pix, glFormat.contains(glFormat10Bit) ? 16 : 8, err) + } + } + } + + let errS = String(cString: CGLErrorString(err)) + ccb.log.sendWarning("Couldn't create a " + + "\(software ? "software" : "hardware accelerated") " + + "CGL pixel format: \(errS) (\(err.rawValue))") + if software == false && ccb.libmpv.macOpts.cocoa_cb_sw_renderer == -1 { + ccb.log.sendWarning("Falling back to software renderer") + } + + return (pix, 8, err) + } + + class func createContext(_ ccb: CocoaCB, _ pixelFormat: CGLPixelFormatObj) -> CGLContextObj { + var context: CGLContextObj? + let error = CGLCreateContext(pixelFormat, nil, &context) + + guard let cglContext = context, error == kCGLNoError else { + let errS = String(cString: CGLErrorString(error)) + ccb.log.sendError("Couldn't create a CGLContext: " + errS) + exit(1) + } + + return cglContext + } +} diff --git a/video/out/mac/metal_layer.swift b/video/out/mac/metal_layer.swift new file mode 100644 index 0000000..7cea87c --- /dev/null +++ b/video/out/mac/metal_layer.swift @@ -0,0 +1,43 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +import Cocoa + +class MetalLayer: CAMetalLayer { + unowned var common: MacCommon + + init(common com: MacCommon) { + common = com + super.init() + + pixelFormat = .rgba16Float + backgroundColor = NSColor.black.cgColor + } + + // necessary for when the layer containing window changes the screen + override init(layer: Any) { + guard let oldLayer = layer as? MetalLayer else { + fatalError("init(layer: Any) passed an invalid layer") + } + common = oldLayer.common + super.init() + } + + required init?(coder: NSCoder) { + fatalError("init(coder:) has not been implemented") + } +} diff --git a/video/out/mac/title_bar.swift b/video/out/mac/title_bar.swift new file mode 100644 index 0000000..764c1ff --- /dev/null +++ b/video/out/mac/title_bar.swift @@ -0,0 +1,229 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +import Cocoa + +class TitleBar: NSVisualEffectView { + unowned var common: Common + var mpv: MPVHelper? { get { return common.mpv } } + + var systemBar: NSView? { + get { return common.window?.standardWindowButton(.closeButton)?.superview } + } + static var height: CGFloat { + get { return NSWindow.frameRect(forContentRect: CGRect.zero, styleMask: .titled).size.height } + } + var buttons: [NSButton] { + get { return ([.closeButton, .miniaturizeButton, .zoomButton] as [NSWindow.ButtonType]).compactMap { common.window?.standardWindowButton($0) } } + } + + override var material: NSVisualEffectView.Material { + get { return super.material } + set { + super.material = newValue + // fix for broken deprecated materials + if material == .light || material == .dark || material == .mediumLight || + material == .ultraDark + { + state = .active + } else { + state = .followsWindowActiveState + } + + } + } + + init(frame: NSRect, window: NSWindow, common com: Common) { + let f = NSMakeRect(0, frame.size.height - TitleBar.height, + frame.size.width, TitleBar.height + 1) + common = com + super.init(frame: f) + buttons.forEach { $0.isHidden = true } + isHidden = true + alphaValue = 0 + blendingMode = .withinWindow + autoresizingMask = [.width, .minYMargin] + systemBar?.alphaValue = 0 + state = .followsWindowActiveState + wantsLayer = true + + window.contentView?.addSubview(self, positioned: .above, relativeTo: nil) + window.titlebarAppearsTransparent = true + window.styleMask.insert(.fullSizeContentView) + set(appearance: Int(mpv?.macOpts.macos_title_bar_appearance ?? 0)) + set(material: Int(mpv?.macOpts.macos_title_bar_material ?? 0)) + set(color: mpv?.macOpts.macos_title_bar_color ?? "#00000000") + } + + required init?(coder: NSCoder) { + fatalError("init(coder:) has not been implemented") + } + + // catch these events so they are not propagated to the underlying view + override func mouseDown(with event: NSEvent) { } + + override func mouseUp(with event: NSEvent) { + if event.clickCount > 1 { + let def = UserDefaults.standard + var action = def.string(forKey: "AppleActionOnDoubleClick") + + // macOS 10.10 and earlier + if action == nil { + action = def.bool(forKey: "AppleMiniaturizeOnDoubleClick") == true ? + "Minimize" : "Maximize" + } + + if action == "Minimize" { + window?.miniaturize(self) + } else if action == "Maximize" { + window?.zoom(self) + } + } + + common.window?.isMoving = false + } + + func set(appearance: Any) { + if appearance is Int { + window?.appearance = appearanceFrom(string: String(appearance as? Int ?? 0)) + } else { + window?.appearance = appearanceFrom(string: appearance as? String ?? "auto") + } + } + + func set(material: Any) { + if material is Int { + self.material = materialFrom(string: String(material as? Int ?? 0)) + } else { + self.material = materialFrom(string: material as? String ?? "titlebar") + } + } + + func set(color: Any) { + if color is String { + layer?.backgroundColor = NSColor(hex: color as? String ?? "#00000000").cgColor + } else { + let col = color as? m_color ?? m_color(r: 0, g: 0, b: 0, a: 0) + let red = CGFloat(col.r)/255 + let green = CGFloat(col.g)/255 + let blue = CGFloat(col.b)/255 + let alpha = CGFloat(col.a)/255 + layer?.backgroundColor = NSColor(calibratedRed: red, green: green, + blue: blue, alpha: alpha).cgColor + } + } + + func show() { + guard let window = common.window else { return } + if !window.border && !window.isInFullscreen { return } + let loc = common.view?.convert(window.mouseLocationOutsideOfEventStream, from: nil) + + buttons.forEach { $0.isHidden = false } + NSAnimationContext.runAnimationGroup({ (context) -> Void in + context.duration = 0.20 + systemBar?.animator().alphaValue = 1 + if !window.isInFullscreen && !window.isAnimating { + animator().alphaValue = 1 + isHidden = false + } + }, completionHandler: nil ) + + if loc?.y ?? 0 > TitleBar.height { + hideDelayed() + } else { + NSObject.cancelPreviousPerformRequests(withTarget: self, selector: #selector(hide), object: nil) + } + } + + @objc func hide(_ duration: TimeInterval = 0.20) { + guard let window = common.window else { return } + if window.isInFullscreen && !window.isAnimating { + alphaValue = 0 + isHidden = true + return + } + NSAnimationContext.runAnimationGroup({ (context) -> Void in + context.duration = duration + systemBar?.animator().alphaValue = 0 + animator().alphaValue = 0 + }, completionHandler: { + self.buttons.forEach { $0.isHidden = true } + self.isHidden = true + }) + } + + func hideDelayed() { + NSObject.cancelPreviousPerformRequests(withTarget: self, + selector: #selector(hide), + object: nil) + perform(#selector(hide), with: nil, afterDelay: 0.5) + } + + func appearanceFrom(string: String) -> NSAppearance? { + switch string { + case "1", "aqua": + return NSAppearance(named: .aqua) + case "2", "darkAqua": + return NSAppearance(named: .darkAqua) + case "3", "vibrantLight": + return NSAppearance(named: .vibrantLight) + case "4", "vibrantDark": + return NSAppearance(named: .vibrantDark) + case "5", "aquaHighContrast": + return NSAppearance(named: .accessibilityHighContrastAqua) + case "6", "darkAquaHighContrast": + return NSAppearance(named: .accessibilityHighContrastDarkAqua) + case "7", "vibrantLightHighContrast": + return NSAppearance(named: .accessibilityHighContrastVibrantLight) + case "8", "vibrantDarkHighContrast": + return NSAppearance(named: .accessibilityHighContrastVibrantDark) + case "0", "auto": fallthrough + default: + return nil + } + + + let style = UserDefaults.standard.string(forKey: "AppleInterfaceStyle") + return appearanceFrom(string: style == nil ? "aqua" : "vibrantDark") + } + + func materialFrom(string: String) -> NSVisualEffectView.Material { + switch string { + case "0", "titlebar": return .titlebar + case "1", "selection": return .selection + case "2,", "menu": return .menu + case "3", "popover": return .popover + case "4", "sidebar": return .sidebar + case "5,", "headerView": return .headerView + case "6", "sheet": return .sheet + case "7", "windowBackground": return .windowBackground + case "8", "hudWindow": return .hudWindow + case "9", "fullScreen": return .fullScreenUI + case "10", "toolTip": return .toolTip + case "11", "contentBackground": return .contentBackground + case "12", "underWindowBackground": return .underWindowBackground + case "13", "underPageBackground": return .underPageBackground + case "14", "dark": return .dark + case "15", "light": return .light + case "16", "mediumLight": return .mediumLight + case "17", "ultraDark": return .ultraDark + default: break + } + + return .titlebar + } +} diff --git a/video/out/mac/view.swift b/video/out/mac/view.swift new file mode 100644 index 0000000..c4776c3 --- /dev/null +++ b/video/out/mac/view.swift @@ -0,0 +1,297 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +import Cocoa + +class View: NSView { + unowned var common: Common + var mpv: MPVHelper? { get { return common.mpv } } + + var tracker: NSTrackingArea? + var hasMouseDown: Bool = false + + override var isFlipped: Bool { return true } + override var acceptsFirstResponder: Bool { return true } + + + init(frame: NSRect, common com: Common) { + common = com + super.init(frame: frame) + autoresizingMask = [.width, .height] + wantsBestResolutionOpenGLSurface = true + registerForDraggedTypes([ .fileURL, .URL, .string ]) + } + + required init?(coder: NSCoder) { + fatalError("init(coder:) has not been implemented") + } + + override func updateTrackingAreas() { + if let tracker = self.tracker { + removeTrackingArea(tracker) + } + + tracker = NSTrackingArea(rect: bounds, + options: [.activeAlways, .mouseEnteredAndExited, .mouseMoved, .enabledDuringMouseDrag], + owner: self, userInfo: nil) + // here tracker is guaranteed to be none-nil + addTrackingArea(tracker!) + + if containsMouseLocation() { + cocoa_put_key_with_modifiers(SWIFT_KEY_MOUSE_LEAVE, 0) + } + } + + override func draggingEntered(_ sender: NSDraggingInfo) -> NSDragOperation { + guard let types = sender.draggingPasteboard.types else { return [] } + if types.contains(.fileURL) || types.contains(.URL) || types.contains(.string) { + return .copy + } + return [] + } + + func isURL(_ str: String) -> Bool { + // force unwrapping is fine here, regex is guaranteed to be valid + let regex = try! NSRegularExpression(pattern: "^(https?|ftp)://[^\\s/$.?#].[^\\s]*$", + options: .caseInsensitive) + let isURL = regex.numberOfMatches(in: str, + options: [], + range: NSRange(location: 0, length: str.count)) + return isURL > 0 + } + + override func performDragOperation(_ sender: NSDraggingInfo) -> Bool { + let pb = sender.draggingPasteboard + guard let types = pb.types else { return false } + + if types.contains(.fileURL) || types.contains(.URL) { + if let urls = pb.readObjects(forClasses: [NSURL.self]) as? [URL] { + let files = urls.map { $0.absoluteString } + EventsResponder.sharedInstance().handleFilesArray(files) + return true + } + } else if types.contains(.string) { + guard let str = pb.string(forType: .string) else { return false } + var filesArray: [String] = [] + + for val in str.components(separatedBy: "\n") { + let url = val.trimmingCharacters(in: .whitespacesAndNewlines) + let path = (url as NSString).expandingTildeInPath + if isURL(url) { + filesArray.append(url) + } else if path.starts(with: "/") { + filesArray.append(path) + } + } + EventsResponder.sharedInstance().handleFilesArray(filesArray) + return true + } + return false + } + + override func acceptsFirstMouse(for event: NSEvent?) -> Bool { + return true + } + + override func becomeFirstResponder() -> Bool { + return true + } + + override func resignFirstResponder() -> Bool { + return true + } + + override func mouseEntered(with event: NSEvent) { + if mpv?.mouseEnabled() ?? true { + cocoa_put_key_with_modifiers(SWIFT_KEY_MOUSE_ENTER, 0) + } + common.updateCursorVisibility() + } + + override func mouseExited(with event: NSEvent) { + if mpv?.mouseEnabled() ?? true { + cocoa_put_key_with_modifiers(SWIFT_KEY_MOUSE_LEAVE, 0) + } + common.titleBar?.hide() + common.setCursorVisibility(true) + } + + override func mouseMoved(with event: NSEvent) { + if mpv?.mouseEnabled() ?? true { + signalMouseMovement(event) + } + common.titleBar?.show() + } + + override func mouseDragged(with event: NSEvent) { + if mpv?.mouseEnabled() ?? true { + signalMouseMovement(event) + } + } + + override func mouseDown(with event: NSEvent) { + if mpv?.mouseEnabled() ?? true { + signalMouseDown(event) + } + } + + override func mouseUp(with event: NSEvent) { + if mpv?.mouseEnabled() ?? true { + signalMouseUp(event) + } + common.window?.isMoving = false + } + + override func rightMouseDown(with event: NSEvent) { + if mpv?.mouseEnabled() ?? true { + signalMouseDown(event) + } + } + + override func rightMouseUp(with event: NSEvent) { + if mpv?.mouseEnabled() ?? true { + signalMouseUp(event) + } + } + + override func otherMouseDown(with event: NSEvent) { + if mpv?.mouseEnabled() ?? true { + signalMouseDown(event) + } + } + + override func otherMouseUp(with event: NSEvent) { + if mpv?.mouseEnabled() ?? true { + signalMouseUp(event) + } + } + + override func magnify(with event: NSEvent) { + event.phase == .ended ? + common.windowDidEndLiveResize() : common.windowWillStartLiveResize() + + common.window?.addWindowScale(Double(event.magnification)) + } + + func signalMouseDown(_ event: NSEvent) { + signalMouseEvent(event, MP_KEY_STATE_DOWN) + if event.clickCount > 1 { + signalMouseEvent(event, MP_KEY_STATE_UP) + } + } + + func signalMouseUp(_ event: NSEvent) { + signalMouseEvent(event, MP_KEY_STATE_UP) + } + + func signalMouseEvent(_ event: NSEvent, _ state: UInt32) { + hasMouseDown = state == MP_KEY_STATE_DOWN + let mpkey = getMpvButton(event) + cocoa_put_key_with_modifiers((mpkey | Int32(state)), Int32(event.modifierFlags.rawValue)) + } + + func signalMouseMovement(_ event: NSEvent) { + var point = convert(event.locationInWindow, from: nil) + point = convertToBacking(point) + point.y = -point.y + + common.window?.updateMovableBackground(point) + if !(common.window?.isMoving ?? false) { + mpv?.setMousePosition(point) + } + } + + func preciseScroll(_ event: NSEvent) { + var delta: Double + var cmd: Int32 + + if abs(event.deltaY) >= abs(event.deltaX) { + delta = Double(event.deltaY) * 0.1 + cmd = delta > 0 ? SWIFT_WHEEL_UP : SWIFT_WHEEL_DOWN + } else { + delta = Double(event.deltaX) * 0.1 + cmd = delta > 0 ? SWIFT_WHEEL_LEFT : SWIFT_WHEEL_RIGHT + } + + mpv?.putAxis(cmd, delta: abs(delta)) + } + + override func scrollWheel(with event: NSEvent) { + if !(mpv?.mouseEnabled() ?? true) { + return + } + + if event.hasPreciseScrollingDeltas { + preciseScroll(event) + } else { + let modifiers = event.modifierFlags + let deltaX = modifiers.contains(.shift) ? event.scrollingDeltaY : event.scrollingDeltaX + let deltaY = modifiers.contains(.shift) ? event.scrollingDeltaX : event.scrollingDeltaY + var mpkey: Int32 + + if abs(deltaY) >= abs(deltaX) { + mpkey = deltaY > 0 ? SWIFT_WHEEL_UP : SWIFT_WHEEL_DOWN + } else { + mpkey = deltaX > 0 ? SWIFT_WHEEL_LEFT : SWIFT_WHEEL_RIGHT + } + + cocoa_put_key_with_modifiers(mpkey, Int32(modifiers.rawValue)) + } + } + + func containsMouseLocation() -> Bool { + var topMargin: CGFloat = 0.0 + let menuBarHeight = NSApp.mainMenu?.menuBarHeight ?? 23.0 + + guard let window = common.window else { return false } + guard var vF = window.screen?.frame else { return false } + + if window.isInFullscreen && (menuBarHeight > 0) { + topMargin = TitleBar.height + 1 + menuBarHeight + } + + vF.size.height -= topMargin + + let vFW = window.convertFromScreen(vF) + let vFV = convert(vFW, from: nil) + let pt = convert(window.mouseLocationOutsideOfEventStream, from: nil) + + var clippedBounds = bounds.intersection(vFV) + if !window.isInFullscreen { + clippedBounds.origin.y += TitleBar.height + clippedBounds.size.height -= TitleBar.height + } + return clippedBounds.contains(pt) + } + + func canHideCursor() -> Bool { + guard let window = common.window else { return false } + return !hasMouseDown && containsMouseLocation() && window.isKeyWindow + } + + func getMpvButton(_ event: NSEvent) -> Int32 { + let buttonNumber = event.buttonNumber + switch (buttonNumber) { + case 0: return SWIFT_MBTN_LEFT + case 1: return SWIFT_MBTN_RIGHT + case 2: return SWIFT_MBTN_MID + case 3: return SWIFT_MBTN_BACK + case 4: return SWIFT_MBTN_FORWARD + default: return SWIFT_MBTN9 + Int32(buttonNumber - 5) + } + } +} diff --git a/video/out/mac/window.swift b/video/out/mac/window.swift new file mode 100644 index 0000000..7b1a858 --- /dev/null +++ b/video/out/mac/window.swift @@ -0,0 +1,593 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +import Cocoa + +class Window: NSWindow, NSWindowDelegate { + weak var common: Common! = nil + var mpv: MPVHelper? { get { return common.mpv } } + + var targetScreen: NSScreen? + var previousScreen: NSScreen? + var currentScreen: NSScreen? + var unfScreen: NSScreen? + + var unfsContentFrame: NSRect? + var isInFullscreen: Bool = false + var isMoving: Bool = false + var previousStyleMask: NSWindow.StyleMask = [.titled, .closable, .miniaturizable, .resizable] + + var isAnimating: Bool = false + let animationLock: NSCondition = NSCondition() + + var unfsContentFramePixel: NSRect { get { return convertToBacking(unfsContentFrame ?? NSRect(x: 0, y: 0, width: 160, height: 90)) } } + var framePixel: NSRect { get { return convertToBacking(frame) } } + + var keepAspect: Bool = true { + didSet { + if let contentViewFrame = contentView?.frame, !isInFullscreen { + unfsContentFrame = convertToScreen(contentViewFrame) + } + + if keepAspect { + contentAspectRatio = unfsContentFrame?.size ?? contentAspectRatio + } else { + resizeIncrements = NSSize(width: 1.0, height: 1.0) + } + } + } + + var border: Bool = true { + didSet { if !border { common.titleBar?.hide() } } + } + + override var canBecomeKey: Bool { return true } + override var canBecomeMain: Bool { return true } + + override var styleMask: NSWindow.StyleMask { + get { return super.styleMask } + set { + let responder = firstResponder + let windowTitle = title + previousStyleMask = super.styleMask + super.styleMask = newValue + makeFirstResponder(responder) + title = windowTitle + } + } + + convenience init(contentRect: NSRect, screen: NSScreen?, view: NSView, common com: Common) { + self.init(contentRect: contentRect, + styleMask: [.titled, .closable, .miniaturizable, .resizable], + backing: .buffered, defer: false, screen: screen) + + // workaround for an AppKit bug where the NSWindow can't be placed on a + // none Main screen NSScreen outside the Main screen's frame bounds + if let wantedScreen = screen, screen != NSScreen.main { + var absoluteWantedOrigin = contentRect.origin + absoluteWantedOrigin.x += wantedScreen.frame.origin.x + absoluteWantedOrigin.y += wantedScreen.frame.origin.y + + if !NSEqualPoints(absoluteWantedOrigin, self.frame.origin) { + self.setFrameOrigin(absoluteWantedOrigin) + } + } + + common = com + title = com.title + minSize = NSMakeSize(160, 90) + collectionBehavior = .fullScreenPrimary + delegate = self + + if let cView = contentView { + cView.addSubview(view) + view.frame = cView.frame + unfsContentFrame = convertToScreen(cView.frame) + } + + targetScreen = screen + currentScreen = screen + unfScreen = screen + + if let app = NSApp as? Application { + app.menuBar.register(#selector(setHalfWindowSize), for: MPM_H_SIZE) + app.menuBar.register(#selector(setNormalWindowSize), for: MPM_N_SIZE) + app.menuBar.register(#selector(setDoubleWindowSize), for: MPM_D_SIZE) + app.menuBar.register(#selector(performMiniaturize(_:)), for: MPM_MINIMIZE) + app.menuBar.register(#selector(performZoom(_:)), for: MPM_ZOOM) + } + } + + override func toggleFullScreen(_ sender: Any?) { + if isAnimating { + return + } + + animationLock.lock() + isAnimating = true + animationLock.unlock() + + targetScreen = common.getTargetScreen(forFullscreen: !isInFullscreen) + if targetScreen == nil && previousScreen == nil { + targetScreen = screen + } else if targetScreen == nil { + targetScreen = previousScreen + previousScreen = nil + } else { + previousScreen = screen + } + + if let contentViewFrame = contentView?.frame, !isInFullscreen { + unfsContentFrame = convertToScreen(contentViewFrame) + unfScreen = screen + } + // move window to target screen when going to fullscreen + if let tScreen = targetScreen, !isInFullscreen && (tScreen != screen) { + let frame = calculateWindowPosition(for: tScreen, withoutBounds: false) + setFrame(frame, display: true) + } + + if Bool(mpv?.opts.native_fs ?? true) { + super.toggleFullScreen(sender) + } else { + if !isInFullscreen { + setToFullScreen() + } + else { + setToWindow() + } + } + } + + func customWindowsToEnterFullScreen(for window: NSWindow) -> [NSWindow]? { + return [window] + } + + func customWindowsToExitFullScreen(for window: NSWindow) -> [NSWindow]? { + return [window] + } + + func window(_ window: NSWindow, startCustomAnimationToEnterFullScreenWithDuration duration: TimeInterval) { + guard let tScreen = targetScreen else { return } + common.view?.layerContentsPlacement = .scaleProportionallyToFit + common.titleBar?.hide() + NSAnimationContext.runAnimationGroup({ (context) -> Void in + context.duration = getFsAnimationDuration(duration - 0.05) + window.animator().setFrame(tScreen.frame, display: true) + }, completionHandler: nil) + } + + func window(_ window: NSWindow, startCustomAnimationToExitFullScreenWithDuration duration: TimeInterval) { + guard let tScreen = targetScreen, let currentScreen = screen else { return } + let newFrame = calculateWindowPosition(for: tScreen, withoutBounds: tScreen == screen) + let intermediateFrame = aspectFit(rect: newFrame, in: currentScreen.frame) + common.titleBar?.hide(0.0) + + NSAnimationContext.runAnimationGroup({ (context) -> Void in + context.duration = 0.0 + common.view?.layerContentsPlacement = .scaleProportionallyToFill + window.animator().setFrame(intermediateFrame, display: true) + }, completionHandler: { + NSAnimationContext.runAnimationGroup({ (context) -> Void in + context.duration = self.getFsAnimationDuration(duration - 0.05) + self.styleMask.remove(.fullScreen) + window.animator().setFrame(newFrame, display: true) + }, completionHandler: nil) + }) + } + + func windowDidEnterFullScreen(_ notification: Notification) { + isInFullscreen = true + mpv?.setOption(fullscreen: isInFullscreen) + common.updateCursorVisibility() + endAnimation(frame) + common.titleBar?.show() + } + + func windowDidExitFullScreen(_ notification: Notification) { + guard let tScreen = targetScreen else { return } + isInFullscreen = false + mpv?.setOption(fullscreen: isInFullscreen) + endAnimation(calculateWindowPosition(for: tScreen, withoutBounds: targetScreen == screen)) + common.view?.layerContentsPlacement = .scaleProportionallyToFit + } + + func windowDidFailToEnterFullScreen(_ window: NSWindow) { + guard let tScreen = targetScreen else { return } + let newFrame = calculateWindowPosition(for: tScreen, withoutBounds: targetScreen == screen) + setFrame(newFrame, display: true) + endAnimation() + } + + func windowDidFailToExitFullScreen(_ window: NSWindow) { + guard let targetFrame = targetScreen?.frame else { return } + setFrame(targetFrame, display: true) + endAnimation() + common.view?.layerContentsPlacement = .scaleProportionallyToFit + } + + func endAnimation(_ newFrame: NSRect = NSZeroRect) { + if !NSEqualRects(newFrame, NSZeroRect) && isAnimating { + NSAnimationContext.runAnimationGroup({ (context) -> Void in + context.duration = 0.01 + self.animator().setFrame(newFrame, display: true) + }, completionHandler: nil ) + } + + animationLock.lock() + isAnimating = false + animationLock.signal() + animationLock.unlock() + common.windowDidEndAnimation() + } + + func setToFullScreen() { + guard let targetFrame = targetScreen?.frame else { return } + + if #available(macOS 11.0, *) { + styleMask = .borderless + common.titleBar?.hide(0.0) + } else { + styleMask.insert(.fullScreen) + } + + NSApp.presentationOptions = [.autoHideMenuBar, .autoHideDock] + setFrame(targetFrame, display: true) + endAnimation() + isInFullscreen = true + mpv?.setOption(fullscreen: isInFullscreen) + common.windowSetToFullScreen() + } + + func setToWindow() { + guard let tScreen = targetScreen else { return } + + if #available(macOS 11.0, *) { + styleMask = previousStyleMask + common.titleBar?.hide(0.0) + } else { + styleMask.remove(.fullScreen) + } + + let newFrame = calculateWindowPosition(for: tScreen, withoutBounds: targetScreen == screen) + NSApp.presentationOptions = [] + setFrame(newFrame, display: true) + endAnimation() + isInFullscreen = false + mpv?.setOption(fullscreen: isInFullscreen) + common.windowSetToWindow() + } + + func waitForAnimation() { + animationLock.lock() + while(isAnimating){ + animationLock.wait() + } + animationLock.unlock() + } + + func getFsAnimationDuration(_ def: Double) -> Double { + let duration = mpv?.macOpts.macos_fs_animation_duration ?? -1 + if duration < 0 { + return def + } else { + return Double(duration)/1000 + } + } + + func setOnTop(_ state: Bool, _ ontopLevel: Int) { + if state { + switch ontopLevel { + case -1: + level = .floating + case -2: + level = .statusBar + 1 + case -3: + level = NSWindow.Level(Int(CGWindowLevelForKey(.desktopWindow))) + default: + level = NSWindow.Level(ontopLevel) + } + collectionBehavior.remove(.transient) + collectionBehavior.insert(.managed) + } else { + level = .normal + } + } + + func setOnAllWorkspaces(_ state: Bool) { + if state { + collectionBehavior.insert(.canJoinAllSpaces) + } else { + collectionBehavior.remove(.canJoinAllSpaces) + } + } + + func setMinimized(_ stateWanted: Bool) { + if isMiniaturized == stateWanted { return } + + if stateWanted { + performMiniaturize(self) + } else { + deminiaturize(self) + } + } + + func setMaximized(_ stateWanted: Bool) { + if isZoomed == stateWanted { return } + + zoom(self) + } + + func updateMovableBackground(_ pos: NSPoint) { + if !isInFullscreen { + isMovableByWindowBackground = mpv?.canBeDraggedAt(pos) ?? true + } else { + isMovableByWindowBackground = false + } + } + + func updateFrame(_ rect: NSRect) { + if rect != frame { + let cRect = frameRect(forContentRect: rect) + unfsContentFrame = rect + setFrame(cRect, display: true) + common.windowDidUpdateFrame() + } + } + + func updateSize(_ size: NSSize) { + if let currentSize = contentView?.frame.size, size != currentSize { + let newContentFrame = centeredContentSize(for: frame, size: size) + if !isInFullscreen { + updateFrame(newContentFrame) + } else { + unfsContentFrame = newContentFrame + } + } + } + + override func setFrame(_ frameRect: NSRect, display flag: Bool) { + if frameRect.width < minSize.width || frameRect.height < minSize.height { + common.log.sendVerbose("tried to set too small window size: \(frameRect.size)") + return + } + + super.setFrame(frameRect, display: flag) + + if let size = unfsContentFrame?.size, keepAspect { + contentAspectRatio = size + } + } + + func centeredContentSize(for rect: NSRect, size sz: NSSize) -> NSRect { + let cRect = contentRect(forFrameRect: rect) + let dx = (cRect.size.width - sz.width) / 2 + let dy = (cRect.size.height - sz.height) / 2 + return NSInsetRect(cRect, dx, dy) + } + + func aspectFit(rect r: NSRect, in rTarget: NSRect) -> NSRect { + var s = rTarget.width / r.width + if r.height*s > rTarget.height { + s = rTarget.height / r.height + } + let w = r.width * s + let h = r.height * s + return NSRect(x: rTarget.midX - w/2, y: rTarget.midY - h/2, width: w, height: h) + } + + func calculateWindowPosition(for tScreen: NSScreen, withoutBounds: Bool) -> NSRect { + guard let contentFrame = unfsContentFrame, let screen = unfScreen else { + return frame + } + var newFrame = frameRect(forContentRect: contentFrame) + let targetFrame = tScreen.frame + let targetVisibleFrame = tScreen.visibleFrame + let unfsScreenFrame = screen.frame + let visibleWindow = NSIntersectionRect(unfsScreenFrame, newFrame) + + // calculate visible area of every side + let left = newFrame.origin.x - unfsScreenFrame.origin.x + let right = unfsScreenFrame.size.width - + (newFrame.origin.x - unfsScreenFrame.origin.x + newFrame.size.width) + let bottom = newFrame.origin.y - unfsScreenFrame.origin.y + let top = unfsScreenFrame.size.height - + (newFrame.origin.y - unfsScreenFrame.origin.y + newFrame.size.height) + + // normalize visible areas, decide which one to take horizontal/vertical + var xPer = (unfsScreenFrame.size.width - visibleWindow.size.width) + var yPer = (unfsScreenFrame.size.height - visibleWindow.size.height) + if xPer != 0 { xPer = (left >= 0 || right < 0 ? left : right) / xPer } + if yPer != 0 { yPer = (bottom >= 0 || top < 0 ? bottom : top) / yPer } + + // calculate visible area for every side for target screen + let xNewLeft = targetFrame.origin.x + + (targetFrame.size.width - visibleWindow.size.width) * xPer + let xNewRight = targetFrame.origin.x + targetFrame.size.width - + (targetFrame.size.width - visibleWindow.size.width) * xPer - newFrame.size.width + let yNewBottom = targetFrame.origin.y + + (targetFrame.size.height - visibleWindow.size.height) * yPer + let yNewTop = targetFrame.origin.y + targetFrame.size.height - + (targetFrame.size.height - visibleWindow.size.height) * yPer - newFrame.size.height + + // calculate new coordinates, decide which one to take horizontal/vertical + newFrame.origin.x = left >= 0 || right < 0 ? xNewLeft : xNewRight + newFrame.origin.y = bottom >= 0 || top < 0 ? yNewBottom : yNewTop + + // don't place new window on top of a visible menubar + let topMar = targetFrame.size.height - + (newFrame.origin.y - targetFrame.origin.y + newFrame.size.height) + let menuBarHeight = targetFrame.size.height - + (targetVisibleFrame.size.height + targetVisibleFrame.origin.y) + if topMar < menuBarHeight { + newFrame.origin.y -= top - menuBarHeight + } + + if withoutBounds { + return newFrame + } + + // screen bounds right and left + if newFrame.origin.x + newFrame.size.width > targetFrame.origin.x + targetFrame.size.width { + newFrame.origin.x = targetFrame.origin.x + targetFrame.size.width - newFrame.size.width + } + if newFrame.origin.x < targetFrame.origin.x { + newFrame.origin.x = targetFrame.origin.x + } + + // screen bounds top and bottom + if newFrame.origin.y + newFrame.size.height > targetFrame.origin.y + targetFrame.size.height { + newFrame.origin.y = targetFrame.origin.y + targetFrame.size.height - newFrame.size.height + } + if newFrame.origin.y < targetFrame.origin.y { + newFrame.origin.y = targetFrame.origin.y + } + return newFrame + } + + override func constrainFrameRect(_ frameRect: NSRect, to tScreen: NSScreen?) -> NSRect { + if (isAnimating && !isInFullscreen) || (!isAnimating && isInFullscreen || + level == NSWindow.Level(Int(CGWindowLevelForKey(.desktopWindow)))) + { + return frameRect + } + + guard let ts: NSScreen = tScreen ?? screen ?? NSScreen.main else { + return frameRect + } + var nf: NSRect = frameRect + let of: NSRect = frame + let vf: NSRect = (isAnimating ? (targetScreen ?? ts) : ts).visibleFrame + let ncf: NSRect = contentRect(forFrameRect: nf) + + // screen bounds top and bottom + if NSMaxY(nf) > NSMaxY(vf) { + nf.origin.y = NSMaxY(vf) - NSHeight(nf) + } + if NSMaxY(ncf) < NSMinY(vf) { + nf.origin.y = NSMinY(vf) + NSMinY(ncf) - NSMaxY(ncf) + } + + // screen bounds right and left + if NSMinX(nf) > NSMaxX(vf) { + nf.origin.x = NSMaxX(vf) - NSWidth(nf) + } + if NSMaxX(nf) < NSMinX(vf) { + nf.origin.x = NSMinX(vf) + } + + if NSHeight(nf) < NSHeight(vf) && NSHeight(of) > NSHeight(vf) && !isInFullscreen { + // If the window height is smaller than the visible frame, but it was + // bigger previously recenter the smaller window vertically. This is + // needed to counter the 'snap to top' behaviour. + nf.origin.y = (NSHeight(vf) - NSHeight(nf)) / 2 + } + return nf + } + + @objc func setNormalWindowSize() { setWindowScale(1.0) } + @objc func setHalfWindowSize() { setWindowScale(0.5) } + @objc func setDoubleWindowSize() { setWindowScale(2.0) } + + func setWindowScale(_ scale: Double) { + mpv?.command("set window-scale \(scale)") + } + + func addWindowScale(_ scale: Double) { + if !isInFullscreen { + mpv?.command("add window-scale \(scale)") + } + } + + func windowDidChangeScreen(_ notification: Notification) { + if screen == nil { + return + } + if !isAnimating && (currentScreen != screen) { + previousScreen = screen + } + if currentScreen != screen { + common.updateDisplaylink() + common.windowDidChangeScreen() + } + currentScreen = screen + } + + func windowDidChangeScreenProfile(_ notification: Notification) { + common.windowDidChangeScreenProfile() + } + + func windowDidChangeBackingProperties(_ notification: Notification) { + common.windowDidChangeBackingProperties() + common.flagEvents(VO_EVENT_DPI) + } + + func windowWillStartLiveResize(_ notification: Notification) { + common.windowWillStartLiveResize() + } + + func windowDidEndLiveResize(_ notification: Notification) { + common.windowDidEndLiveResize() + mpv?.setOption(maximized: isZoomed) + + if let contentViewFrame = contentView?.frame, + !isAnimating && !isInFullscreen + { + unfsContentFrame = convertToScreen(contentViewFrame) + } + } + + func windowDidResize(_ notification: Notification) { + common.windowDidResize() + } + + func windowShouldClose(_ sender: NSWindow) -> Bool { + cocoa_put_key(MP_KEY_CLOSE_WIN) + return false + } + + func windowDidMiniaturize(_ notification: Notification) { + mpv?.setOption(minimized: true) + } + + func windowDidDeminiaturize(_ notification: Notification) { + mpv?.setOption(minimized: false) + } + + func windowDidResignKey(_ notification: Notification) { + common.setCursorVisibility(true) + } + + func windowDidBecomeKey(_ notification: Notification) { + common.updateCursorVisibility() + } + + func windowDidChangeOcclusionState(_ notification: Notification) { + if occlusionState.contains(.visible) { + common.windowDidChangeOcclusionState() + common.updateCursorVisibility() + } + } + + func windowWillMove(_ notification: Notification) { + isMoving = true + } + + func windowDidMove(_ notification: Notification) { + mpv?.setOption(maximized: isZoomed) + } +} diff --git a/video/out/mac_common.swift b/video/out/mac_common.swift new file mode 100644 index 0000000..349712b --- /dev/null +++ b/video/out/mac_common.swift @@ -0,0 +1,174 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +import Cocoa + +class MacCommon: Common { + @objc var layer: MetalLayer? + + var timer: PreciseTimer? + var swapTime: UInt64 = 0 + let swapLock: NSCondition = NSCondition() + + var needsICCUpdate: Bool = false + + @objc init(_ vo: UnsafeMutablePointer<vo>) { + let newlog = mp_log_new(vo, vo.pointee.log, "mac") + super.init(newlog) + mpv = MPVHelper(vo, log) + timer = PreciseTimer(common: self) + + DispatchQueue.main.sync { + layer = MetalLayer(common: self) + initMisc(vo) + } + } + + @objc func config(_ vo: UnsafeMutablePointer<vo>) -> Bool { + mpv?.vo = vo + + DispatchQueue.main.sync { + let previousActiveApp = getActiveApp() + initApp() + + let (_, _, wr) = getInitProperties(vo) + + guard let layer = self.layer else { + log.sendError("Something went wrong, no MetalLayer was initialized") + exit(1) + } + + if window == nil { + initView(vo, layer) + initWindow(vo, previousActiveApp) + initWindowState() + } + + if !NSEqualSizes(window?.unfsContentFramePixel.size ?? NSZeroSize, wr.size) { + window?.updateSize(wr.size) + } + + windowDidResize() + needsICCUpdate = true + } + + return true + } + + @objc func uninit(_ vo: UnsafeMutablePointer<vo>) { + window?.waitForAnimation() + + timer?.terminate() + + DispatchQueue.main.sync { + window?.delegate = nil + window?.close() + + uninitCommon() + } + } + + @objc func swapBuffer() { + if mpv?.macOpts.macos_render_timer ?? Int32(RENDER_TIMER_CALLBACK) != RENDER_TIMER_SYSTEM { + swapLock.lock() + while(swapTime < 1) { + swapLock.wait() + } + swapTime = 0 + swapLock.unlock() + } + + if needsICCUpdate { + needsICCUpdate = false + updateICCProfile() + } + } + + func updateRenderSize(_ size: NSSize) { + mpv?.vo.pointee.dwidth = Int32(size.width) + mpv?.vo.pointee.dheight = Int32(size.height) + flagEvents(VO_EVENT_RESIZE | VO_EVENT_EXPOSE) + } + + override func displayLinkCallback(_ displayLink: CVDisplayLink, + _ inNow: UnsafePointer<CVTimeStamp>, + _ inOutputTime: UnsafePointer<CVTimeStamp>, + _ flagsIn: CVOptionFlags, + _ flagsOut: UnsafeMutablePointer<CVOptionFlags>) -> CVReturn + { + let frameTimer = mpv?.macOpts.macos_render_timer ?? Int32(RENDER_TIMER_CALLBACK) + let signalSwap = { + self.swapLock.lock() + self.swapTime += 1 + self.swapLock.signal() + self.swapLock.unlock() + } + + if frameTimer != RENDER_TIMER_SYSTEM { + if let timer = self.timer, frameTimer == RENDER_TIMER_PRECISE { + timer.scheduleAt(time: inOutputTime.pointee.hostTime, closure: signalSwap) + return kCVReturnSuccess + } + + signalSwap() + } + + return kCVReturnSuccess + } + + override func startDisplayLink(_ vo: UnsafeMutablePointer<vo>) { + super.startDisplayLink(vo) + timer?.updatePolicy(periodSeconds: 1 / currentFps()) + } + + override func updateDisplaylink() { + super.updateDisplaylink() + timer?.updatePolicy(periodSeconds: 1 / currentFps()) + } + + override func lightSensorUpdate() { + flagEvents(VO_EVENT_AMBIENT_LIGHTING_CHANGED) + } + + @objc override func updateICCProfile() { + guard let colorSpace = window?.screen?.colorSpace else { + log.sendWarning("Couldn't update ICC Profile, no color space available") + return + } + + layer?.colorspace = colorSpace.cgColorSpace + flagEvents(VO_EVENT_ICC_PROFILE_CHANGED) + } + + override func windowDidResize() { + guard let window = window else { + log.sendWarning("No window available on window resize event") + return + } + + updateRenderSize(window.framePixel.size) + } + + override func windowDidChangeScreenProfile() { + needsICCUpdate = true + } + + override func windowDidChangeBackingProperties() { + layer?.contentsScale = window?.backingScaleFactor ?? 1 + windowDidResize() + } +} diff --git a/video/out/meson.build b/video/out/meson.build new file mode 100644 index 0000000..e2808d6 --- /dev/null +++ b/video/out/meson.build @@ -0,0 +1,51 @@ +wl_protocol_dir = wayland['deps'][2].get_variable(pkgconfig: 'pkgdatadir', internal: 'pkgdatadir') +protocols = [[wl_protocol_dir, 'stable/presentation-time/presentation-time.xml'], + [wl_protocol_dir, 'stable/viewporter/viewporter.xml'], + [wl_protocol_dir, 'stable/xdg-shell/xdg-shell.xml'], + [wl_protocol_dir, 'unstable/idle-inhibit/idle-inhibit-unstable-v1.xml'], + [wl_protocol_dir, 'unstable/linux-dmabuf/linux-dmabuf-unstable-v1.xml'], + [wl_protocol_dir, 'unstable/xdg-decoration/xdg-decoration-unstable-v1.xml']] +wl_protocols_source = [] +wl_protocols_headers = [] + +foreach v: ['1.27', '1.31', '1.32'] + features += {'wayland-protocols-' + v.replace('.', '-'): + wayland['deps'][2].version().version_compare('>=' + v)} +endforeach + +if features['wayland-protocols-1-27'] + protocols += [[wl_protocol_dir, 'staging/content-type/content-type-v1.xml'], + [wl_protocol_dir, 'staging/single-pixel-buffer/single-pixel-buffer-v1.xml']] +endif +if features['wayland-protocols-1-31'] + protocols += [[wl_protocol_dir, 'staging/fractional-scale/fractional-scale-v1.xml']] +endif +if features['wayland-protocols-1-32'] + protocols += [[wl_protocol_dir, 'staging/cursor-shape/cursor-shape-v1.xml'], + [wl_protocol_dir, 'unstable/tablet/tablet-unstable-v2.xml']] # required by cursor-shape +endif + +foreach p: protocols + xml = join_paths(p) + wl_protocols_source += custom_target(xml.underscorify() + '_c', + input: xml, + output: '@BASENAME@.c', + command: [wayland['scanner'], 'private-code', '@INPUT@', '@OUTPUT@'], + ) + wl_protocols_headers += custom_target(xml.underscorify() + '_h', + input: xml, + output: '@BASENAME@.h', + command: [wayland['scanner'], 'client-header', '@INPUT@', '@OUTPUT@'], + ) +endforeach + +lib_client_protocols = static_library('protocols', + wl_protocols_source + wl_protocols_headers, + dependencies: wayland['deps'][0]) + +client_protocols = declare_dependency(link_with: lib_client_protocols, + sources: wl_protocols_headers) + +dependencies += [client_protocols, wayland['deps']] + +sources += files('wayland_common.c') diff --git a/video/out/opengl/angle_dynamic.c b/video/out/opengl/angle_dynamic.c new file mode 100644 index 0000000..2483828 --- /dev/null +++ b/video/out/opengl/angle_dynamic.c @@ -0,0 +1,39 @@ +#include <windows.h> + +#include "angle_dynamic.h" + +#include "common/common.h" +#include "osdep/threads.h" + +#if HAVE_EGL_ANGLE_LIB +bool angle_load(void) +{ + return true; +} +#else +#define ANGLE_DECL(NAME, VAR) \ + VAR; +ANGLE_FNS(ANGLE_DECL) + +static bool angle_loaded; +static mp_once angle_load_once = MP_STATIC_ONCE_INITIALIZER; + +static void angle_do_load(void) +{ + // Note: we let this handle "leak", as the functions remain valid forever. + HANDLE angle_dll = LoadLibraryW(L"LIBEGL.DLL"); + if (!angle_dll) + return; +#define ANGLE_LOAD_ENTRY(NAME, VAR) \ + NAME = (void *)GetProcAddress(angle_dll, #NAME); \ + if (!NAME) return; + ANGLE_FNS(ANGLE_LOAD_ENTRY) + angle_loaded = true; +} + +bool angle_load(void) +{ + mp_exec_once(&angle_load_once, angle_do_load); + return angle_loaded; +} +#endif diff --git a/video/out/opengl/angle_dynamic.h b/video/out/opengl/angle_dynamic.h new file mode 100644 index 0000000..d419c3f --- /dev/null +++ b/video/out/opengl/angle_dynamic.h @@ -0,0 +1,89 @@ +// Based on Khronos headers, thus MIT licensed. + +#ifndef MP_ANGLE_DYNAMIC_H +#define MP_ANGLE_DYNAMIC_H + +#include <stdbool.h> + +#include <EGL/egl.h> +#include <EGL/eglext.h> + +#include "config.h" + +#define ANGLE_FNS(FN) \ + FN(eglBindAPI, EGLBoolean (*EGLAPIENTRY PFN_eglBindAPI)(EGLenum)) \ + FN(eglBindTexImage, EGLBoolean (*EGLAPIENTRY PFN_eglBindTexImage) \ + (EGLDisplay, EGLSurface, EGLint)) \ + FN(eglChooseConfig, EGLBoolean (*EGLAPIENTRY PFN_eglChooseConfig) \ + (EGLDisplay, const EGLint *, EGLConfig *, EGLint, EGLint *)) \ + FN(eglCreateContext, EGLContext (*EGLAPIENTRY PFN_eglCreateContext) \ + (EGLDisplay, EGLConfig, EGLContext, const EGLint *)) \ + FN(eglCreatePbufferFromClientBuffer, EGLSurface (*EGLAPIENTRY \ + PFN_eglCreatePbufferFromClientBuffer)(EGLDisplay, EGLenum, \ + EGLClientBuffer, EGLConfig, const EGLint *)) \ + FN(eglCreateWindowSurface, EGLSurface (*EGLAPIENTRY \ + PFN_eglCreateWindowSurface)(EGLDisplay, EGLConfig, \ + EGLNativeWindowType, const EGLint *)) \ + FN(eglDestroyContext, EGLBoolean (*EGLAPIENTRY PFN_eglDestroyContext) \ + (EGLDisplay, EGLContext)) \ + FN(eglDestroySurface, EGLBoolean (*EGLAPIENTRY PFN_eglDestroySurface) \ + (EGLDisplay, EGLSurface)) \ + FN(eglGetConfigAttrib, EGLBoolean (*EGLAPIENTRY PFN_eglGetConfigAttrib) \ + (EGLDisplay, EGLConfig, EGLint, EGLint *)) \ + FN(eglGetCurrentContext, EGLContext (*EGLAPIENTRY \ + PFN_eglGetCurrentContext)(void)) \ + FN(eglGetCurrentDisplay, EGLDisplay (*EGLAPIENTRY \ + PFN_eglGetCurrentDisplay)(void)) \ + FN(eglGetDisplay, EGLDisplay (*EGLAPIENTRY PFN_eglGetDisplay) \ + (EGLNativeDisplayType)) \ + FN(eglGetError, EGLint (*EGLAPIENTRY PFN_eglGetError)(void)) \ + FN(eglGetProcAddress, void *(*EGLAPIENTRY \ + PFN_eglGetProcAddress)(const char *)) \ + FN(eglInitialize, EGLBoolean (*EGLAPIENTRY PFN_eglInitialize) \ + (EGLDisplay, EGLint *, EGLint *)) \ + FN(eglMakeCurrent, EGLBoolean (*EGLAPIENTRY PFN_eglMakeCurrent) \ + (EGLDisplay, EGLSurface, EGLSurface, EGLContext)) \ + FN(eglQueryString, const char *(*EGLAPIENTRY PFN_eglQueryString) \ + (EGLDisplay, EGLint)) \ + FN(eglSwapBuffers, EGLBoolean (*EGLAPIENTRY PFN_eglSwapBuffers) \ + (EGLDisplay, EGLSurface)) \ + FN(eglSwapInterval, EGLBoolean (*EGLAPIENTRY PFN_eglSwapInterval) \ + (EGLDisplay, EGLint)) \ + FN(eglReleaseTexImage, EGLBoolean (*EGLAPIENTRY PFN_eglReleaseTexImage) \ + (EGLDisplay, EGLSurface, EGLint)) \ + FN(eglTerminate, EGLBoolean (*EGLAPIENTRY PFN_eglTerminate)(EGLDisplay)) \ + FN(eglWaitClient, EGLBoolean (*EGLAPIENTRY PFN_eglWaitClient)(void)) + +#define ANGLE_EXT_DECL(NAME, VAR) \ + extern VAR; +ANGLE_FNS(ANGLE_EXT_DECL) + +bool angle_load(void); + +// Source compatibility to statically linked ANGLE. +#if !HAVE_EGL_ANGLE_LIB +#define eglBindAPI PFN_eglBindAPI +#define eglBindTexImage PFN_eglBindTexImage +#define eglChooseConfig PFN_eglChooseConfig +#define eglCreateContext PFN_eglCreateContext +#define eglCreatePbufferFromClientBuffer PFN_eglCreatePbufferFromClientBuffer +#define eglCreateWindowSurface PFN_eglCreateWindowSurface +#define eglDestroyContext PFN_eglDestroyContext +#define eglDestroySurface PFN_eglDestroySurface +#define eglGetConfigAttrib PFN_eglGetConfigAttrib +#define eglGetCurrentContext PFN_eglGetCurrentContext +#define eglGetCurrentDisplay PFN_eglGetCurrentDisplay +#define eglGetDisplay PFN_eglGetDisplay +#define eglGetError PFN_eglGetError +#define eglGetProcAddress PFN_eglGetProcAddress +#define eglInitialize PFN_eglInitialize +#define eglMakeCurrent PFN_eglMakeCurrent +#define eglQueryString PFN_eglQueryString +#define eglReleaseTexImage PFN_eglReleaseTexImage +#define eglSwapBuffers PFN_eglSwapBuffers +#define eglSwapInterval PFN_eglSwapInterval +#define eglTerminate PFN_eglTerminate +#define eglWaitClient PFN_eglWaitClient +#endif + +#endif diff --git a/video/out/opengl/common.c b/video/out/opengl/common.c new file mode 100644 index 0000000..ee26508 --- /dev/null +++ b/video/out/opengl/common.c @@ -0,0 +1,694 @@ +/* + * common OpenGL routines + * + * copyleft (C) 2005-2010 Reimar Döffinger <Reimar.Doeffinger@gmx.de> + * Special thanks go to the xine team and Matthias Hopf, whose video_out_opengl.c + * gave me lots of good ideas. + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stddef.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <strings.h> +#include <stdbool.h> +#include <math.h> +#include <assert.h> + +#include "common.h" +#include "common/common.h" +#include "utils.h" + +// This guesses if the current GL context is a suspected software renderer. +static bool is_software_gl(GL *gl) +{ + const char *renderer = gl->GetString(GL_RENDERER); + // Note we don't attempt to blacklist Microsoft's fallback implementation. + // It only provides OpenGL 1.1 and will be skipped anyway. + return !renderer || + strcmp(renderer, "Software Rasterizer") == 0 || + strstr(renderer, "llvmpipe") || + strstr(renderer, "softpipe") || + strcmp(renderer, "Mesa X11") == 0 || + strcmp(renderer, "Apple Software Renderer") == 0; +} + +// This guesses whether our DR path is fast or slow +static bool is_fast_dr(GL *gl) +{ + const char *vendor = gl->GetString(GL_VENDOR); + if (!vendor) + return false; + + return strcasecmp(vendor, "AMD") == 0 || + strcasecmp(vendor, "NVIDIA Corporation") == 0 || + strcasecmp(vendor, "ATI Technologies Inc.") == 0; // AMD on Windows +} + +static void GLAPIENTRY dummy_glBindFramebuffer(GLenum target, GLuint framebuffer) +{ + assert(framebuffer == 0); +} + +#define FN_OFFS(name) offsetof(GL, name) + +#define DEF_FN(name) {FN_OFFS(name), "gl" # name} +#define DEF_FN_NAME(name, str) {FN_OFFS(name), str} + +struct gl_function { + ptrdiff_t offset; + char *name; +}; + +struct gl_functions { + const char *extension; // introduced with this extension in any version + int provides; // bitfield of MPGL_CAP_* constants + int ver_core; // introduced as required function + int ver_es_core; // introduced as required GL ES function + int ver_exclude; // not applicable to versions >= ver_exclude + int ver_es_exclude; // same for GLES + const struct gl_function *functions; +}; + +#define MAX_FN_COUNT 100 // max functions per gl_functions section + +// Note: to keep the number of sections low, some functions are in multiple +// sections (if there are tricky combinations of GL/ES versions) +static const struct gl_functions gl_functions[] = { + // GL 2.1+ desktop and GLES 2.0+ (anything we support) + // Probably all of these are in GL 2.0 too, but we require GLSL 120. + { + .ver_core = 210, + .ver_es_core = 200, + .functions = (const struct gl_function[]) { + DEF_FN(ActiveTexture), + DEF_FN(AttachShader), + DEF_FN(BindAttribLocation), + DEF_FN(BindBuffer), + DEF_FN(BindTexture), + DEF_FN(BlendFuncSeparate), + DEF_FN(BufferData), + DEF_FN(BufferSubData), + DEF_FN(Clear), + DEF_FN(ClearColor), + DEF_FN(CompileShader), + DEF_FN(CreateProgram), + DEF_FN(CreateShader), + DEF_FN(DeleteBuffers), + DEF_FN(DeleteProgram), + DEF_FN(DeleteShader), + DEF_FN(DeleteTextures), + DEF_FN(Disable), + DEF_FN(DisableVertexAttribArray), + DEF_FN(DrawArrays), + DEF_FN(Enable), + DEF_FN(EnableVertexAttribArray), + DEF_FN(Finish), + DEF_FN(Flush), + DEF_FN(GenBuffers), + DEF_FN(GenTextures), + DEF_FN(GetAttribLocation), + DEF_FN(GetError), + DEF_FN(GetIntegerv), + DEF_FN(GetProgramInfoLog), + DEF_FN(GetProgramiv), + DEF_FN(GetShaderInfoLog), + DEF_FN(GetShaderiv), + DEF_FN(GetString), + DEF_FN(GetUniformLocation), + DEF_FN(LinkProgram), + DEF_FN(PixelStorei), + DEF_FN(ReadPixels), + DEF_FN(Scissor), + DEF_FN(ShaderSource), + DEF_FN(TexImage2D), + DEF_FN(TexParameteri), + DEF_FN(TexSubImage2D), + DEF_FN(Uniform1f), + DEF_FN(Uniform2f), + DEF_FN(Uniform3f), + DEF_FN(Uniform1i), + DEF_FN(UniformMatrix2fv), + DEF_FN(UniformMatrix3fv), + DEF_FN(UseProgram), + DEF_FN(VertexAttribPointer), + DEF_FN(Viewport), + {0} + }, + }, + // GL 2.1+ desktop only (and GLSL 120 shaders) + { + .ver_core = 210, + .provides = MPGL_CAP_ROW_LENGTH | MPGL_CAP_1D_TEX, + .functions = (const struct gl_function[]) { + DEF_FN(DrawBuffer), + DEF_FN(GetTexLevelParameteriv), + DEF_FN(ReadBuffer), + DEF_FN(TexImage1D), + DEF_FN(UnmapBuffer), + {0} + }, + }, + // GL 2.1 has this as extension only. + { + .ver_exclude = 300, + .ver_es_exclude = 300, + .extension = "GL_ARB_map_buffer_range", + .functions = (const struct gl_function[]) { + DEF_FN(MapBufferRange), + {0} + }, + }, + // GL 3.0+ and ES 3.x core only functions. + { + .ver_core = 300, + .ver_es_core = 300, + .functions = (const struct gl_function[]) { + DEF_FN(BindBufferBase), + DEF_FN(BlitFramebuffer), + DEF_FN(GetStringi), + DEF_FN(MapBufferRange), + // for ES 3.0 + DEF_FN(ReadBuffer), + DEF_FN(UnmapBuffer), + {0} + }, + }, + // For ES 3.1 core + { + .ver_es_core = 310, + .functions = (const struct gl_function[]) { + DEF_FN(GetTexLevelParameteriv), + {0} + }, + }, + { + .ver_core = 210, + .ver_es_core = 300, + .provides = MPGL_CAP_3D_TEX, + .functions = (const struct gl_function[]) { + DEF_FN(TexImage3D), + {0} + }, + }, + // Useful for ES 2.0 + { + .ver_core = 110, + .ver_es_core = 300, + .extension = "GL_EXT_unpack_subimage", + .provides = MPGL_CAP_ROW_LENGTH, + }, + // Framebuffers, extension in GL 2.x, core in GL 3.x core. + { + .ver_core = 300, + .ver_es_core = 200, + .extension = "GL_ARB_framebuffer_object", + .provides = MPGL_CAP_FB, + .functions = (const struct gl_function[]) { + DEF_FN(BindFramebuffer), + DEF_FN(GenFramebuffers), + DEF_FN(DeleteFramebuffers), + DEF_FN(CheckFramebufferStatus), + DEF_FN(FramebufferTexture2D), + DEF_FN(GetFramebufferAttachmentParameteriv), + {0} + }, + }, + // VAOs, extension in GL 2.x, core in GL 3.x core. + { + .ver_core = 300, + .ver_es_core = 300, + .extension = "GL_ARB_vertex_array_object", + .provides = MPGL_CAP_VAO, + .functions = (const struct gl_function[]) { + DEF_FN(GenVertexArrays), + DEF_FN(BindVertexArray), + DEF_FN(DeleteVertexArrays), + {0} + } + }, + // GL_RED / GL_RG textures, extension in GL 2.x, core in GL 3.x core. + { + .ver_core = 300, + .ver_es_core = 300, + .extension = "GL_ARB_texture_rg", + .provides = MPGL_CAP_TEX_RG, + }, + { + .ver_core = 300, + .ver_es_core = 300, + .extension = "GL_EXT_texture_rg", + .provides = MPGL_CAP_TEX_RG, + }, + // GL_R16 etc. + { + .extension = "GL_EXT_texture_norm16", + .provides = MPGL_CAP_EXT16, + .ver_exclude = 1, // never in desktop GL + }, + // Float texture support for GL 2.x + { + .extension = "GL_ARB_texture_float", + .provides = MPGL_CAP_ARB_FLOAT, + .ver_exclude = 300, + .ver_es_exclude = 1, + }, + // 16 bit float textures that can be rendered to in GLES + { + .extension = "GL_EXT_color_buffer_half_float", + .provides = MPGL_CAP_EXT_CR_HFLOAT, + .ver_exclude = 1, + .ver_es_exclude = 320, + }, + { + .ver_core = 320, + .ver_es_core = 300, + .extension = "GL_ARB_sync", + .functions = (const struct gl_function[]) { + DEF_FN(FenceSync), + DEF_FN(ClientWaitSync), + DEF_FN(DeleteSync), + {0} + }, + }, + { + .ver_core = 330, + .extension = "GL_ARB_timer_query", + .functions = (const struct gl_function[]) { + DEF_FN(GenQueries), + DEF_FN(DeleteQueries), + DEF_FN(BeginQuery), + DEF_FN(EndQuery), + DEF_FN(QueryCounter), + DEF_FN(IsQuery), + DEF_FN(GetQueryObjectiv), + DEF_FN(GetQueryObjecti64v), + DEF_FN(GetQueryObjectuiv), + DEF_FN(GetQueryObjectui64v), + {0} + }, + }, + { + .extension = "GL_EXT_disjoint_timer_query", + .functions = (const struct gl_function[]) { + DEF_FN_NAME(GenQueries, "glGenQueriesEXT"), + DEF_FN_NAME(DeleteQueries, "glDeleteQueriesEXT"), + DEF_FN_NAME(BeginQuery, "glBeginQueryEXT"), + DEF_FN_NAME(EndQuery, "glEndQueryEXT"), + DEF_FN_NAME(QueryCounter, "glQueryCounterEXT"), + DEF_FN_NAME(IsQuery, "glIsQueryEXT"), + DEF_FN_NAME(GetQueryObjectiv, "glGetQueryObjectivEXT"), + DEF_FN_NAME(GetQueryObjecti64v, "glGetQueryObjecti64vEXT"), + DEF_FN_NAME(GetQueryObjectuiv, "glGetQueryObjectuivEXT"), + DEF_FN_NAME(GetQueryObjectui64v, "glGetQueryObjectui64vEXT"), + {0} + }, + }, + { + .ver_core = 430, + .extension = "GL_ARB_invalidate_subdata", + .functions = (const struct gl_function[]) { + DEF_FN(InvalidateTexImage), + {0} + }, + }, + { + .ver_core = 430, + .ver_es_core = 300, + .functions = (const struct gl_function[]) { + DEF_FN(InvalidateFramebuffer), + {0} + }, + }, + { + .ver_core = 410, + .ver_es_core = 300, + .extension = "GL_ARB_get_program_binary", + .functions = (const struct gl_function[]) { + DEF_FN(GetProgramBinary), + DEF_FN(ProgramBinary), + {0} + }, + }, + { + .ver_core = 440, + .extension = "GL_ARB_buffer_storage", + .functions = (const struct gl_function[]) { + DEF_FN(BufferStorage), + {0} + }, + }, + // Equivalent extension for ES + { + .extension = "GL_EXT_buffer_storage", + .functions = (const struct gl_function[]) { + DEF_FN_NAME(BufferStorage, "glBufferStorageEXT"), + {0} + }, + }, + { + .ver_core = 420, + .ver_es_core = 310, + .extension = "GL_ARB_shader_image_load_store", + .functions = (const struct gl_function[]) { + DEF_FN(BindImageTexture), + DEF_FN(MemoryBarrier), + {0} + }, + }, + { + .ver_core = 310, + .ver_es_core = 300, + .extension = "GL_ARB_uniform_buffer_object", + .provides = MPGL_CAP_UBO, + }, + { + .ver_core = 430, + .ver_es_core = 310, + .extension = "GL_ARB_shader_storage_buffer_object", + .provides = MPGL_CAP_SSBO, + }, + { + .ver_core = 430, + .ver_es_core = 310, + .extension = "GL_ARB_compute_shader", + .functions = (const struct gl_function[]) { + DEF_FN(DispatchCompute), + {0}, + }, + }, + { + .ver_core = 430, + .extension = "GL_ARB_arrays_of_arrays", + .provides = MPGL_CAP_NESTED_ARRAY, + }, + // Swap control, always an OS specific extension + // The OSX code loads this manually. + { + .extension = "GLX_SGI_swap_control", + .functions = (const struct gl_function[]) { + DEF_FN_NAME(SwapInterval, "glXSwapIntervalSGI"), + {0}, + }, + }, + // This one overrides GLX_SGI_swap_control on platforms using mesa. The + // only difference is that it supports glXSwapInterval(0). + { + .extension = "GLX_MESA_swap_control", + .functions = (const struct gl_function[]) { + DEF_FN_NAME(SwapInterval, "glXSwapIntervalMESA"), + {0}, + }, + }, + { + .extension = "WGL_EXT_swap_control", + .functions = (const struct gl_function[]) { + DEF_FN_NAME(SwapInterval, "wglSwapIntervalEXT"), + {0}, + }, + }, + { + .extension = "GLX_SGI_video_sync", + .functions = (const struct gl_function[]) { + DEF_FN_NAME(GetVideoSync, "glXGetVideoSyncSGI"), + DEF_FN_NAME(WaitVideoSync, "glXWaitVideoSyncSGI"), + {0}, + }, + }, + // For gl_hwdec_vdpau.c + // http://www.opengl.org/registry/specs/NV/vdpau_interop.txt + { + .extension = "GL_NV_vdpau_interop", + .provides = MPGL_CAP_VDPAU, + .functions = (const struct gl_function[]) { + // (only functions needed by us) + DEF_FN(VDPAUInitNV), + DEF_FN(VDPAUFiniNV), + DEF_FN(VDPAURegisterOutputSurfaceNV), + DEF_FN(VDPAURegisterVideoSurfaceNV), + DEF_FN(VDPAUUnregisterSurfaceNV), + DEF_FN(VDPAUSurfaceAccessNV), + DEF_FN(VDPAUMapSurfacesNV), + DEF_FN(VDPAUUnmapSurfacesNV), + {0} + }, + }, +#if HAVE_GL_DXINTEROP + { + .extension = "WGL_NV_DX_interop", + .provides = MPGL_CAP_DXINTEROP, + .functions = (const struct gl_function[]) { + DEF_FN_NAME(DXSetResourceShareHandleNV, "wglDXSetResourceShareHandleNV"), + DEF_FN_NAME(DXOpenDeviceNV, "wglDXOpenDeviceNV"), + DEF_FN_NAME(DXCloseDeviceNV, "wglDXCloseDeviceNV"), + DEF_FN_NAME(DXRegisterObjectNV, "wglDXRegisterObjectNV"), + DEF_FN_NAME(DXUnregisterObjectNV, "wglDXUnregisterObjectNV"), + DEF_FN_NAME(DXLockObjectsNV, "wglDXLockObjectsNV"), + DEF_FN_NAME(DXUnlockObjectsNV, "wglDXUnlockObjectsNV"), + {0} + }, + }, +#endif + // Apple Packed YUV Formats + // For gl_hwdec_vda.c + // http://www.opengl.org/registry/specs/APPLE/rgb_422.txt + { + .extension = "GL_APPLE_rgb_422", + .provides = MPGL_CAP_APPLE_RGB_422, + }, + { + .ver_core = 430, + .extension = "GL_ARB_debug_output", + .provides = MPGL_CAP_DEBUG, + .functions = (const struct gl_function[]) { + // (only functions needed by us) + DEF_FN(DebugMessageCallback), + {0} + }, + }, + // ES version uses a different extension. + { + .ver_es_core = 320, + .extension = "GL_KHR_debug", + .provides = MPGL_CAP_DEBUG, + .functions = (const struct gl_function[]) { + // (only functions needed by us) + DEF_FN(DebugMessageCallback), + {0} + }, + }, + { + .extension = "GL_ANGLE_translated_shader_source", + .functions = (const struct gl_function[]) { + DEF_FN(GetTranslatedShaderSourceANGLE), + {0} + }, + }, +}; + +#undef FN_OFFS +#undef DEF_FN_HARD +#undef DEF_FN +#undef DEF_FN_NAME + +// Fill the GL struct with function pointers and extensions from the current +// GL context. Called by the backend. +// get_fn: function to resolve function names +// ext2: an extra extension string +// log: used to output messages +void mpgl_load_functions2(GL *gl, void *(*get_fn)(void *ctx, const char *n), + void *fn_ctx, const char *ext2, struct mp_log *log) +{ + talloc_free(gl->extensions); + *gl = (GL) { + .extensions = talloc_strdup(gl, ext2 ? ext2 : ""), + .get_fn = get_fn, + .fn_ctx = fn_ctx, + }; + + gl->GetString = get_fn(fn_ctx, "glGetString"); + if (!gl->GetString) { + mp_err(log, "Can't load OpenGL functions.\n"); + goto error; + } + + int major = 0, minor = 0; + const char *version_string = gl->GetString(GL_VERSION); + if (!version_string) { + mp_fatal(log, "glGetString(GL_VERSION) returned NULL.\n"); + goto error; + } + mp_verbose(log, "GL_VERSION='%s'\n", version_string); + if (strncmp(version_string, "OpenGL ES ", 10) == 0) { + version_string += 10; + gl->es = 100; + } + if (sscanf(version_string, "%d.%d", &major, &minor) < 2) + goto error; + gl->version = MPGL_VER(major, minor); + mp_verbose(log, "Detected %s %d.%d.\n", gl->es ? "GLES" : "desktop OpenGL", + major, minor); + + if (gl->es) { + gl->es = gl->version; + gl->version = 0; + if (gl->es < 200) { + mp_fatal(log, "At least GLESv2 required.\n"); + goto error; + } + } + + mp_verbose(log, "GL_VENDOR='%s'\n", gl->GetString(GL_VENDOR)); + mp_verbose(log, "GL_RENDERER='%s'\n", gl->GetString(GL_RENDERER)); + const char *shader = gl->GetString(GL_SHADING_LANGUAGE_VERSION); + if (shader) + mp_verbose(log, "GL_SHADING_LANGUAGE_VERSION='%s'\n", shader); + + if (gl->version >= 300) { + gl->GetStringi = get_fn(fn_ctx, "glGetStringi"); + gl->GetIntegerv = get_fn(fn_ctx, "glGetIntegerv"); + + if (!(gl->GetStringi && gl->GetIntegerv)) + goto error; + + GLint exts; + gl->GetIntegerv(GL_NUM_EXTENSIONS, &exts); + for (int n = 0; n < exts; n++) { + const char *ext = gl->GetStringi(GL_EXTENSIONS, n); + gl->extensions = talloc_asprintf_append(gl->extensions, " %s", ext); + } + + } else { + const char *ext = (char*)gl->GetString(GL_EXTENSIONS); + gl->extensions = talloc_asprintf_append(gl->extensions, " %s", ext); + } + + mp_dbg(log, "Combined OpenGL extensions string:\n%s\n", gl->extensions); + + for (int n = 0; n < MP_ARRAY_SIZE(gl_functions); n++) { + const struct gl_functions *section = &gl_functions[n]; + int version = gl->es ? gl->es : gl->version; + int ver_core = gl->es ? section->ver_es_core : section->ver_core; + + // NOTE: Function entrypoints can exist, even if they do not work. + // We must always check extension strings and versions. + + if (gl->version && section->ver_exclude && + gl->version >= section->ver_exclude) + continue; + if (gl->es && section->ver_es_exclude && + gl->es >= section->ver_es_exclude) + continue; + + bool exists = false, must_exist = false; + if (ver_core) + must_exist = version >= ver_core; + + if (section->extension) + exists = gl_check_extension(gl->extensions, section->extension); + + exists |= must_exist; + if (!exists) + continue; + + void *loaded[MAX_FN_COUNT] = {0}; + bool all_loaded = true; + const struct gl_function *fnlist = section->functions; + + for (int i = 0; fnlist && fnlist[i].name; i++) { + const struct gl_function *fn = &fnlist[i]; + void *ptr = get_fn(fn_ctx, fn->name); + if (!ptr) { + all_loaded = false; + if (must_exist) { + mp_err(log, "GL %d.%d function %s not found.\n", + MPGL_VER_GET_MAJOR(ver_core), + MPGL_VER_GET_MINOR(ver_core), fn->name); + goto error; + } else { + mp_warn(log, "Function %s from extension %s not found.\n", + fn->name, section->extension); + } + break; + } + assert(i < MAX_FN_COUNT); + loaded[i] = ptr; + } + + if (all_loaded) { + gl->mpgl_caps |= section->provides; + for (int i = 0; fnlist && fnlist[i].name; i++) { + const struct gl_function *fn = &fnlist[i]; + void **funcptr = (void**)(((char*)gl) + fn->offset); + if (loaded[i]) + *funcptr = loaded[i]; + } + if (!must_exist && section->extension) + mp_verbose(log, "Loaded extension %s.\n", section->extension); + } + } + + gl->glsl_version = 0; + if (gl->es) { + if (gl->es >= 200) + gl->glsl_version = 100; + if (gl->es >= 300) + gl->glsl_version = gl->es; + } else { + gl->glsl_version = 120; + int glsl_major = 0, glsl_minor = 0; + if (shader && sscanf(shader, "%d.%d", &glsl_major, &glsl_minor) == 2) + gl->glsl_version = glsl_major * 100 + glsl_minor; + // restrict GLSL version to be forwards compatible + gl->glsl_version = MPMIN(gl->glsl_version, 440); + } + + if (is_software_gl(gl)) { + gl->mpgl_caps |= MPGL_CAP_SW; + mp_verbose(log, "Detected suspected software renderer.\n"); + } + + if (!is_fast_dr(gl)) + gl->mpgl_caps |= MPGL_CAP_SLOW_DR; + + // GL_ARB_compute_shader & GL_ARB_shader_image_load_store + if (gl->DispatchCompute && gl->BindImageTexture) + gl->mpgl_caps |= MPGL_CAP_COMPUTE_SHADER; + + // Provided for simpler handling if no framebuffer support is available. + if (!gl->BindFramebuffer) + gl->BindFramebuffer = &dummy_glBindFramebuffer; + return; + +error: + gl->version = 0; + gl->es = 0; + gl->mpgl_caps = 0; +} + +static void *get_procaddr_wrapper(void *ctx, const char *name) +{ + void *(*getProcAddress)(const GLubyte *) = ctx; + return getProcAddress ? getProcAddress((const GLubyte*)name) : NULL; +} + +void mpgl_load_functions(GL *gl, void *(*getProcAddress)(const GLubyte *), + const char *ext2, struct mp_log *log) +{ + mpgl_load_functions2(gl, get_procaddr_wrapper, getProcAddress, ext2, log); +} diff --git a/video/out/opengl/common.h b/video/out/opengl/common.h new file mode 100644 index 0000000..a6b02c9 --- /dev/null +++ b/video/out/opengl/common.h @@ -0,0 +1,258 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MPLAYER_GL_COMMON_H +#define MPLAYER_GL_COMMON_H + +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> + +#include "config.h" +#include "common/msg.h" +#include "misc/bstr.h" + +#include "video/csputils.h" +#include "video/mp_image.h" +#include "video/out/vo.h" +#include "video/out/gpu/ra.h" + +#include "gl_headers.h" + +#if HAVE_GL_WIN32 +#include <windows.h> +#endif + +struct GL; +typedef struct GL GL; + +enum { + MPGL_CAP_ROW_LENGTH = (1 << 4), // GL_[UN]PACK_ROW_LENGTH + MPGL_CAP_FB = (1 << 5), + MPGL_CAP_VAO = (1 << 6), + MPGL_CAP_TEX_RG = (1 << 10), // GL_ARB_texture_rg / GL 3.x + MPGL_CAP_VDPAU = (1 << 11), // GL_NV_vdpau_interop + MPGL_CAP_APPLE_RGB_422 = (1 << 12), // GL_APPLE_rgb_422 + MPGL_CAP_1D_TEX = (1 << 14), + MPGL_CAP_3D_TEX = (1 << 15), + MPGL_CAP_DEBUG = (1 << 16), + MPGL_CAP_DXINTEROP = (1 << 17), // WGL_NV_DX_interop + MPGL_CAP_EXT16 = (1 << 18), // GL_EXT_texture_norm16 + MPGL_CAP_ARB_FLOAT = (1 << 19), // GL_ARB_texture_float + MPGL_CAP_EXT_CR_HFLOAT = (1 << 20), // GL_EXT_color_buffer_half_float + MPGL_CAP_UBO = (1 << 21), // GL_ARB_uniform_buffer_object + MPGL_CAP_SSBO = (1 << 22), // GL_ARB_shader_storage_buffer_object + MPGL_CAP_COMPUTE_SHADER = (1 << 23), // GL_ARB_compute_shader & GL_ARB_shader_image_load_store + MPGL_CAP_NESTED_ARRAY = (1 << 24), // GL_ARB_arrays_of_arrays + + MPGL_CAP_SLOW_DR = (1 << 29), // direct rendering is assumed to be slow + MPGL_CAP_SW = (1 << 30), // indirect or sw renderer +}; + +// E.g. 310 means 3.1 +// Code doesn't have to use the macros; they are for convenience only. +#define MPGL_VER(major, minor) (((major) * 100) + (minor) * 10) +#define MPGL_VER_GET_MAJOR(ver) ((unsigned)(ver) / 100) +#define MPGL_VER_GET_MINOR(ver) ((unsigned)(ver) % 100 / 10) + +#define MPGL_VER_P(ver) MPGL_VER_GET_MAJOR(ver), MPGL_VER_GET_MINOR(ver) + +void mpgl_load_functions(GL *gl, void *(*getProcAddress)(const GLubyte *), + const char *ext2, struct mp_log *log); +void mpgl_load_functions2(GL *gl, void *(*get_fn)(void *ctx, const char *n), + void *fn_ctx, const char *ext2, struct mp_log *log); + +typedef void (GLAPIENTRY *MP_GLDEBUGPROC)(GLenum, GLenum, GLuint, GLenum, + GLsizei, const GLchar *,const void *); + +//function pointers loaded from the OpenGL library +struct GL { + int version; // MPGL_VER() mangled (e.g. 210 for 2.1) + int es; // es version (e.g. 300), 0 for desktop GL + int glsl_version; // e.g. 130 for GLSL 1.30 + char *extensions; // Equivalent to GL_EXTENSIONS + int mpgl_caps; // Bitfield of MPGL_CAP_* constants + bool debug_context; // use of e.g. GLX_CONTEXT_DEBUG_BIT_ARB + + // Set to false if the implementation follows normal GL semantics, which is + // upside down. Set to true if it does *not*, i.e. if rendering is right + // side up + bool flipped; + + // Copy of function pointer used to load GL. + // Caution: Not necessarily valid to use after VO init has completed! + void *(*get_fn)(void *ctx, const char *n); + void *fn_ctx; + + void (GLAPIENTRY *Viewport)(GLint, GLint, GLsizei, GLsizei); + void (GLAPIENTRY *Clear)(GLbitfield); + void (GLAPIENTRY *GenTextures)(GLsizei, GLuint *); + void (GLAPIENTRY *DeleteTextures)(GLsizei, const GLuint *); + void (GLAPIENTRY *ClearColor)(GLclampf, GLclampf, GLclampf, GLclampf); + void (GLAPIENTRY *Enable)(GLenum); + void (GLAPIENTRY *Disable)(GLenum); + const GLubyte *(GLAPIENTRY * GetString)(GLenum); + void (GLAPIENTRY *BlendFuncSeparate)(GLenum, GLenum, GLenum, GLenum); + void (GLAPIENTRY *Flush)(void); + void (GLAPIENTRY *Finish)(void); + void (GLAPIENTRY *PixelStorei)(GLenum, GLint); + void (GLAPIENTRY *TexImage1D)(GLenum, GLint, GLint, GLsizei, GLint, + GLenum, GLenum, const GLvoid *); + void (GLAPIENTRY *TexImage2D)(GLenum, GLint, GLint, GLsizei, GLsizei, + GLint, GLenum, GLenum, const GLvoid *); + void (GLAPIENTRY *TexSubImage2D)(GLenum, GLint, GLint, GLint, + GLsizei, GLsizei, GLenum, GLenum, + const GLvoid *); + void (GLAPIENTRY *TexParameteri)(GLenum, GLenum, GLint); + void (GLAPIENTRY *GetIntegerv)(GLenum, GLint *); + void (GLAPIENTRY *ReadPixels)(GLint, GLint, GLsizei, GLsizei, GLenum, + GLenum, GLvoid *); + void (GLAPIENTRY *ReadBuffer)(GLenum); + void (GLAPIENTRY *DrawBuffer)(GLenum); + void (GLAPIENTRY *DrawArrays)(GLenum, GLint, GLsizei); + GLenum (GLAPIENTRY *GetError)(void); + void (GLAPIENTRY *GetTexLevelParameteriv)(GLenum, GLint, GLenum, GLint *); + void (GLAPIENTRY *Scissor)(GLint, GLint, GLsizei, GLsizei); + + void (GLAPIENTRY *GenBuffers)(GLsizei, GLuint *); + void (GLAPIENTRY *DeleteBuffers)(GLsizei, const GLuint *); + void (GLAPIENTRY *BindBuffer)(GLenum, GLuint); + void (GLAPIENTRY *BindBufferBase)(GLenum, GLuint, GLuint); + GLvoid * (GLAPIENTRY *MapBufferRange)(GLenum, GLintptr, GLsizeiptr, + GLbitfield); + GLboolean (GLAPIENTRY *UnmapBuffer)(GLenum); + void (GLAPIENTRY *BufferData)(GLenum, intptr_t, const GLvoid *, GLenum); + void (GLAPIENTRY *BufferSubData)(GLenum, GLintptr, GLsizeiptr, const GLvoid *); + void (GLAPIENTRY *ActiveTexture)(GLenum); + void (GLAPIENTRY *BindTexture)(GLenum, GLuint); + int (GLAPIENTRY *SwapInterval)(int); + void (GLAPIENTRY *TexImage3D)(GLenum, GLint, GLenum, GLsizei, GLsizei, + GLsizei, GLint, GLenum, GLenum, + const GLvoid *); + + void (GLAPIENTRY *GenVertexArrays)(GLsizei, GLuint *); + void (GLAPIENTRY *BindVertexArray)(GLuint); + GLint (GLAPIENTRY *GetAttribLocation)(GLuint, const GLchar *); + void (GLAPIENTRY *EnableVertexAttribArray)(GLuint); + void (GLAPIENTRY *DisableVertexAttribArray)(GLuint); + void (GLAPIENTRY *VertexAttribPointer)(GLuint, GLint, GLenum, GLboolean, + GLsizei, const GLvoid *); + void (GLAPIENTRY *DeleteVertexArrays)(GLsizei, const GLuint *); + void (GLAPIENTRY *UseProgram)(GLuint); + GLint (GLAPIENTRY *GetUniformLocation)(GLuint, const GLchar *); + void (GLAPIENTRY *CompileShader)(GLuint); + GLuint (GLAPIENTRY *CreateProgram)(void); + GLuint (GLAPIENTRY *CreateShader)(GLenum); + void (GLAPIENTRY *ShaderSource)(GLuint, GLsizei, const GLchar **, + const GLint *); + void (GLAPIENTRY *LinkProgram)(GLuint); + void (GLAPIENTRY *AttachShader)(GLuint, GLuint); + void (GLAPIENTRY *DeleteShader)(GLuint); + void (GLAPIENTRY *DeleteProgram)(GLuint); + void (GLAPIENTRY *GetShaderInfoLog)(GLuint, GLsizei, GLsizei *, GLchar *); + void (GLAPIENTRY *GetShaderiv)(GLuint, GLenum, GLint *); + void (GLAPIENTRY *GetProgramInfoLog)(GLuint, GLsizei, GLsizei *, GLchar *); + void (GLAPIENTRY *GetProgramiv)(GLenum, GLenum, GLint *); + void (GLAPIENTRY *GetProgramBinary)(GLuint, GLsizei, GLsizei *, GLenum *, + void *); + void (GLAPIENTRY *ProgramBinary)(GLuint, GLenum, const void *, GLsizei); + + void (GLAPIENTRY *DispatchCompute)(GLuint, GLuint, GLuint); + void (GLAPIENTRY *BindImageTexture)(GLuint, GLuint, GLint, GLboolean, + GLint, GLenum, GLenum); + void (GLAPIENTRY *MemoryBarrier)(GLbitfield); + + const GLubyte* (GLAPIENTRY *GetStringi)(GLenum, GLuint); + void (GLAPIENTRY *BindAttribLocation)(GLuint, GLuint, const GLchar *); + void (GLAPIENTRY *BindFramebuffer)(GLenum, GLuint); + void (GLAPIENTRY *GenFramebuffers)(GLsizei, GLuint *); + void (GLAPIENTRY *DeleteFramebuffers)(GLsizei, const GLuint *); + GLenum (GLAPIENTRY *CheckFramebufferStatus)(GLenum); + void (GLAPIENTRY *FramebufferTexture2D)(GLenum, GLenum, GLenum, GLuint, + GLint); + void (GLAPIENTRY *BlitFramebuffer)(GLint, GLint, GLint, GLint, GLint, GLint, + GLint, GLint, GLbitfield, GLenum); + void (GLAPIENTRY *GetFramebufferAttachmentParameteriv)(GLenum, GLenum, + GLenum, GLint *); + + void (GLAPIENTRY *Uniform1f)(GLint, GLfloat); + void (GLAPIENTRY *Uniform2f)(GLint, GLfloat, GLfloat); + void (GLAPIENTRY *Uniform3f)(GLint, GLfloat, GLfloat, GLfloat); + void (GLAPIENTRY *Uniform4f)(GLint, GLfloat, GLfloat, GLfloat, GLfloat); + void (GLAPIENTRY *Uniform1i)(GLint, GLint); + void (GLAPIENTRY *UniformMatrix2fv)(GLint, GLsizei, GLboolean, + const GLfloat *); + void (GLAPIENTRY *UniformMatrix3fv)(GLint, GLsizei, GLboolean, + const GLfloat *); + + void (GLAPIENTRY *InvalidateTexImage)(GLuint, GLint); + void (GLAPIENTRY *InvalidateFramebuffer)(GLenum, GLsizei, const GLenum *); + + GLsync (GLAPIENTRY *FenceSync)(GLenum, GLbitfield); + GLenum (GLAPIENTRY *ClientWaitSync)(GLsync, GLbitfield, GLuint64); + void (GLAPIENTRY *DeleteSync)(GLsync sync); + + void (GLAPIENTRY *BufferStorage)(GLenum, intptr_t, const GLvoid *, GLenum); + + void (GLAPIENTRY *GenQueries)(GLsizei, GLuint *); + void (GLAPIENTRY *DeleteQueries)(GLsizei, const GLuint *); + void (GLAPIENTRY *BeginQuery)(GLenum, GLuint); + void (GLAPIENTRY *EndQuery)(GLenum); + void (GLAPIENTRY *QueryCounter)(GLuint, GLenum); + GLboolean (GLAPIENTRY *IsQuery)(GLuint); + void (GLAPIENTRY *GetQueryObjectiv)(GLuint, GLenum, GLint *); + void (GLAPIENTRY *GetQueryObjecti64v)(GLuint, GLenum, GLint64 *); + void (GLAPIENTRY *GetQueryObjectuiv)(GLuint, GLenum, GLuint *); + void (GLAPIENTRY *GetQueryObjectui64v)(GLuint, GLenum, GLuint64 *); + + void (GLAPIENTRY *VDPAUInitNV)(const GLvoid *, const GLvoid *); + void (GLAPIENTRY *VDPAUFiniNV)(void); + GLvdpauSurfaceNV (GLAPIENTRY *VDPAURegisterOutputSurfaceNV) + (GLvoid *, GLenum, GLsizei, const GLuint *); + GLvdpauSurfaceNV (GLAPIENTRY *VDPAURegisterVideoSurfaceNV) + (GLvoid *, GLenum, GLsizei, const GLuint *); + void (GLAPIENTRY *VDPAUUnregisterSurfaceNV)(GLvdpauSurfaceNV); + void (GLAPIENTRY *VDPAUSurfaceAccessNV)(GLvdpauSurfaceNV, GLenum); + void (GLAPIENTRY *VDPAUMapSurfacesNV)(GLsizei, const GLvdpauSurfaceNV *); + void (GLAPIENTRY *VDPAUUnmapSurfacesNV)(GLsizei, const GLvdpauSurfaceNV *); + +#if HAVE_GL_WIN32 + // The HANDLE type might not be present on non-Win32 + BOOL (GLAPIENTRY *DXSetResourceShareHandleNV)(void *dxObject, + HANDLE shareHandle); + HANDLE (GLAPIENTRY *DXOpenDeviceNV)(void *dxDevice); + BOOL (GLAPIENTRY *DXCloseDeviceNV)(HANDLE hDevice); + HANDLE (GLAPIENTRY *DXRegisterObjectNV)(HANDLE hDevice, void *dxObject, + GLuint name, GLenum type, GLenum access); + BOOL (GLAPIENTRY *DXUnregisterObjectNV)(HANDLE hDevice, HANDLE hObject); + BOOL (GLAPIENTRY *DXLockObjectsNV)(HANDLE hDevice, GLint count, + HANDLE *hObjects); + BOOL (GLAPIENTRY *DXUnlockObjectsNV)(HANDLE hDevice, GLint count, + HANDLE *hObjects); +#endif + + GLint (GLAPIENTRY *GetVideoSync)(GLuint *); + GLint (GLAPIENTRY *WaitVideoSync)(GLint, GLint, unsigned int *); + + void (GLAPIENTRY *GetTranslatedShaderSourceANGLE)(GLuint, GLsizei, + GLsizei*, GLchar* source); + + void (GLAPIENTRY *DebugMessageCallback)(MP_GLDEBUGPROC callback, + const void *userParam); +}; + +#endif /* MPLAYER_GL_COMMON_H */ diff --git a/video/out/opengl/context.c b/video/out/opengl/context.c new file mode 100644 index 0000000..05e279b --- /dev/null +++ b/video/out/opengl/context.c @@ -0,0 +1,324 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "options/m_config.h" +#include "context.h" +#include "ra_gl.h" +#include "utils.h" + +// 0-terminated list of desktop GL versions a backend should try to +// initialize. Each entry is the minimum required version. +const int mpgl_min_required_gl_versions[] = { + /* + * Nvidia drivers will not provide the highest supported version + * when 320 core is requested. Instead, it just returns 3.2. This + * would be bad, as we actually want compute shaders that require + * 4.2, so we have to request a sufficiently high version. We use + * 440 to maximise driver compatibility as we don't need anything + * from newer versions. + */ + 440, + 320, + 210, + 0 +}; + +enum { + FLUSH_NO = 0, + FLUSH_YES, + FLUSH_AUTO, +}; + +struct opengl_opts { + bool use_glfinish; + bool waitvsync; + int vsync_pattern[2]; + int swapinterval; + int early_flush; + int gles_mode; +}; + +#define OPT_BASE_STRUCT struct opengl_opts +const struct m_sub_options opengl_conf = { + .opts = (const struct m_option[]) { + {"opengl-glfinish", OPT_BOOL(use_glfinish)}, + {"opengl-waitvsync", OPT_BOOL(waitvsync)}, + {"opengl-swapinterval", OPT_INT(swapinterval)}, + {"opengl-check-pattern-a", OPT_INT(vsync_pattern[0])}, + {"opengl-check-pattern-b", OPT_INT(vsync_pattern[1])}, + {"opengl-es", OPT_CHOICE(gles_mode, + {"auto", GLES_AUTO}, {"yes", GLES_YES}, {"no", GLES_NO})}, + {"opengl-early-flush", OPT_CHOICE(early_flush, + {"no", FLUSH_NO}, {"yes", FLUSH_YES}, {"auto", FLUSH_AUTO})}, + {0}, + }, + .defaults = &(const struct opengl_opts) { + .swapinterval = 1, + }, + .size = sizeof(struct opengl_opts), +}; + +struct priv { + GL *gl; + struct mp_log *log; + struct ra_gl_ctx_params params; + struct opengl_opts *opts; + struct ra_swapchain_fns fns; + GLuint main_fb; + struct ra_tex *wrapped_fb; // corresponds to main_fb + // for debugging: + int frames_rendered; + unsigned int prev_sgi_sync_count; + // for gl_vsync_pattern + int last_pattern; + int matches, mismatches; + // for swapchain_depth simulation + GLsync *vsync_fences; + int num_vsync_fences; +}; + +enum gles_mode ra_gl_ctx_get_glesmode(struct ra_ctx *ctx) +{ + void *tmp = talloc_new(NULL); + struct opengl_opts *opts; + enum gles_mode mode; + + opts = mp_get_config_group(tmp, ctx->global, &opengl_conf); + mode = opts->gles_mode; + + talloc_free(tmp); + return mode; +} + +void ra_gl_ctx_uninit(struct ra_ctx *ctx) +{ + if (ctx->swapchain) { + struct priv *p = ctx->swapchain->priv; + if (ctx->ra && p->wrapped_fb) + ra_tex_free(ctx->ra, &p->wrapped_fb); + talloc_free(ctx->swapchain); + ctx->swapchain = NULL; + } + + // Clean up any potentially left-over debug callback + if (ctx->ra) + ra_gl_set_debug(ctx->ra, false); + + ra_free(&ctx->ra); +} + +static const struct ra_swapchain_fns ra_gl_swapchain_fns; + +bool ra_gl_ctx_init(struct ra_ctx *ctx, GL *gl, struct ra_gl_ctx_params params) +{ + struct ra_swapchain *sw = ctx->swapchain = talloc_ptrtype(NULL, sw); + *sw = (struct ra_swapchain) { + .ctx = ctx, + }; + + struct priv *p = sw->priv = talloc_ptrtype(sw, p); + *p = (struct priv) { + .gl = gl, + .log = ctx->log, + .params = params, + .opts = mp_get_config_group(p, ctx->global, &opengl_conf), + .fns = ra_gl_swapchain_fns, + }; + + sw->fns = &p->fns; + + const struct ra_swapchain_fns *ext = p->params.external_swapchain; + if (ext) { + if (ext->color_depth) + p->fns.color_depth = ext->color_depth; + if (ext->start_frame) + p->fns.start_frame = ext->start_frame; + if (ext->submit_frame) + p->fns.submit_frame = ext->submit_frame; + if (ext->swap_buffers) + p->fns.swap_buffers = ext->swap_buffers; + } + + if (!gl->version && !gl->es) + return false; + + if (gl->mpgl_caps & MPGL_CAP_SW) { + MP_WARN(p, "Suspected software renderer or indirect context.\n"); + if (ctx->opts.probing && !ctx->opts.allow_sw) + return false; + } + + gl->debug_context = ctx->opts.debug; + + if (gl->SwapInterval) { + gl->SwapInterval(p->opts->swapinterval); + } else { + MP_VERBOSE(p, "GL_*_swap_control extension missing.\n"); + } + + ctx->ra = ra_create_gl(p->gl, ctx->log); + return !!ctx->ra; +} + +void ra_gl_ctx_resize(struct ra_swapchain *sw, int w, int h, int fbo) +{ + struct priv *p = sw->priv; + if (p->main_fb == fbo && p->wrapped_fb && p->wrapped_fb->params.w == w + && p->wrapped_fb->params.h == h) + return; + + if (p->wrapped_fb) + ra_tex_free(sw->ctx->ra, &p->wrapped_fb); + + p->main_fb = fbo; + p->wrapped_fb = ra_create_wrapped_fb(sw->ctx->ra, fbo, w, h); +} + +int ra_gl_ctx_color_depth(struct ra_swapchain *sw) +{ + struct priv *p = sw->priv; + GL *gl = p->gl; + + if (!p->wrapped_fb) + return 0; + + if ((gl->es < 300 && !gl->version) || !(gl->mpgl_caps & MPGL_CAP_FB)) + return 0; + + gl->BindFramebuffer(GL_FRAMEBUFFER, p->main_fb); + + GLenum obj = gl->version ? GL_BACK_LEFT : GL_BACK; + if (p->main_fb) + obj = GL_COLOR_ATTACHMENT0; + + GLint depth_g = 0; + + gl->GetFramebufferAttachmentParameteriv(GL_FRAMEBUFFER, obj, + GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE, &depth_g); + + gl->BindFramebuffer(GL_FRAMEBUFFER, 0); + + return depth_g; +} + +bool ra_gl_ctx_start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo) +{ + struct priv *p = sw->priv; + + bool visible = true; + if (p->params.check_visible) + visible = p->params.check_visible(sw->ctx); + + // If out_fbo is NULL, this was called from vo_gpu_next. Bail out. + if (!out_fbo || !visible) + return visible; + + *out_fbo = (struct ra_fbo) { + .tex = p->wrapped_fb, + .flip = !p->gl->flipped, // OpenGL FBs are normally flipped + }; + return true; +} + +bool ra_gl_ctx_submit_frame(struct ra_swapchain *sw, const struct vo_frame *frame) +{ + struct priv *p = sw->priv; + GL *gl = p->gl; + + if (p->opts->use_glfinish) + gl->Finish(); + + if (gl->FenceSync && !p->params.external_swapchain) { + GLsync fence = gl->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + if (fence) + MP_TARRAY_APPEND(p, p->vsync_fences, p->num_vsync_fences, fence); + } + + switch (p->opts->early_flush) { + case FLUSH_AUTO: + if (frame->display_synced) + break; + MP_FALLTHROUGH; + case FLUSH_YES: + gl->Flush(); + } + + return true; +} + +static void check_pattern(struct priv *p, int item) +{ + int expected = p->opts->vsync_pattern[p->last_pattern]; + if (item == expected) { + p->last_pattern++; + if (p->last_pattern >= 2) + p->last_pattern = 0; + p->matches++; + } else { + p->mismatches++; + MP_WARN(p, "wrong pattern, expected %d got %d (hit: %d, mis: %d)\n", + expected, item, p->matches, p->mismatches); + } +} + +void ra_gl_ctx_swap_buffers(struct ra_swapchain *sw) +{ + struct priv *p = sw->priv; + GL *gl = p->gl; + + p->params.swap_buffers(sw->ctx); + p->frames_rendered++; + + if (p->frames_rendered > 5 && !sw->ctx->opts.debug) + ra_gl_set_debug(sw->ctx->ra, false); + + if ((p->opts->waitvsync || p->opts->vsync_pattern[0]) + && gl->GetVideoSync) + { + unsigned int n1 = 0, n2 = 0; + gl->GetVideoSync(&n1); + if (p->opts->waitvsync) + gl->WaitVideoSync(2, (n1 + 1) % 2, &n2); + int step = n1 - p->prev_sgi_sync_count; + p->prev_sgi_sync_count = n1; + MP_DBG(p, "Flip counts: %u->%u, step=%d\n", n1, n2, step); + if (p->opts->vsync_pattern[0]) + check_pattern(p, step); + } + + while (p->num_vsync_fences >= sw->ctx->vo->opts->swapchain_depth) { + gl->ClientWaitSync(p->vsync_fences[0], GL_SYNC_FLUSH_COMMANDS_BIT, 1e9); + gl->DeleteSync(p->vsync_fences[0]); + MP_TARRAY_REMOVE_AT(p->vsync_fences, p->num_vsync_fences, 0); + } +} + +static void ra_gl_ctx_get_vsync(struct ra_swapchain *sw, + struct vo_vsync_info *info) +{ + struct priv *p = sw->priv; + if (p->params.get_vsync) + p->params.get_vsync(sw->ctx, info); +} + +static const struct ra_swapchain_fns ra_gl_swapchain_fns = { + .color_depth = ra_gl_ctx_color_depth, + .start_frame = ra_gl_ctx_start_frame, + .submit_frame = ra_gl_ctx_submit_frame, + .swap_buffers = ra_gl_ctx_swap_buffers, + .get_vsync = ra_gl_ctx_get_vsync, +}; diff --git a/video/out/opengl/context.h b/video/out/opengl/context.h new file mode 100644 index 0000000..c96450e --- /dev/null +++ b/video/out/opengl/context.h @@ -0,0 +1,58 @@ +#pragma once + +#include "common/global.h" +#include "video/out/gpu/context.h" +#include "common.h" + +extern const int mpgl_min_required_gl_versions[]; + +enum gles_mode { + GLES_AUTO = 0, + GLES_YES, + GLES_NO, +}; + +// Returns the gles mode based on the --opengl opts. +enum gles_mode ra_gl_ctx_get_glesmode(struct ra_ctx *ctx); + +// These are a set of helpers for ra_ctx providers based on ra_gl. +// The init function also initializes ctx->ra and ctx->swapchain, so the user +// doesn't have to do this manually. (Similarly, the uninit function will +// clean them up) + +struct ra_gl_ctx_params { + // For special contexts (i.e. wayland) that want to check visibility + // before drawing a frame. + bool (*check_visible)(struct ra_ctx *ctx); + + // Set to the platform-specific function to swap buffers, like + // glXSwapBuffers, eglSwapBuffers etc. This will be called by + // ra_gl_ctx_swap_buffers. Required unless you either never call that + // function or if you override it yourself. + void (*swap_buffers)(struct ra_ctx *ctx); + + // See ra_swapchain_fns.get_vsync. + void (*get_vsync)(struct ra_ctx *ctx, struct vo_vsync_info *info); + + // If this is set to non-NULL, then the ra_gl_ctx will consider the GL + // implementation to be using an external swapchain, which disables the + // software simulation of --swapchain-depth. Any functions defined by this + // ra_swapchain_fns structs will entirely replace the equivalent ra_gl_ctx + // functions in the resulting ra_swapchain. + const struct ra_swapchain_fns *external_swapchain; +}; + +void ra_gl_ctx_uninit(struct ra_ctx *ctx); +bool ra_gl_ctx_init(struct ra_ctx *ctx, GL *gl, struct ra_gl_ctx_params params); + +// Call this any time the window size or main framebuffer changes +void ra_gl_ctx_resize(struct ra_swapchain *sw, int w, int h, int fbo); + +// These functions are normally set in the ra_swapchain->fns, but if an +// implementation has a need to override this fns struct with custom functions +// for whatever reason, these can be used to inherit the original behavior. +int ra_gl_ctx_color_depth(struct ra_swapchain *sw); +struct mp_image *ra_gl_ctx_screenshot(struct ra_swapchain *sw); +bool ra_gl_ctx_start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo); +bool ra_gl_ctx_submit_frame(struct ra_swapchain *sw, const struct vo_frame *frame); +void ra_gl_ctx_swap_buffers(struct ra_swapchain *sw); diff --git a/video/out/opengl/context_android.c b/video/out/opengl/context_android.c new file mode 100644 index 0000000..bc1717c --- /dev/null +++ b/video/out/opengl/context_android.c @@ -0,0 +1,130 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <EGL/egl.h> +#include <EGL/eglext.h> + +#include "video/out/android_common.h" +#include "egl_helpers.h" +#include "common/common.h" +#include "context.h" + +struct priv { + struct GL gl; + EGLDisplay egl_display; + EGLContext egl_context; + EGLSurface egl_surface; +}; + +static void android_swap_buffers(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + eglSwapBuffers(p->egl_display, p->egl_surface); +} + +static void android_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + ra_gl_ctx_uninit(ctx); + + if (p->egl_surface) { + eglMakeCurrent(p->egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, + EGL_NO_CONTEXT); + eglDestroySurface(p->egl_display, p->egl_surface); + } + if (p->egl_context) + eglDestroyContext(p->egl_display, p->egl_context); + + vo_android_uninit(ctx->vo); +} + +static bool android_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + + if (!vo_android_init(ctx->vo)) + goto fail; + + p->egl_display = eglGetDisplay(EGL_DEFAULT_DISPLAY); + if (!eglInitialize(p->egl_display, NULL, NULL)) { + MP_FATAL(ctx, "EGL failed to initialize.\n"); + goto fail; + } + + EGLConfig config; + if (!mpegl_create_context(ctx, p->egl_display, &p->egl_context, &config)) + goto fail; + + ANativeWindow *native_window = vo_android_native_window(ctx->vo); + EGLint format; + eglGetConfigAttrib(p->egl_display, config, EGL_NATIVE_VISUAL_ID, &format); + ANativeWindow_setBuffersGeometry(native_window, 0, 0, format); + + p->egl_surface = eglCreateWindowSurface(p->egl_display, config, + (EGLNativeWindowType)native_window, NULL); + + if (p->egl_surface == EGL_NO_SURFACE) { + MP_FATAL(ctx, "Could not create EGL surface!\n"); + goto fail; + } + + if (!eglMakeCurrent(p->egl_display, p->egl_surface, p->egl_surface, + p->egl_context)) { + MP_FATAL(ctx, "Failed to set context!\n"); + goto fail; + } + + mpegl_load_functions(&p->gl, ctx->log); + + struct ra_gl_ctx_params params = { + .swap_buffers = android_swap_buffers, + }; + + if (!ra_gl_ctx_init(ctx, &p->gl, params)) + goto fail; + + return true; +fail: + android_uninit(ctx); + return false; +} + +static bool android_reconfig(struct ra_ctx *ctx) +{ + int w, h; + if (!vo_android_surface_size(ctx->vo, &w, &h)) + return false; + + ctx->vo->dwidth = w; + ctx->vo->dheight = h; + ra_gl_ctx_resize(ctx->swapchain, w, h, 0); + return true; +} + +static int android_control(struct ra_ctx *ctx, int *events, int request, void *arg) +{ + return VO_NOTIMPL; +} + +const struct ra_ctx_fns ra_ctx_android = { + .type = "opengl", + .name = "android", + .reconfig = android_reconfig, + .control = android_control, + .init = android_init, + .uninit = android_uninit, +}; diff --git a/video/out/opengl/context_angle.c b/video/out/opengl/context_angle.c new file mode 100644 index 0000000..553718a --- /dev/null +++ b/video/out/opengl/context_angle.c @@ -0,0 +1,653 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <windows.h> +#include <EGL/egl.h> +#include <EGL/eglext.h> +#include <EGL/eglext_angle.h> +#include <d3d11.h> +#include <dxgi1_2.h> +#include <dwmapi.h> + +#include "angle_dynamic.h" +#include "egl_helpers.h" +#include "video/out/gpu/d3d11_helpers.h" + +#include "common/common.h" +#include "options/m_config.h" +#include "video/out/w32_common.h" +#include "osdep/windows_utils.h" +#include "context.h" +#include "utils.h" + +#ifndef EGL_D3D_TEXTURE_ANGLE +#define EGL_D3D_TEXTURE_ANGLE 0x33A3 +#endif +#ifndef EGL_OPTIMAL_SURFACE_ORIENTATION_ANGLE +#define EGL_OPTIMAL_SURFACE_ORIENTATION_ANGLE 0x33A7 +#define EGL_SURFACE_ORIENTATION_ANGLE 0x33A8 +#define EGL_SURFACE_ORIENTATION_INVERT_Y_ANGLE 0x0002 +#endif + +enum { + RENDERER_AUTO, + RENDERER_D3D9, + RENDERER_D3D11, +}; + +struct angle_opts { + int renderer; + int d3d11_warp; + int d3d11_feature_level; + int egl_windowing; + bool flip; +}; + +#define OPT_BASE_STRUCT struct angle_opts +const struct m_sub_options angle_conf = { + .opts = (const struct m_option[]) { + {"angle-renderer", OPT_CHOICE(renderer, + {"auto", RENDERER_AUTO}, + {"d3d9", RENDERER_D3D9}, + {"d3d11", RENDERER_D3D11})}, + {"angle-d3d11-warp", OPT_CHOICE(d3d11_warp, + {"auto", -1}, + {"no", 0}, + {"yes", 1})}, + {"angle-d3d11-feature-level", OPT_CHOICE(d3d11_feature_level, + {"11_0", D3D_FEATURE_LEVEL_11_0}, + {"10_1", D3D_FEATURE_LEVEL_10_1}, + {"10_0", D3D_FEATURE_LEVEL_10_0}, + {"9_3", D3D_FEATURE_LEVEL_9_3})}, + {"angle-egl-windowing", OPT_CHOICE(egl_windowing, + {"auto", -1}, + {"no", 0}, + {"yes", 1})}, + {"angle-flip", OPT_BOOL(flip)}, + {0} + }, + .defaults = &(const struct angle_opts) { + .renderer = RENDERER_AUTO, + .d3d11_warp = -1, + .d3d11_feature_level = D3D_FEATURE_LEVEL_11_0, + .egl_windowing = -1, + .flip = true, + }, + .size = sizeof(struct angle_opts), +}; + +struct priv { + GL gl; + + IDXGISwapChain *dxgi_swapchain; + + ID3D11Device *d3d11_device; + ID3D11DeviceContext *d3d11_context; + ID3D11Texture2D *d3d11_backbuffer; + + EGLConfig egl_config; + EGLDisplay egl_display; + EGLDeviceEXT egl_device; + EGLContext egl_context; + EGLSurface egl_window; // For the EGL windowing surface only + EGLSurface egl_backbuffer; // For the DXGI swap chain based surface + + int sc_width, sc_height; // Swap chain width and height + int swapinterval; + bool flipped; + + struct angle_opts *opts; +}; + +static __thread struct ra_ctx *current_ctx; + +static void update_sizes(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + p->sc_width = ctx->vo->dwidth ? ctx->vo->dwidth : 1; + p->sc_height = ctx->vo->dheight ? ctx->vo->dheight : 1; +} + +static void d3d11_backbuffer_release(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + if (p->egl_backbuffer) { + eglMakeCurrent(p->egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, + EGL_NO_CONTEXT); + eglDestroySurface(p->egl_display, p->egl_backbuffer); + } + p->egl_backbuffer = EGL_NO_SURFACE; + + SAFE_RELEASE(p->d3d11_backbuffer); +} + +static bool d3d11_backbuffer_get(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + struct vo *vo = ctx->vo; + HRESULT hr; + + hr = IDXGISwapChain_GetBuffer(p->dxgi_swapchain, 0, &IID_ID3D11Texture2D, + (void**)&p->d3d11_backbuffer); + if (FAILED(hr)) { + MP_FATAL(vo, "Couldn't get swap chain back buffer\n"); + return false; + } + + EGLint pbuffer_attributes[] = { + EGL_TEXTURE_FORMAT, EGL_TEXTURE_RGBA, + EGL_TEXTURE_TARGET, EGL_TEXTURE_2D, + EGL_NONE, + }; + p->egl_backbuffer = eglCreatePbufferFromClientBuffer(p->egl_display, + EGL_D3D_TEXTURE_ANGLE, p->d3d11_backbuffer, p->egl_config, + pbuffer_attributes); + if (!p->egl_backbuffer) { + MP_FATAL(vo, "Couldn't create EGL pbuffer\n"); + return false; + } + + eglMakeCurrent(p->egl_display, p->egl_backbuffer, p->egl_backbuffer, + p->egl_context); + return true; +} + +static void d3d11_backbuffer_resize(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + struct vo *vo = ctx->vo; + HRESULT hr; + + int old_sc_width = p->sc_width; + int old_sc_height = p->sc_height; + + update_sizes(ctx); + // Avoid unnecessary resizing + if (old_sc_width == p->sc_width && old_sc_height == p->sc_height) + return; + + // All references to backbuffers must be released before ResizeBuffers + // (including references held by ANGLE) + d3d11_backbuffer_release(ctx); + + // The DirectX runtime may report errors related to the device like + // DXGI_ERROR_DEVICE_REMOVED at this point + hr = IDXGISwapChain_ResizeBuffers(p->dxgi_swapchain, 0, p->sc_width, + p->sc_height, DXGI_FORMAT_UNKNOWN, 0); + if (FAILED(hr)) + MP_FATAL(vo, "Couldn't resize swapchain: %s\n", mp_HRESULT_to_str(hr)); + + if (!d3d11_backbuffer_get(ctx)) + MP_FATAL(vo, "Couldn't get back buffer after resize\n"); +} + +static void d3d11_device_destroy(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + PFNEGLRELEASEDEVICEANGLEPROC eglReleaseDeviceANGLE = + (PFNEGLRELEASEDEVICEANGLEPROC)eglGetProcAddress("eglReleaseDeviceANGLE"); + + if (p->egl_display) + eglTerminate(p->egl_display); + p->egl_display = EGL_NO_DISPLAY; + + if (p->egl_device && eglReleaseDeviceANGLE) + eglReleaseDeviceANGLE(p->egl_device); + p->egl_device = 0; + + SAFE_RELEASE(p->d3d11_device); +} + +static bool d3d11_device_create(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + struct vo *vo = ctx->vo; + struct angle_opts *o = p->opts; + + struct d3d11_device_opts device_opts = { + .allow_warp = o->d3d11_warp != 0, + .force_warp = o->d3d11_warp == 1, + .max_feature_level = o->d3d11_feature_level, + .min_feature_level = D3D_FEATURE_LEVEL_9_3, + .max_frame_latency = ctx->vo->opts->swapchain_depth, + }; + if (!mp_d3d11_create_present_device(vo->log, &device_opts, &p->d3d11_device)) + return false; + ID3D11Device_GetImmediateContext(p->d3d11_device, &p->d3d11_context); + + PFNEGLGETPLATFORMDISPLAYEXTPROC eglGetPlatformDisplayEXT = + (PFNEGLGETPLATFORMDISPLAYEXTPROC)eglGetProcAddress("eglGetPlatformDisplayEXT"); + if (!eglGetPlatformDisplayEXT) { + MP_FATAL(vo, "Missing EGL_EXT_platform_base\n"); + return false; + } + PFNEGLCREATEDEVICEANGLEPROC eglCreateDeviceANGLE = + (PFNEGLCREATEDEVICEANGLEPROC)eglGetProcAddress("eglCreateDeviceANGLE"); + if (!eglCreateDeviceANGLE) { + MP_FATAL(vo, "Missing EGL_EXT_platform_device\n"); + return false; + } + + p->egl_device = eglCreateDeviceANGLE(EGL_D3D11_DEVICE_ANGLE, + p->d3d11_device, NULL); + if (!p->egl_device) { + MP_FATAL(vo, "Couldn't create EGL device\n"); + return false; + } + + p->egl_display = eglGetPlatformDisplayEXT(EGL_PLATFORM_DEVICE_EXT, + p->egl_device, NULL); + if (!p->egl_display) { + MP_FATAL(vo, "Couldn't get EGL display\n"); + return false; + } + + return true; +} + +static void d3d11_swapchain_surface_destroy(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + bool had_swapchain = p->dxgi_swapchain; + SAFE_RELEASE(p->dxgi_swapchain); + d3d11_backbuffer_release(ctx); + + // Ensure the swapchain is destroyed by flushing the D3D11 immediate + // context. This is needed because the HWND may be reused. See: + // https://msdn.microsoft.com/en-us/library/windows/desktop/ff476425.aspx + if (had_swapchain && p->d3d11_context) + ID3D11DeviceContext_Flush(p->d3d11_context); +} + +static bool d3d11_swapchain_surface_create(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + struct vo *vo = ctx->vo; + struct angle_opts *o = p->opts; + + if (!p->d3d11_device) + goto fail; + + update_sizes(ctx); + struct d3d11_swapchain_opts swapchain_opts = { + .window = vo_w32_hwnd(vo), + .width = p->sc_width, + .height = p->sc_height, + .flip = o->flip, + // Add one frame for the backbuffer and one frame of "slack" to reduce + // contention with the window manager when acquiring the backbuffer + .length = ctx->vo->opts->swapchain_depth + 2, + .usage = DXGI_USAGE_RENDER_TARGET_OUTPUT | DXGI_USAGE_SHADER_INPUT, + }; + if (!mp_d3d11_create_swapchain(p->d3d11_device, vo->log, &swapchain_opts, + &p->dxgi_swapchain)) + goto fail; + if (!d3d11_backbuffer_get(ctx)) + goto fail; + + p->flipped = true; + return true; + +fail: + d3d11_swapchain_surface_destroy(ctx); + return false; +} + +static void d3d9_device_destroy(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + if (p->egl_display) + eglTerminate(p->egl_display); + p->egl_display = EGL_NO_DISPLAY; +} + +static bool d3d9_device_create(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + struct vo *vo = ctx->vo; + + PFNEGLGETPLATFORMDISPLAYEXTPROC eglGetPlatformDisplayEXT = + (PFNEGLGETPLATFORMDISPLAYEXTPROC)eglGetProcAddress("eglGetPlatformDisplayEXT"); + if (!eglGetPlatformDisplayEXT) { + MP_FATAL(vo, "Missing EGL_EXT_platform_base\n"); + return false; + } + + EGLint display_attributes[] = { + EGL_PLATFORM_ANGLE_TYPE_ANGLE, + EGL_PLATFORM_ANGLE_TYPE_D3D9_ANGLE, + EGL_PLATFORM_ANGLE_DEVICE_TYPE_ANGLE, + EGL_PLATFORM_ANGLE_DEVICE_TYPE_HARDWARE_ANGLE, + EGL_NONE, + }; + p->egl_display = eglGetPlatformDisplayEXT(EGL_PLATFORM_ANGLE_ANGLE, + EGL_DEFAULT_DISPLAY, display_attributes); + if (p->egl_display == EGL_NO_DISPLAY) { + MP_FATAL(vo, "Couldn't get display\n"); + return false; + } + + return true; +} + +static void egl_window_surface_destroy(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + if (p->egl_window) { + eglMakeCurrent(p->egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, + EGL_NO_CONTEXT); + } +} + +static bool egl_window_surface_create(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + struct vo *vo = ctx->vo; + + int window_attribs_len = 0; + EGLint *window_attribs = NULL; + + EGLint flip_val; + if (eglGetConfigAttrib(p->egl_display, p->egl_config, + EGL_OPTIMAL_SURFACE_ORIENTATION_ANGLE, &flip_val)) + { + if (flip_val == EGL_SURFACE_ORIENTATION_INVERT_Y_ANGLE) { + MP_TARRAY_APPEND(NULL, window_attribs, window_attribs_len, + EGL_SURFACE_ORIENTATION_ANGLE); + MP_TARRAY_APPEND(NULL, window_attribs, window_attribs_len, + EGL_SURFACE_ORIENTATION_INVERT_Y_ANGLE); + p->flipped = true; + MP_VERBOSE(vo, "Rendering flipped.\n"); + } + } + + MP_TARRAY_APPEND(NULL, window_attribs, window_attribs_len, EGL_NONE); + p->egl_window = eglCreateWindowSurface(p->egl_display, p->egl_config, + vo_w32_hwnd(vo), window_attribs); + talloc_free(window_attribs); + if (!p->egl_window) { + MP_FATAL(vo, "Could not create EGL surface!\n"); + goto fail; + } + + eglMakeCurrent(p->egl_display, p->egl_window, p->egl_window, + p->egl_context); + return true; +fail: + egl_window_surface_destroy(ctx); + return false; +} + +static void context_destroy(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + if (p->egl_context) { + eglMakeCurrent(p->egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, + EGL_NO_CONTEXT); + eglDestroyContext(p->egl_display, p->egl_context); + } + p->egl_context = EGL_NO_CONTEXT; +} + +static bool context_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + struct vo *vo = ctx->vo; + + if (!eglInitialize(p->egl_display, NULL, NULL)) { + MP_FATAL(vo, "Couldn't initialize EGL\n"); + goto fail; + } + + const char *exts = eglQueryString(p->egl_display, EGL_EXTENSIONS); + if (exts) + MP_DBG(vo, "EGL extensions: %s\n", exts); + + if (!mpegl_create_context(ctx, p->egl_display, &p->egl_context, + &p->egl_config)) + { + MP_FATAL(vo, "Could not create EGL context!\n"); + goto fail; + } + + return true; +fail: + context_destroy(ctx); + return false; +} + +static void angle_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + ra_gl_ctx_uninit(ctx); + + DwmEnableMMCSS(FALSE); + + // Uninit the EGL surface implementation that is being used. Note: This may + // result in the *_destroy function being called twice since it is also + // called when the surface create function fails. This is fine because the + // *_destroy functions are idempotent. + if (p->dxgi_swapchain) + d3d11_swapchain_surface_destroy(ctx); + else + egl_window_surface_destroy(ctx); + + context_destroy(ctx); + + // Uninit the EGL device implementation that is being used + if (p->d3d11_device) + d3d11_device_destroy(ctx); + else + d3d9_device_destroy(ctx); + + vo_w32_uninit(ctx->vo); +} + +static int GLAPIENTRY angle_swap_interval(int interval) +{ + if (!current_ctx) + return 0; + struct priv *p = current_ctx->priv; + + if (p->dxgi_swapchain) { + p->swapinterval = MPCLAMP(interval, 0, 4); + return 1; + } else { + return eglSwapInterval(p->egl_display, interval); + } +} + +static void d3d11_swap_buffers(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + // Calling Present() on a flip-sequential swap chain will silently change + // the underlying storage of the back buffer to point to the next buffer in + // the chain. This results in the RTVs for the back buffer becoming + // unbound. Since ANGLE doesn't know we called Present(), it will continue + // using the unbound RTVs, so we must save and restore them ourselves. + ID3D11RenderTargetView *rtvs[D3D11_SIMULTANEOUS_RENDER_TARGET_COUNT] = {0}; + ID3D11DepthStencilView *dsv = NULL; + ID3D11DeviceContext_OMGetRenderTargets(p->d3d11_context, + MP_ARRAY_SIZE(rtvs), rtvs, &dsv); + + HRESULT hr = IDXGISwapChain_Present(p->dxgi_swapchain, p->swapinterval, 0); + if (FAILED(hr)) + MP_FATAL(ctx->vo, "Couldn't present: %s\n", mp_HRESULT_to_str(hr)); + + // Restore the RTVs and release the objects + ID3D11DeviceContext_OMSetRenderTargets(p->d3d11_context, + MP_ARRAY_SIZE(rtvs), rtvs, dsv); + for (int i = 0; i < MP_ARRAY_SIZE(rtvs); i++) + SAFE_RELEASE(rtvs[i]); + SAFE_RELEASE(dsv); +} + +static void egl_swap_buffers(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + eglSwapBuffers(p->egl_display, p->egl_window); +} + +static void angle_swap_buffers(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + if (p->dxgi_swapchain) + d3d11_swap_buffers(ctx); + else + egl_swap_buffers(ctx); +} + + +static int angle_color_depth(struct ra_swapchain *sw) +{ + // Only 8-bit output is supported at the moment + return 8; +} + +static bool angle_submit_frame(struct ra_swapchain *sw, + const struct vo_frame *frame) +{ + struct priv *p = sw->ctx->priv; + bool ret = ra_gl_ctx_submit_frame(sw, frame); + if (p->d3d11_context) { + // DXGI Present doesn't flush the immediate context, which can make + // timers inaccurate, since the end queries might not be sent until the + // next frame. Fix this by flushing the context now. + ID3D11DeviceContext_Flush(p->d3d11_context); + } + return ret; +} + +static bool angle_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + struct vo *vo = ctx->vo; + GL *gl = &p->gl; + + p->opts = mp_get_config_group(ctx, ctx->global, &angle_conf); + struct angle_opts *o = p->opts; + + if (!angle_load()) { + MP_VERBOSE(vo, "Failed to load LIBEGL.DLL\n"); + goto fail; + } + + // Create the underlying EGL device implementation + bool context_ok = false; + if ((!context_ok && !o->renderer) || o->renderer == RENDERER_D3D11) { + context_ok = d3d11_device_create(ctx); + if (context_ok) { + context_ok = context_init(ctx); + if (!context_ok) + d3d11_device_destroy(ctx); + } + } + if ((!context_ok && !o->renderer) || o->renderer == RENDERER_D3D9) { + context_ok = d3d9_device_create(ctx); + if (context_ok) { + MP_VERBOSE(vo, "Using Direct3D 9\n"); + + context_ok = context_init(ctx); + if (!context_ok) + d3d9_device_destroy(ctx); + } + } + if (!context_ok) + goto fail; + + if (!vo_w32_init(vo)) + goto fail; + + // Create the underlying EGL surface implementation + bool surface_ok = false; + if ((!surface_ok && o->egl_windowing == -1) || o->egl_windowing == 0) { + surface_ok = d3d11_swapchain_surface_create(ctx); + } + if ((!surface_ok && o->egl_windowing == -1) || o->egl_windowing == 1) { + surface_ok = egl_window_surface_create(ctx); + if (surface_ok) + MP_VERBOSE(vo, "Using EGL windowing\n"); + } + if (!surface_ok) + goto fail; + + mpegl_load_functions(gl, vo->log); + + current_ctx = ctx; + gl->SwapInterval = angle_swap_interval; + + // Custom swapchain impl for the D3D11 swapchain-based surface + static const struct ra_swapchain_fns dxgi_swapchain_fns = { + .color_depth = angle_color_depth, + .submit_frame = angle_submit_frame, + }; + struct ra_gl_ctx_params params = { + .swap_buffers = angle_swap_buffers, + .external_swapchain = p->dxgi_swapchain ? &dxgi_swapchain_fns : NULL, + }; + + gl->flipped = p->flipped; + if (!ra_gl_ctx_init(ctx, gl, params)) + goto fail; + + DwmEnableMMCSS(TRUE); // DWM MMCSS cargo-cult. The dxgl backend also does this. + + return true; +fail: + angle_uninit(ctx); + return false; +} + +static void resize(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + if (p->dxgi_swapchain) + d3d11_backbuffer_resize(ctx); + else + eglWaitClient(); // Should get ANGLE to resize its swapchain + ra_gl_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight, 0); +} + +static bool angle_reconfig(struct ra_ctx *ctx) +{ + vo_w32_config(ctx->vo); + resize(ctx); + return true; +} + +static int angle_control(struct ra_ctx *ctx, int *events, int request, void *arg) +{ + int ret = vo_w32_control(ctx->vo, events, request, arg); + if (*events & VO_EVENT_RESIZE) + resize(ctx); + return ret; +} + +const struct ra_ctx_fns ra_ctx_angle = { + .type = "opengl", + .name = "angle", + .init = angle_init, + .reconfig = angle_reconfig, + .control = angle_control, + .uninit = angle_uninit, +}; diff --git a/video/out/opengl/context_drm_egl.c b/video/out/opengl/context_drm_egl.c new file mode 100644 index 0000000..2db428f --- /dev/null +++ b/video/out/opengl/context_drm_egl.c @@ -0,0 +1,744 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <signal.h> +#include <string.h> +#include <poll.h> +#include <unistd.h> + +#include <gbm.h> +#include <EGL/egl.h> +#include <EGL/eglext.h> +#include <drm_fourcc.h> + +#include "libmpv/render_gl.h" +#include "common/common.h" +#include "osdep/timer.h" +#include "video/out/drm_atomic.h" +#include "video/out/drm_common.h" +#include "video/out/present_sync.h" + +#include "egl_helpers.h" +#include "common.h" +#include "context.h" + +#ifndef EGL_PLATFORM_GBM_MESA +#define EGL_PLATFORM_GBM_MESA 0x31D7 +#endif + +#ifndef EGL_PLATFORM_GBM_KHR +#define EGL_PLATFORM_GBM_KHR 0x31D7 +#endif + +struct gbm_frame { + struct gbm_bo *bo; +}; + +struct gbm { + struct gbm_surface *surface; + struct gbm_device *device; + struct gbm_frame **bo_queue; + unsigned int num_bos; +}; + +struct egl { + EGLDisplay display; + EGLContext context; + EGLSurface surface; +}; + +struct priv { + GL gl; + + struct egl egl; + struct gbm gbm; + + GLsync *vsync_fences; + unsigned int num_vsync_fences; + + uint32_t gbm_format; + uint64_t *gbm_modifiers; + unsigned int num_gbm_modifiers; + + struct mpv_opengl_drm_params_v2 drm_params; + struct mpv_opengl_drm_draw_surface_size draw_surface_size; +}; + +// Not general. Limited to only the formats being used in this module +static const char *gbm_format_to_string(uint32_t format) +{ + switch (format) { + case GBM_FORMAT_XRGB8888: + return "GBM_FORMAT_XRGB8888"; + case GBM_FORMAT_ARGB8888: + return "GBM_FORMAT_ARGB8888"; + case GBM_FORMAT_XBGR8888: + return "GBM_FORMAT_XBGR8888"; + case GBM_FORMAT_ABGR8888: + return "GBM_FORMAT_ABGR8888"; + case GBM_FORMAT_XRGB2101010: + return "GBM_FORMAT_XRGB2101010"; + case GBM_FORMAT_ARGB2101010: + return "GBM_FORMAT_ARGB2101010"; + case GBM_FORMAT_XBGR2101010: + return "GBM_FORMAT_XBGR2101010"; + case GBM_FORMAT_ABGR2101010: + return "GBM_FORMAT_ABGR2101010"; + default: + return "UNKNOWN"; + } +} + +// Allow falling back to an ARGB EGLConfig when we have an XRGB framebuffer. +// Also allow falling back to an XRGB EGLConfig for ARGB framebuffers, since +// this seems necessary to work with broken Mali drivers that don't report +// their EGLConfigs as supporting alpha properly. +static uint32_t fallback_format_for(uint32_t format) +{ + switch (format) { + case GBM_FORMAT_XRGB8888: + return GBM_FORMAT_ARGB8888; + case GBM_FORMAT_ARGB8888: + return GBM_FORMAT_XRGB8888; + case GBM_FORMAT_XBGR8888: + return GBM_FORMAT_ABGR8888; + case GBM_FORMAT_ABGR8888: + return GBM_FORMAT_XBGR8888; + case GBM_FORMAT_XRGB2101010: + return GBM_FORMAT_ARGB2101010; + case GBM_FORMAT_ARGB2101010: + return GBM_FORMAT_XRGB2101010; + case GBM_FORMAT_XBGR2101010: + return GBM_FORMAT_ABGR2101010; + case GBM_FORMAT_ABGR2101010: + return GBM_FORMAT_XBGR2101010; + default: + return 0; + } +} + +static int match_config_to_visual(void *user_data, EGLConfig *configs, int num_configs) +{ + struct ra_ctx *ctx = (struct ra_ctx*)user_data; + struct priv *p = ctx->priv; + const EGLint visual_id[] = { + (EGLint)p->gbm_format, + (EGLint)fallback_format_for(p->gbm_format), + 0 + }; + + for (unsigned int i = 0; visual_id[i] != 0; ++i) { + MP_VERBOSE(ctx, "Attempting to find EGLConfig matching %s\n", + gbm_format_to_string(visual_id[i])); + for (unsigned int j = 0; j < num_configs; ++j) { + EGLint id; + + if (!eglGetConfigAttrib(p->egl.display, configs[j], EGL_NATIVE_VISUAL_ID, &id)) + continue; + + if (visual_id[i] == id) { + MP_VERBOSE(ctx, "Found matching EGLConfig for %s\n", + gbm_format_to_string(visual_id[i])); + return j; + } + } + MP_VERBOSE(ctx, "No matching EGLConfig for %s\n", gbm_format_to_string(visual_id[i])); + } + + MP_ERR(ctx, "Could not find EGLConfig matching the GBM visual (%s).\n", + gbm_format_to_string(p->gbm_format)); + return -1; +} + +static EGLDisplay egl_get_display(struct gbm_device *gbm_device) +{ + EGLDisplay ret; + + ret = mpegl_get_display(EGL_PLATFORM_GBM_MESA, "EGL_MESA_platform_gbm", gbm_device); + if (ret != EGL_NO_DISPLAY) + return ret; + + ret = mpegl_get_display(EGL_PLATFORM_GBM_KHR, "EGL_KHR_platform_gbm", gbm_device); + if (ret != EGL_NO_DISPLAY) + return ret; + + return eglGetDisplay(gbm_device); +} + +static bool init_egl(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + MP_VERBOSE(ctx, "Initializing EGL\n"); + p->egl.display = egl_get_display(p->gbm.device); + + if (p->egl.display == EGL_NO_DISPLAY) { + MP_ERR(ctx, "Failed to get EGL display.\n"); + return false; + } + if (!eglInitialize(p->egl.display, NULL, NULL)) { + MP_ERR(ctx, "Failed to initialize EGL.\n"); + return false; + } + EGLConfig config; + if (!mpegl_create_context_cb(ctx, + p->egl.display, + (struct mpegl_cb){match_config_to_visual, ctx}, + &p->egl.context, + &config)) + return false; + + MP_VERBOSE(ctx, "Initializing EGL surface\n"); + p->egl.surface = mpegl_create_window_surface( + p->egl.display, config, p->gbm.surface); + if (p->egl.surface == EGL_NO_SURFACE) { + p->egl.surface = eglCreateWindowSurface( + p->egl.display, config, p->gbm.surface, NULL); + } + if (p->egl.surface == EGL_NO_SURFACE) { + MP_ERR(ctx, "Failed to create EGL surface.\n"); + return false; + } + return true; +} + +static bool init_gbm(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + struct vo_drm_state *drm = ctx->vo->drm; + MP_VERBOSE(ctx->vo, "Creating GBM device\n"); + p->gbm.device = gbm_create_device(drm->fd); + if (!p->gbm.device) { + MP_ERR(ctx->vo, "Failed to create GBM device.\n"); + return false; + } + + MP_VERBOSE(ctx->vo, "Initializing GBM surface (%d x %d)\n", + p->draw_surface_size.width, p->draw_surface_size.height); + if (p->num_gbm_modifiers == 0) { + p->gbm.surface = gbm_surface_create( + p->gbm.device, + p->draw_surface_size.width, + p->draw_surface_size.height, + p->gbm_format, + GBM_BO_USE_SCANOUT | GBM_BO_USE_RENDERING); + } else { + p->gbm.surface = gbm_surface_create_with_modifiers( + p->gbm.device, + p->draw_surface_size.width, + p->draw_surface_size.height, + p->gbm_format, + p->gbm_modifiers, + p->num_gbm_modifiers); + } + if (!p->gbm.surface) { + MP_ERR(ctx->vo, "Failed to create GBM surface.\n"); + return false; + } + return true; +} + +static void framebuffer_destroy_callback(struct gbm_bo *bo, void *data) +{ + struct framebuffer *fb = data; + if (fb) { + drmModeRmFB(fb->fd, fb->id); + } +} + +static void update_framebuffer_from_bo(struct ra_ctx *ctx, struct gbm_bo *bo) +{ + struct priv *p = ctx->priv; + struct vo_drm_state *drm = ctx->vo->drm; + struct framebuffer *fb = gbm_bo_get_user_data(bo); + if (fb) { + drm->fb = fb; + return; + } + + fb = talloc_zero(ctx, struct framebuffer); + fb->fd = drm->fd; + fb->width = gbm_bo_get_width(bo); + fb->height = gbm_bo_get_height(bo); + uint64_t modifier = gbm_bo_get_modifier(bo); + + int ret; + if (p->num_gbm_modifiers == 0 || modifier == DRM_FORMAT_MOD_INVALID) { + uint32_t stride = gbm_bo_get_stride(bo); + uint32_t handle = gbm_bo_get_handle(bo).u32; + ret = drmModeAddFB2(fb->fd, fb->width, fb->height, + p->gbm_format, + (uint32_t[4]){handle, 0, 0, 0}, + (uint32_t[4]){stride, 0, 0, 0}, + (uint32_t[4]){0, 0, 0, 0}, + &fb->id, 0); + } else { + MP_VERBOSE(ctx, "GBM surface using modifier 0x%"PRIX64"\n", modifier); + + uint32_t handles[4] = {0}; + uint32_t strides[4] = {0}; + uint32_t offsets[4] = {0}; + uint64_t modifiers[4] = {0}; + + const int num_planes = gbm_bo_get_plane_count(bo); + for (int i = 0; i < num_planes; ++i) { + handles[i] = gbm_bo_get_handle_for_plane(bo, i).u32; + strides[i] = gbm_bo_get_stride_for_plane(bo, i); + offsets[i] = gbm_bo_get_offset(bo, i); + modifiers[i] = modifier; + } + + ret = drmModeAddFB2WithModifiers(fb->fd, fb->width, fb->height, + p->gbm_format, + handles, strides, offsets, modifiers, + &fb->id, DRM_MODE_FB_MODIFIERS); + } + if (ret) { + MP_ERR(ctx->vo, "Failed to create framebuffer: %s\n", mp_strerror(errno)); + } + gbm_bo_set_user_data(bo, fb, framebuffer_destroy_callback); + drm->fb = fb; +} + +static void queue_flip(struct ra_ctx *ctx, struct gbm_frame *frame) +{ + struct vo_drm_state *drm = ctx->vo->drm; + + update_framebuffer_from_bo(ctx, frame->bo); + + struct drm_atomic_context *atomic_ctx = drm->atomic_context; + drm_object_set_property(atomic_ctx->request, atomic_ctx->draw_plane, "FB_ID", drm->fb->id); + drm_object_set_property(atomic_ctx->request, atomic_ctx->draw_plane, "CRTC_ID", atomic_ctx->crtc->id); + drm_object_set_property(atomic_ctx->request, atomic_ctx->draw_plane, "ZPOS", 1); + + int ret = drmModeAtomicCommit(drm->fd, atomic_ctx->request, + DRM_MODE_ATOMIC_NONBLOCK | DRM_MODE_PAGE_FLIP_EVENT, drm); + + if (ret) + MP_WARN(ctx->vo, "Failed to commit atomic request: %s\n", mp_strerror(ret)); + drm->waiting_for_flip = !ret; + + drmModeAtomicFree(atomic_ctx->request); + atomic_ctx->request = drmModeAtomicAlloc(); +} + +static void enqueue_bo(struct ra_ctx *ctx, struct gbm_bo *bo) +{ + struct priv *p = ctx->priv; + + struct gbm_frame *new_frame = talloc(p, struct gbm_frame); + new_frame->bo = bo; + MP_TARRAY_APPEND(p, p->gbm.bo_queue, p->gbm.num_bos, new_frame); +} + +static void dequeue_bo(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + talloc_free(p->gbm.bo_queue[0]); + MP_TARRAY_REMOVE_AT(p->gbm.bo_queue, p->gbm.num_bos, 0); +} + +static void swapchain_step(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + if (!(p->gbm.num_bos > 0)) + return; + + if (p->gbm.bo_queue[0]->bo) + gbm_surface_release_buffer(p->gbm.surface, p->gbm.bo_queue[0]->bo); + dequeue_bo(ctx); +} + +static void new_fence(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + if (p->gl.FenceSync) { + GLsync fence = p->gl.FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + if (fence) + MP_TARRAY_APPEND(p, p->vsync_fences, p->num_vsync_fences, fence); + } +} + +static void wait_fence(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + while (p->num_vsync_fences && (p->num_vsync_fences >= p->gbm.num_bos)) { + p->gl.ClientWaitSync(p->vsync_fences[0], GL_SYNC_FLUSH_COMMANDS_BIT, 1e9); + p->gl.DeleteSync(p->vsync_fences[0]); + MP_TARRAY_REMOVE_AT(p->vsync_fences, p->num_vsync_fences, 0); + } +} + +static bool drm_egl_start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo) +{ + struct ra_ctx *ctx = sw->ctx; + struct priv *p = ctx->priv; + struct vo_drm_state *drm = ctx->vo->drm; + + if (!drm->atomic_context->request) { + drm->atomic_context->request = drmModeAtomicAlloc(); + p->drm_params.atomic_request_ptr = &drm->atomic_context->request; + } + + return ra_gl_ctx_start_frame(sw, out_fbo); +} + +static bool drm_egl_submit_frame(struct ra_swapchain *sw, const struct vo_frame *frame) +{ + struct ra_ctx *ctx = sw->ctx; + struct vo_drm_state *drm = ctx->vo->drm; + + drm->still = frame->still; + + return ra_gl_ctx_submit_frame(sw, frame); +} + +static void drm_egl_swap_buffers(struct ra_swapchain *sw) +{ + struct ra_ctx *ctx = sw->ctx; + struct priv *p = ctx->priv; + struct vo_drm_state *drm = ctx->vo->drm; + const bool drain = drm->paused || drm->still; // True when we need to drain the swapchain + + if (!drm->active) + return; + + wait_fence(ctx); + + eglSwapBuffers(p->egl.display, p->egl.surface); + + struct gbm_bo *new_bo = gbm_surface_lock_front_buffer(p->gbm.surface); + if (!new_bo) { + MP_ERR(ctx->vo, "Couldn't lock front buffer\n"); + return; + } + enqueue_bo(ctx, new_bo); + new_fence(ctx); + + while (drain || p->gbm.num_bos > ctx->vo->opts->swapchain_depth || + !gbm_surface_has_free_buffers(p->gbm.surface)) { + if (drm->waiting_for_flip) { + vo_drm_wait_on_flip(drm); + swapchain_step(ctx); + } + if (p->gbm.num_bos <= 1) + break; + if (!p->gbm.bo_queue[1] || !p->gbm.bo_queue[1]->bo) { + MP_ERR(ctx->vo, "Hole in swapchain?\n"); + swapchain_step(ctx); + continue; + } + queue_flip(ctx, p->gbm.bo_queue[1]); + } +} + +static const struct ra_swapchain_fns drm_egl_swapchain = { + .start_frame = drm_egl_start_frame, + .submit_frame = drm_egl_submit_frame, + .swap_buffers = drm_egl_swap_buffers, +}; + +static void drm_egl_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + struct vo_drm_state *drm = ctx->vo->drm; + if (drm) { + struct drm_atomic_context *atomic_ctx = drm->atomic_context; + + if (drmModeAtomicCommit(drm->fd, atomic_ctx->request, 0, NULL)) + MP_ERR(ctx->vo, "Failed to commit atomic request: %s\n", + mp_strerror(errno)); + + drmModeAtomicFree(atomic_ctx->request); + } + + ra_gl_ctx_uninit(ctx); + vo_drm_uninit(ctx->vo); + + if (p) { + // According to GBM documentation all BO:s must be released + // before gbm_surface_destroy can be called on the surface. + while (p->gbm.num_bos) { + swapchain_step(ctx); + } + + eglMakeCurrent(p->egl.display, EGL_NO_SURFACE, EGL_NO_SURFACE, + EGL_NO_CONTEXT); + if (p->egl.display != EGL_NO_DISPLAY) { + eglDestroySurface(p->egl.display, p->egl.surface); + eglDestroyContext(p->egl.display, p->egl.context); + } + if (p->gbm.surface) + gbm_surface_destroy(p->gbm.surface); + eglTerminate(p->egl.display); + gbm_device_destroy(p->gbm.device); + + if (p->drm_params.render_fd != -1) + close(p->drm_params.render_fd); + } +} + +// If the draw plane supports ARGB we want to use that, but if it doesn't we +// fall back on XRGB. If we do not have atomic there is no particular reason to +// be using ARGB (drmprime hwdec will not work without atomic, anyway), so we +// fall back to XRGB (another reason is that we do not have the convenient +// atomic_ctx and its convenient plane fields). +static bool probe_gbm_format(struct ra_ctx *ctx, uint32_t argb_format, uint32_t xrgb_format) +{ + struct priv *p = ctx->priv; + struct vo_drm_state *drm = ctx->vo->drm; + + drmModePlane *drmplane = drmModeGetPlane(drm->fd, drm->atomic_context->draw_plane->id); + bool have_argb = false; + bool have_xrgb = false; + bool result = false; + for (unsigned int i = 0; i < drmplane->count_formats; ++i) { + if (drmplane->formats[i] == argb_format) { + have_argb = true; + } else if (drmplane->formats[i] == xrgb_format) { + have_xrgb = true; + } + } + + if (have_argb) { + p->gbm_format = argb_format; + MP_VERBOSE(ctx->vo, "%s supported by draw plane.\n", gbm_format_to_string(argb_format)); + result = true; + } else if (have_xrgb) { + p->gbm_format = xrgb_format; + MP_VERBOSE(ctx->vo, "%s not supported by draw plane: Falling back to %s.\n", + gbm_format_to_string(argb_format), gbm_format_to_string(xrgb_format)); + result = true; + } + + drmModeFreePlane(drmplane); + return result; +} + +static bool probe_gbm_modifiers(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + struct vo_drm_state *drm = ctx->vo->drm; + + drmModePropertyBlobPtr blob = drm_object_get_property_blob(drm->atomic_context->draw_plane, + "IN_FORMATS"); + if (!blob) { + MP_VERBOSE(ctx->vo, "Failed to find IN_FORMATS property\n"); + return false; + } + + struct drm_format_modifier_blob *data = blob->data; + uint32_t *fmts = (uint32_t *)((char *)data + data->formats_offset); + struct drm_format_modifier *mods = + (struct drm_format_modifier *)((char *)data + data->modifiers_offset); + + for (unsigned int j = 0; j < data->count_modifiers; ++j) { + struct drm_format_modifier *mod = &mods[j]; + for (uint64_t k = 0; k < 64; ++k) { + if (mod->formats & (1ull << k)) { + uint32_t fmt = fmts[k + mod->offset]; + if (fmt == p->gbm_format) { + MP_TARRAY_APPEND(p, p->gbm_modifiers, + p->num_gbm_modifiers, mod->modifier); + MP_VERBOSE(ctx->vo, "Supported modifier: 0x%"PRIX64"\n", + (uint64_t)mod->modifier); + break; + } + } + } + } + drmModeFreePropertyBlob(blob); + + if (p->num_gbm_modifiers == 0) { + MP_VERBOSE(ctx->vo, "No supported DRM modifiers found.\n"); + } + return true; +} + +static void drm_egl_get_vsync(struct ra_ctx *ctx, struct vo_vsync_info *info) +{ + struct vo_drm_state *drm = ctx->vo->drm; + present_sync_get_info(drm->present, info); +} + +static bool drm_egl_init(struct ra_ctx *ctx) +{ + if (!vo_drm_init(ctx->vo)) + goto err; + + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + struct vo_drm_state *drm = ctx->vo->drm; + + if (ctx->vo->drm->opts->draw_surface_size.wh_valid) { + p->draw_surface_size.width = ctx->vo->drm->opts->draw_surface_size.w; + p->draw_surface_size.height = ctx->vo->drm->opts->draw_surface_size.h; + } else { + p->draw_surface_size.width = drm->mode.mode.hdisplay; + p->draw_surface_size.height = drm->mode.mode.vdisplay; + } + + drm->width = p->draw_surface_size.width; + drm->height = p->draw_surface_size.height; + + uint32_t argb_format; + uint32_t xrgb_format; + switch (ctx->vo->drm->opts->drm_format) { + case DRM_OPTS_FORMAT_XRGB2101010: + argb_format = GBM_FORMAT_ARGB2101010; + xrgb_format = GBM_FORMAT_XRGB2101010; + break; + case DRM_OPTS_FORMAT_XBGR2101010: + argb_format = GBM_FORMAT_ABGR2101010; + xrgb_format = GBM_FORMAT_XBGR2101010; + break; + case DRM_OPTS_FORMAT_XBGR8888: + argb_format = GBM_FORMAT_ABGR8888; + xrgb_format = GBM_FORMAT_XBGR8888; + break; + default: + argb_format = GBM_FORMAT_ARGB8888; + xrgb_format = GBM_FORMAT_XRGB8888; + break; + } + + if (!probe_gbm_format(ctx, argb_format, xrgb_format)) { + MP_ERR(ctx->vo, "No suitable format found on draw plane (tried: %s and %s).\n", + gbm_format_to_string(argb_format), gbm_format_to_string(xrgb_format)); + goto err; + } + + // It is not fatal if this fails. We'll just try without modifiers. + probe_gbm_modifiers(ctx); + + if (!init_gbm(ctx)) { + MP_ERR(ctx->vo, "Failed to setup GBM.\n"); + goto err; + } + + if (!init_egl(ctx)) { + MP_ERR(ctx->vo, "Failed to setup EGL.\n"); + goto err; + } + + if (!eglMakeCurrent(p->egl.display, p->egl.surface, p->egl.surface, + p->egl.context)) { + MP_ERR(ctx->vo, "Failed to make context current.\n"); + goto err; + } + + mpegl_load_functions(&p->gl, ctx->vo->log); + // required by gbm_surface_lock_front_buffer + eglSwapBuffers(p->egl.display, p->egl.surface); + + MP_VERBOSE(ctx, "Preparing framebuffer\n"); + struct gbm_bo *new_bo = gbm_surface_lock_front_buffer(p->gbm.surface); + if (!new_bo) { + MP_ERR(ctx, "Failed to lock GBM surface.\n"); + goto err; + } + + enqueue_bo(ctx, new_bo); + update_framebuffer_from_bo(ctx, new_bo); + if (!drm->fb || !drm->fb->id) { + MP_ERR(ctx, "Failed to create framebuffer.\n"); + goto err; + } + + if (!vo_drm_acquire_crtc(ctx->vo->drm)) { + MP_ERR(ctx, "Failed to set CRTC for connector %u: %s\n", + drm->connector->connector_id, mp_strerror(errno)); + goto err; + } + + vo_drm_set_monitor_par(ctx->vo); + + p->drm_params.fd = drm->fd; + p->drm_params.crtc_id = drm->crtc_id; + p->drm_params.connector_id = drm->connector->connector_id; + p->drm_params.atomic_request_ptr = &drm->atomic_context->request; + char *rendernode_path = drmGetRenderDeviceNameFromFd(drm->fd); + if (rendernode_path) { + MP_VERBOSE(ctx, "Opening render node \"%s\"\n", rendernode_path); + p->drm_params.render_fd = open(rendernode_path, O_RDWR | O_CLOEXEC); + if (p->drm_params.render_fd == -1) { + MP_WARN(ctx, "Cannot open render node: %s\n", mp_strerror(errno)); + } + free(rendernode_path); + } else { + p->drm_params.render_fd = -1; + MP_VERBOSE(ctx, "Could not find path to render node.\n"); + } + + struct ra_gl_ctx_params params = { + .external_swapchain = &drm_egl_swapchain, + .get_vsync = &drm_egl_get_vsync, + }; + if (!ra_gl_ctx_init(ctx, &p->gl, params)) + goto err; + + ra_add_native_resource(ctx->ra, "drm_params_v2", &p->drm_params); + ra_add_native_resource(ctx->ra, "drm_draw_surface_size", &p->draw_surface_size); + + return true; + +err: + drm_egl_uninit(ctx); + return false; +} + +static bool drm_egl_reconfig(struct ra_ctx *ctx) +{ + struct vo_drm_state *drm = ctx->vo->drm; + ctx->vo->dwidth = drm->fb->width; + ctx->vo->dheight = drm->fb->height; + ra_gl_ctx_resize(ctx->swapchain, drm->fb->width, drm->fb->height, 0); + return true; +} + +static int drm_egl_control(struct ra_ctx *ctx, int *events, int request, + void *arg) +{ + int ret = vo_drm_control(ctx->vo, events, request, arg); + return ret; +} + +static void drm_egl_wait_events(struct ra_ctx *ctx, int64_t until_time_ns) +{ + vo_drm_wait_events(ctx->vo, until_time_ns); +} + +static void drm_egl_wakeup(struct ra_ctx *ctx) +{ + vo_drm_wakeup(ctx->vo); +} + +const struct ra_ctx_fns ra_ctx_drm_egl = { + .type = "opengl", + .name = "drm", + .reconfig = drm_egl_reconfig, + .control = drm_egl_control, + .init = drm_egl_init, + .uninit = drm_egl_uninit, + .wait_events = drm_egl_wait_events, + .wakeup = drm_egl_wakeup, +}; diff --git a/video/out/opengl/context_dxinterop.c b/video/out/opengl/context_dxinterop.c new file mode 100644 index 0000000..cda696f --- /dev/null +++ b/video/out/opengl/context_dxinterop.c @@ -0,0 +1,605 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <windows.h> +#include <versionhelpers.h> +#include <d3d9.h> +#include <dwmapi.h> +#include "osdep/windows_utils.h" +#include "video/out/w32_common.h" +#include "context.h" +#include "utils.h" + +// For WGL_ACCESS_WRITE_DISCARD_NV, etc. +#include <GL/wglext.h> + +EXTERN_C IMAGE_DOS_HEADER __ImageBase; +#define HINST_THISCOMPONENT ((HINSTANCE)&__ImageBase) + +// mingw-w64 header typo? +#ifndef IDirect3DSwapChain9Ex_GetBackBuffer +#define IDirect3DSwapChain9Ex_GetBackBuffer IDirect3DSwapChain9EX_GetBackBuffer +#endif + +struct priv { + GL gl; + + HMODULE d3d9_dll; + HRESULT (WINAPI *Direct3DCreate9Ex)(UINT SDKVersion, IDirect3D9Ex **ppD3D); + + // Direct3D9 device and resources + IDirect3D9Ex *d3d9ex; + IDirect3DDevice9Ex *device; + HANDLE device_h; + IDirect3DSwapChain9Ex *swapchain; + IDirect3DSurface9 *backbuffer; + IDirect3DSurface9 *rtarget; + HANDLE rtarget_h; + + // OpenGL offscreen context + HWND os_wnd; + HDC os_dc; + HGLRC os_ctx; + + // OpenGL resources + GLuint texture; + GLuint main_fb; + + // Did we lose the device? + bool lost_device; + + // Requested and current parameters + int requested_swapinterval; + int width, height, swapinterval; +}; + +static __thread struct ra_ctx *current_ctx; + +static void pump_message_loop(void) +{ + // We have a hidden window on this thread (for the OpenGL context,) so pump + // its message loop at regular intervals to be safe + MSG message; + while (PeekMessageW(&message, NULL, 0, 0, PM_REMOVE)) + DispatchMessageW(&message); +} + +static void *w32gpa(const GLubyte *procName) +{ + HMODULE oglmod; + void *res = wglGetProcAddress(procName); + if (res) + return res; + oglmod = GetModuleHandleW(L"opengl32.dll"); + return GetProcAddress(oglmod, procName); +} + +static int os_ctx_create(struct ra_ctx *ctx) +{ + static const wchar_t os_wnd_class[] = L"mpv offscreen gl"; + struct priv *p = ctx->priv; + GL *gl = &p->gl; + HGLRC legacy_context = NULL; + + RegisterClassExW(&(WNDCLASSEXW) { + .cbSize = sizeof(WNDCLASSEXW), + .style = CS_OWNDC, + .lpfnWndProc = DefWindowProc, + .hInstance = HINST_THISCOMPONENT, + .lpszClassName = os_wnd_class, + }); + + // Create a hidden window for an offscreen OpenGL context. It might also be + // possible to use the VO window, but MSDN recommends against drawing to + // the same window with flip mode present and other APIs, so play it safe. + p->os_wnd = CreateWindowExW(0, os_wnd_class, os_wnd_class, 0, 0, 0, 200, + 200, NULL, NULL, HINST_THISCOMPONENT, NULL); + p->os_dc = GetDC(p->os_wnd); + if (!p->os_dc) { + MP_FATAL(ctx->vo, "Couldn't create window for offscreen rendering\n"); + goto fail; + } + + // Choose a pixel format. It probably doesn't matter what this is because + // the primary framebuffer will not be used. + PIXELFORMATDESCRIPTOR pfd = { + .nSize = sizeof pfd, + .nVersion = 1, + .dwFlags = PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER, + .iPixelType = PFD_TYPE_RGBA, + .cColorBits = 24, + .iLayerType = PFD_MAIN_PLANE, + }; + int pf = ChoosePixelFormat(p->os_dc, &pfd); + if (!pf) { + MP_FATAL(ctx->vo, + "Couldn't choose pixelformat for offscreen rendering: %s\n", + mp_LastError_to_str()); + goto fail; + } + SetPixelFormat(p->os_dc, pf, &pfd); + + legacy_context = wglCreateContext(p->os_dc); + if (!legacy_context || !wglMakeCurrent(p->os_dc, legacy_context)) { + MP_FATAL(ctx->vo, "Couldn't create OpenGL context for offscreen rendering: %s\n", + mp_LastError_to_str()); + goto fail; + } + + const char *(GLAPIENTRY *wglGetExtensionsStringARB)(HDC hdc) + = w32gpa((const GLubyte*)"wglGetExtensionsStringARB"); + if (!wglGetExtensionsStringARB) { + MP_FATAL(ctx->vo, "The OpenGL driver does not support OpenGL 3.x\n"); + goto fail; + } + + const char *wgl_exts = wglGetExtensionsStringARB(p->os_dc); + if (!gl_check_extension(wgl_exts, "WGL_ARB_create_context")) { + MP_FATAL(ctx->vo, "The OpenGL driver does not support OpenGL 3.x\n"); + goto fail; + } + + HGLRC (GLAPIENTRY *wglCreateContextAttribsARB)(HDC hDC, HGLRC hShareContext, + const int *attribList) + = w32gpa((const GLubyte*)"wglCreateContextAttribsARB"); + if (!wglCreateContextAttribsARB) { + MP_FATAL(ctx->vo, "The OpenGL driver does not support OpenGL 3.x\n"); + goto fail; + } + + int attribs[] = { + WGL_CONTEXT_MAJOR_VERSION_ARB, 3, + WGL_CONTEXT_MINOR_VERSION_ARB, 0, + WGL_CONTEXT_FLAGS_ARB, 0, + WGL_CONTEXT_PROFILE_MASK_ARB, WGL_CONTEXT_CORE_PROFILE_BIT_ARB, + 0 + }; + + p->os_ctx = wglCreateContextAttribsARB(p->os_dc, 0, attribs); + if (!p->os_ctx) { + // NVidia, instead of ignoring WGL_CONTEXT_FLAGS_ARB, will error out if + // it's present on pre-3.2 contexts. + // Remove it from attribs and retry the context creation. + attribs[6] = attribs[7] = 0; + p->os_ctx = wglCreateContextAttribsARB(p->os_dc, 0, attribs); + } + if (!p->os_ctx) { + MP_FATAL(ctx->vo, + "Couldn't create OpenGL 3.x context for offscreen rendering: %s\n", + mp_LastError_to_str()); + goto fail; + } + + wglMakeCurrent(p->os_dc, NULL); + wglDeleteContext(legacy_context); + legacy_context = NULL; + + if (!wglMakeCurrent(p->os_dc, p->os_ctx)) { + MP_FATAL(ctx->vo, + "Couldn't activate OpenGL 3.x context for offscreen rendering: %s\n", + mp_LastError_to_str()); + goto fail; + } + + mpgl_load_functions(gl, w32gpa, wgl_exts, ctx->vo->log); + if (!(gl->mpgl_caps & MPGL_CAP_DXINTEROP)) { + MP_FATAL(ctx->vo, "WGL_NV_DX_interop is not supported\n"); + goto fail; + } + + return 0; +fail: + if (legacy_context) { + wglMakeCurrent(p->os_dc, NULL); + wglDeleteContext(legacy_context); + } + return -1; +} + +static void os_ctx_destroy(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + if (p->os_ctx) { + wglMakeCurrent(p->os_dc, NULL); + wglDeleteContext(p->os_ctx); + } + if (p->os_dc) + ReleaseDC(p->os_wnd, p->os_dc); + if (p->os_wnd) + DestroyWindow(p->os_wnd); +} + +static int d3d_size_dependent_create(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + GL *gl = &p->gl; + HRESULT hr; + + IDirect3DSwapChain9 *sw9; + hr = IDirect3DDevice9Ex_GetSwapChain(p->device, 0, &sw9); + if (FAILED(hr)) { + MP_ERR(ctx->vo, "Couldn't get swap chain: %s\n", mp_HRESULT_to_str(hr)); + return -1; + } + + hr = IDirect3DSwapChain9_QueryInterface(sw9, &IID_IDirect3DSwapChain9Ex, + (void**)&p->swapchain); + if (FAILED(hr)) { + SAFE_RELEASE(sw9); + MP_ERR(ctx->vo, "Obtained swap chain is not IDirect3DSwapChain9Ex: %s\n", + mp_HRESULT_to_str(hr)); + return -1; + } + SAFE_RELEASE(sw9); + + hr = IDirect3DSwapChain9Ex_GetBackBuffer(p->swapchain, 0, + D3DBACKBUFFER_TYPE_MONO, &p->backbuffer); + if (FAILED(hr)) { + MP_ERR(ctx->vo, "Couldn't get backbuffer: %s\n", mp_HRESULT_to_str(hr)); + return -1; + } + + // Get the format of the backbuffer + D3DSURFACE_DESC bb_desc = { 0 }; + IDirect3DSurface9_GetDesc(p->backbuffer, &bb_desc); + + MP_VERBOSE(ctx->vo, "DX_interop backbuffer size: %ux%u\n", + (unsigned)bb_desc.Width, (unsigned)bb_desc.Height); + MP_VERBOSE(ctx->vo, "DX_interop backbuffer format: %u\n", + (unsigned)bb_desc.Format); + + // Create a rendertarget with the same format as the backbuffer for + // rendering from OpenGL + HANDLE share_handle = NULL; + hr = IDirect3DDevice9Ex_CreateRenderTarget(p->device, bb_desc.Width, + bb_desc.Height, bb_desc.Format, D3DMULTISAMPLE_NONE, 0, FALSE, + &p->rtarget, &share_handle); + if (FAILED(hr)) { + MP_ERR(ctx->vo, "Couldn't create rendertarget: %s\n", mp_HRESULT_to_str(hr)); + return -1; + } + + // Register the share handle with WGL_NV_DX_interop. Nvidia does not + // require the use of share handles, but Intel does. + if (share_handle) + gl->DXSetResourceShareHandleNV(p->rtarget, share_handle); + + // Create the OpenGL-side texture + gl->GenTextures(1, &p->texture); + + // Now share the rendertarget with OpenGL as a texture + p->rtarget_h = gl->DXRegisterObjectNV(p->device_h, p->rtarget, p->texture, + GL_TEXTURE_2D, WGL_ACCESS_WRITE_DISCARD_NV); + if (!p->rtarget_h) { + MP_ERR(ctx->vo, "Couldn't share rendertarget with OpenGL: %s\n", + mp_LastError_to_str()); + return -1; + } + + // Lock the rendertarget for use from OpenGL. This will only be unlocked in + // swap_buffers() when it is blitted to the real Direct3D backbuffer. + if (!gl->DXLockObjectsNV(p->device_h, 1, &p->rtarget_h)) { + MP_ERR(ctx->vo, "Couldn't lock rendertarget: %s\n", + mp_LastError_to_str()); + return -1; + } + + gl->BindFramebuffer(GL_FRAMEBUFFER, p->main_fb); + gl->FramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_TEXTURE_2D, p->texture, 0); + gl->BindFramebuffer(GL_FRAMEBUFFER, 0); + + return 0; +} + +static void d3d_size_dependent_destroy(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + GL *gl = &p->gl; + + if (p->rtarget_h) { + gl->DXUnlockObjectsNV(p->device_h, 1, &p->rtarget_h); + gl->DXUnregisterObjectNV(p->device_h, p->rtarget_h); + } + p->rtarget_h = 0; + if (p->texture) + gl->DeleteTextures(1, &p->texture); + p->texture = 0; + + SAFE_RELEASE(p->rtarget); + SAFE_RELEASE(p->backbuffer); + SAFE_RELEASE(p->swapchain); +} + +static void fill_presentparams(struct ra_ctx *ctx, + D3DPRESENT_PARAMETERS *pparams) +{ + struct priv *p = ctx->priv; + + // Present intervals other than IMMEDIATE and ONE don't seem to work. It's + // possible that they're not compatible with FLIPEX. + UINT presentation_interval; + switch (p->requested_swapinterval) { + case 0: presentation_interval = D3DPRESENT_INTERVAL_IMMEDIATE; break; + case 1: presentation_interval = D3DPRESENT_INTERVAL_ONE; break; + default: presentation_interval = D3DPRESENT_INTERVAL_ONE; break; + } + + *pparams = (D3DPRESENT_PARAMETERS) { + .Windowed = TRUE, + .BackBufferWidth = ctx->vo->dwidth ? ctx->vo->dwidth : 1, + .BackBufferHeight = ctx->vo->dheight ? ctx->vo->dheight : 1, + // Add one frame for the backbuffer and one frame of "slack" to reduce + // contention with the window manager when acquiring the backbuffer + .BackBufferCount = ctx->vo->opts->swapchain_depth + 2, + .SwapEffect = IsWindows7OrGreater() ? D3DSWAPEFFECT_FLIPEX : D3DSWAPEFFECT_FLIP, + // Automatically get the backbuffer format from the display format + .BackBufferFormat = D3DFMT_UNKNOWN, + .PresentationInterval = presentation_interval, + .hDeviceWindow = vo_w32_hwnd(ctx->vo), + }; +} + +static int d3d_create(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + GL *gl = &p->gl; + HRESULT hr; + + p->d3d9_dll = LoadLibraryW(L"d3d9.dll"); + if (!p->d3d9_dll) { + MP_FATAL(ctx->vo, "Failed to load \"d3d9.dll\": %s\n", + mp_LastError_to_str()); + return -1; + } + + // WGL_NV_dx_interop requires Direct3D 9Ex on WDDM systems. Direct3D 9Ex + // also enables flip mode present for efficient rendering with the DWM. + p->Direct3DCreate9Ex = (void*)GetProcAddress(p->d3d9_dll, + "Direct3DCreate9Ex"); + if (!p->Direct3DCreate9Ex) { + MP_FATAL(ctx->vo, "Direct3D 9Ex not supported\n"); + return -1; + } + + hr = p->Direct3DCreate9Ex(D3D_SDK_VERSION, &p->d3d9ex); + if (FAILED(hr)) { + MP_FATAL(ctx->vo, "Couldn't create Direct3D9Ex: %s\n", + mp_HRESULT_to_str(hr)); + return -1; + } + + D3DPRESENT_PARAMETERS pparams; + fill_presentparams(ctx, &pparams); + + hr = IDirect3D9Ex_CreateDeviceEx(p->d3d9ex, D3DADAPTER_DEFAULT, + D3DDEVTYPE_HAL, vo_w32_hwnd(ctx->vo), + D3DCREATE_HARDWARE_VERTEXPROCESSING | D3DCREATE_PUREDEVICE | + D3DCREATE_FPU_PRESERVE | D3DCREATE_MULTITHREADED | + D3DCREATE_NOWINDOWCHANGES, + &pparams, NULL, &p->device); + if (FAILED(hr)) { + MP_FATAL(ctx->vo, "Couldn't create device: %s\n", mp_HRESULT_to_str(hr)); + return -1; + } + + IDirect3DDevice9Ex_SetMaximumFrameLatency(p->device, ctx->vo->opts->swapchain_depth); + + // Register the Direct3D device with WGL_NV_dx_interop + p->device_h = gl->DXOpenDeviceNV(p->device); + if (!p->device_h) { + MP_FATAL(ctx->vo, "Couldn't open Direct3D device from OpenGL: %s\n", + mp_LastError_to_str()); + return -1; + } + + return 0; +} + +static void d3d_destroy(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + GL *gl = &p->gl; + + if (p->device_h) + gl->DXCloseDeviceNV(p->device_h); + SAFE_RELEASE(p->device); + SAFE_RELEASE(p->d3d9ex); + if (p->d3d9_dll) + FreeLibrary(p->d3d9_dll); +} + +static void dxgl_uninit(struct ra_ctx *ctx) +{ + ra_gl_ctx_uninit(ctx); + d3d_size_dependent_destroy(ctx); + d3d_destroy(ctx); + os_ctx_destroy(ctx); + vo_w32_uninit(ctx->vo); + DwmEnableMMCSS(FALSE); + pump_message_loop(); +} + +static void dxgl_reset(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + HRESULT hr; + + // Check if the device actually needs to be reset + if (ctx->vo->dwidth == p->width && ctx->vo->dheight == p->height && + p->requested_swapinterval == p->swapinterval && !p->lost_device) + return; + + d3d_size_dependent_destroy(ctx); + + D3DPRESENT_PARAMETERS pparams; + fill_presentparams(ctx, &pparams); + + hr = IDirect3DDevice9Ex_ResetEx(p->device, &pparams, NULL); + if (FAILED(hr)) { + p->lost_device = true; + MP_ERR(ctx->vo, "Couldn't reset device: %s\n", mp_HRESULT_to_str(hr)); + return; + } + + if (d3d_size_dependent_create(ctx) < 0) { + p->lost_device = true; + MP_ERR(ctx->vo, "Couldn't recreate Direct3D objects after reset\n"); + return; + } + + MP_VERBOSE(ctx->vo, "Direct3D device reset\n"); + p->width = ctx->vo->dwidth; + p->height = ctx->vo->dheight; + p->swapinterval = p->requested_swapinterval; + p->lost_device = false; +} + +static int GLAPIENTRY dxgl_swap_interval(int interval) +{ + if (!current_ctx) + return 0; + struct priv *p = current_ctx->priv; + + p->requested_swapinterval = interval; + dxgl_reset(current_ctx); + return 1; +} + +static void dxgl_swap_buffers(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + GL *gl = &p->gl; + HRESULT hr; + + pump_message_loop(); + + // If the device is still lost, try to reset it again + if (p->lost_device) + dxgl_reset(ctx); + if (p->lost_device) + return; + + if (!gl->DXUnlockObjectsNV(p->device_h, 1, &p->rtarget_h)) { + MP_ERR(ctx->vo, "Couldn't unlock rendertarget for present: %s\n", + mp_LastError_to_str()); + return; + } + + // Blit the OpenGL rendertarget to the backbuffer + hr = IDirect3DDevice9Ex_StretchRect(p->device, p->rtarget, NULL, + p->backbuffer, NULL, D3DTEXF_NONE); + if (FAILED(hr)) { + MP_ERR(ctx->vo, "Couldn't stretchrect for present: %s\n", + mp_HRESULT_to_str(hr)); + return; + } + + hr = IDirect3DDevice9Ex_PresentEx(p->device, NULL, NULL, NULL, NULL, 0); + switch (hr) { + case D3DERR_DEVICELOST: + case D3DERR_DEVICEHUNG: + MP_VERBOSE(ctx->vo, "Direct3D device lost! Resetting.\n"); + p->lost_device = true; + dxgl_reset(ctx); + return; + default: + if (FAILED(hr)) + MP_ERR(ctx->vo, "Failed to present: %s\n", mp_HRESULT_to_str(hr)); + } + + if (!gl->DXLockObjectsNV(p->device_h, 1, &p->rtarget_h)) { + MP_ERR(ctx->vo, "Couldn't lock rendertarget after present: %s\n", + mp_LastError_to_str()); + } +} + +static bool dxgl_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + GL *gl = &p->gl; + + p->requested_swapinterval = 1; + + if (!vo_w32_init(ctx->vo)) + goto fail; + if (os_ctx_create(ctx) < 0) + goto fail; + + // Create the shared framebuffer + gl->GenFramebuffers(1, &p->main_fb); + + current_ctx = ctx; + gl->SwapInterval = dxgl_swap_interval; + + if (d3d_create(ctx) < 0) + goto fail; + if (d3d_size_dependent_create(ctx) < 0) + goto fail; + + static const struct ra_swapchain_fns empty_swapchain_fns = {0}; + struct ra_gl_ctx_params params = { + .swap_buffers = dxgl_swap_buffers, + .external_swapchain = &empty_swapchain_fns, + }; + + gl->flipped = true; + if (!ra_gl_ctx_init(ctx, gl, params)) + goto fail; + + ra_add_native_resource(ctx->ra, "IDirect3DDevice9Ex", p->device); + ra_add_native_resource(ctx->ra, "dxinterop_device_HANDLE", p->device_h); + + DwmEnableMMCSS(TRUE); + return true; +fail: + dxgl_uninit(ctx); + return false; +} + +static void resize(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + dxgl_reset(ctx); + ra_gl_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight, p->main_fb); +} + +static bool dxgl_reconfig(struct ra_ctx *ctx) +{ + vo_w32_config(ctx->vo); + resize(ctx); + return true; +} + +static int dxgl_control(struct ra_ctx *ctx, int *events, int request, + void *arg) +{ + int ret = vo_w32_control(ctx->vo, events, request, arg); + if (*events & VO_EVENT_RESIZE) + resize(ctx); + return ret; +} + +const struct ra_ctx_fns ra_ctx_dxgl = { + .type = "opengl", + .name = "dxinterop", + .init = dxgl_init, + .reconfig = dxgl_reconfig, + .control = dxgl_control, + .uninit = dxgl_uninit, +}; diff --git a/video/out/opengl/context_glx.c b/video/out/opengl/context_glx.c new file mode 100644 index 0000000..4062224 --- /dev/null +++ b/video/out/opengl/context_glx.c @@ -0,0 +1,351 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <X11/Xlib.h> +#include <GL/glx.h> + +// FreeBSD 10.0-CURRENT lacks the GLX_ARB_create_context extension completely +#ifndef GLX_CONTEXT_MAJOR_VERSION_ARB +#define GLX_CONTEXT_MAJOR_VERSION_ARB 0x2091 +#define GLX_CONTEXT_MINOR_VERSION_ARB 0x2092 +#define GLX_CONTEXT_FLAGS_ARB 0x2094 +#define GLX_CONTEXT_PROFILE_MASK_ARB 0x9126 +#ifndef __APPLE__ +// These are respectively 0x00000001 and 0x00000002 on OSX +#define GLX_CONTEXT_DEBUG_BIT_ARB 0x0001 +#define GLX_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB 0x0002 +#endif +#define GLX_CONTEXT_CORE_PROFILE_BIT_ARB 0x00000001 +#define GLX_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB 0x00000002 +#endif +// GLX_EXT_create_context_es2_profile +#ifndef GLX_CONTEXT_ES2_PROFILE_BIT_EXT +#define GLX_CONTEXT_ES2_PROFILE_BIT_EXT 0x00000004 +#endif + +#include "osdep/timer.h" +#include "video/out/present_sync.h" +#include "video/out/x11_common.h" +#include "context.h" +#include "utils.h" + +struct priv { + GL gl; + XVisualInfo *vinfo; + GLXContext context; + GLXFBConfig fbc; +}; + +static void glx_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + ra_gl_ctx_uninit(ctx); + + if (p->vinfo) + XFree(p->vinfo); + if (p->context) { + Display *display = ctx->vo->x11->display; + glXMakeCurrent(display, None, NULL); + glXDestroyContext(display, p->context); + } + + vo_x11_uninit(ctx->vo); +} + +typedef GLXContext (*glXCreateContextAttribsARBProc) + (Display*, GLXFBConfig, GLXContext, Bool, const int*); + +static bool create_context_x11(struct ra_ctx *ctx, GL *gl, bool es) +{ + struct priv *p = ctx->priv; + struct vo *vo = ctx->vo; + + glXCreateContextAttribsARBProc glXCreateContextAttribsARB = + (glXCreateContextAttribsARBProc) + glXGetProcAddressARB((const GLubyte *)"glXCreateContextAttribsARB"); + + const char *glxstr = + glXQueryExtensionsString(vo->x11->display, vo->x11->screen); + if (!glxstr) { + MP_ERR(ctx, "GLX did not advertise any extensions\n"); + return false; + } + + if (!gl_check_extension(glxstr, "GLX_ARB_create_context_profile") || + !glXCreateContextAttribsARB) { + MP_ERR(ctx, "GLX does not support GLX_ARB_create_context_profile\n"); + return false; + } + + int ctx_flags = ctx->opts.debug ? GLX_CONTEXT_DEBUG_BIT_ARB : 0; + int profile_mask = GLX_CONTEXT_CORE_PROFILE_BIT_ARB; + + if (es) { + profile_mask = GLX_CONTEXT_ES2_PROFILE_BIT_EXT; + if (!gl_check_extension(glxstr, "GLX_EXT_create_context_es2_profile")) + return false; + } + + int context_attribs[] = { + GLX_CONTEXT_MAJOR_VERSION_ARB, 0, + GLX_CONTEXT_MINOR_VERSION_ARB, 0, + GLX_CONTEXT_PROFILE_MASK_ARB, profile_mask, + GLX_CONTEXT_FLAGS_ARB, ctx_flags, + None + }; + + GLXContext context; + + if (!es) { + for (int n = 0; mpgl_min_required_gl_versions[n]; n++) { + int version = mpgl_min_required_gl_versions[n]; + MP_VERBOSE(ctx, "Creating OpenGL %d.%d context...\n", + MPGL_VER_P(version)); + + context_attribs[1] = MPGL_VER_GET_MAJOR(version); + context_attribs[3] = MPGL_VER_GET_MINOR(version); + + vo_x11_silence_xlib(1); + context = glXCreateContextAttribsARB(vo->x11->display, + p->fbc, 0, True, + context_attribs); + vo_x11_silence_xlib(-1); + + if (context) + break; + } + } else { + context_attribs[1] = 2; + + vo_x11_silence_xlib(1); + context = glXCreateContextAttribsARB(vo->x11->display, + p->fbc, 0, True, + context_attribs); + vo_x11_silence_xlib(-1); + } + + if (!context) + return false; + + // set context + if (!glXMakeCurrent(vo->x11->display, vo->x11->window, context)) { + MP_FATAL(vo, "Could not set GLX context!\n"); + glXDestroyContext(vo->x11->display, context); + return false; + } + + p->context = context; + + mpgl_load_functions(gl, (void *)glXGetProcAddressARB, glxstr, vo->log); + return true; +} + +// The GL3/FBC initialization code roughly follows/copies from: +// http://www.opengl.org/wiki/Tutorial:_OpenGL_3.0_Context_Creation_(GLX) +// but also uses some of the old code. + +static GLXFBConfig select_fb_config(struct vo *vo, const int *attribs, bool alpha) +{ + int fbcount; + GLXFBConfig *fbc = glXChooseFBConfig(vo->x11->display, vo->x11->screen, + attribs, &fbcount); + if (!fbc) + return NULL; + + // The list in fbc is sorted (so that the first element is the best). + GLXFBConfig fbconfig = fbcount > 0 ? fbc[0] : NULL; + + if (alpha) { + for (int n = 0; n < fbcount; n++) { + XVisualInfo *v = glXGetVisualFromFBConfig(vo->x11->display, fbc[n]); + if (v) { + bool is_rgba = vo_x11_is_rgba_visual(v); + XFree(v); + if (is_rgba) { + fbconfig = fbc[n]; + break; + } + } + } + } + + XFree(fbc); + + return fbconfig; +} + +static void set_glx_attrib(int *attribs, int name, int value) +{ + for (int n = 0; attribs[n * 2 + 0] != None; n++) { + if (attribs[n * 2 + 0] == name) { + attribs[n * 2 + 1] = value; + break; + } + } +} + +static bool glx_check_visible(struct ra_ctx *ctx) +{ + return vo_x11_check_visible(ctx->vo); +} + +static void glx_swap_buffers(struct ra_ctx *ctx) +{ + glXSwapBuffers(ctx->vo->x11->display, ctx->vo->x11->window); + if (ctx->vo->x11->use_present) + present_sync_swap(ctx->vo->x11->present); +} + +static void glx_get_vsync(struct ra_ctx *ctx, struct vo_vsync_info *info) +{ + struct vo_x11_state *x11 = ctx->vo->x11; + if (ctx->vo->x11->use_present) + present_sync_get_info(x11->present, info); +} + +static bool glx_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + struct vo *vo = ctx->vo; + GL *gl = &p->gl; + + if (!vo_x11_init(ctx->vo)) + goto uninit; + + int glx_major, glx_minor; + + if (!glXQueryVersion(vo->x11->display, &glx_major, &glx_minor)) { + MP_ERR(ctx, "GLX not found.\n"); + goto uninit; + } + // FBConfigs were added in GLX version 1.3. + if (MPGL_VER(glx_major, glx_minor) < MPGL_VER(1, 3)) { + MP_ERR(ctx, "GLX version older than 1.3.\n"); + goto uninit; + } + + int glx_attribs[] = { + GLX_X_RENDERABLE, True, + GLX_X_VISUAL_TYPE, GLX_TRUE_COLOR, + GLX_RED_SIZE, 1, + GLX_GREEN_SIZE, 1, + GLX_BLUE_SIZE, 1, + GLX_ALPHA_SIZE, 0, + GLX_DOUBLEBUFFER, True, + None + }; + GLXFBConfig fbc = NULL; + if (ctx->opts.want_alpha) { + set_glx_attrib(glx_attribs, GLX_ALPHA_SIZE, 1); + fbc = select_fb_config(vo, glx_attribs, true); + if (!fbc) + set_glx_attrib(glx_attribs, GLX_ALPHA_SIZE, 0); + } + if (!fbc) + fbc = select_fb_config(vo, glx_attribs, false); + if (!fbc) { + MP_ERR(ctx, "no GLX support present\n"); + goto uninit; + } + + int fbid = -1; + if (!glXGetFBConfigAttrib(vo->x11->display, fbc, GLX_FBCONFIG_ID, &fbid)) + MP_VERBOSE(ctx, "GLX chose FB config with ID 0x%x\n", fbid); + + p->fbc = fbc; + p->vinfo = glXGetVisualFromFBConfig(vo->x11->display, fbc); + if (p->vinfo) { + MP_VERBOSE(ctx, "GLX chose visual with ID 0x%x\n", + (int)p->vinfo->visualid); + } else { + MP_WARN(ctx, "Selected GLX FB config has no associated X visual\n"); + } + + if (!vo_x11_create_vo_window(vo, p->vinfo, "gl")) + goto uninit; + + bool success = false; + enum gles_mode mode = ra_gl_ctx_get_glesmode(ctx); + + if (mode == GLES_NO || mode == GLES_AUTO) + success = create_context_x11(ctx, gl, false); + if (!success && (mode == GLES_YES || mode == GLES_AUTO)) + success = create_context_x11(ctx, gl, true); + if (success && !glXIsDirect(vo->x11->display, p->context)) + gl->mpgl_caps |= MPGL_CAP_SW; + if (!success) + goto uninit; + + struct ra_gl_ctx_params params = { + .check_visible = glx_check_visible, + .swap_buffers = glx_swap_buffers, + .get_vsync = glx_get_vsync, + }; + + if (!ra_gl_ctx_init(ctx, gl, params)) + goto uninit; + + ra_add_native_resource(ctx->ra, "x11", vo->x11->display); + + return true; + +uninit: + glx_uninit(ctx); + return false; +} + + +static void resize(struct ra_ctx *ctx) +{ + ra_gl_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight, 0); +} + +static bool glx_reconfig(struct ra_ctx *ctx) +{ + vo_x11_config_vo_window(ctx->vo); + resize(ctx); + return true; +} + +static int glx_control(struct ra_ctx *ctx, int *events, int request, void *arg) +{ + int ret = vo_x11_control(ctx->vo, events, request, arg); + if (*events & VO_EVENT_RESIZE) + resize(ctx); + return ret; +} + +static void glx_wakeup(struct ra_ctx *ctx) +{ + vo_x11_wakeup(ctx->vo); +} + +static void glx_wait_events(struct ra_ctx *ctx, int64_t until_time_ns) +{ + vo_x11_wait_events(ctx->vo, until_time_ns); +} + +const struct ra_ctx_fns ra_ctx_glx = { + .type = "opengl", + .name = "x11", + .reconfig = glx_reconfig, + .control = glx_control, + .wakeup = glx_wakeup, + .wait_events = glx_wait_events, + .init = glx_init, + .uninit = glx_uninit, +}; diff --git a/video/out/opengl/context_rpi.c b/video/out/opengl/context_rpi.c new file mode 100644 index 0000000..0b6babb --- /dev/null +++ b/video/out/opengl/context_rpi.c @@ -0,0 +1,327 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <stdatomic.h> +#include <stddef.h> + +#include <bcm_host.h> + +#include <EGL/egl.h> +#include <EGL/eglext.h> + +#include "common/common.h" +#include "video/out/win_state.h" +#include "context.h" +#include "egl_helpers.h" + +struct priv { + struct GL gl; + DISPMANX_DISPLAY_HANDLE_T display; + DISPMANX_ELEMENT_HANDLE_T window; + DISPMANX_UPDATE_HANDLE_T update; + EGLDisplay egl_display; + EGLConfig egl_config; + EGLContext egl_context; + EGLSurface egl_surface; + // yep, the API keeps a pointer to it + EGL_DISPMANX_WINDOW_T egl_window; + int x, y, w, h; + double display_fps; + atomic_int reload_display; + int win_params[4]; +}; + +static void tv_callback(void *callback_data, uint32_t reason, uint32_t param1, + uint32_t param2) +{ + struct ra_ctx *ctx = callback_data; + struct priv *p = ctx->priv; + atomic_store(&p->reload_display, true); + vo_wakeup(ctx->vo); +} + +static void destroy_dispmanx(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + if (p->egl_surface) { + eglMakeCurrent(p->egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, + EGL_NO_CONTEXT); + eglDestroySurface(p->egl_display, p->egl_surface); + p->egl_surface = EGL_NO_SURFACE; + } + + if (p->window) + vc_dispmanx_element_remove(p->update, p->window); + p->window = 0; + if (p->display) + vc_dispmanx_display_close(p->display); + p->display = 0; + if (p->update) + vc_dispmanx_update_submit_sync(p->update); + p->update = 0; +} + +static void rpi_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + ra_gl_ctx_uninit(ctx); + + vc_tv_unregister_callback_full(tv_callback, ctx); + + destroy_dispmanx(ctx); + + if (p->egl_context) + eglDestroyContext(p->egl_display, p->egl_context); + p->egl_context = EGL_NO_CONTEXT; + eglReleaseThread(); + p->egl_display = EGL_NO_DISPLAY; +} + +static bool recreate_dispmanx(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + int display_nr = 0; + int layer = 0; + + MP_VERBOSE(ctx, "Recreating DISPMANX state...\n"); + + destroy_dispmanx(ctx); + + p->display = vc_dispmanx_display_open(display_nr); + p->update = vc_dispmanx_update_start(0); + if (!p->display || !p->update) { + MP_FATAL(ctx, "Could not get DISPMANX objects.\n"); + goto fail; + } + + uint32_t dispw, disph; + if (graphics_get_display_size(0, &dispw, &disph) < 0) { + MP_FATAL(ctx, "Could not get display size.\n"); + goto fail; + } + p->w = dispw; + p->h = disph; + + if (ctx->vo->opts->fullscreen) { + p->x = p->y = 0; + } else { + struct vo_win_geometry geo; + struct mp_rect screenrc = {0, 0, p->w, p->h}; + + vo_calc_window_geometry(ctx->vo, &screenrc, &geo); + + mp_rect_intersection(&geo.win, &screenrc); + + p->x = geo.win.x0; + p->y = geo.win.y0; + p->w = geo.win.x1 - geo.win.x0; + p->h = geo.win.y1 - geo.win.y0; + } + + // dispmanx is like a neanderthal version of Wayland - you can add an + // overlay any place on the screen. + VC_RECT_T dst = {.x = p->x, .y = p->y, .width = p->w, .height = p->h}; + VC_RECT_T src = {.width = p->w << 16, .height = p->h << 16}; + VC_DISPMANX_ALPHA_T alpha = { + .flags = DISPMANX_FLAGS_ALPHA_FIXED_ALL_PIXELS, + .opacity = 0xFF, + }; + p->window = vc_dispmanx_element_add(p->update, p->display, layer, &dst, 0, + &src, DISPMANX_PROTECTION_NONE, &alpha, + 0, 0); + if (!p->window) { + MP_FATAL(ctx, "Could not add DISPMANX element.\n"); + goto fail; + } + + vc_dispmanx_update_submit_sync(p->update); + p->update = vc_dispmanx_update_start(0); + + p->egl_window = (EGL_DISPMANX_WINDOW_T){ + .element = p->window, + .width = p->w, + .height = p->h, + }; + p->egl_surface = eglCreateWindowSurface(p->egl_display, p->egl_config, + &p->egl_window, NULL); + + if (p->egl_surface == EGL_NO_SURFACE) { + MP_FATAL(ctx, "Could not create EGL surface!\n"); + goto fail; + } + + if (!eglMakeCurrent(p->egl_display, p->egl_surface, p->egl_surface, + p->egl_context)) + { + MP_FATAL(ctx, "Failed to set context!\n"); + goto fail; + } + + p->display_fps = 0; + TV_GET_STATE_RESP_T tvstate; + TV_DISPLAY_STATE_T tvstate_disp; + if (!vc_tv_get_state(&tvstate) && !vc_tv_get_display_state(&tvstate_disp)) { + if (tvstate_disp.state & (VC_HDMI_HDMI | VC_HDMI_DVI)) { + p->display_fps = tvstate_disp.display.hdmi.frame_rate; + + HDMI_PROPERTY_PARAM_T param = { + .property = HDMI_PROPERTY_PIXEL_CLOCK_TYPE, + }; + if (!vc_tv_hdmi_get_property(¶m) && + param.param1 == HDMI_PIXEL_CLOCK_TYPE_NTSC) + p->display_fps = p->display_fps / 1.001; + } else { + p->display_fps = tvstate_disp.display.sdtv.frame_rate; + } + } + + p->win_params[0] = display_nr; + p->win_params[1] = layer; + p->win_params[2] = p->x; + p->win_params[3] = p->y; + + ctx->vo->dwidth = p->w; + ctx->vo->dheight = p->h; + if (ctx->swapchain) + ra_gl_ctx_resize(ctx->swapchain, p->w, p->h, 0); + + ctx->vo->want_redraw = true; + + vo_event(ctx->vo, VO_EVENT_WIN_STATE); + return true; + +fail: + destroy_dispmanx(ctx); + return false; +} + +static void rpi_swap_buffers(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + eglSwapBuffers(p->egl_display, p->egl_surface); +} + +static bool rpi_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + + bcm_host_init(); + + vc_tv_register_callback(tv_callback, ctx); + + p->egl_display = eglGetDisplay(EGL_DEFAULT_DISPLAY); + if (!eglInitialize(p->egl_display, NULL, NULL)) { + MP_FATAL(ctx, "EGL failed to initialize.\n"); + goto fail; + } + + if (!mpegl_create_context(ctx, p->egl_display, &p->egl_context, &p->egl_config)) + goto fail; + + if (!recreate_dispmanx(ctx)) + goto fail; + + mpegl_load_functions(&p->gl, ctx->log); + + struct ra_gl_ctx_params params = { + .swap_buffers = rpi_swap_buffers, + }; + + if (!ra_gl_ctx_init(ctx, &p->gl, params)) + goto fail; + + ra_add_native_resource(ctx->ra, "MPV_RPI_WINDOW", p->win_params); + + ra_gl_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight, 0); + return true; + +fail: + rpi_uninit(ctx); + return false; +} + +static bool rpi_reconfig(struct ra_ctx *ctx) +{ + return recreate_dispmanx(ctx); +} + +static struct mp_image *take_screenshot(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + if (!p->display) + return NULL; + + struct mp_image *img = mp_image_alloc(IMGFMT_BGR0, p->w, p->h); + if (!img) + return NULL; + + DISPMANX_RESOURCE_HANDLE_T resource = + vc_dispmanx_resource_create(VC_IMAGE_ARGB8888, + img->w | ((img->w * 4) << 16), img->h, + &(int32_t){0}); + if (!resource) + goto fail; + + if (vc_dispmanx_snapshot(p->display, resource, 0)) + goto fail; + + VC_RECT_T rc = {.width = img->w, .height = img->h}; + if (vc_dispmanx_resource_read_data(resource, &rc, img->planes[0], img->stride[0])) + goto fail; + + vc_dispmanx_resource_delete(resource); + return img; + +fail: + vc_dispmanx_resource_delete(resource); + talloc_free(img); + return NULL; +} + +static int rpi_control(struct ra_ctx *ctx, int *events, int request, void *arg) +{ + struct priv *p = ctx->priv; + + switch (request) { + case VOCTRL_SCREENSHOT_WIN: + *(struct mp_image **)arg = take_screenshot(ctx); + return VO_TRUE; + case VOCTRL_CHECK_EVENTS: + if (atomic_fetch_and(&p->reload_display, 0)) { + MP_WARN(ctx, "Recovering from display mode switch...\n"); + recreate_dispmanx(ctx); + } + return VO_TRUE; + case VOCTRL_GET_DISPLAY_FPS: + *(double *)arg = p->display_fps; + return VO_TRUE; + } + + return VO_NOTIMPL; +} + +const struct ra_ctx_fns ra_ctx_rpi = { + .type = "opengl", + .name = "rpi", + .reconfig = rpi_reconfig, + .control = rpi_control, + .init = rpi_init, + .uninit = rpi_uninit, +}; diff --git a/video/out/opengl/context_wayland.c b/video/out/opengl/context_wayland.c new file mode 100644 index 0000000..26c5268 --- /dev/null +++ b/video/out/opengl/context_wayland.c @@ -0,0 +1,230 @@ +/* + * This file is part of mpv video player. + * Copyright © 2013 Alexander Preisinger <alexander.preisinger@gmail.com> + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <wayland-egl.h> +#include <EGL/egl.h> +#include <EGL/eglext.h> + +#include "video/out/present_sync.h" +#include "video/out/wayland_common.h" +#include "context.h" +#include "egl_helpers.h" +#include "utils.h" + +#define EGL_PLATFORM_WAYLAND_EXT 0x31D8 + +struct priv { + GL gl; + EGLDisplay egl_display; + EGLContext egl_context; + EGLSurface egl_surface; + EGLConfig egl_config; + struct wl_egl_window *egl_window; +}; + +static void resize(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + struct vo_wayland_state *wl = ctx->vo->wl; + + MP_VERBOSE(wl, "Handling resize on the egl side\n"); + + const int32_t width = mp_rect_w(wl->geometry); + const int32_t height = mp_rect_h(wl->geometry); + + vo_wayland_set_opaque_region(wl, ctx->opts.want_alpha); + if (p->egl_window) + wl_egl_window_resize(p->egl_window, width, height, 0, 0); + + wl->vo->dwidth = width; + wl->vo->dheight = height; + + vo_wayland_handle_fractional_scale(wl); +} + +static bool wayland_egl_check_visible(struct ra_ctx *ctx) +{ + return vo_wayland_check_visible(ctx->vo); +} + +static void wayland_egl_swap_buffers(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + struct vo_wayland_state *wl = ctx->vo->wl; + + eglSwapBuffers(p->egl_display, p->egl_surface); + + if (!wl->opts->disable_vsync) + vo_wayland_wait_frame(wl); + + if (wl->use_present) + present_sync_swap(wl->present); +} + +static void wayland_egl_get_vsync(struct ra_ctx *ctx, struct vo_vsync_info *info) +{ + struct vo_wayland_state *wl = ctx->vo->wl; + if (wl->use_present) + present_sync_get_info(wl->present, info); +} + +static bool egl_create_context(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + struct vo_wayland_state *wl = ctx->vo->wl; + + if (!(p->egl_display = mpegl_get_display(EGL_PLATFORM_WAYLAND_EXT, + "EGL_EXT_platform_wayland", + wl->display))) + return false; + + if (eglInitialize(p->egl_display, NULL, NULL) != EGL_TRUE) + return false; + + if (!mpegl_create_context(ctx, p->egl_display, &p->egl_context, + &p->egl_config)) + return false; + + eglMakeCurrent(p->egl_display, NULL, NULL, p->egl_context); + + mpegl_load_functions(&p->gl, wl->log); + + struct ra_gl_ctx_params params = { + .check_visible = wayland_egl_check_visible, + .swap_buffers = wayland_egl_swap_buffers, + .get_vsync = wayland_egl_get_vsync, + }; + + if (!ra_gl_ctx_init(ctx, &p->gl, params)) + return false; + + ra_add_native_resource(ctx->ra, "wl", wl->display); + + return true; +} + +static void egl_create_window(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + struct vo_wayland_state *wl = ctx->vo->wl; + + p->egl_window = wl_egl_window_create(wl->surface, + mp_rect_w(wl->geometry), + mp_rect_h(wl->geometry)); + + p->egl_surface = mpegl_create_window_surface( + p->egl_display, p->egl_config, p->egl_window); + if (p->egl_surface == EGL_NO_SURFACE) { + p->egl_surface = eglCreateWindowSurface( + p->egl_display, p->egl_config, p->egl_window, NULL); + } + + eglMakeCurrent(p->egl_display, p->egl_surface, p->egl_surface, p->egl_context); + // eglMakeCurrent may not configure the draw or read buffers if the context + // has been made current previously. On nvidia GL_NONE is bound because EGL_NO_SURFACE + // is used initially and we must bind the read and draw buffers here. + if(!p->gl.es) { + p->gl.ReadBuffer(GL_BACK); + p->gl.DrawBuffer(GL_BACK); + } + + eglSwapInterval(p->egl_display, 0); +} + +static bool wayland_egl_reconfig(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + if (!vo_wayland_reconfig(ctx->vo)) + return false; + + if (!p->egl_window) + egl_create_window(ctx); + + return true; +} + +static void wayland_egl_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + ra_gl_ctx_uninit(ctx); + + if (p->egl_context) { + eglReleaseThread(); + if (p->egl_window) + wl_egl_window_destroy(p->egl_window); + eglDestroySurface(p->egl_display, p->egl_surface); + eglMakeCurrent(p->egl_display, NULL, NULL, EGL_NO_CONTEXT); + eglDestroyContext(p->egl_display, p->egl_context); + p->egl_context = NULL; + } + eglTerminate(p->egl_display); + + vo_wayland_uninit(ctx->vo); +} + +static int wayland_egl_control(struct ra_ctx *ctx, int *events, int request, + void *data) +{ + struct vo_wayland_state *wl = ctx->vo->wl; + int r = vo_wayland_control(ctx->vo, events, request, data); + + if (*events & VO_EVENT_RESIZE) { + resize(ctx); + ra_gl_ctx_resize(ctx->swapchain, wl->vo->dwidth, wl->vo->dheight, 0); + } + + return r; +} + +static void wayland_egl_wakeup(struct ra_ctx *ctx) +{ + vo_wayland_wakeup(ctx->vo); +} + +static void wayland_egl_wait_events(struct ra_ctx *ctx, int64_t until_time_ns) +{ + vo_wayland_wait_events(ctx->vo, until_time_ns); +} + +static void wayland_egl_update_render_opts(struct ra_ctx *ctx) +{ + struct vo_wayland_state *wl = ctx->vo->wl; + vo_wayland_set_opaque_region(wl, ctx->opts.want_alpha); + wl_surface_commit(wl->surface); +} + +static bool wayland_egl_init(struct ra_ctx *ctx) +{ + if (!vo_wayland_init(ctx->vo)) + return false; + return egl_create_context(ctx); +} + +const struct ra_ctx_fns ra_ctx_wayland_egl = { + .type = "opengl", + .name = "wayland", + .reconfig = wayland_egl_reconfig, + .control = wayland_egl_control, + .wakeup = wayland_egl_wakeup, + .wait_events = wayland_egl_wait_events, + .update_render_opts = wayland_egl_update_render_opts, + .init = wayland_egl_init, + .uninit = wayland_egl_uninit, +}; diff --git a/video/out/opengl/context_win.c b/video/out/opengl/context_win.c new file mode 100644 index 0000000..968b176 --- /dev/null +++ b/video/out/opengl/context_win.c @@ -0,0 +1,378 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <windows.h> +#include <dwmapi.h> + +#include "options/m_config.h" +#include "video/out/w32_common.h" +#include "context.h" +#include "utils.h" + +#if !defined(WGL_CONTEXT_MAJOR_VERSION_ARB) +/* these are supposed to be defined in wingdi.h but mingw's is too old */ +/* only the bits actually used by mplayer are defined */ +/* reference: http://www.opengl.org/registry/specs/ARB/wgl_create_context.txt */ + +#define WGL_CONTEXT_MAJOR_VERSION_ARB 0x2091 +#define WGL_CONTEXT_MINOR_VERSION_ARB 0x2092 +#define WGL_CONTEXT_FLAGS_ARB 0x2094 +#define WGL_CONTEXT_PROFILE_MASK_ARB 0x9126 +#define WGL_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB 0x0002 +#define WGL_CONTEXT_CORE_PROFILE_BIT_ARB 0x00000001 +#endif + +struct wingl_opts { + int wingl_dwm_flush; +}; + +#define OPT_BASE_STRUCT struct wingl_opts +const struct m_sub_options wingl_conf = { + .opts = (const struct m_option[]) { + {"opengl-dwmflush", OPT_CHOICE(wingl_dwm_flush, + {"no", -1}, {"auto", 0}, {"windowed", 1}, {"yes", 2})}, + {0} + }, + .size = sizeof(struct wingl_opts), +}; + +struct priv { + GL gl; + + int opt_swapinterval; + int current_swapinterval; + + int (GLAPIENTRY *real_wglSwapInterval)(int); + struct m_config_cache *opts_cache; + struct wingl_opts *opts; + + HGLRC context; + HDC hdc; +}; + +static void wgl_uninit(struct ra_ctx *ctx); + +static __thread struct priv *current_wgl_context; + +static int GLAPIENTRY wgl_swap_interval(int interval) +{ + if (current_wgl_context) + current_wgl_context->opt_swapinterval = interval; + return 0; +} + +static bool create_dc(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + HWND win = vo_w32_hwnd(ctx->vo); + + if (p->hdc) + return true; + + HDC hdc = GetDC(win); + if (!hdc) + return false; + + PIXELFORMATDESCRIPTOR pfd; + memset(&pfd, 0, sizeof pfd); + pfd.nSize = sizeof pfd; + pfd.nVersion = 1; + pfd.dwFlags = PFD_DRAW_TO_WINDOW | PFD_SUPPORT_OPENGL | PFD_DOUBLEBUFFER; + + pfd.iPixelType = PFD_TYPE_RGBA; + pfd.cColorBits = 24; + pfd.iLayerType = PFD_MAIN_PLANE; + int pf = ChoosePixelFormat(hdc, &pfd); + + if (!pf) { + MP_ERR(ctx->vo, "unable to select a valid pixel format!\n"); + ReleaseDC(win, hdc); + return false; + } + + SetPixelFormat(hdc, pf, &pfd); + + p->hdc = hdc; + return true; +} + +static void *wglgpa(const GLubyte *procName) +{ + HMODULE oglmod; + void *res = wglGetProcAddress(procName); + if (res) + return res; + oglmod = GetModuleHandle(L"opengl32.dll"); + return GetProcAddress(oglmod, procName); +} + +static bool create_context_wgl_old(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + HDC windc = p->hdc; + bool res = false; + + HGLRC context = wglCreateContext(windc); + if (!context) { + MP_FATAL(ctx->vo, "Could not create GL context!\n"); + return res; + } + + if (!wglMakeCurrent(windc, context)) { + MP_FATAL(ctx->vo, "Could not set GL context!\n"); + wglDeleteContext(context); + return res; + } + + p->context = context; + return true; +} + +static bool create_context_wgl_gl3(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + HDC windc = p->hdc; + HGLRC context = 0; + + // A legacy context is needed to get access to the new functions. + HGLRC legacy_context = wglCreateContext(windc); + if (!legacy_context) { + MP_FATAL(ctx->vo, "Could not create GL context!\n"); + return false; + } + + // set context + if (!wglMakeCurrent(windc, legacy_context)) { + MP_FATAL(ctx->vo, "Could not set GL context!\n"); + goto out; + } + + const char *(GLAPIENTRY *wglGetExtensionsStringARB)(HDC hdc) + = wglgpa((const GLubyte*)"wglGetExtensionsStringARB"); + + if (!wglGetExtensionsStringARB) + goto unsupported; + + const char *wgl_exts = wglGetExtensionsStringARB(windc); + if (!gl_check_extension(wgl_exts, "WGL_ARB_create_context")) + goto unsupported; + + HGLRC (GLAPIENTRY *wglCreateContextAttribsARB)(HDC hDC, HGLRC hShareContext, + const int *attribList) + = wglgpa((const GLubyte*)"wglCreateContextAttribsARB"); + + if (!wglCreateContextAttribsARB) + goto unsupported; + + int attribs[] = { + WGL_CONTEXT_MAJOR_VERSION_ARB, 3, + WGL_CONTEXT_MINOR_VERSION_ARB, 0, + WGL_CONTEXT_FLAGS_ARB, 0, + WGL_CONTEXT_PROFILE_MASK_ARB, WGL_CONTEXT_CORE_PROFILE_BIT_ARB, + 0 + }; + + context = wglCreateContextAttribsARB(windc, 0, attribs); + if (!context) { + // NVidia, instead of ignoring WGL_CONTEXT_FLAGS_ARB, will error out if + // it's present on pre-3.2 contexts. + // Remove it from attribs and retry the context creation. + attribs[6] = attribs[7] = 0; + context = wglCreateContextAttribsARB(windc, 0, attribs); + } + if (!context) { + int err = GetLastError(); + MP_FATAL(ctx->vo, "Could not create an OpenGL 3.x context: error 0x%x\n", err); + goto out; + } + + wglMakeCurrent(windc, NULL); + wglDeleteContext(legacy_context); + + if (!wglMakeCurrent(windc, context)) { + MP_FATAL(ctx->vo, "Could not set GL3 context!\n"); + wglDeleteContext(context); + return false; + } + + p->context = context; + return true; + +unsupported: + MP_ERR(ctx->vo, "The OpenGL driver does not support OpenGL 3.x \n"); +out: + wglMakeCurrent(windc, NULL); + wglDeleteContext(legacy_context); + return false; +} + +static void create_ctx(void *ptr) +{ + struct ra_ctx *ctx = ptr; + struct priv *p = ctx->priv; + + if (!create_dc(ctx)) + return; + + create_context_wgl_gl3(ctx); + if (!p->context) + create_context_wgl_old(ctx); + + wglMakeCurrent(p->hdc, NULL); +} + +static bool compositor_active(struct ra_ctx *ctx) +{ + // For Windows 7. + BOOL enabled = 0; + if (FAILED(DwmIsCompositionEnabled(&enabled)) || !enabled) + return false; + + // This works at least on Windows 8.1: it returns an error in fullscreen, + // which is also when we get consistent timings without DwmFlush. Might + // be cargo-cult. + DWM_TIMING_INFO info = { .cbSize = sizeof(DWM_TIMING_INFO) }; + if (FAILED(DwmGetCompositionTimingInfo(0, &info))) + return false; + + return true; +} + +static void wgl_swap_buffers(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + SwapBuffers(p->hdc); + + // default if we don't DwmFLush + int new_swapinterval = p->opt_swapinterval; + + if (p->opts->wingl_dwm_flush >= 0) { + if ((p->opts->wingl_dwm_flush == 1 && !ctx->vo->opts->fullscreen) || + (p->opts->wingl_dwm_flush == 2) || + (p->opts->wingl_dwm_flush == 0 && compositor_active(ctx))) + { + if (DwmFlush() == S_OK) + new_swapinterval = 0; + } + } + + if (new_swapinterval != p->current_swapinterval && + p->real_wglSwapInterval) + { + p->real_wglSwapInterval(new_swapinterval); + MP_VERBOSE(ctx->vo, "set SwapInterval(%d)\n", new_swapinterval); + } + p->current_swapinterval = new_swapinterval; +} + +static bool wgl_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + GL *gl = &p->gl; + + p->opts_cache = m_config_cache_alloc(ctx, ctx->global, &wingl_conf); + p->opts = p->opts_cache->opts; + + if (!vo_w32_init(ctx->vo)) + goto fail; + + vo_w32_run_on_thread(ctx->vo, create_ctx, ctx); + if (!p->context) + goto fail; + + current_wgl_context = p; + wglMakeCurrent(p->hdc, p->context); + + mpgl_load_functions(gl, wglgpa, NULL, ctx->vo->log); + + if (!gl->SwapInterval) + MP_VERBOSE(ctx->vo, "WGL_EXT_swap_control missing.\n"); + p->real_wglSwapInterval = gl->SwapInterval; + gl->SwapInterval = wgl_swap_interval; + p->current_swapinterval = -1; + + struct ra_gl_ctx_params params = { + .swap_buffers = wgl_swap_buffers, + }; + + if (!ra_gl_ctx_init(ctx, gl, params)) + goto fail; + + DwmEnableMMCSS(TRUE); + return true; + +fail: + wgl_uninit(ctx); + return false; +} + +static void resize(struct ra_ctx *ctx) +{ + ra_gl_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight, 0); +} + +static bool wgl_reconfig(struct ra_ctx *ctx) +{ + vo_w32_config(ctx->vo); + resize(ctx); + return true; +} + +static void destroy_gl(void *ptr) +{ + struct ra_ctx *ctx = ptr; + struct priv *p = ctx->priv; + if (p->context) + wglDeleteContext(p->context); + p->context = 0; + if (p->hdc) + ReleaseDC(vo_w32_hwnd(ctx->vo), p->hdc); + p->hdc = NULL; + current_wgl_context = NULL; +} + +static void wgl_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + ra_gl_ctx_uninit(ctx); + if (p->context) + wglMakeCurrent(p->hdc, 0); + vo_w32_run_on_thread(ctx->vo, destroy_gl, ctx); + + DwmEnableMMCSS(FALSE); + vo_w32_uninit(ctx->vo); +} + +static int wgl_control(struct ra_ctx *ctx, int *events, int request, void *arg) +{ + int ret = vo_w32_control(ctx->vo, events, request, arg); + if (*events & VO_EVENT_RESIZE) + resize(ctx); + return ret; +} + +const struct ra_ctx_fns ra_ctx_wgl = { + .type = "opengl", + .name = "win", + .init = wgl_init, + .reconfig = wgl_reconfig, + .control = wgl_control, + .uninit = wgl_uninit, +}; diff --git a/video/out/opengl/context_x11egl.c b/video/out/opengl/context_x11egl.c new file mode 100644 index 0000000..3201f29 --- /dev/null +++ b/video/out/opengl/context_x11egl.c @@ -0,0 +1,225 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> + +#include <X11/Xlib.h> +#include <X11/extensions/Xpresent.h> +#include <EGL/egl.h> +#include <EGL/eglext.h> + +#include "common/common.h" +#include "video/out/present_sync.h" +#include "video/out/x11_common.h" +#include "context.h" +#include "egl_helpers.h" +#include "utils.h" + +#define EGL_PLATFORM_X11_EXT 0x31D5 + +struct priv { + GL gl; + EGLDisplay egl_display; + EGLContext egl_context; + EGLSurface egl_surface; +}; + +static void mpegl_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + ra_gl_ctx_uninit(ctx); + + eglMakeCurrent(p->egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, + EGL_NO_CONTEXT); + eglTerminate(p->egl_display); + vo_x11_uninit(ctx->vo); +} + +static int pick_xrgba_config(void *user_data, EGLConfig *configs, int num_configs) +{ + struct ra_ctx *ctx = user_data; + struct priv *p = ctx->priv; + struct vo *vo = ctx->vo; + + for (int n = 0; n < num_configs; n++) { + int vID = 0, num; + eglGetConfigAttrib(p->egl_display, configs[n], EGL_NATIVE_VISUAL_ID, &vID); + XVisualInfo template = {.visualid = vID}; + XVisualInfo *vi = XGetVisualInfo(vo->x11->display, VisualIDMask, + &template, &num); + if (vi) { + bool is_rgba = vo_x11_is_rgba_visual(vi); + XFree(vi); + if (is_rgba) + return n; + } + } + + return 0; +} + +static bool mpegl_check_visible(struct ra_ctx *ctx) +{ + return vo_x11_check_visible(ctx->vo); +} + +static void mpegl_swap_buffers(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + eglSwapBuffers(p->egl_display, p->egl_surface); + if (ctx->vo->x11->use_present) + present_sync_swap(ctx->vo->x11->present); +} + +static void mpegl_get_vsync(struct ra_ctx *ctx, struct vo_vsync_info *info) +{ + struct vo_x11_state *x11 = ctx->vo->x11; + if (ctx->vo->x11->use_present) + present_sync_get_info(x11->present, info); +} + +static bool mpegl_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + struct vo *vo = ctx->vo; + int msgl = ctx->opts.probing ? MSGL_V : MSGL_FATAL; + + if (!vo_x11_init(vo)) + goto uninit; + + p->egl_display = mpegl_get_display(EGL_PLATFORM_X11_EXT, + "EGL_EXT_platform_x11", + vo->x11->display); + if (!eglInitialize(p->egl_display, NULL, NULL)) { + MP_MSG(ctx, msgl, "Could not initialize EGL.\n"); + goto uninit; + } + + struct mpegl_cb cb = { + .user_data = ctx, + .refine_config = ctx->opts.want_alpha ? pick_xrgba_config : NULL, + }; + + EGLConfig config; + if (!mpegl_create_context_cb(ctx, p->egl_display, cb, &p->egl_context, &config)) + goto uninit; + + int cid, vID, n; + if (!eglGetConfigAttrib(p->egl_display, config, EGL_CONFIG_ID, &cid)) { + MP_FATAL(ctx, "Getting EGL_CONFIG_ID failed!\n"); + goto uninit; + } + if (!eglGetConfigAttrib(p->egl_display, config, EGL_NATIVE_VISUAL_ID, &vID)) { + MP_FATAL(ctx, "Getting X visual ID failed!\n"); + goto uninit; + } + MP_VERBOSE(ctx, "Choosing visual EGL config 0x%x, visual ID 0x%x\n", cid, vID); + XVisualInfo template = {.visualid = vID}; + XVisualInfo *vi = XGetVisualInfo(vo->x11->display, VisualIDMask, &template, &n); + + if (!vi) { + MP_FATAL(ctx, "Getting X visual failed!\n"); + goto uninit; + } + + if (!vo_x11_create_vo_window(vo, vi, "gl")) { + XFree(vi); + goto uninit; + } + + XFree(vi); + + p->egl_surface = mpegl_create_window_surface( + p->egl_display, config, &vo->x11->window); + if (p->egl_surface == EGL_NO_SURFACE) { + p->egl_surface = eglCreateWindowSurface( + p->egl_display, config, (EGLNativeWindowType)vo->x11->window, NULL); + } + if (p->egl_surface == EGL_NO_SURFACE) { + MP_FATAL(ctx, "Could not create EGL surface!\n"); + goto uninit; + } + + if (!eglMakeCurrent(p->egl_display, p->egl_surface, p->egl_surface, + p->egl_context)) + { + MP_FATAL(ctx, "Could not make context current!\n"); + goto uninit; + } + + mpegl_load_functions(&p->gl, ctx->log); + + struct ra_gl_ctx_params params = { + .check_visible = mpegl_check_visible, + .swap_buffers = mpegl_swap_buffers, + .get_vsync = mpegl_get_vsync, + }; + + if (!ra_gl_ctx_init(ctx, &p->gl, params)) + goto uninit; + + ra_add_native_resource(ctx->ra, "x11", vo->x11->display); + + return true; + +uninit: + mpegl_uninit(ctx); + return false; +} + +static void resize(struct ra_ctx *ctx) +{ + ra_gl_ctx_resize(ctx->swapchain, ctx->vo->dwidth, ctx->vo->dheight, 0); +} + +static bool mpegl_reconfig(struct ra_ctx *ctx) +{ + vo_x11_config_vo_window(ctx->vo); + resize(ctx); + return true; +} + +static int mpegl_control(struct ra_ctx *ctx, int *events, int request, + void *arg) +{ + int ret = vo_x11_control(ctx->vo, events, request, arg); + if (*events & VO_EVENT_RESIZE) + resize(ctx); + return ret; +} + +static void mpegl_wakeup(struct ra_ctx *ctx) +{ + vo_x11_wakeup(ctx->vo); +} + +static void mpegl_wait_events(struct ra_ctx *ctx, int64_t until_time_ns) +{ + vo_x11_wait_events(ctx->vo, until_time_ns); +} + +const struct ra_ctx_fns ra_ctx_x11_egl = { + .type = "opengl", + .name = "x11egl", + .reconfig = mpegl_reconfig, + .control = mpegl_control, + .wakeup = mpegl_wakeup, + .wait_events = mpegl_wait_events, + .init = mpegl_init, + .uninit = mpegl_uninit, +}; diff --git a/video/out/opengl/egl_helpers.c b/video/out/opengl/egl_helpers.c new file mode 100644 index 0000000..3bf6239 --- /dev/null +++ b/video/out/opengl/egl_helpers.c @@ -0,0 +1,381 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "config.h" + +#if HAVE_LIBDL +#include <dlfcn.h> +#endif + +#include "common/common.h" + +#include "egl_helpers.h" +#include "common.h" +#include "utils.h" +#include "context.h" + +#if HAVE_EGL_ANGLE +// On Windows, egl_helpers.c is only used by ANGLE, where the EGL functions may +// be loaded dynamically from ANGLE DLLs +#include "angle_dynamic.h" +#endif + +// EGL 1.5 +#ifndef EGL_CONTEXT_OPENGL_PROFILE_MASK +#define EGL_CONTEXT_MAJOR_VERSION 0x3098 +#define EGL_CONTEXT_MINOR_VERSION 0x30FB +#define EGL_CONTEXT_OPENGL_PROFILE_MASK 0x30FD +#define EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT 0x00000001 +#define EGL_CONTEXT_OPENGL_FORWARD_COMPATIBLE 0x31B1 +typedef intptr_t EGLAttrib; +#endif + +// Not every EGL provider (like RPI) has these. +#ifndef EGL_CONTEXT_FLAGS_KHR +#define EGL_CONTEXT_FLAGS_KHR EGL_NONE +#endif + +#ifndef EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR +#define EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR 0 +#endif + +struct mp_egl_config_attr { + int attrib; + const char *name; +}; + +#define MP_EGL_ATTRIB(id) {id, # id} + +static const struct mp_egl_config_attr mp_egl_attribs[] = { + MP_EGL_ATTRIB(EGL_CONFIG_ID), + MP_EGL_ATTRIB(EGL_RED_SIZE), + MP_EGL_ATTRIB(EGL_GREEN_SIZE), + MP_EGL_ATTRIB(EGL_BLUE_SIZE), + MP_EGL_ATTRIB(EGL_ALPHA_SIZE), + MP_EGL_ATTRIB(EGL_COLOR_BUFFER_TYPE), + MP_EGL_ATTRIB(EGL_CONFIG_CAVEAT), + MP_EGL_ATTRIB(EGL_CONFORMANT), + MP_EGL_ATTRIB(EGL_NATIVE_VISUAL_ID), +}; + +static void dump_egl_config(struct mp_log *log, int msgl, EGLDisplay display, + EGLConfig config) +{ + for (int n = 0; n < MP_ARRAY_SIZE(mp_egl_attribs); n++) { + const char *name = mp_egl_attribs[n].name; + EGLint v = -1; + if (eglGetConfigAttrib(display, config, mp_egl_attribs[n].attrib, &v)) { + mp_msg(log, msgl, " %s=0x%x\n", name, v); + } else { + mp_msg(log, msgl, " %s=<error>\n", name); + } + } +} + +static void *mpegl_get_proc_address(void *ctx, const char *name) +{ + void *p = eglGetProcAddress(name); +#if defined(__GLIBC__) && HAVE_LIBDL + // Some crappy ARM/Linux things do not provide EGL 1.5, so above call does + // not necessarily return function pointers for core functions. Try to get + // them from a loaded GLES lib. As POSIX leaves RTLD_DEFAULT "reserved", + // use it only with glibc. + if (!p) + p = dlsym(RTLD_DEFAULT, name); +#endif + return p; +} + +static bool create_context(struct ra_ctx *ctx, EGLDisplay display, + bool es, struct mpegl_cb cb, + EGLContext *out_context, EGLConfig *out_config) +{ + int msgl = ctx->opts.probing ? MSGL_V : MSGL_FATAL; + + EGLenum api; + EGLint rend; + const char *name; + + if (!es) { + api = EGL_OPENGL_API; + rend = EGL_OPENGL_BIT; + name = "Desktop OpenGL"; + } else { + api = EGL_OPENGL_ES_API; + rend = EGL_OPENGL_ES2_BIT; + name = "GLES 2.x +"; + } + + MP_VERBOSE(ctx, "Trying to create %s context.\n", name); + + if (!eglBindAPI(api)) { + MP_VERBOSE(ctx, "Could not bind API!\n"); + return false; + } + + EGLint attributes[] = { + EGL_SURFACE_TYPE, EGL_WINDOW_BIT, + EGL_RED_SIZE, 8, + EGL_GREEN_SIZE, 8, + EGL_BLUE_SIZE, 8, + EGL_ALPHA_SIZE, ctx->opts.want_alpha ? 8 : 0, + EGL_RENDERABLE_TYPE, rend, + EGL_NONE + }; + + EGLint num_configs; + if (!eglChooseConfig(display, attributes, NULL, 0, &num_configs)) + num_configs = 0; + + EGLConfig *configs = talloc_array(NULL, EGLConfig, num_configs); + if (!eglChooseConfig(display, attributes, configs, num_configs, &num_configs)) + num_configs = 0; + + if (!num_configs) { + talloc_free(configs); + MP_MSG(ctx, msgl, "Could not choose EGLConfig for %s!\n", name); + return false; + } + + for (int n = 0; n < num_configs; n++) + dump_egl_config(ctx->log, MSGL_TRACE, display, configs[n]); + + int chosen = 0; + if (cb.refine_config) + chosen = cb.refine_config(cb.user_data, configs, num_configs); + if (chosen < 0) { + talloc_free(configs); + MP_MSG(ctx, msgl, "Could not refine EGLConfig for %s!\n", name); + return false; + } + EGLConfig config = configs[chosen]; + + talloc_free(configs); + + MP_DBG(ctx, "Chosen EGLConfig:\n"); + dump_egl_config(ctx->log, MSGL_DEBUG, display, config); + + int ctx_flags = ctx->opts.debug ? EGL_CONTEXT_OPENGL_DEBUG_BIT_KHR : 0; + EGLContext *egl_ctx = NULL; + + if (!es) { + for (int n = 0; mpgl_min_required_gl_versions[n]; n++) { + int ver = mpgl_min_required_gl_versions[n]; + + EGLint attrs[] = { + EGL_CONTEXT_MAJOR_VERSION, MPGL_VER_GET_MAJOR(ver), + EGL_CONTEXT_MINOR_VERSION, MPGL_VER_GET_MINOR(ver), + EGL_CONTEXT_OPENGL_PROFILE_MASK, + ver >= 320 ? EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT : 0, + EGL_CONTEXT_FLAGS_KHR, ctx_flags, + EGL_NONE + }; + + egl_ctx = eglCreateContext(display, config, EGL_NO_CONTEXT, attrs); + if (egl_ctx) + break; + } + } + if (!egl_ctx) { + // Fallback for EGL 1.4 without EGL_KHR_create_context or GLES + // Add the context flags only for GLES - GL has been attempted above + EGLint attrs[] = { + EGL_CONTEXT_CLIENT_VERSION, 2, + es ? EGL_CONTEXT_FLAGS_KHR : EGL_NONE, ctx_flags, + EGL_NONE + }; + + egl_ctx = eglCreateContext(display, config, EGL_NO_CONTEXT, attrs); + } + + if (!egl_ctx) { + MP_MSG(ctx, msgl, "Could not create EGL context for %s!\n", name); + return false; + } + + *out_context = egl_ctx; + *out_config = config; + return true; +} + +#define STR_OR_ERR(s) ((s) ? (s) : "(error)") + +// Create a context and return it and the config it was created with. If it +// returns false, the out_* pointers are set to NULL. +// vo_flags is a combination of VOFLAG_* values. +bool mpegl_create_context(struct ra_ctx *ctx, EGLDisplay display, + EGLContext *out_context, EGLConfig *out_config) +{ + return mpegl_create_context_cb(ctx, display, (struct mpegl_cb){0}, + out_context, out_config); +} + +// Create a context and return it and the config it was created with. If it +// returns false, the out_* pointers are set to NULL. +bool mpegl_create_context_cb(struct ra_ctx *ctx, EGLDisplay display, + struct mpegl_cb cb, EGLContext *out_context, + EGLConfig *out_config) +{ + *out_context = NULL; + *out_config = NULL; + + const char *version = eglQueryString(display, EGL_VERSION); + const char *vendor = eglQueryString(display, EGL_VENDOR); + const char *apis = eglQueryString(display, EGL_CLIENT_APIS); + MP_VERBOSE(ctx, "EGL_VERSION=%s\nEGL_VENDOR=%s\nEGL_CLIENT_APIS=%s\n", + STR_OR_ERR(version), STR_OR_ERR(vendor), STR_OR_ERR(apis)); + + enum gles_mode mode = ra_gl_ctx_get_glesmode(ctx); + + if ((mode == GLES_NO || mode == GLES_AUTO) && + create_context(ctx, display, false, cb, out_context, out_config)) + return true; + + if ((mode == GLES_YES || mode == GLES_AUTO) && + create_context(ctx, display, true, cb, out_context, out_config)) + return true; + + int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR; + MP_MSG(ctx, msgl, "Could not create a GL context.\n"); + return false; +} + +static int GLAPIENTRY swap_interval(int interval) +{ + EGLDisplay display = eglGetCurrentDisplay(); + if (!display) + return 1; + return !eglSwapInterval(display, interval); +} + +// Load gl version and function pointers into *gl. +// Expects a current EGL context set. +void mpegl_load_functions(struct GL *gl, struct mp_log *log) +{ + const char *egl_exts = ""; + EGLDisplay display = eglGetCurrentDisplay(); + if (display != EGL_NO_DISPLAY) + egl_exts = eglQueryString(display, EGL_EXTENSIONS); + + mpgl_load_functions2(gl, mpegl_get_proc_address, NULL, egl_exts, log); + if (!gl->SwapInterval) + gl->SwapInterval = swap_interval; +} + +static bool is_egl15(void) +{ + // It appears that EGL 1.4 is specified to _require_ an initialized display + // for EGL_VERSION, while EGL 1.5 is _required_ to return the EGL version. + const char *ver = eglQueryString(EGL_NO_DISPLAY, EGL_VERSION); + // Of course we have to go through the excruciating pain of parsing a + // version string, since EGL provides no other way without a display. In + // theory version!=NULL is already proof enough that it's 1.5, but be + // extra defensive, since this should have been true for EGL_EXTENSIONS as + // well, but then they added an extension that modified standard behavior. + int ma = 0, mi = 0; + return ver && sscanf(ver, "%d.%d", &ma, &mi) == 2 && (ma > 1 || mi >= 5); +} + +// This is similar to eglGetPlatformDisplay(platform, native_display, NULL), +// except that it 1. may use eglGetPlatformDisplayEXT, 2. checks for the +// platform client extension platform_ext_name, and 3. does not support passing +// an attrib list, because the type for that parameter is different in the EXT +// and standard functions (EGL can't not fuck up, no matter what). +// platform: e.g. EGL_PLATFORM_X11_KHR +// platform_ext_name: e.g. "EGL_KHR_platform_x11" +// native_display: e.g. X11 Display* +// Returns EGL_NO_DISPLAY on failure. +// Warning: the EGL version can be different at runtime depending on the chosen +// platform, so this might return a display corresponding to some older EGL +// version (often 1.4). +// Often, there are two extension variants of a platform (KHR and EXT). If you +// need to check both, call this function twice. (Why do they define them twice? +// They're crazy.) +EGLDisplay mpegl_get_display(EGLenum platform, const char *platform_ext_name, + void *native_display) +{ + // EGL is awful. Designed as ultra-portable library, it fails at dealing + // with slightly more complex environment than its short-sighted design + // could deal with. So they invented an awful, awful kludge that modifies + // EGL standard behavior, the EGL_EXT_client_extensions extension. EGL 1.4 + // normally is to return NULL when querying EGL_EXTENSIONS on EGL_NO_DISPLAY, + // however, with that extension, it'll return the set of "client extensions", + // which may include EGL_EXT_platform_base. + + // Prerequisite: check the platform extension. + // If this is either EGL 1.5, or 1.4 with EGL_EXT_client_extensions, then + // this must return a valid extension string. + const char *exts = eglQueryString(EGL_NO_DISPLAY, EGL_EXTENSIONS); + if (!gl_check_extension(exts, platform_ext_name)) + return EGL_NO_DISPLAY; + + // Before we go through the EGL 1.4 BS, try if we can use native EGL 1.5 + if (is_egl15()) { + // This is EGL 1.5. It must support querying standard functions through + // eglGetProcAddress(). Note that on EGL 1.4, even if the function is + // unknown, it could return non-NULL anyway (because EGL is crazy). + EGLDisplay (EGLAPIENTRYP GetPlatformDisplay) + (EGLenum, void *, const EGLAttrib *) = + (void *)eglGetProcAddress("eglGetPlatformDisplay"); + // (It should be impossible to be NULL, but uh.) + if (GetPlatformDisplay) + return GetPlatformDisplay(platform, native_display, NULL); + } + + if (!gl_check_extension(exts, "EGL_EXT_platform_base")) + return EGL_NO_DISPLAY; + + EGLDisplay (EGLAPIENTRYP GetPlatformDisplayEXT)(EGLenum, void*, const EGLint*) + = (void *)eglGetProcAddress("eglGetPlatformDisplayEXT"); + + // (It should be impossible to be NULL, but uh.) + if (GetPlatformDisplayEXT) + return GetPlatformDisplayEXT(platform, native_display, NULL); + + return EGL_NO_DISPLAY; +} + +// The same mess but with eglCreatePlatformWindowSurface(EXT) +// again no support for an attribute list because the type differs +// Returns EGL_NO_SURFACE on failure. +EGLSurface mpegl_create_window_surface(EGLDisplay dpy, EGLConfig config, + void *native_window) +{ + // Use the EGL 1.5 function if possible + if (is_egl15()) { + EGLSurface (EGLAPIENTRYP CreatePlatformWindowSurface) + (EGLDisplay, EGLConfig, void *, const EGLAttrib *) = + (void *)eglGetProcAddress("eglCreatePlatformWindowSurface"); + // (It should be impossible to be NULL, but uh.) + if (CreatePlatformWindowSurface) + return CreatePlatformWindowSurface(dpy, config, native_window, NULL); + } + + // Check the extension that provides the *EXT function + const char *exts = eglQueryString(EGL_NO_DISPLAY, EGL_EXTENSIONS); + if (!gl_check_extension(exts, "EGL_EXT_platform_base")) + return EGL_NO_SURFACE; + + EGLSurface (EGLAPIENTRYP CreatePlatformWindowSurfaceEXT) + (EGLDisplay, EGLConfig, void *, const EGLint *) = + (void *)eglGetProcAddress("eglCreatePlatformWindowSurfaceEXT"); + // (It should be impossible to be NULL, but uh.) + if (CreatePlatformWindowSurfaceEXT) + return CreatePlatformWindowSurfaceEXT(dpy, config, native_window, NULL); + + return EGL_NO_SURFACE; +} diff --git a/video/out/opengl/egl_helpers.h b/video/out/opengl/egl_helpers.h new file mode 100644 index 0000000..32ec5d1 --- /dev/null +++ b/video/out/opengl/egl_helpers.h @@ -0,0 +1,38 @@ +#ifndef MP_GL_EGL_HELPERS_H +#define MP_GL_EGL_HELPERS_H + +#include <stdbool.h> + +#include <EGL/egl.h> +#include <EGL/eglext.h> + +#include "video/out/gpu/context.h" + +struct mp_log; + +bool mpegl_create_context(struct ra_ctx *ctx, EGLDisplay display, + EGLContext *out_context, EGLConfig *out_config); + +struct mpegl_cb { + // if set, pick the desired config from the given list and return its index + // defaults to 0 (they are sorted by eglChooseConfig). return a negative + // number to indicate an error condition or that no suitable configs could + // be found. + int (*refine_config)(void *user_data, EGLConfig *configs, int num_configs); + void *user_data; +}; + +bool mpegl_create_context_cb(struct ra_ctx *ctx, EGLDisplay display, + struct mpegl_cb cb, EGLContext *out_context, + EGLConfig *out_config); + +struct GL; +void mpegl_load_functions(struct GL *gl, struct mp_log *log); + +EGLDisplay mpegl_get_display(EGLenum platform, const char *platform_ext_name, + void *native_display); + +EGLSurface mpegl_create_window_surface(EGLDisplay dpy, EGLConfig config, + void *native_window); + +#endif diff --git a/video/out/opengl/formats.c b/video/out/opengl/formats.c new file mode 100644 index 0000000..a0b79e2 --- /dev/null +++ b/video/out/opengl/formats.c @@ -0,0 +1,196 @@ +#include "common/common.h" +#include "formats.h" + +enum { + // --- GL type aliases (for readability) + T_U8 = GL_UNSIGNED_BYTE, + T_U16 = GL_UNSIGNED_SHORT, + T_FL = GL_FLOAT, +}; + +// List of allowed formats, and their usability for bilinear filtering and FBOs. +// This is limited to combinations that are useful for our renderer. +const struct gl_format gl_formats[] = { + // These are used for desktop GL 3+, and GLES 3+ with GL_EXT_texture_norm16. + {"r8", GL_R8, GL_RED, T_U8, F_CF | F_GL3 | F_GL2F | F_ES3}, + {"rg8", GL_RG8, GL_RG, T_U8, F_CF | F_GL3 | F_GL2F | F_ES3}, + {"rgb8", GL_RGB8, GL_RGB, T_U8, F_CF | F_GL3 | F_GL2F | F_ES3}, + {"rgba8", GL_RGBA8, GL_RGBA, T_U8, F_CF | F_GL3 | F_GL2F | F_ES3}, + {"r16", GL_R16, GL_RED, T_U16, F_CF | F_GL3 | F_GL2F | F_EXT16}, + {"rg16", GL_RG16, GL_RG, T_U16, F_CF | F_GL3 | F_GL2F | F_EXT16}, + {"rgb16", GL_RGB16, GL_RGB, T_U16, F_CF | F_GL3 | F_GL2F}, + {"rgba16", GL_RGBA16, GL_RGBA, T_U16, F_CF | F_GL3 | F_GL2F | F_EXT16}, + + // Specifically not color-renderable. + {"rgb16", GL_RGB16, GL_RGB, T_U16, F_TF | F_EXT16}, + + // GL2 legacy. Ignores possibly present FBO extensions (no CF flag set). + {"l8", GL_LUMINANCE8, GL_LUMINANCE, T_U8, F_TF | F_GL2}, + {"la8", GL_LUMINANCE8_ALPHA8, GL_LUMINANCE_ALPHA, T_U8, F_TF | F_GL2}, + {"rgb8", GL_RGB8, GL_RGB, T_U8, F_TF | F_GL2}, + {"rgba8", GL_RGBA8, GL_RGBA, T_U8, F_TF | F_GL2}, + {"l16", GL_LUMINANCE16, GL_LUMINANCE, T_U16, F_TF | F_GL2}, + {"la16", GL_LUMINANCE16_ALPHA16, GL_LUMINANCE_ALPHA, T_U16, F_TF | F_GL2}, + {"rgb16", GL_RGB16, GL_RGB, T_U16, F_TF | F_GL2}, + {"rgba16", GL_RGBA16, GL_RGBA, T_U16, F_TF | F_GL2}, + + // ES3 legacy. This is literally to compensate for Apple bugs in their iOS + // interop (can they do anything right?). ES3 still allows these formats, + // but they are deprecated. + {"l" , GL_LUMINANCE,GL_LUMINANCE, T_U8, F_CF | F_ES3}, + {"la",GL_LUMINANCE_ALPHA,GL_LUMINANCE_ALPHA, T_U8, F_CF | F_ES3}, + + // ES2 legacy + {"l" , GL_LUMINANCE,GL_LUMINANCE, T_U8, F_TF | F_ES2}, + {"la",GL_LUMINANCE_ALPHA,GL_LUMINANCE_ALPHA, T_U8, F_TF | F_ES2}, + {"rgb", GL_RGB, GL_RGB, T_U8, F_TF | F_ES2}, + {"rgba", GL_RGBA, GL_RGBA, T_U8, F_TF | F_ES2}, + + // Non-normalized integer formats. + // Follows ES 3.0 as to which are color-renderable. + {"r8ui", GL_R8UI, GL_RED_INTEGER, T_U8, F_CR | F_GL3 | F_ES3}, + {"rg8ui", GL_RG8UI, GL_RG_INTEGER, T_U8, F_CR | F_GL3 | F_ES3}, + {"rgb8ui", GL_RGB8UI, GL_RGB_INTEGER, T_U8, F_GL3 | F_ES3}, + {"rgba8ui", GL_RGBA8UI, GL_RGBA_INTEGER, T_U8, F_CR | F_GL3 | F_ES3}, + {"r16ui", GL_R16UI, GL_RED_INTEGER, T_U16, F_CR | F_GL3 | F_ES3}, + {"rg16ui", GL_RG16UI, GL_RG_INTEGER, T_U16, F_CR | F_GL3 | F_ES3}, + {"rgb16ui", GL_RGB16UI, GL_RGB_INTEGER, T_U16, F_GL3 | F_ES3}, + {"rgba16ui",GL_RGBA16UI, GL_RGBA_INTEGER, T_U16, F_CR | F_GL3 | F_ES3}, + + // On GL3+ or GL2.1 with GL_ARB_texture_float, floats work fully. + {"r16f", GL_R16F, GL_RED, T_FL, F_F16 | F_CF | F_GL3 | F_GL2F}, + {"rg16f", GL_RG16F, GL_RG, T_FL, F_F16 | F_CF | F_GL3 | F_GL2F}, + {"rgb16f", GL_RGB16F, GL_RGB, T_FL, F_F16 | F_CF | F_GL3 | F_GL2F}, + {"rgba16f", GL_RGBA16F, GL_RGBA, T_FL, F_F16 | F_CF | F_GL3 | F_GL2F}, + {"r32f", GL_R32F, GL_RED, T_FL, F_CF | F_GL3 | F_GL2F}, + {"rg32f", GL_RG32F, GL_RG, T_FL, F_CF | F_GL3 | F_GL2F}, + {"rgb32f", GL_RGB32F, GL_RGB, T_FL, F_CF | F_GL3 | F_GL2F}, + {"rgba32f", GL_RGBA32F, GL_RGBA, T_FL, F_CF | F_GL3 | F_GL2F}, + + // Note: we simply don't support float anything on ES2, despite extensions. + // We also don't bother with non-filterable float formats, and we ignore + // 32 bit float formats that are not blendable when rendering to them. + + // On ES3.2+, both 16 bit floats work fully (except 3-component formats). + // F_EXTF16 implies extensions that also enable 16 bit floats fully. + {"r16f", GL_R16F, GL_RED, T_FL, F_F16 | F_CF | F_ES32 | F_EXTF16}, + {"rg16f", GL_RG16F, GL_RG, T_FL, F_F16 | F_CF | F_ES32 | F_EXTF16}, + {"rgb16f", GL_RGB16F, GL_RGB, T_FL, F_F16 | F_TF | F_ES32 | F_EXTF16}, + {"rgba16f", GL_RGBA16F, GL_RGBA, T_FL, F_F16 | F_CF | F_ES32 | F_EXTF16}, + + // On ES3.0+, 16 bit floats are texture-filterable. + // Don't bother with 32 bit floats; they exist but are neither CR nor TF. + {"r16f", GL_R16F, GL_RED, T_FL, F_F16 | F_TF | F_ES3}, + {"rg16f", GL_RG16F, GL_RG, T_FL, F_F16 | F_TF | F_ES3}, + {"rgb16f", GL_RGB16F, GL_RGB, T_FL, F_F16 | F_TF | F_ES3}, + {"rgba16f", GL_RGBA16F, GL_RGBA, T_FL, F_F16 | F_TF | F_ES3}, + + // These might be useful as FBO formats. + {"rgb10_a2",GL_RGB10_A2, GL_RGBA, + GL_UNSIGNED_INT_2_10_10_10_REV, F_CF | F_GL3 | F_ES3}, + {"rgba12", GL_RGBA12, GL_RGBA, T_U16, F_CF | F_GL2 | F_GL3}, + {"rgb10", GL_RGB10, GL_RGB, T_U16, F_CF | F_GL2 | F_GL3}, + + // Special formats. + {"rgb565", GL_RGB8, GL_RGB, + GL_UNSIGNED_SHORT_5_6_5, F_TF | F_GL2 | F_GL3}, + // Worthless, but needed by OSX videotoolbox interop on old Apple hardware. + {"appleyp", GL_RGB, GL_RGB_422_APPLE, + GL_UNSIGNED_SHORT_8_8_APPLE, F_TF | F_APPL}, + + {0} +}; + +// Return an or-ed combination of all F_ flags that apply. +int gl_format_feature_flags(GL *gl) +{ + return (gl->version == 210 ? F_GL2 : 0) + | (gl->version >= 300 ? F_GL3 : 0) + | (gl->es == 200 ? F_ES2 : 0) + | (gl->es >= 300 ? F_ES3 : 0) + | (gl->es >= 320 ? F_ES32 : 0) + | (gl->mpgl_caps & MPGL_CAP_EXT16 ? F_EXT16 : 0) + | ((gl->es >= 300 && + (gl->mpgl_caps & MPGL_CAP_EXT_CR_HFLOAT)) ? F_EXTF16 : 0) + | ((gl->version == 210 && + (gl->mpgl_caps & MPGL_CAP_ARB_FLOAT) && + (gl->mpgl_caps & MPGL_CAP_TEX_RG) && + (gl->mpgl_caps & MPGL_CAP_FB)) ? F_GL2F : 0) + | (gl->mpgl_caps & MPGL_CAP_APPLE_RGB_422 ? F_APPL : 0); +} + +int gl_format_type(const struct gl_format *format) +{ + if (!format) + return 0; + if (format->type == GL_FLOAT) + return MPGL_TYPE_FLOAT; + if (gl_integer_format_to_base(format->format)) + return MPGL_TYPE_UINT; + return MPGL_TYPE_UNORM; +} + +// Return base internal format of an integer format, or 0 if it's not integer. +// "format" is like in struct gl_format. +GLenum gl_integer_format_to_base(GLenum format) +{ + switch (format) { + case GL_RED_INTEGER: return GL_RED; + case GL_RG_INTEGER: return GL_RG; + case GL_RGB_INTEGER: return GL_RGB; + case GL_RGBA_INTEGER: return GL_RGBA; + } + return 0; +} + +// Return the number of bytes per component this format implies. +// Returns 0 for formats with non-byte alignments and formats which +// merge multiple components (like GL_UNSIGNED_SHORT_5_6_5). +// "type" is like in struct gl_format. +int gl_component_size(GLenum type) +{ + switch (type) { + case GL_UNSIGNED_BYTE: return 1; + case GL_UNSIGNED_SHORT: return 2; + case GL_FLOAT: return 4; + } + return 0; +} + +// Return the number of separate color components. +// "format" is like in struct gl_format. +int gl_format_components(GLenum format) +{ + switch (format) { + case GL_RED: + case GL_RED_INTEGER: + case GL_LUMINANCE: + return 1; + case GL_RG: + case GL_RG_INTEGER: + case GL_LUMINANCE_ALPHA: + return 2; + case GL_RGB: + case GL_RGB_INTEGER: + return 3; + case GL_RGBA: + case GL_RGBA_INTEGER: + return 4; + } + return 0; +} + +// Return the number of bytes per pixel for the given format. +// Parameter names like in struct gl_format. +int gl_bytes_per_pixel(GLenum format, GLenum type) +{ + // Formats with merged components are special. + switch (type) { + case GL_UNSIGNED_INT_2_10_10_10_REV: return 4; + case GL_UNSIGNED_SHORT_5_6_5: return 2; + case GL_UNSIGNED_SHORT_8_8_APPLE: return 2; + case GL_UNSIGNED_SHORT_8_8_REV_APPLE: return 2; + } + + return gl_component_size(type) * gl_format_components(format); +} diff --git a/video/out/opengl/formats.h b/video/out/opengl/formats.h new file mode 100644 index 0000000..f727a3b --- /dev/null +++ b/video/out/opengl/formats.h @@ -0,0 +1,51 @@ +#ifndef MPGL_FORMATS_H_ +#define MPGL_FORMATS_H_ + +#include "common.h" + +struct gl_format { + const char *name; // symbolic name for user interaction/debugging + GLint internal_format; // glTexImage argument + GLenum format; // glTexImage argument + GLenum type; // e.g. GL_UNSIGNED_SHORT + int flags; // F_* flags +}; + +enum { + // --- gl_format.flags + + // Version flags. If at least 1 flag matches, the format entry is considered + // supported on the current GL context. + F_GL2 = 1 << 0, // GL2.1-only + F_GL3 = 1 << 1, // GL3.0 or later + F_ES2 = 1 << 2, // ES2-only + F_ES3 = 1 << 3, // ES3.0 or later + F_ES32 = 1 << 4, // ES3.2 or later + F_EXT16 = 1 << 5, // ES with GL_EXT_texture_norm16 + F_EXTF16 = 1 << 6, // GL_EXT_color_buffer_half_float + F_GL2F = 1 << 7, // GL2.1-only with texture_rg + texture_float + FBOs + F_APPL = 1 << 8, // GL_APPLE_rgb_422 + + // Feature flags. They are additional and signal presence of features. + F_CR = 1 << 16, // color-renderable + F_TF = 1 << 17, // texture-filterable with GL_LINEAR + F_CF = F_CR | F_TF, + F_F16 = 1 << 18, // uses half-floats (16 bit) internally, even though + // the format is still GL_FLOAT (32 bit) + + // --- Other constants. + MPGL_TYPE_UNORM = RA_CTYPE_UNORM, // normalized integer (fixed point) formats + MPGL_TYPE_UINT = RA_CTYPE_UINT, // full integer formats + MPGL_TYPE_FLOAT = RA_CTYPE_FLOAT, // float formats (both full and half) +}; + +extern const struct gl_format gl_formats[]; + +int gl_format_feature_flags(GL *gl); +int gl_format_type(const struct gl_format *format); +GLenum gl_integer_format_to_base(GLenum format); +int gl_component_size(GLenum type); +int gl_format_components(GLenum format); +int gl_bytes_per_pixel(GLenum format, GLenum type); + +#endif diff --git a/video/out/opengl/gl_headers.h b/video/out/opengl/gl_headers.h new file mode 100644 index 0000000..5c36718 --- /dev/null +++ b/video/out/opengl/gl_headers.h @@ -0,0 +1,799 @@ +/* + * Parts of OpenGL(ES) needed by the OpenGL renderer. + * + * This excludes function declarations. + * + * This header is based on: + * - Khronos GLES headers (MIT) + * - mpv or MPlayer code (LGPL 2.1 or later) + * - probably Mesa GL headers (MIT) + */ + +#ifndef MPV_GL_HEADERS_H +#define MPV_GL_HEADERS_H + +#include <stdint.h> + +// Enable this to use system headers instead. +#if 0 +#include <GL/gl.h> +#include <GLES3/gl3.h> +#endif + +#ifndef GLAPIENTRY +#ifdef _WIN32 +#define GLAPIENTRY __stdcall +#else +#define GLAPIENTRY +#endif +#endif + +// Typedefs. This needs to work with system headers too (consider GLX), and +// before C11, duplicated typedefs were an error. So try to tolerate at least +// Mesa. +#ifdef GL_TRUE + // Tolerate old Mesa which has only definitions up to GL 2.0. + #define MP_GET_GL_TYPES_2_0 0 + #ifdef GL_VERSION_3_2 + #define MP_GET_GL_TYPES_3_2 0 + #else + #define MP_GET_GL_TYPES_3_2 1 + #endif +#else + // Get them all. + #define MP_GET_GL_TYPES_2_0 1 + #define MP_GET_GL_TYPES_3_2 1 +#endif + +#if MP_GET_GL_TYPES_2_0 +// GL_VERSION_1_0, GL_ES_VERSION_2_0 +typedef unsigned int GLbitfield; +typedef unsigned char GLboolean; +typedef unsigned int GLenum; +typedef float GLfloat; +typedef int GLint; +typedef int GLsizei; +typedef uint8_t GLubyte; +typedef unsigned int GLuint; +typedef void GLvoid; +// GL 1.1 GL_VERSION_1_1, GL_ES_VERSION_2_0 +typedef float GLclampf; +// GL 1.5 GL_VERSION_1_5, GL_ES_VERSION_2_0 +typedef intptr_t GLintptr; +typedef ptrdiff_t GLsizeiptr; +// GL 2.0 GL_VERSION_2_0, GL_ES_VERSION_2_0 +typedef int8_t GLbyte; +typedef char GLchar; +typedef short GLshort; +typedef unsigned short GLushort; +#endif + +#if MP_GET_GL_TYPES_3_2 +// GL 3.2 GL_VERSION_3_2, GL_ES_VERSION_2_0 +typedef int64_t GLint64; +typedef struct __GLsync *GLsync; +typedef uint64_t GLuint64; +#endif + +// --- GL 1.1 + +#define GL_BACK_LEFT 0x0402 +#define GL_TEXTURE_1D 0x0DE0 +#define GL_RGB16 0x8054 +#define GL_RGB10 0x8052 +#define GL_RGBA12 0x805A +#define GL_RGBA16 0x805B +#define GL_TEXTURE_RED_SIZE 0x805C +#define GL_TEXTURE_GREEN_SIZE 0x805D +#define GL_TEXTURE_BLUE_SIZE 0x805E +#define GL_TEXTURE_ALPHA_SIZE 0x805F + +// --- GL 1.1 (removed from 3.0 core and not in GLES 2/3) + +#define GL_TEXTURE_LUMINANCE_SIZE 0x8060 +#define GL_LUMINANCE8 0x8040 +#define GL_LUMINANCE8_ALPHA8 0x8045 +#define GL_LUMINANCE16 0x8042 +#define GL_LUMINANCE16_ALPHA16 0x8048 + +// --- GL 1.5 + +#define GL_READ_ONLY 0x88B8 +#define GL_WRITE_ONLY 0x88B9 +#define GL_READ_WRITE 0x88BA + +// --- GL 3.0 + +#define GL_R16 0x822A +#define GL_RG16 0x822C + +// --- GL 3.1 + +#define GL_TEXTURE_RECTANGLE 0x84F5 + +// --- GL 3.3 or GL_ARB_timer_query + +#define GL_TIME_ELAPSED 0x88BF +#define GL_TIMESTAMP 0x8E28 + +// --- GL 4.3 or GL_ARB_debug_output + +#define GL_DEBUG_SEVERITY_HIGH 0x9146 +#define GL_DEBUG_SEVERITY_MEDIUM 0x9147 +#define GL_DEBUG_SEVERITY_LOW 0x9148 +#define GL_DEBUG_SEVERITY_NOTIFICATION 0x826B + +// --- GL 4.4 or GL_ARB_buffer_storage + +#define GL_MAP_PERSISTENT_BIT 0x0040 +#define GL_MAP_COHERENT_BIT 0x0080 +#define GL_DYNAMIC_STORAGE_BIT 0x0100 +#define GL_CLIENT_STORAGE_BIT 0x0200 + +// --- GL 4.2 or GL_ARB_image_load_store + +#define GL_TEXTURE_FETCH_BARRIER_BIT 0x00000008 + +// --- GL 4.3 or GL_ARB_compute_shader + +#define GL_COMPUTE_SHADER 0x91B9 +#define GL_MAX_COMPUTE_SHARED_MEMORY_SIZE 0x8262 +#define GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS 0x90EB + +// --- GL 4.3 or GL_ARB_shader_storage_buffer_object + +#define GL_SHADER_STORAGE_BUFFER 0x90D2 +#define GL_SHADER_STORAGE_BARRIER_BIT 0x00002000 + +// --- GL_NV_vdpau_interop + +#define GLvdpauSurfaceNV GLintptr +#define GL_WRITE_DISCARD_NV 0x88BE + +// --- GL_OES_EGL_image_external, GL_NV_EGL_stream_consumer_external + +#define GL_TEXTURE_EXTERNAL_OES 0x8D65 + +// --- GL_APPLE_rgb_422 + +#define GL_RGB_422_APPLE 0x8A1F +#define GL_UNSIGNED_SHORT_8_8_APPLE 0x85BA +#define GL_UNSIGNED_SHORT_8_8_REV_APPLE 0x85BB + +// --- GL_ANGLE_translated_shader_source + +#define GL_TRANSLATED_SHADER_SOURCE_LENGTH_ANGLE 0x93A0 + +// ---- GLES 2 + +#define GL_DEPTH_BUFFER_BIT 0x00000100 +#define GL_STENCIL_BUFFER_BIT 0x00000400 +#define GL_COLOR_BUFFER_BIT 0x00004000 +#define GL_FALSE 0 +#define GL_TRUE 1 +#define GL_POINTS 0x0000 +#define GL_LINES 0x0001 +#define GL_LINE_LOOP 0x0002 +#define GL_LINE_STRIP 0x0003 +#define GL_TRIANGLES 0x0004 +#define GL_TRIANGLE_STRIP 0x0005 +#define GL_TRIANGLE_FAN 0x0006 +#define GL_ZERO 0 +#define GL_ONE 1 +#define GL_SRC_COLOR 0x0300 +#define GL_ONE_MINUS_SRC_COLOR 0x0301 +#define GL_SRC_ALPHA 0x0302 +#define GL_ONE_MINUS_SRC_ALPHA 0x0303 +#define GL_DST_ALPHA 0x0304 +#define GL_ONE_MINUS_DST_ALPHA 0x0305 +#define GL_DST_COLOR 0x0306 +#define GL_ONE_MINUS_DST_COLOR 0x0307 +#define GL_SRC_ALPHA_SATURATE 0x0308 +#define GL_FUNC_ADD 0x8006 +#define GL_BLEND_EQUATION 0x8009 +#define GL_BLEND_EQUATION_RGB 0x8009 +#define GL_BLEND_EQUATION_ALPHA 0x883D +#define GL_FUNC_SUBTRACT 0x800A +#define GL_FUNC_REVERSE_SUBTRACT 0x800B +#define GL_BLEND_DST_RGB 0x80C8 +#define GL_BLEND_SRC_RGB 0x80C9 +#define GL_BLEND_DST_ALPHA 0x80CA +#define GL_BLEND_SRC_ALPHA 0x80CB +#define GL_CONSTANT_COLOR 0x8001 +#define GL_ONE_MINUS_CONSTANT_COLOR 0x8002 +#define GL_CONSTANT_ALPHA 0x8003 +#define GL_ONE_MINUS_CONSTANT_ALPHA 0x8004 +#define GL_BLEND_COLOR 0x8005 +#define GL_ARRAY_BUFFER 0x8892 +#define GL_ELEMENT_ARRAY_BUFFER 0x8893 +#define GL_ARRAY_BUFFER_BINDING 0x8894 +#define GL_ELEMENT_ARRAY_BUFFER_BINDING 0x8895 +#define GL_STREAM_DRAW 0x88E0 +#define GL_STATIC_DRAW 0x88E4 +#define GL_DYNAMIC_DRAW 0x88E8 +#define GL_BUFFER_SIZE 0x8764 +#define GL_BUFFER_USAGE 0x8765 +#define GL_CURRENT_VERTEX_ATTRIB 0x8626 +#define GL_FRONT 0x0404 +#define GL_BACK 0x0405 +#define GL_FRONT_AND_BACK 0x0408 +#define GL_TEXTURE_2D 0x0DE1 +#define GL_CULL_FACE 0x0B44 +#define GL_BLEND 0x0BE2 +#define GL_DITHER 0x0BD0 +#define GL_STENCIL_TEST 0x0B90 +#define GL_DEPTH_TEST 0x0B71 +#define GL_SCISSOR_TEST 0x0C11 +#define GL_POLYGON_OFFSET_FILL 0x8037 +#define GL_SAMPLE_ALPHA_TO_COVERAGE 0x809E +#define GL_SAMPLE_COVERAGE 0x80A0 +#define GL_NO_ERROR 0 +#define GL_INVALID_ENUM 0x0500 +#define GL_INVALID_VALUE 0x0501 +#define GL_INVALID_OPERATION 0x0502 +#define GL_OUT_OF_MEMORY 0x0505 +#define GL_CW 0x0900 +#define GL_CCW 0x0901 +#define GL_LINE_WIDTH 0x0B21 +#define GL_ALIASED_POINT_SIZE_RANGE 0x846D +#define GL_ALIASED_LINE_WIDTH_RANGE 0x846E +#define GL_CULL_FACE_MODE 0x0B45 +#define GL_FRONT_FACE 0x0B46 +#define GL_DEPTH_RANGE 0x0B70 +#define GL_DEPTH_WRITEMASK 0x0B72 +#define GL_DEPTH_CLEAR_VALUE 0x0B73 +#define GL_DEPTH_FUNC 0x0B74 +#define GL_STENCIL_CLEAR_VALUE 0x0B91 +#define GL_STENCIL_FUNC 0x0B92 +#define GL_STENCIL_FAIL 0x0B94 +#define GL_STENCIL_PASS_DEPTH_FAIL 0x0B95 +#define GL_STENCIL_PASS_DEPTH_PASS 0x0B96 +#define GL_STENCIL_REF 0x0B97 +#define GL_STENCIL_VALUE_MASK 0x0B93 +#define GL_STENCIL_WRITEMASK 0x0B98 +#define GL_STENCIL_BACK_FUNC 0x8800 +#define GL_STENCIL_BACK_FAIL 0x8801 +#define GL_STENCIL_BACK_PASS_DEPTH_FAIL 0x8802 +#define GL_STENCIL_BACK_PASS_DEPTH_PASS 0x8803 +#define GL_STENCIL_BACK_REF 0x8CA3 +#define GL_STENCIL_BACK_VALUE_MASK 0x8CA4 +#define GL_STENCIL_BACK_WRITEMASK 0x8CA5 +#define GL_VIEWPORT 0x0BA2 +#define GL_SCISSOR_BOX 0x0C10 +#define GL_COLOR_CLEAR_VALUE 0x0C22 +#define GL_COLOR_WRITEMASK 0x0C23 +#define GL_UNPACK_ALIGNMENT 0x0CF5 +#define GL_PACK_ALIGNMENT 0x0D05 +#define GL_MAX_TEXTURE_SIZE 0x0D33 +#define GL_MAX_VIEWPORT_DIMS 0x0D3A +#define GL_SUBPIXEL_BITS 0x0D50 +#define GL_RED_BITS 0x0D52 +#define GL_GREEN_BITS 0x0D53 +#define GL_BLUE_BITS 0x0D54 +#define GL_ALPHA_BITS 0x0D55 +#define GL_DEPTH_BITS 0x0D56 +#define GL_STENCIL_BITS 0x0D57 +#define GL_POLYGON_OFFSET_UNITS 0x2A00 +#define GL_POLYGON_OFFSET_FACTOR 0x8038 +#define GL_TEXTURE_BINDING_2D 0x8069 +#define GL_SAMPLE_BUFFERS 0x80A8 +#define GL_SAMPLES 0x80A9 +#define GL_SAMPLE_COVERAGE_VALUE 0x80AA +#define GL_SAMPLE_COVERAGE_INVERT 0x80AB +#define GL_NUM_COMPRESSED_TEXTURE_FORMATS 0x86A2 +#define GL_COMPRESSED_TEXTURE_FORMATS 0x86A3 +#define GL_DONT_CARE 0x1100 +#define GL_FASTEST 0x1101 +#define GL_NICEST 0x1102 +#define GL_GENERATE_MIPMAP_HINT 0x8192 +#define GL_BYTE 0x1400 +#define GL_UNSIGNED_BYTE 0x1401 +#define GL_SHORT 0x1402 +#define GL_UNSIGNED_SHORT 0x1403 +#define GL_INT 0x1404 +#define GL_UNSIGNED_INT 0x1405 +#define GL_FLOAT 0x1406 +#define GL_FIXED 0x140C +#define GL_DEPTH_COMPONENT 0x1902 +#define GL_ALPHA 0x1906 +#define GL_RGB 0x1907 +#define GL_RGBA 0x1908 +#define GL_LUMINANCE 0x1909 +#define GL_LUMINANCE_ALPHA 0x190A +#define GL_UNSIGNED_SHORT_4_4_4_4 0x8033 +#define GL_UNSIGNED_SHORT_5_5_5_1 0x8034 +#define GL_UNSIGNED_SHORT_5_6_5 0x8363 +#define GL_FRAGMENT_SHADER 0x8B30 +#define GL_VERTEX_SHADER 0x8B31 +#define GL_MAX_VERTEX_ATTRIBS 0x8869 +#define GL_MAX_VERTEX_UNIFORM_VECTORS 0x8DFB +#define GL_MAX_VARYING_VECTORS 0x8DFC +#define GL_MAX_COMBINED_TEXTURE_IMAGE_UNITS 0x8B4D +#define GL_MAX_VERTEX_TEXTURE_IMAGE_UNITS 0x8B4C +#define GL_MAX_TEXTURE_IMAGE_UNITS 0x8872 +#define GL_MAX_FRAGMENT_UNIFORM_VECTORS 0x8DFD +#define GL_SHADER_TYPE 0x8B4F +#define GL_DELETE_STATUS 0x8B80 +#define GL_LINK_STATUS 0x8B82 +#define GL_VALIDATE_STATUS 0x8B83 +#define GL_ATTACHED_SHADERS 0x8B85 +#define GL_ACTIVE_UNIFORMS 0x8B86 +#define GL_ACTIVE_UNIFORM_MAX_LENGTH 0x8B87 +#define GL_ACTIVE_ATTRIBUTES 0x8B89 +#define GL_ACTIVE_ATTRIBUTE_MAX_LENGTH 0x8B8A +#define GL_SHADING_LANGUAGE_VERSION 0x8B8C +#define GL_CURRENT_PROGRAM 0x8B8D +#define GL_NEVER 0x0200 +#define GL_LESS 0x0201 +#define GL_EQUAL 0x0202 +#define GL_LEQUAL 0x0203 +#define GL_GREATER 0x0204 +#define GL_NOTEQUAL 0x0205 +#define GL_GEQUAL 0x0206 +#define GL_ALWAYS 0x0207 +#define GL_KEEP 0x1E00 +#define GL_REPLACE 0x1E01 +#define GL_INCR 0x1E02 +#define GL_DECR 0x1E03 +#define GL_INVERT 0x150A +#define GL_INCR_WRAP 0x8507 +#define GL_DECR_WRAP 0x8508 +#define GL_VENDOR 0x1F00 +#define GL_RENDERER 0x1F01 +#define GL_VERSION 0x1F02 +#define GL_EXTENSIONS 0x1F03 +#define GL_NEAREST 0x2600 +#define GL_LINEAR 0x2601 +#define GL_NEAREST_MIPMAP_NEAREST 0x2700 +#define GL_LINEAR_MIPMAP_NEAREST 0x2701 +#define GL_NEAREST_MIPMAP_LINEAR 0x2702 +#define GL_LINEAR_MIPMAP_LINEAR 0x2703 +#define GL_TEXTURE_MAG_FILTER 0x2800 +#define GL_TEXTURE_MIN_FILTER 0x2801 +#define GL_TEXTURE_WRAP_S 0x2802 +#define GL_TEXTURE_WRAP_T 0x2803 +#define GL_TEXTURE 0x1702 +#define GL_TEXTURE_CUBE_MAP 0x8513 +#define GL_TEXTURE_BINDING_CUBE_MAP 0x8514 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_X 0x8515 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_X 0x8516 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_Y 0x8517 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Y 0x8518 +#define GL_TEXTURE_CUBE_MAP_POSITIVE_Z 0x8519 +#define GL_TEXTURE_CUBE_MAP_NEGATIVE_Z 0x851A +#define GL_MAX_CUBE_MAP_TEXTURE_SIZE 0x851C +#define GL_TEXTURE0 0x84C0 +#define GL_TEXTURE1 0x84C1 +#define GL_TEXTURE2 0x84C2 +#define GL_TEXTURE3 0x84C3 +#define GL_TEXTURE4 0x84C4 +#define GL_TEXTURE5 0x84C5 +#define GL_TEXTURE6 0x84C6 +#define GL_TEXTURE7 0x84C7 +#define GL_TEXTURE8 0x84C8 +#define GL_TEXTURE9 0x84C9 +#define GL_TEXTURE10 0x84CA +#define GL_TEXTURE11 0x84CB +#define GL_TEXTURE12 0x84CC +#define GL_TEXTURE13 0x84CD +#define GL_TEXTURE14 0x84CE +#define GL_TEXTURE15 0x84CF +#define GL_TEXTURE16 0x84D0 +#define GL_TEXTURE17 0x84D1 +#define GL_TEXTURE18 0x84D2 +#define GL_TEXTURE19 0x84D3 +#define GL_TEXTURE20 0x84D4 +#define GL_TEXTURE21 0x84D5 +#define GL_TEXTURE22 0x84D6 +#define GL_TEXTURE23 0x84D7 +#define GL_TEXTURE24 0x84D8 +#define GL_TEXTURE25 0x84D9 +#define GL_TEXTURE26 0x84DA +#define GL_TEXTURE27 0x84DB +#define GL_TEXTURE28 0x84DC +#define GL_TEXTURE29 0x84DD +#define GL_TEXTURE30 0x84DE +#define GL_TEXTURE31 0x84DF +#define GL_ACTIVE_TEXTURE 0x84E0 +#define GL_REPEAT 0x2901 +#define GL_CLAMP_TO_EDGE 0x812F +#define GL_MIRRORED_REPEAT 0x8370 +#define GL_FLOAT_VEC2 0x8B50 +#define GL_FLOAT_VEC3 0x8B51 +#define GL_FLOAT_VEC4 0x8B52 +#define GL_INT_VEC2 0x8B53 +#define GL_INT_VEC3 0x8B54 +#define GL_INT_VEC4 0x8B55 +#define GL_BOOL 0x8B56 +#define GL_BOOL_VEC2 0x8B57 +#define GL_BOOL_VEC3 0x8B58 +#define GL_BOOL_VEC4 0x8B59 +#define GL_FLOAT_MAT2 0x8B5A +#define GL_FLOAT_MAT3 0x8B5B +#define GL_FLOAT_MAT4 0x8B5C +#define GL_SAMPLER_2D 0x8B5E +#define GL_SAMPLER_CUBE 0x8B60 +#define GL_VERTEX_ATTRIB_ARRAY_ENABLED 0x8622 +#define GL_VERTEX_ATTRIB_ARRAY_SIZE 0x8623 +#define GL_VERTEX_ATTRIB_ARRAY_STRIDE 0x8624 +#define GL_VERTEX_ATTRIB_ARRAY_TYPE 0x8625 +#define GL_VERTEX_ATTRIB_ARRAY_NORMALIZED 0x886A +#define GL_VERTEX_ATTRIB_ARRAY_POINTER 0x8645 +#define GL_VERTEX_ATTRIB_ARRAY_BUFFER_BINDING 0x889F +#define GL_IMPLEMENTATION_COLOR_READ_TYPE 0x8B9A +#define GL_IMPLEMENTATION_COLOR_READ_FORMAT 0x8B9B +#define GL_COMPILE_STATUS 0x8B81 +#define GL_INFO_LOG_LENGTH 0x8B84 +#define GL_SHADER_SOURCE_LENGTH 0x8B88 +#define GL_SHADER_COMPILER 0x8DFA +#define GL_SHADER_BINARY_FORMATS 0x8DF8 +#define GL_NUM_SHADER_BINARY_FORMATS 0x8DF9 +#define GL_LOW_FLOAT 0x8DF0 +#define GL_MEDIUM_FLOAT 0x8DF1 +#define GL_HIGH_FLOAT 0x8DF2 +#define GL_LOW_INT 0x8DF3 +#define GL_MEDIUM_INT 0x8DF4 +#define GL_HIGH_INT 0x8DF5 +#define GL_FRAMEBUFFER 0x8D40 +#define GL_RENDERBUFFER 0x8D41 +#define GL_RGBA4 0x8056 +#define GL_RGB5_A1 0x8057 +#define GL_RGB565 0x8D62 +#define GL_DEPTH_COMPONENT16 0x81A5 +#define GL_STENCIL_INDEX8 0x8D48 +#define GL_RENDERBUFFER_WIDTH 0x8D42 +#define GL_RENDERBUFFER_HEIGHT 0x8D43 +#define GL_RENDERBUFFER_INTERNAL_FORMAT 0x8D44 +#define GL_RENDERBUFFER_RED_SIZE 0x8D50 +#define GL_RENDERBUFFER_GREEN_SIZE 0x8D51 +#define GL_RENDERBUFFER_BLUE_SIZE 0x8D52 +#define GL_RENDERBUFFER_ALPHA_SIZE 0x8D53 +#define GL_RENDERBUFFER_DEPTH_SIZE 0x8D54 +#define GL_RENDERBUFFER_STENCIL_SIZE 0x8D55 +#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_TYPE 0x8CD0 +#define GL_FRAMEBUFFER_ATTACHMENT_OBJECT_NAME 0x8CD1 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LEVEL 0x8CD2 +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_CUBE_MAP_FACE 0x8CD3 +#define GL_COLOR_ATTACHMENT0 0x8CE0 +#define GL_DEPTH_ATTACHMENT 0x8D00 +#define GL_STENCIL_ATTACHMENT 0x8D20 +#define GL_NONE 0 +#define GL_FRAMEBUFFER_COMPLETE 0x8CD5 +#define GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT 0x8CD6 +#define GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT 0x8CD7 +#define GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS 0x8CD9 +#define GL_FRAMEBUFFER_UNSUPPORTED 0x8CDD +#define GL_FRAMEBUFFER_BINDING 0x8CA6 +#define GL_RENDERBUFFER_BINDING 0x8CA7 +#define GL_MAX_RENDERBUFFER_SIZE 0x84E8 +#define GL_INVALID_FRAMEBUFFER_OPERATION 0x0506 + +// ---- GLES 3 + +#ifndef GL_READ_BUFFER +typedef unsigned short GLhalf; +#endif + +#define GL_READ_BUFFER 0x0C02 +#define GL_UNPACK_ROW_LENGTH 0x0CF2 +#define GL_UNPACK_SKIP_ROWS 0x0CF3 +#define GL_UNPACK_SKIP_PIXELS 0x0CF4 +#define GL_PACK_ROW_LENGTH 0x0D02 +#define GL_PACK_SKIP_ROWS 0x0D03 +#define GL_PACK_SKIP_PIXELS 0x0D04 +#define GL_COLOR 0x1800 +#define GL_DEPTH 0x1801 +#define GL_STENCIL 0x1802 +#define GL_RED 0x1903 +#define GL_RGB8 0x8051 +#define GL_RGBA8 0x8058 +#define GL_RGB10_A2 0x8059 +#define GL_TEXTURE_BINDING_3D 0x806A +#define GL_UNPACK_SKIP_IMAGES 0x806D +#define GL_UNPACK_IMAGE_HEIGHT 0x806E +#define GL_TEXTURE_3D 0x806F +#define GL_TEXTURE_WRAP_R 0x8072 +#define GL_MAX_3D_TEXTURE_SIZE 0x8073 +#define GL_UNSIGNED_INT_2_10_10_10_REV 0x8368 +#define GL_MAX_ELEMENTS_VERTICES 0x80E8 +#define GL_MAX_ELEMENTS_INDICES 0x80E9 +#define GL_TEXTURE_MIN_LOD 0x813A +#define GL_TEXTURE_MAX_LOD 0x813B +#define GL_TEXTURE_BASE_LEVEL 0x813C +#define GL_TEXTURE_MAX_LEVEL 0x813D +#define GL_MIN 0x8007 +#define GL_MAX 0x8008 +#define GL_DEPTH_COMPONENT24 0x81A6 +#define GL_MAX_TEXTURE_LOD_BIAS 0x84FD +#define GL_TEXTURE_COMPARE_MODE 0x884C +#define GL_TEXTURE_COMPARE_FUNC 0x884D +#define GL_CURRENT_QUERY 0x8865 +#define GL_QUERY_RESULT 0x8866 +#define GL_QUERY_RESULT_AVAILABLE 0x8867 +#define GL_BUFFER_MAPPED 0x88BC +#define GL_BUFFER_MAP_POINTER 0x88BD +#define GL_STREAM_READ 0x88E1 +#define GL_STREAM_COPY 0x88E2 +#define GL_STATIC_READ 0x88E5 +#define GL_STATIC_COPY 0x88E6 +#define GL_DYNAMIC_READ 0x88E9 +#define GL_DYNAMIC_COPY 0x88EA +#define GL_MAX_DRAW_BUFFERS 0x8824 +#define GL_DRAW_BUFFER0 0x8825 +#define GL_DRAW_BUFFER1 0x8826 +#define GL_DRAW_BUFFER2 0x8827 +#define GL_DRAW_BUFFER3 0x8828 +#define GL_DRAW_BUFFER4 0x8829 +#define GL_DRAW_BUFFER5 0x882A +#define GL_DRAW_BUFFER6 0x882B +#define GL_DRAW_BUFFER7 0x882C +#define GL_DRAW_BUFFER8 0x882D +#define GL_DRAW_BUFFER9 0x882E +#define GL_DRAW_BUFFER10 0x882F +#define GL_DRAW_BUFFER11 0x8830 +#define GL_DRAW_BUFFER12 0x8831 +#define GL_DRAW_BUFFER13 0x8832 +#define GL_DRAW_BUFFER14 0x8833 +#define GL_DRAW_BUFFER15 0x8834 +#define GL_MAX_FRAGMENT_UNIFORM_COMPONENTS 0x8B49 +#define GL_MAX_VERTEX_UNIFORM_COMPONENTS 0x8B4A +#define GL_SAMPLER_3D 0x8B5F +#define GL_SAMPLER_2D_SHADOW 0x8B62 +#define GL_FRAGMENT_SHADER_DERIVATIVE_HINT 0x8B8B +#define GL_PIXEL_PACK_BUFFER 0x88EB +#define GL_PIXEL_UNPACK_BUFFER 0x88EC +#define GL_PIXEL_PACK_BUFFER_BINDING 0x88ED +#define GL_PIXEL_UNPACK_BUFFER_BINDING 0x88EF +#define GL_FLOAT_MAT2x3 0x8B65 +#define GL_FLOAT_MAT2x4 0x8B66 +#define GL_FLOAT_MAT3x2 0x8B67 +#define GL_FLOAT_MAT3x4 0x8B68 +#define GL_FLOAT_MAT4x2 0x8B69 +#define GL_FLOAT_MAT4x3 0x8B6A +#define GL_SRGB 0x8C40 +#define GL_SRGB8 0x8C41 +#define GL_SRGB8_ALPHA8 0x8C43 +#define GL_COMPARE_REF_TO_TEXTURE 0x884E +#define GL_MAJOR_VERSION 0x821B +#define GL_MINOR_VERSION 0x821C +#define GL_NUM_EXTENSIONS 0x821D +#define GL_RGBA32F 0x8814 +#define GL_RGB32F 0x8815 +#define GL_RGBA16F 0x881A +#define GL_RGB16F 0x881B +#define GL_VERTEX_ATTRIB_ARRAY_INTEGER 0x88FD +#define GL_MAX_ARRAY_TEXTURE_LAYERS 0x88FF +#define GL_MIN_PROGRAM_TEXEL_OFFSET 0x8904 +#define GL_MAX_PROGRAM_TEXEL_OFFSET 0x8905 +#define GL_MAX_VARYING_COMPONENTS 0x8B4B +#define GL_TEXTURE_2D_ARRAY 0x8C1A +#define GL_TEXTURE_BINDING_2D_ARRAY 0x8C1D +#define GL_R11F_G11F_B10F 0x8C3A +#define GL_UNSIGNED_INT_10F_11F_11F_REV 0x8C3B +#define GL_RGB9_E5 0x8C3D +#define GL_UNSIGNED_INT_5_9_9_9_REV 0x8C3E +#define GL_TRANSFORM_FEEDBACK_VARYING_MAX_LENGTH 0x8C76 +#define GL_TRANSFORM_FEEDBACK_BUFFER_MODE 0x8C7F +#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_COMPONENTS 0x8C80 +#define GL_TRANSFORM_FEEDBACK_VARYINGS 0x8C83 +#define GL_TRANSFORM_FEEDBACK_BUFFER_START 0x8C84 +#define GL_TRANSFORM_FEEDBACK_BUFFER_SIZE 0x8C85 +#define GL_TRANSFORM_FEEDBACK_PRIMITIVES_WRITTEN 0x8C88 +#define GL_RASTERIZER_DISCARD 0x8C89 +#define GL_MAX_TRANSFORM_FEEDBACK_INTERLEAVED_COMPONENTS 0x8C8A +#define GL_MAX_TRANSFORM_FEEDBACK_SEPARATE_ATTRIBS 0x8C8B +#define GL_INTERLEAVED_ATTRIBS 0x8C8C +#define GL_SEPARATE_ATTRIBS 0x8C8D +#define GL_TRANSFORM_FEEDBACK_BUFFER 0x8C8E +#define GL_TRANSFORM_FEEDBACK_BUFFER_BINDING 0x8C8F +#define GL_RGBA32UI 0x8D70 +#define GL_RGB32UI 0x8D71 +#define GL_RGBA16UI 0x8D76 +#define GL_RGB16UI 0x8D77 +#define GL_RGBA8UI 0x8D7C +#define GL_RGB8UI 0x8D7D +#define GL_RGBA32I 0x8D82 +#define GL_RGB32I 0x8D83 +#define GL_RGBA16I 0x8D88 +#define GL_RGB16I 0x8D89 +#define GL_RGBA8I 0x8D8E +#define GL_RGB8I 0x8D8F +#define GL_RED_INTEGER 0x8D94 +#define GL_RGB_INTEGER 0x8D98 +#define GL_RGBA_INTEGER 0x8D99 +#define GL_SAMPLER_2D_ARRAY 0x8DC1 +#define GL_SAMPLER_2D_ARRAY_SHADOW 0x8DC4 +#define GL_SAMPLER_CUBE_SHADOW 0x8DC5 +#define GL_UNSIGNED_INT_VEC2 0x8DC6 +#define GL_UNSIGNED_INT_VEC3 0x8DC7 +#define GL_UNSIGNED_INT_VEC4 0x8DC8 +#define GL_INT_SAMPLER_2D 0x8DCA +#define GL_INT_SAMPLER_3D 0x8DCB +#define GL_INT_SAMPLER_CUBE 0x8DCC +#define GL_INT_SAMPLER_2D_ARRAY 0x8DCF +#define GL_UNSIGNED_INT_SAMPLER_2D 0x8DD2 +#define GL_UNSIGNED_INT_SAMPLER_3D 0x8DD3 +#define GL_UNSIGNED_INT_SAMPLER_CUBE 0x8DD4 +#define GL_UNSIGNED_INT_SAMPLER_2D_ARRAY 0x8DD7 +#define GL_BUFFER_ACCESS_FLAGS 0x911F +#define GL_BUFFER_MAP_LENGTH 0x9120 +#define GL_BUFFER_MAP_OFFSET 0x9121 +#define GL_DEPTH_COMPONENT32F 0x8CAC +#define GL_DEPTH32F_STENCIL8 0x8CAD +#define GL_FLOAT_32_UNSIGNED_INT_24_8_REV 0x8DAD +#define GL_FRAMEBUFFER_ATTACHMENT_COLOR_ENCODING 0x8210 +#define GL_FRAMEBUFFER_ATTACHMENT_COMPONENT_TYPE 0x8211 +#define GL_FRAMEBUFFER_ATTACHMENT_RED_SIZE 0x8212 +#define GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE 0x8213 +#define GL_FRAMEBUFFER_ATTACHMENT_BLUE_SIZE 0x8214 +#define GL_FRAMEBUFFER_ATTACHMENT_ALPHA_SIZE 0x8215 +#define GL_FRAMEBUFFER_ATTACHMENT_DEPTH_SIZE 0x8216 +#define GL_FRAMEBUFFER_ATTACHMENT_STENCIL_SIZE 0x8217 +#define GL_FRAMEBUFFER_DEFAULT 0x8218 +#define GL_FRAMEBUFFER_UNDEFINED 0x8219 +#define GL_DEPTH_STENCIL_ATTACHMENT 0x821A +#define GL_DEPTH_STENCIL 0x84F9 +#define GL_UNSIGNED_INT_24_8 0x84FA +#define GL_DEPTH24_STENCIL8 0x88F0 +#define GL_UNSIGNED_NORMALIZED 0x8C17 +#define GL_DRAW_FRAMEBUFFER_BINDING 0x8CA6 +#define GL_READ_FRAMEBUFFER 0x8CA8 +#define GL_DRAW_FRAMEBUFFER 0x8CA9 +#define GL_READ_FRAMEBUFFER_BINDING 0x8CAA +#define GL_RENDERBUFFER_SAMPLES 0x8CAB +#define GL_FRAMEBUFFER_ATTACHMENT_TEXTURE_LAYER 0x8CD4 +#define GL_MAX_COLOR_ATTACHMENTS 0x8CDF +#define GL_COLOR_ATTACHMENT1 0x8CE1 +#define GL_COLOR_ATTACHMENT2 0x8CE2 +#define GL_COLOR_ATTACHMENT3 0x8CE3 +#define GL_COLOR_ATTACHMENT4 0x8CE4 +#define GL_COLOR_ATTACHMENT5 0x8CE5 +#define GL_COLOR_ATTACHMENT6 0x8CE6 +#define GL_COLOR_ATTACHMENT7 0x8CE7 +#define GL_COLOR_ATTACHMENT8 0x8CE8 +#define GL_COLOR_ATTACHMENT9 0x8CE9 +#define GL_COLOR_ATTACHMENT10 0x8CEA +#define GL_COLOR_ATTACHMENT11 0x8CEB +#define GL_COLOR_ATTACHMENT12 0x8CEC +#define GL_COLOR_ATTACHMENT13 0x8CED +#define GL_COLOR_ATTACHMENT14 0x8CEE +#define GL_COLOR_ATTACHMENT15 0x8CEF +#define GL_COLOR_ATTACHMENT16 0x8CF0 +#define GL_COLOR_ATTACHMENT17 0x8CF1 +#define GL_COLOR_ATTACHMENT18 0x8CF2 +#define GL_COLOR_ATTACHMENT19 0x8CF3 +#define GL_COLOR_ATTACHMENT20 0x8CF4 +#define GL_COLOR_ATTACHMENT21 0x8CF5 +#define GL_COLOR_ATTACHMENT22 0x8CF6 +#define GL_COLOR_ATTACHMENT23 0x8CF7 +#define GL_COLOR_ATTACHMENT24 0x8CF8 +#define GL_COLOR_ATTACHMENT25 0x8CF9 +#define GL_COLOR_ATTACHMENT26 0x8CFA +#define GL_COLOR_ATTACHMENT27 0x8CFB +#define GL_COLOR_ATTACHMENT28 0x8CFC +#define GL_COLOR_ATTACHMENT29 0x8CFD +#define GL_COLOR_ATTACHMENT30 0x8CFE +#define GL_COLOR_ATTACHMENT31 0x8CFF +#define GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE 0x8D56 +#define GL_MAX_SAMPLES 0x8D57 +#define GL_HALF_FLOAT 0x140B +#define GL_MAP_READ_BIT 0x0001 +#define GL_MAP_WRITE_BIT 0x0002 +#define GL_MAP_INVALIDATE_RANGE_BIT 0x0004 +#define GL_MAP_INVALIDATE_BUFFER_BIT 0x0008 +#define GL_MAP_FLUSH_EXPLICIT_BIT 0x0010 +#define GL_MAP_UNSYNCHRONIZED_BIT 0x0020 +#define GL_RG 0x8227 +#define GL_RG_INTEGER 0x8228 +#define GL_R8 0x8229 +#define GL_RG8 0x822B +#define GL_R16F 0x822D +#define GL_R32F 0x822E +#define GL_RG16F 0x822F +#define GL_RG32F 0x8230 +#define GL_R8I 0x8231 +#define GL_R8UI 0x8232 +#define GL_R16I 0x8233 +#define GL_R16UI 0x8234 +#define GL_R32I 0x8235 +#define GL_R32UI 0x8236 +#define GL_RG8I 0x8237 +#define GL_RG8UI 0x8238 +#define GL_RG16I 0x8239 +#define GL_RG16UI 0x823A +#define GL_RG32I 0x823B +#define GL_RG32UI 0x823C +#define GL_VERTEX_ARRAY_BINDING 0x85B5 +#define GL_R8_SNORM 0x8F94 +#define GL_RG8_SNORM 0x8F95 +#define GL_RGB8_SNORM 0x8F96 +#define GL_RGBA8_SNORM 0x8F97 +#define GL_SIGNED_NORMALIZED 0x8F9C +#define GL_PRIMITIVE_RESTART_FIXED_INDEX 0x8D69 +#define GL_COPY_READ_BUFFER 0x8F36 +#define GL_COPY_WRITE_BUFFER 0x8F37 +#define GL_COPY_READ_BUFFER_BINDING 0x8F36 +#define GL_COPY_WRITE_BUFFER_BINDING 0x8F37 +#define GL_UNIFORM_BUFFER 0x8A11 +#define GL_UNIFORM_BUFFER_BINDING 0x8A28 +#define GL_UNIFORM_BUFFER_START 0x8A29 +#define GL_UNIFORM_BUFFER_SIZE 0x8A2A +#define GL_MAX_VERTEX_UNIFORM_BLOCKS 0x8A2B +#define GL_MAX_FRAGMENT_UNIFORM_BLOCKS 0x8A2D +#define GL_MAX_COMBINED_UNIFORM_BLOCKS 0x8A2E +#define GL_MAX_UNIFORM_BUFFER_BINDINGS 0x8A2F +#define GL_MAX_UNIFORM_BLOCK_SIZE 0x8A30 +#define GL_MAX_COMBINED_VERTEX_UNIFORM_COMPONENTS 0x8A31 +#define GL_MAX_COMBINED_FRAGMENT_UNIFORM_COMPONENTS 0x8A33 +#define GL_UNIFORM_BUFFER_OFFSET_ALIGNMENT 0x8A34 +#define GL_ACTIVE_UNIFORM_BLOCK_MAX_NAME_LENGTH 0x8A35 +#define GL_ACTIVE_UNIFORM_BLOCKS 0x8A36 +#define GL_UNIFORM_TYPE 0x8A37 +#define GL_UNIFORM_SIZE 0x8A38 +#define GL_UNIFORM_NAME_LENGTH 0x8A39 +#define GL_UNIFORM_BLOCK_INDEX 0x8A3A +#define GL_UNIFORM_OFFSET 0x8A3B +#define GL_UNIFORM_ARRAY_STRIDE 0x8A3C +#define GL_UNIFORM_MATRIX_STRIDE 0x8A3D +#define GL_UNIFORM_IS_ROW_MAJOR 0x8A3E +#define GL_UNIFORM_BLOCK_BINDING 0x8A3F +#define GL_UNIFORM_BLOCK_DATA_SIZE 0x8A40 +#define GL_UNIFORM_BLOCK_NAME_LENGTH 0x8A41 +#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORMS 0x8A42 +#define GL_UNIFORM_BLOCK_ACTIVE_UNIFORM_INDICES 0x8A43 +#define GL_UNIFORM_BLOCK_REFERENCED_BY_VERTEX_SHADER 0x8A44 +#define GL_UNIFORM_BLOCK_REFERENCED_BY_FRAGMENT_SHADER 0x8A46 +#define GL_INVALID_INDEX 0xFFFFFFFFu +#define GL_MAX_VERTEX_OUTPUT_COMPONENTS 0x9122 +#define GL_MAX_FRAGMENT_INPUT_COMPONENTS 0x9125 +#define GL_MAX_SERVER_WAIT_TIMEOUT 0x9111 +#define GL_OBJECT_TYPE 0x9112 +#define GL_SYNC_CONDITION 0x9113 +#define GL_SYNC_STATUS 0x9114 +#define GL_SYNC_FLAGS 0x9115 +#define GL_SYNC_FENCE 0x9116 +#define GL_SYNC_GPU_COMMANDS_COMPLETE 0x9117 +#define GL_UNSIGNALED 0x9118 +#define GL_SIGNALED 0x9119 +#define GL_ALREADY_SIGNALED 0x911A +#define GL_TIMEOUT_EXPIRED 0x911B +#define GL_CONDITION_SATISFIED 0x911C +#define GL_WAIT_FAILED 0x911D +#define GL_SYNC_FLUSH_COMMANDS_BIT 0x00000001 +#define GL_TIMEOUT_IGNORED 0xFFFFFFFFFFFFFFFFull +#define GL_VERTEX_ATTRIB_ARRAY_DIVISOR 0x88FE +#define GL_ANY_SAMPLES_PASSED 0x8C2F +#define GL_ANY_SAMPLES_PASSED_CONSERVATIVE 0x8D6A +#define GL_SAMPLER_BINDING 0x8919 +#define GL_RGB10_A2UI 0x906F +#define GL_TEXTURE_SWIZZLE_R 0x8E42 +#define GL_TEXTURE_SWIZZLE_G 0x8E43 +#define GL_TEXTURE_SWIZZLE_B 0x8E44 +#define GL_TEXTURE_SWIZZLE_A 0x8E45 +#define GL_GREEN 0x1904 +#define GL_BLUE 0x1905 +#define GL_INT_2_10_10_10_REV 0x8D9F +#define GL_TRANSFORM_FEEDBACK 0x8E22 +#define GL_TRANSFORM_FEEDBACK_PAUSED 0x8E23 +#define GL_TRANSFORM_FEEDBACK_ACTIVE 0x8E24 +#define GL_TRANSFORM_FEEDBACK_BINDING 0x8E25 +#define GL_PROGRAM_BINARY_RETRIEVABLE_HINT 0x8257 +#define GL_PROGRAM_BINARY_LENGTH 0x8741 +#define GL_NUM_PROGRAM_BINARY_FORMATS 0x87FE +#define GL_PROGRAM_BINARY_FORMATS 0x87FF +#define GL_COMPRESSED_R11_EAC 0x9270 +#define GL_COMPRESSED_SIGNED_R11_EAC 0x9271 +#define GL_COMPRESSED_RG11_EAC 0x9272 +#define GL_COMPRESSED_SIGNED_RG11_EAC 0x9273 +#define GL_COMPRESSED_RGB8_ETC2 0x9274 +#define GL_COMPRESSED_SRGB8_ETC2 0x9275 +#define GL_COMPRESSED_RGB8_PUNCHTHROUGH_ALPHA1_ETC2 0x9276 +#define GL_COMPRESSED_SRGB8_PUNCHTHROUGH_ALPHA1_ETC2 0x9277 +#define GL_COMPRESSED_RGBA8_ETC2_EAC 0x9278 +#define GL_COMPRESSED_SRGB8_ALPHA8_ETC2_EAC 0x9279 +#define GL_TEXTURE_IMMUTABLE_FORMAT 0x912F +#define GL_MAX_ELEMENT_INDEX 0x8D6B +#define GL_NUM_SAMPLE_COUNTS 0x9380 +#define GL_TEXTURE_IMMUTABLE_LEVELS 0x82DF + +#endif diff --git a/video/out/opengl/hwdec_d3d11egl.c b/video/out/opengl/hwdec_d3d11egl.c new file mode 100644 index 0000000..c312091 --- /dev/null +++ b/video/out/opengl/hwdec_d3d11egl.c @@ -0,0 +1,363 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <windows.h> +#include <d3d11.h> + +#include <EGL/egl.h> +#include <EGL/eglext.h> + +#include "angle_dynamic.h" + +#include "common/common.h" +#include "osdep/timer.h" +#include "osdep/windows_utils.h" +#include "video/out/gpu/hwdec.h" +#include "ra_gl.h" +#include "video/hwdec.h" +#include "video/d3d.h" + +#ifndef EGL_D3D_TEXTURE_SUBRESOURCE_ID_ANGLE +#define EGL_D3D_TEXTURE_SUBRESOURCE_ID_ANGLE 0x33AB +#endif + +struct priv_owner { + struct mp_hwdec_ctx hwctx; + + ID3D11Device *d3d11_device; + EGLDisplay egl_display; + + // EGL_KHR_stream + EGLStreamKHR (EGLAPIENTRY *CreateStreamKHR)(EGLDisplay dpy, + const EGLint *attrib_list); + EGLBoolean (EGLAPIENTRY *DestroyStreamKHR)(EGLDisplay dpy, + EGLStreamKHR stream); + + // EGL_KHR_stream_consumer_gltexture + EGLBoolean (EGLAPIENTRY *StreamConsumerAcquireKHR) + (EGLDisplay dpy, EGLStreamKHR stream); + EGLBoolean (EGLAPIENTRY *StreamConsumerReleaseKHR) + (EGLDisplay dpy, EGLStreamKHR stream); + + // EGL_NV_stream_consumer_gltexture_yuv + EGLBoolean (EGLAPIENTRY *StreamConsumerGLTextureExternalAttribsNV) + (EGLDisplay dpy, EGLStreamKHR stream, EGLAttrib *attrib_list); + + // EGL_ANGLE_stream_producer_d3d_texture + EGLBoolean (EGLAPIENTRY *CreateStreamProducerD3DTextureANGLE) + (EGLDisplay dpy, EGLStreamKHR stream, const EGLAttrib *attrib_list); + EGLBoolean (EGLAPIENTRY *StreamPostD3DTextureANGLE) + (EGLDisplay dpy, EGLStreamKHR stream, void *texture, + const EGLAttrib *attrib_list); +}; + +struct priv { + EGLStreamKHR egl_stream; + GLuint gl_textures[2]; +}; + +static void uninit(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + + hwdec_devices_remove(hw->devs, &p->hwctx); + + if (p->d3d11_device) + ID3D11Device_Release(p->d3d11_device); +} + +static int init(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + HRESULT hr; + + if (!ra_is_gl(hw->ra_ctx->ra)) + return -1; + if (!angle_load()) + return -1; + + EGLDisplay egl_display = eglGetCurrentDisplay(); + if (!egl_display) + return -1; + + if (!eglGetCurrentContext()) + return -1; + + GL *gl = ra_gl_get(hw->ra_ctx->ra); + + const char *exts = eglQueryString(egl_display, EGL_EXTENSIONS); + if (!gl_check_extension(exts, "EGL_ANGLE_d3d_share_handle_client_buffer") || + !gl_check_extension(exts, "EGL_ANGLE_stream_producer_d3d_texture") || + !(gl_check_extension(gl->extensions, "GL_OES_EGL_image_external_essl3") || + gl->es == 200) || + !gl_check_extension(exts, "EGL_EXT_device_query") || + !(gl->mpgl_caps & MPGL_CAP_TEX_RG)) + return -1; + + p->egl_display = egl_display; + + p->CreateStreamKHR = (void *)eglGetProcAddress("eglCreateStreamKHR"); + p->DestroyStreamKHR = (void *)eglGetProcAddress("eglDestroyStreamKHR"); + p->StreamConsumerAcquireKHR = + (void *)eglGetProcAddress("eglStreamConsumerAcquireKHR"); + p->StreamConsumerReleaseKHR = + (void *)eglGetProcAddress("eglStreamConsumerReleaseKHR"); + p->StreamConsumerGLTextureExternalAttribsNV = + (void *)eglGetProcAddress("eglStreamConsumerGLTextureExternalAttribsNV"); + p->CreateStreamProducerD3DTextureANGLE = + (void *)eglGetProcAddress("eglCreateStreamProducerD3DTextureANGLE"); + p->StreamPostD3DTextureANGLE = + (void *)eglGetProcAddress("eglStreamPostD3DTextureANGLE"); + + if (!p->CreateStreamKHR || !p->DestroyStreamKHR || + !p->StreamConsumerAcquireKHR || !p->StreamConsumerReleaseKHR || + !p->StreamConsumerGLTextureExternalAttribsNV || + !p->CreateStreamProducerD3DTextureANGLE || + !p->StreamPostD3DTextureANGLE) + { + MP_ERR(hw, "Failed to load some EGLStream functions.\n"); + goto fail; + } + + static const char *es2_exts[] = {"GL_NV_EGL_stream_consumer_external", 0}; + static const char *es3_exts[] = {"GL_NV_EGL_stream_consumer_external", + "GL_OES_EGL_image_external_essl3", 0}; + hw->glsl_extensions = gl->es == 200 ? es2_exts : es3_exts; + + PFNEGLQUERYDISPLAYATTRIBEXTPROC p_eglQueryDisplayAttribEXT = + (void *)eglGetProcAddress("eglQueryDisplayAttribEXT"); + PFNEGLQUERYDEVICEATTRIBEXTPROC p_eglQueryDeviceAttribEXT = + (void *)eglGetProcAddress("eglQueryDeviceAttribEXT"); + if (!p_eglQueryDisplayAttribEXT || !p_eglQueryDeviceAttribEXT) + goto fail; + + EGLAttrib device = 0; + if (!p_eglQueryDisplayAttribEXT(egl_display, EGL_DEVICE_EXT, &device)) + goto fail; + EGLAttrib d3d_device = 0; + if (!p_eglQueryDeviceAttribEXT((EGLDeviceEXT)device, + EGL_D3D11_DEVICE_ANGLE, &d3d_device)) + { + MP_ERR(hw, "Could not get EGL_D3D11_DEVICE_ANGLE from ANGLE.\n"); + goto fail; + } + + p->d3d11_device = (ID3D11Device *)d3d_device; + if (!p->d3d11_device) + goto fail; + ID3D11Device_AddRef(p->d3d11_device); + + if (!d3d11_check_decoding(p->d3d11_device)) { + MP_VERBOSE(hw, "D3D11 video decoding not supported on this system.\n"); + goto fail; + } + + ID3D10Multithread *multithread; + hr = ID3D11Device_QueryInterface(p->d3d11_device, &IID_ID3D10Multithread, + (void **)&multithread); + if (FAILED(hr)) { + MP_ERR(hw, "Failed to get Multithread interface: %s\n", + mp_HRESULT_to_str(hr)); + goto fail; + } + ID3D10Multithread_SetMultithreadProtected(multithread, TRUE); + ID3D10Multithread_Release(multithread); + + static const int subfmts[] = {IMGFMT_NV12, IMGFMT_P010, 0}; + p->hwctx = (struct mp_hwdec_ctx){ + .driver_name = hw->driver->name, + .av_device_ref = d3d11_wrap_device_ref(p->d3d11_device), + .supported_formats = subfmts, + .hw_imgfmt = IMGFMT_D3D11, + }; + + if (!p->hwctx.av_device_ref) { + MP_VERBOSE(hw, "Failed to create hwdevice_ctx\n"); + return -1; + } + + hwdec_devices_add(hw->devs, &p->hwctx); + + return 0; +fail: + return -1; +} + +static void mapper_uninit(struct ra_hwdec_mapper *mapper) +{ + struct priv_owner *o = mapper->owner->priv; + struct priv *p = mapper->priv; + GL *gl = ra_gl_get(mapper->ra); + + if (p->egl_stream) + o->DestroyStreamKHR(o->egl_display, p->egl_stream); + p->egl_stream = 0; + + gl->DeleteTextures(2, p->gl_textures); +} + +static int mapper_init(struct ra_hwdec_mapper *mapper) +{ + struct priv_owner *o = mapper->owner->priv; + struct priv *p = mapper->priv; + GL *gl = ra_gl_get(mapper->ra); + + struct ra_imgfmt_desc desc = {0}; + + ra_get_imgfmt_desc(mapper->ra, mapper->src_params.hw_subfmt, &desc); + + // ANGLE hardcodes the list of accepted formats. This is a subset. + if ((mapper->src_params.hw_subfmt != IMGFMT_NV12 && + mapper->src_params.hw_subfmt != IMGFMT_P010) || + desc.num_planes < 1 || desc.num_planes > 2) + { + MP_FATAL(mapper, "Format not supported.\n"); + return -1; + } + + mapper->dst_params = mapper->src_params; + mapper->dst_params.imgfmt = mapper->src_params.hw_subfmt; + mapper->dst_params.hw_subfmt = 0; + + // The texture units need to be bound during init only, and are free for + // use again after the initialization here is done. + int texunits = 0; // [texunits, texunits + num_planes) + int num_planes = desc.num_planes; + int gl_target = GL_TEXTURE_EXTERNAL_OES; + + p->egl_stream = o->CreateStreamKHR(o->egl_display, (EGLint[]){EGL_NONE}); + if (!p->egl_stream) + goto fail; + + EGLAttrib attrs[(2 + 2 + 1) * 2] = { + EGL_COLOR_BUFFER_TYPE, EGL_YUV_BUFFER_EXT, + EGL_YUV_NUMBER_OF_PLANES_EXT, num_planes, + }; + + for (int n = 0; n < num_planes; n++) { + gl->ActiveTexture(GL_TEXTURE0 + texunits + n); + gl->GenTextures(1, &p->gl_textures[n]); + gl->BindTexture(gl_target, p->gl_textures[n]); + gl->TexParameteri(gl_target, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + gl->TexParameteri(gl_target, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + gl->TexParameteri(gl_target, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + gl->TexParameteri(gl_target, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + attrs[(2 + n) * 2 + 0] = EGL_YUV_PLANE0_TEXTURE_UNIT_NV + n; + attrs[(2 + n) * 2 + 1] = texunits + n; + } + + attrs[(2 + num_planes) * 2 + 0] = EGL_NONE; + + if (!o->StreamConsumerGLTextureExternalAttribsNV(o->egl_display, p->egl_stream, + attrs)) + goto fail; + + if (!o->CreateStreamProducerD3DTextureANGLE(o->egl_display, p->egl_stream, + (EGLAttrib[]){EGL_NONE})) + goto fail; + + for (int n = 0; n < num_planes; n++) { + gl->ActiveTexture(GL_TEXTURE0 + texunits + n); + gl->BindTexture(gl_target, 0); + } + gl->ActiveTexture(GL_TEXTURE0); + return 0; +fail: + gl->ActiveTexture(GL_TEXTURE0); + MP_ERR(mapper, "Failed to create EGLStream\n"); + return -1; +} + +static int mapper_map(struct ra_hwdec_mapper *mapper) +{ + struct priv_owner *o = mapper->owner->priv; + struct priv *p = mapper->priv; + + ID3D11Texture2D *d3d_tex = (void *)mapper->src->planes[0]; + int d3d_subindex = (intptr_t)mapper->src->planes[1]; + if (!d3d_tex) + return -1; + + EGLAttrib attrs[] = { + EGL_D3D_TEXTURE_SUBRESOURCE_ID_ANGLE, d3d_subindex, + EGL_NONE, + }; + if (!o->StreamPostD3DTextureANGLE(o->egl_display, p->egl_stream, + (void *)d3d_tex, attrs)) + { + // ANGLE changed the enum ID of this without warning at one point. + attrs[0] = attrs[0] == 0x33AB ? 0x3AAB : 0x33AB; + if (!o->StreamPostD3DTextureANGLE(o->egl_display, p->egl_stream, + (void *)d3d_tex, attrs)) + return -1; + } + + if (!o->StreamConsumerAcquireKHR(o->egl_display, p->egl_stream)) + return -1; + + D3D11_TEXTURE2D_DESC texdesc; + ID3D11Texture2D_GetDesc(d3d_tex, &texdesc); + + for (int n = 0; n < 2; n++) { + struct ra_tex_params params = { + .dimensions = 2, + .w = texdesc.Width / (n ? 2 : 1), + .h = texdesc.Height / (n ? 2 : 1), + .d = 1, + .format = ra_find_unorm_format(mapper->ra, 1, n ? 2 : 1), + .render_src = true, + .src_linear = true, + .external_oes = true, + }; + if (!params.format) + return -1; + + mapper->tex[n] = ra_create_wrapped_tex(mapper->ra, ¶ms, + p->gl_textures[n]); + if (!mapper->tex[n]) + return -1; + } + + return 0; +} + +static void mapper_unmap(struct ra_hwdec_mapper *mapper) +{ + struct priv_owner *o = mapper->owner->priv; + struct priv *p = mapper->priv; + + for (int n = 0; n < 2; n++) + ra_tex_free(mapper->ra, &mapper->tex[n]); + if (p->egl_stream) + o->StreamConsumerReleaseKHR(o->egl_display, p->egl_stream); +} + +const struct ra_hwdec_driver ra_hwdec_d3d11egl = { + .name = "d3d11-egl", + .priv_size = sizeof(struct priv_owner), + .imgfmts = {IMGFMT_D3D11, 0}, + .init = init, + .uninit = uninit, + .mapper = &(const struct ra_hwdec_mapper_driver){ + .priv_size = sizeof(struct priv), + .init = mapper_init, + .uninit = mapper_uninit, + .map = mapper_map, + .unmap = mapper_unmap, + }, +}; diff --git a/video/out/opengl/hwdec_dxva2egl.c b/video/out/opengl/hwdec_dxva2egl.c new file mode 100644 index 0000000..979ef59 --- /dev/null +++ b/video/out/opengl/hwdec_dxva2egl.c @@ -0,0 +1,384 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <windows.h> +#include <d3d9.h> + +#include <EGL/egl.h> +#include <EGL/eglext.h> + +#include "angle_dynamic.h" + +#include "common/common.h" +#include "osdep/timer.h" +#include "osdep/windows_utils.h" +#include "video/out/gpu/hwdec.h" +#include "ra_gl.h" +#include "video/hwdec.h" +#include "video/d3d.h" + +struct priv_owner { + struct mp_hwdec_ctx hwctx; + IDirect3D9Ex *d3d9ex; + IDirect3DDevice9Ex *device9ex; + + EGLDisplay egl_display; + EGLConfig egl_config; + EGLint alpha; +}; + +struct priv { + IDirect3DDevice9Ex *device9ex; // (no own reference) + IDirect3DQuery9 *query9; + IDirect3DTexture9 *texture9; + IDirect3DSurface9 *surface9; + + EGLDisplay egl_display; + EGLSurface egl_surface; + + GLuint gl_texture; +}; + +static void uninit(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + + hwdec_devices_remove(hw->devs, &p->hwctx); + av_buffer_unref(&p->hwctx.av_device_ref); + + if (p->device9ex) + IDirect3DDevice9Ex_Release(p->device9ex); + + if (p->d3d9ex) + IDirect3D9Ex_Release(p->d3d9ex); +} + +static int init(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + HRESULT hr; + + if (!ra_is_gl(hw->ra_ctx->ra)) + return -1; + if (!angle_load()) + return -1; + + d3d_load_dlls(); + + EGLDisplay egl_display = eglGetCurrentDisplay(); + if (!egl_display) + return -1; + + if (!eglGetCurrentContext()) + return -1; + + const char *exts = eglQueryString(egl_display, EGL_EXTENSIONS); + if (!gl_check_extension(exts, "EGL_ANGLE_d3d_share_handle_client_buffer")) { + return -1; + } + + p->egl_display = egl_display; + + if (!d3d9_dll) { + MP_FATAL(hw, "Failed to load \"d3d9.dll\": %s\n", + mp_LastError_to_str()); + goto fail; + } + + HRESULT (WINAPI *Direct3DCreate9Ex)(UINT SDKVersion, IDirect3D9Ex **ppD3D); + Direct3DCreate9Ex = (void *)GetProcAddress(d3d9_dll, "Direct3DCreate9Ex"); + if (!Direct3DCreate9Ex) { + MP_FATAL(hw, "Direct3D 9Ex not supported\n"); + goto fail; + } + + hr = Direct3DCreate9Ex(D3D_SDK_VERSION, &p->d3d9ex); + if (FAILED(hr)) { + MP_FATAL(hw, "Couldn't create Direct3D9Ex: %s\n", + mp_HRESULT_to_str(hr)); + goto fail; + } + + // We must create our own Direct3D9Ex device. ANGLE can give us the device + // it's using, but that's probably a ID3D11Device. + // (copied from chromium dxva_video_decode_accelerator_win.cc) + D3DPRESENT_PARAMETERS present_params = { + .BackBufferWidth = 1, + .BackBufferHeight = 1, + .BackBufferFormat = D3DFMT_UNKNOWN, + .BackBufferCount = 1, + .SwapEffect = D3DSWAPEFFECT_DISCARD, + .hDeviceWindow = NULL, + .Windowed = TRUE, + .Flags = D3DPRESENTFLAG_VIDEO, + .FullScreen_RefreshRateInHz = 0, + .PresentationInterval = 0, + }; + hr = IDirect3D9Ex_CreateDeviceEx(p->d3d9ex, + D3DADAPTER_DEFAULT, + D3DDEVTYPE_HAL, + NULL, + D3DCREATE_FPU_PRESERVE | + D3DCREATE_HARDWARE_VERTEXPROCESSING | + D3DCREATE_DISABLE_PSGP_THREADING | + D3DCREATE_MULTITHREADED, + &present_params, + NULL, + &p->device9ex); + if (FAILED(hr)) { + MP_FATAL(hw, "Failed to create Direct3D9Ex device: %s\n", + mp_HRESULT_to_str(hr)); + goto fail; + } + + EGLint attrs[] = { + EGL_BUFFER_SIZE, 32, + EGL_RED_SIZE, 8, + EGL_GREEN_SIZE, 8, + EGL_BLUE_SIZE, 8, + EGL_SURFACE_TYPE, EGL_PBUFFER_BIT, + EGL_ALPHA_SIZE, 0, + EGL_NONE + }; + EGLint count; + if (!eglChooseConfig(p->egl_display, attrs, &p->egl_config, 1, &count) || + !count) { + MP_ERR(hw, "Failed to get EGL surface configuration\n"); + goto fail; + } + + if (!eglGetConfigAttrib(p->egl_display, p->egl_config, + EGL_BIND_TO_TEXTURE_RGBA, &p->alpha)) { + MP_FATAL(hw, "Failed to query EGL surface alpha\n"); + goto fail; + } + + struct mp_image_params dummy_params = { + .imgfmt = IMGFMT_DXVA2, + .w = 256, + .h = 256, + }; + struct ra_hwdec_mapper *mapper = ra_hwdec_mapper_create(hw, &dummy_params); + if (!mapper) + goto fail; + ra_hwdec_mapper_free(&mapper); + + p->hwctx = (struct mp_hwdec_ctx){ + .driver_name = hw->driver->name, + .av_device_ref = d3d9_wrap_device_ref((IDirect3DDevice9 *)p->device9ex), + .hw_imgfmt = IMGFMT_DXVA2, + }; + + if (!p->hwctx.av_device_ref) { + MP_VERBOSE(hw, "Failed to create hwdevice_ctx\n"); + goto fail; + } + + hwdec_devices_add(hw->devs, &p->hwctx); + + return 0; +fail: + return -1; +} + +static void mapper_uninit(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + GL *gl = ra_gl_get(mapper->ra); + + ra_tex_free(mapper->ra, &mapper->tex[0]); + gl->DeleteTextures(1, &p->gl_texture); + + if (p->egl_display && p->egl_surface) { + eglReleaseTexImage(p->egl_display, p->egl_surface, EGL_BACK_BUFFER); + eglDestroySurface(p->egl_display, p->egl_surface); + } + + if (p->surface9) + IDirect3DSurface9_Release(p->surface9); + + if (p->texture9) + IDirect3DTexture9_Release(p->texture9); + + if (p->query9) + IDirect3DQuery9_Release(p->query9); +} + +static int mapper_init(struct ra_hwdec_mapper *mapper) +{ + struct priv_owner *p_owner = mapper->owner->priv; + struct priv *p = mapper->priv; + GL *gl = ra_gl_get(mapper->ra); + HRESULT hr; + + p->device9ex = p_owner->device9ex; + p->egl_display = p_owner->egl_display; + + hr = IDirect3DDevice9_CreateQuery(p->device9ex, D3DQUERYTYPE_EVENT, + &p->query9); + if (FAILED(hr)) { + MP_FATAL(mapper, "Failed to create Direct3D query interface: %s\n", + mp_HRESULT_to_str(hr)); + goto fail; + } + + // Test the query API + hr = IDirect3DQuery9_Issue(p->query9, D3DISSUE_END); + if (FAILED(hr)) { + MP_FATAL(mapper, "Failed to issue Direct3D END test query: %s\n", + mp_HRESULT_to_str(hr)); + goto fail; + } + + HANDLE share_handle = NULL; + hr = IDirect3DDevice9Ex_CreateTexture(p->device9ex, + mapper->src_params.w, + mapper->src_params.h, + 1, D3DUSAGE_RENDERTARGET, + p_owner->alpha ? + D3DFMT_A8R8G8B8 : D3DFMT_X8R8G8B8, + D3DPOOL_DEFAULT, + &p->texture9, + &share_handle); + if (FAILED(hr)) { + MP_ERR(mapper, "Failed to create Direct3D9 texture: %s\n", + mp_HRESULT_to_str(hr)); + goto fail; + } + + hr = IDirect3DTexture9_GetSurfaceLevel(p->texture9, 0, &p->surface9); + if (FAILED(hr)) { + MP_ERR(mapper, "Failed to get Direct3D9 surface from texture: %s\n", + mp_HRESULT_to_str(hr)); + goto fail; + } + + EGLint attrib_list[] = { + EGL_WIDTH, mapper->src_params.w, + EGL_HEIGHT, mapper->src_params.h, + EGL_TEXTURE_FORMAT, p_owner->alpha ? EGL_TEXTURE_RGBA : EGL_TEXTURE_RGB, + EGL_TEXTURE_TARGET, EGL_TEXTURE_2D, + EGL_NONE + }; + p->egl_surface = eglCreatePbufferFromClientBuffer( + p->egl_display, EGL_D3D_TEXTURE_2D_SHARE_HANDLE_ANGLE, + share_handle, p_owner->egl_config, attrib_list); + if (p->egl_surface == EGL_NO_SURFACE) { + MP_ERR(mapper, "Failed to create EGL surface\n"); + goto fail; + } + + gl->GenTextures(1, &p->gl_texture); + gl->BindTexture(GL_TEXTURE_2D, p->gl_texture); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + gl->BindTexture(GL_TEXTURE_2D, 0); + + struct ra_tex_params params = { + .dimensions = 2, + .w = mapper->src_params.w, + .h = mapper->src_params.h, + .d = 1, + .format = ra_find_unorm_format(mapper->ra, 1, p_owner->alpha ? 4 : 3), + .render_src = true, + .src_linear = true, + }; + if (!params.format) + goto fail; + + mapper->tex[0] = ra_create_wrapped_tex(mapper->ra, ¶ms, p->gl_texture); + if (!mapper->tex[0]) + goto fail; + + mapper->dst_params = mapper->src_params; + mapper->dst_params.imgfmt = IMGFMT_RGB0; + mapper->dst_params.hw_subfmt = 0; + return 0; +fail: + return -1; +} + +static int mapper_map(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + GL *gl = ra_gl_get(mapper->ra); + + HRESULT hr; + RECT rc = {0, 0, mapper->src->w, mapper->src->h}; + IDirect3DSurface9* hw_surface = (IDirect3DSurface9 *)mapper->src->planes[3]; + hr = IDirect3DDevice9Ex_StretchRect(p->device9ex, + hw_surface, &rc, + p->surface9, &rc, + D3DTEXF_NONE); + if (FAILED(hr)) { + MP_ERR(mapper, "Direct3D RGB conversion failed: %s\n", + mp_HRESULT_to_str(hr)); + return -1; + } + + hr = IDirect3DQuery9_Issue(p->query9, D3DISSUE_END); + if (FAILED(hr)) { + MP_ERR(mapper, "Failed to issue Direct3D END query\n"); + return -1; + } + + // There doesn't appear to be an efficient way to do a blocking flush + // of the above StretchRect. Timeout of 8ms is required to reliably + // render 4k on Intel Haswell, Ivybridge and Cherry Trail Atom. + const int max_retries = 8; + const int64_t wait_ns = MP_TIME_MS_TO_NS(1); + int retries = 0; + while (true) { + hr = IDirect3DQuery9_GetData(p->query9, NULL, 0, D3DGETDATA_FLUSH); + if (FAILED(hr)) { + MP_ERR(mapper, "Failed to query Direct3D flush state\n"); + return -1; + } else if (hr == S_FALSE) { + if (++retries > max_retries) { + MP_VERBOSE(mapper, "Failed to flush frame after %lld ms\n", + (long long)MP_TIME_MS_TO_NS(wait_ns * max_retries)); + break; + } + mp_sleep_ns(wait_ns); + } else { + break; + } + } + + gl->BindTexture(GL_TEXTURE_2D, p->gl_texture); + eglBindTexImage(p->egl_display, p->egl_surface, EGL_BACK_BUFFER); + gl->BindTexture(GL_TEXTURE_2D, 0); + + return 0; +} + +const struct ra_hwdec_driver ra_hwdec_dxva2egl = { + .name = "dxva2-egl", + .priv_size = sizeof(struct priv_owner), + .imgfmts = {IMGFMT_DXVA2, 0}, + .init = init, + .uninit = uninit, + .mapper = &(const struct ra_hwdec_mapper_driver){ + .priv_size = sizeof(struct priv), + .init = mapper_init, + .uninit = mapper_uninit, + .map = mapper_map, + }, +}; diff --git a/video/out/opengl/hwdec_dxva2gldx.c b/video/out/opengl/hwdec_dxva2gldx.c new file mode 100644 index 0000000..0172813 --- /dev/null +++ b/video/out/opengl/hwdec_dxva2gldx.c @@ -0,0 +1,247 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <d3d9.h> +#include <assert.h> + +#include "common/common.h" +#include "osdep/windows_utils.h" +#include "video/out/gpu/hwdec.h" +#include "ra_gl.h" +#include "video/hwdec.h" +#include "video/d3d.h" + +// for WGL_ACCESS_READ_ONLY_NV +#include <GL/wglext.h> + +#define SHARED_SURFACE_D3DFMT D3DFMT_X8R8G8B8 + +struct priv_owner { + struct mp_hwdec_ctx hwctx; + IDirect3DDevice9Ex *device; + HANDLE device_h; +}; + +struct priv { + IDirect3DDevice9Ex *device; + HANDLE device_h; + IDirect3DSurface9 *rtarget; + HANDLE rtarget_h; + GLuint texture; +}; + +static void uninit(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + + hwdec_devices_remove(hw->devs, &p->hwctx); + av_buffer_unref(&p->hwctx.av_device_ref); + + if (p->device) + IDirect3DDevice9Ex_Release(p->device); +} + +static int init(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + struct ra *ra = hw->ra_ctx->ra; + + if (!ra_is_gl(ra)) + return -1; + GL *gl = ra_gl_get(ra); + if (!(gl->mpgl_caps & MPGL_CAP_DXINTEROP)) + return -1; + + // AMD drivers won't open multiple dxinterop HANDLES on the same D3D device, + // so we request the one already in use by context_dxinterop + p->device_h = ra_get_native_resource(ra, "dxinterop_device_HANDLE"); + if (!p->device_h) + return -1; + + // But we also still need the actual D3D device + p->device = ra_get_native_resource(ra, "IDirect3DDevice9Ex"); + if (!p->device) + return -1; + IDirect3DDevice9Ex_AddRef(p->device); + + p->hwctx = (struct mp_hwdec_ctx){ + .driver_name = hw->driver->name, + .av_device_ref = d3d9_wrap_device_ref((IDirect3DDevice9 *)p->device), + .hw_imgfmt = IMGFMT_DXVA2, + }; + + if (!p->hwctx.av_device_ref) { + MP_VERBOSE(hw, "Failed to create hwdevice_ctx\n"); + return -1; + } + + hwdec_devices_add(hw->devs, &p->hwctx); + return 0; +} + +static void mapper_uninit(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + GL *gl = ra_gl_get(mapper->ra); + + if (p->rtarget_h && p->device_h) { + if (!gl->DXUnlockObjectsNV(p->device_h, 1, &p->rtarget_h)) { + MP_ERR(mapper, "Failed unlocking texture for access by OpenGL: %s\n", + mp_LastError_to_str()); + } + } + + if (p->rtarget_h) { + if (!gl->DXUnregisterObjectNV(p->device_h, p->rtarget_h)) { + MP_ERR(mapper, "Failed to unregister Direct3D surface with OpenGL: %s\n", + mp_LastError_to_str()); + } else { + p->rtarget_h = 0; + } + } + + gl->DeleteTextures(1, &p->texture); + p->texture = 0; + + if (p->rtarget) { + IDirect3DSurface9_Release(p->rtarget); + p->rtarget = NULL; + } + + ra_tex_free(mapper->ra, &mapper->tex[0]); +} + +static int mapper_init(struct ra_hwdec_mapper *mapper) +{ + struct priv_owner *p_owner = mapper->owner->priv; + struct priv *p = mapper->priv; + GL *gl = ra_gl_get(mapper->ra); + HRESULT hr; + + p->device = p_owner->device; + p->device_h = p_owner->device_h; + + HANDLE share_handle = NULL; + hr = IDirect3DDevice9Ex_CreateRenderTarget( + p->device, + mapper->src_params.w, mapper->src_params.h, + SHARED_SURFACE_D3DFMT, D3DMULTISAMPLE_NONE, 0, FALSE, + &p->rtarget, &share_handle); + if (FAILED(hr)) { + MP_ERR(mapper, "Failed creating offscreen Direct3D surface: %s\n", + mp_HRESULT_to_str(hr)); + return -1; + } + + if (share_handle && + !gl->DXSetResourceShareHandleNV(p->rtarget, share_handle)) { + MP_ERR(mapper, "Failed setting Direct3D/OpenGL share handle for surface: %s\n", + mp_LastError_to_str()); + return -1; + } + + gl->GenTextures(1, &p->texture); + gl->BindTexture(GL_TEXTURE_2D, p->texture); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + gl->BindTexture(GL_TEXTURE_2D, 0); + + p->rtarget_h = gl->DXRegisterObjectNV(p->device_h, p->rtarget, p->texture, + GL_TEXTURE_2D, + WGL_ACCESS_READ_ONLY_NV); + if (!p->rtarget_h) { + MP_ERR(mapper, "Failed to register Direct3D surface with OpenGL: %s\n", + mp_LastError_to_str()); + return -1; + } + + if (!gl->DXLockObjectsNV(p->device_h, 1, &p->rtarget_h)) { + MP_ERR(mapper, "Failed locking texture for access by OpenGL %s\n", + mp_LastError_to_str()); + return -1; + } + + struct ra_tex_params params = { + .dimensions = 2, + .w = mapper->src_params.w, + .h = mapper->src_params.h, + .d = 1, + .format = ra_find_unorm_format(mapper->ra, 1, 4), + .render_src = true, + .src_linear = true, + }; + if (!params.format) + return -1; + + mapper->tex[0] = ra_create_wrapped_tex(mapper->ra, ¶ms, p->texture); + if (!mapper->tex[0]) + return -1; + + mapper->dst_params = mapper->src_params; + mapper->dst_params.imgfmt = IMGFMT_RGB0; + mapper->dst_params.hw_subfmt = 0; + + return 0; +} + +static int mapper_map(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + GL *gl = ra_gl_get(mapper->ra); + HRESULT hr; + + if (!gl->DXUnlockObjectsNV(p->device_h, 1, &p->rtarget_h)) { + MP_ERR(mapper, "Failed unlocking texture for access by OpenGL: %s\n", + mp_LastError_to_str()); + return -1; + } + + IDirect3DSurface9* hw_surface = (IDirect3DSurface9 *)mapper->src->planes[3]; + RECT rc = {0, 0, mapper->src->w, mapper->src->h}; + hr = IDirect3DDevice9Ex_StretchRect(p->device, + hw_surface, &rc, + p->rtarget, &rc, + D3DTEXF_NONE); + if (FAILED(hr)) { + MP_ERR(mapper, "Direct3D RGB conversion failed: %s", mp_HRESULT_to_str(hr)); + return -1; + } + + if (!gl->DXLockObjectsNV(p->device_h, 1, &p->rtarget_h)) { + MP_ERR(mapper, "Failed locking texture for access by OpenGL: %s\n", + mp_LastError_to_str()); + return -1; + } + + return 0; +} + +const struct ra_hwdec_driver ra_hwdec_dxva2gldx = { + .name = "dxva2-dxinterop", + .priv_size = sizeof(struct priv_owner), + .imgfmts = {IMGFMT_DXVA2, 0}, + .init = init, + .uninit = uninit, + .mapper = &(const struct ra_hwdec_mapper_driver){ + .priv_size = sizeof(struct priv), + .init = mapper_init, + .uninit = mapper_uninit, + .map = mapper_map, + }, +}; diff --git a/video/out/opengl/hwdec_rpi.c b/video/out/opengl/hwdec_rpi.c new file mode 100644 index 0000000..5362832 --- /dev/null +++ b/video/out/opengl/hwdec_rpi.c @@ -0,0 +1,384 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> +#include <stdbool.h> +#include <assert.h> + +#include <bcm_host.h> +#include <interface/mmal/mmal.h> +#include <interface/mmal/util/mmal_util.h> +#include <interface/mmal/util/mmal_default_components.h> +#include <interface/mmal/vc/mmal_vc_api.h> + +#include <libavutil/rational.h> + +#include "common/common.h" +#include "common/msg.h" +#include "video/mp_image.h" +#include "video/out/gpu/hwdec.h" + +#include "common.h" + +struct priv { + struct mp_log *log; + + struct mp_image_params params; + + MMAL_COMPONENT_T *renderer; + bool renderer_enabled; + + // for RAM input + MMAL_POOL_T *swpool; + + struct mp_image *current_frame; + + struct mp_rect src, dst; + int cur_window[4]; // raw user params +}; + +// Magic alignments (in pixels) expected by the MMAL internals. +#define ALIGN_W 32 +#define ALIGN_H 16 + +// Make mpi point to buffer, assuming MMAL_ENCODING_I420. +// buffer can be NULL. +// Return the required buffer space. +static size_t layout_buffer(struct mp_image *mpi, MMAL_BUFFER_HEADER_T *buffer, + struct mp_image_params *params) +{ + assert(params->imgfmt == IMGFMT_420P); + mp_image_set_params(mpi, params); + int w = MP_ALIGN_UP(params->w, ALIGN_W); + int h = MP_ALIGN_UP(params->h, ALIGN_H); + uint8_t *cur = buffer ? buffer->data : NULL; + size_t size = 0; + for (int i = 0; i < 3; i++) { + int div = i ? 2 : 1; + mpi->planes[i] = cur; + mpi->stride[i] = w / div; + size_t plane_size = h / div * mpi->stride[i]; + if (cur) + cur += plane_size; + size += plane_size; + } + return size; +} + +static MMAL_FOURCC_T map_csp(enum mp_csp csp) +{ + switch (csp) { + case MP_CSP_BT_601: return MMAL_COLOR_SPACE_ITUR_BT601; + case MP_CSP_BT_709: return MMAL_COLOR_SPACE_ITUR_BT709; + case MP_CSP_SMPTE_240M: return MMAL_COLOR_SPACE_SMPTE240M; + default: return MMAL_COLOR_SPACE_UNKNOWN; + } +} + +static void control_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer) +{ + mmal_buffer_header_release(buffer); +} + +static void input_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer) +{ + struct mp_image *mpi = buffer->user_data; + talloc_free(mpi); +} + +static void disable_renderer(struct ra_hwdec *hw) +{ + struct priv *p = hw->priv; + + if (p->renderer_enabled) { + mmal_port_disable(p->renderer->control); + mmal_port_disable(p->renderer->input[0]); + + mmal_port_flush(p->renderer->control); + mmal_port_flush(p->renderer->input[0]); + + mmal_component_disable(p->renderer); + } + mmal_pool_destroy(p->swpool); + p->swpool = NULL; + p->renderer_enabled = false; +} + +// check_window_only: assume params and dst/src rc are unchanged +static void update_overlay(struct ra_hwdec *hw, bool check_window_only) +{ + struct priv *p = hw->priv; + MMAL_PORT_T *input = p->renderer->input[0]; + struct mp_rect src = p->src; + struct mp_rect dst = p->dst; + + int defs[4] = {0, 0, 0, 0}; + int *z = ra_get_native_resource(hw->ra_ctx->ra, "MPV_RPI_WINDOW"); + if (!z) + z = defs; + + // As documented in the libmpv openglcb headers. + int display = z[0]; + int layer = z[1]; + int x = z[2]; + int y = z[3]; + + if (check_window_only && memcmp(z, p->cur_window, sizeof(p->cur_window)) == 0) + return; + + memcpy(p->cur_window, z, sizeof(p->cur_window)); + + int rotate[] = {MMAL_DISPLAY_ROT0, + MMAL_DISPLAY_ROT90, + MMAL_DISPLAY_ROT180, + MMAL_DISPLAY_ROT270}; + + int src_w = src.x1 - src.x0, src_h = src.y1 - src.y0, + dst_w = dst.x1 - dst.x0, dst_h = dst.y1 - dst.y0; + int p_x, p_y; + av_reduce(&p_x, &p_y, dst_w * src_h, src_w * dst_h, 16000); + MMAL_DISPLAYREGION_T dr = { + .hdr = { .id = MMAL_PARAMETER_DISPLAYREGION, + .size = sizeof(MMAL_DISPLAYREGION_T), }, + .src_rect = { .x = src.x0, .y = src.y0, + .width = src_w, .height = src_h }, + .dest_rect = { .x = dst.x0 + x, .y = dst.y0 + y, + .width = dst_w, .height = dst_h }, + .layer = layer - 1, // under the GL layer + .display_num = display, + .pixel_x = p_x, + .pixel_y = p_y, + .transform = rotate[p->params.rotate / 90], + .fullscreen = 0, + .set = MMAL_DISPLAY_SET_SRC_RECT | MMAL_DISPLAY_SET_DEST_RECT | + MMAL_DISPLAY_SET_LAYER | MMAL_DISPLAY_SET_NUM | + MMAL_DISPLAY_SET_PIXEL | MMAL_DISPLAY_SET_TRANSFORM | + MMAL_DISPLAY_SET_FULLSCREEN, + }; + + if (p->params.rotate % 180 == 90) { + MPSWAP(int, dr.src_rect.x, dr.src_rect.y); + MPSWAP(int, dr.src_rect.width, dr.src_rect.height); + } + + if (mmal_port_parameter_set(input, &dr.hdr)) + MP_WARN(p, "could not set video rectangle\n"); +} + +static int enable_renderer(struct ra_hwdec *hw) +{ + struct priv *p = hw->priv; + MMAL_PORT_T *input = p->renderer->input[0]; + struct mp_image_params *params = &p->params; + + if (p->renderer_enabled) + return 0; + + if (!params->imgfmt) + return -1; + + bool opaque = params->imgfmt == IMGFMT_MMAL; + + input->format->encoding = opaque ? MMAL_ENCODING_OPAQUE : MMAL_ENCODING_I420; + input->format->es->video.width = MP_ALIGN_UP(params->w, ALIGN_W); + input->format->es->video.height = MP_ALIGN_UP(params->h, ALIGN_H); + input->format->es->video.crop = (MMAL_RECT_T){0, 0, params->w, params->h}; + input->format->es->video.par = (MMAL_RATIONAL_T){params->p_w, params->p_h}; + input->format->es->video.color_space = map_csp(params->color.space); + + if (mmal_port_format_commit(input)) + return -1; + + input->buffer_num = MPMAX(input->buffer_num_min, + input->buffer_num_recommended) + 3; + input->buffer_size = MPMAX(input->buffer_size_min, + input->buffer_size_recommended); + + if (!opaque) { + size_t size = layout_buffer(&(struct mp_image){0}, NULL, params); + if (input->buffer_size != size) { + MP_FATAL(hw, "We disagree with MMAL about buffer sizes.\n"); + return -1; + } + + p->swpool = mmal_pool_create(input->buffer_num, input->buffer_size); + if (!p->swpool) { + MP_FATAL(hw, "Could not allocate buffer pool.\n"); + return -1; + } + } + + update_overlay(hw, false); + + p->renderer_enabled = true; + + if (mmal_port_enable(p->renderer->control, control_port_cb)) + return -1; + + if (mmal_port_enable(input, input_port_cb)) + return -1; + + if (mmal_component_enable(p->renderer)) { + MP_FATAL(hw, "Failed to enable video renderer.\n"); + return -1; + } + + return 0; +} + +static void free_mmal_buffer(void *arg) +{ + MMAL_BUFFER_HEADER_T *buffer = arg; + mmal_buffer_header_release(buffer); +} + +static struct mp_image *upload(struct ra_hwdec *hw, struct mp_image *hw_image) +{ + struct priv *p = hw->priv; + + MMAL_BUFFER_HEADER_T *buffer = mmal_queue_wait(p->swpool->queue); + if (!buffer) { + MP_ERR(hw, "Can't allocate buffer.\n"); + return NULL; + } + mmal_buffer_header_reset(buffer); + + struct mp_image *new_ref = mp_image_new_custom_ref(NULL, buffer, + free_mmal_buffer); + if (!new_ref) { + mmal_buffer_header_release(buffer); + MP_ERR(hw, "Out of memory.\n"); + return NULL; + } + + mp_image_setfmt(new_ref, IMGFMT_MMAL); + new_ref->planes[3] = (void *)buffer; + + struct mp_image dmpi = {0}; + buffer->length = layout_buffer(&dmpi, buffer, &p->params); + mp_image_copy(&dmpi, hw_image); + + return new_ref; +} + +static int overlay_frame(struct ra_hwdec *hw, struct mp_image *hw_image, + struct mp_rect *src, struct mp_rect *dst, bool newframe) +{ + struct priv *p = hw->priv; + + if (hw_image && !mp_image_params_equal(&p->params, &hw_image->params)) { + p->params = hw_image->params; + + disable_renderer(hw); + mp_image_unrefp(&p->current_frame); + + if (enable_renderer(hw) < 0) + return -1; + } + + if (hw_image && p->current_frame && !newframe) { + if (!mp_rect_equals(&p->src, src) ||mp_rect_equals(&p->dst, dst)) { + p->src = *src; + p->dst = *dst; + update_overlay(hw, false); + } + return 0; // don't reupload + } + + mp_image_unrefp(&p->current_frame); + + if (!hw_image) { + disable_renderer(hw); + return 0; + } + + if (enable_renderer(hw) < 0) + return -1; + + update_overlay(hw, true); + + struct mp_image *mpi = NULL; + if (hw_image->imgfmt == IMGFMT_MMAL) { + mpi = mp_image_new_ref(hw_image); + } else { + mpi = upload(hw, hw_image); + } + + if (!mpi) { + disable_renderer(hw); + return -1; + } + + MMAL_BUFFER_HEADER_T *ref = (void *)mpi->planes[3]; + + // Assume this field is free for use by us. + ref->user_data = mpi; + + if (mmal_port_send_buffer(p->renderer->input[0], ref)) { + MP_ERR(hw, "could not queue picture!\n"); + talloc_free(mpi); + return -1; + } + + return 0; +} + +static void destroy(struct ra_hwdec *hw) +{ + struct priv *p = hw->priv; + + disable_renderer(hw); + + if (p->renderer) + mmal_component_release(p->renderer); + + mmal_vc_deinit(); +} + +static int create(struct ra_hwdec *hw) +{ + struct priv *p = hw->priv; + p->log = hw->log; + + bcm_host_init(); + + if (mmal_vc_init()) { + MP_FATAL(hw, "Could not initialize MMAL.\n"); + return -1; + } + + if (mmal_component_create(MMAL_COMPONENT_DEFAULT_VIDEO_RENDERER, &p->renderer)) + { + MP_FATAL(hw, "Could not create MMAL renderer.\n"); + mmal_vc_deinit(); + return -1; + } + + return 0; +} + +const struct ra_hwdec_driver ra_hwdec_rpi_overlay = { + .name = "rpi-overlay", + .priv_size = sizeof(struct priv), + .imgfmts = {IMGFMT_MMAL, IMGFMT_420P, 0}, + .init = create, + .overlay_frame = overlay_frame, + .uninit = destroy, +}; diff --git a/video/out/opengl/hwdec_vdpau.c b/video/out/opengl/hwdec_vdpau.c new file mode 100644 index 0000000..acdc703 --- /dev/null +++ b/video/out/opengl/hwdec_vdpau.c @@ -0,0 +1,251 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stddef.h> +#include <assert.h> + +#include "video/out/gpu/hwdec.h" +#include "ra_gl.h" +#include "video/vdpau.h" +#include "video/vdpau_mixer.h" + +// This is a GL_NV_vdpau_interop specification bug, and headers (unfortunately) +// follow it. I'm not sure about the original nvidia headers. +#define BRAINDEATH(x) ((void *)(uintptr_t)(x)) + +struct priv_owner { + struct mp_vdpau_ctx *ctx; +}; + +struct priv { + struct mp_vdpau_ctx *ctx; + GL *gl; + uint64_t preemption_counter; + GLuint gl_texture; + bool vdpgl_initialized; + GLvdpauSurfaceNV vdpgl_surface; + VdpOutputSurface vdp_surface; + struct mp_vdpau_mixer *mixer; + struct ra_imgfmt_desc direct_desc; + bool mapped; +}; + +static int init(struct ra_hwdec *hw) +{ + struct ra *ra = hw->ra_ctx->ra; + Display *x11disp = ra_get_native_resource(ra, "x11"); + if (!x11disp || !ra_is_gl(ra)) + return -1; + GL *gl = ra_gl_get(ra); + if (!(gl->mpgl_caps & MPGL_CAP_VDPAU)) + return -1; + struct priv_owner *p = hw->priv; + p->ctx = mp_vdpau_create_device_x11(hw->log, x11disp, true); + if (!p->ctx) + return -1; + if (mp_vdpau_handle_preemption(p->ctx, NULL) < 1) + return -1; + if (hw->probing && mp_vdpau_guess_if_emulated(p->ctx)) + return -1; + p->ctx->hwctx.driver_name = hw->driver->name; + p->ctx->hwctx.hw_imgfmt = IMGFMT_VDPAU; + hwdec_devices_add(hw->devs, &p->ctx->hwctx); + return 0; +} + +static void uninit(struct ra_hwdec *hw) +{ + struct priv_owner *p = hw->priv; + + if (p->ctx) + hwdec_devices_remove(hw->devs, &p->ctx->hwctx); + mp_vdpau_destroy(p->ctx); +} + +static void mapper_unmap(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + GL *gl = p->gl; + + for (int n = 0; n < 4; n++) + ra_tex_free(mapper->ra, &mapper->tex[n]); + + if (p->mapped) { + gl->VDPAUUnmapSurfacesNV(1, &p->vdpgl_surface); + } + p->mapped = false; +} + +static void mark_vdpau_objects_uninitialized(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + + p->vdp_surface = VDP_INVALID_HANDLE; + p->mapped = false; +} + +static void mapper_uninit(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + GL *gl = p->gl; + struct vdp_functions *vdp = &p->ctx->vdp; + VdpStatus vdp_st; + + assert(!p->mapped); + + if (p->vdpgl_surface) + gl->VDPAUUnregisterSurfaceNV(p->vdpgl_surface); + p->vdpgl_surface = 0; + + gl->DeleteTextures(1, &p->gl_texture); + + if (p->vdp_surface != VDP_INVALID_HANDLE) { + vdp_st = vdp->output_surface_destroy(p->vdp_surface); + CHECK_VDP_WARNING(mapper, "Error when calling vdp_output_surface_destroy"); + } + p->vdp_surface = VDP_INVALID_HANDLE; + + gl_check_error(gl, mapper->log, "Before uninitializing OpenGL interop"); + + if (p->vdpgl_initialized) + gl->VDPAUFiniNV(); + + p->vdpgl_initialized = false; + + gl_check_error(gl, mapper->log, "After uninitializing OpenGL interop"); + + mp_vdpau_mixer_destroy(p->mixer); +} + +static int mapper_init(struct ra_hwdec_mapper *mapper) +{ + struct priv_owner *p_owner = mapper->owner->priv; + struct priv *p = mapper->priv; + + p->gl = ra_gl_get(mapper->ra); + p->ctx = p_owner->ctx; + + GL *gl = p->gl; + struct vdp_functions *vdp = &p->ctx->vdp; + VdpStatus vdp_st; + + p->vdp_surface = VDP_INVALID_HANDLE; + p->mixer = mp_vdpau_mixer_create(p->ctx, mapper->log); + if (!p->mixer) + return -1; + + mapper->dst_params = mapper->src_params; + + if (mp_vdpau_handle_preemption(p->ctx, &p->preemption_counter) < 0) + return -1; + + gl->VDPAUInitNV(BRAINDEATH(p->ctx->vdp_device), p->ctx->get_proc_address); + + p->vdpgl_initialized = true; + + gl->GenTextures(1, &p->gl_texture); + + gl->BindTexture(GL_TEXTURE_2D, p->gl_texture); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_S, GL_CLAMP_TO_EDGE); + gl->TexParameteri(GL_TEXTURE_2D, GL_TEXTURE_WRAP_T, GL_CLAMP_TO_EDGE); + gl->BindTexture(GL_TEXTURE_2D, 0); + + vdp_st = vdp->output_surface_create(p->ctx->vdp_device, + VDP_RGBA_FORMAT_B8G8R8A8, + mapper->src_params.w, + mapper->src_params.h, + &p->vdp_surface); + CHECK_VDP_ERROR(mapper, "Error when calling vdp_output_surface_create"); + + p->vdpgl_surface = gl->VDPAURegisterOutputSurfaceNV(BRAINDEATH(p->vdp_surface), + GL_TEXTURE_2D, + 1, &p->gl_texture); + if (!p->vdpgl_surface) + return -1; + + gl->VDPAUSurfaceAccessNV(p->vdpgl_surface, GL_READ_ONLY); + + mapper->dst_params.imgfmt = IMGFMT_RGB0; + mapper->dst_params.hw_subfmt = 0; + + gl_check_error(gl, mapper->log, "After initializing vdpau OpenGL interop"); + + return 0; +} + +static int mapper_map(struct ra_hwdec_mapper *mapper) +{ + struct priv *p = mapper->priv; + GL *gl = p->gl; + + int pe = mp_vdpau_handle_preemption(p->ctx, &p->preemption_counter); + if (pe < 1) { + mark_vdpau_objects_uninitialized(mapper); + if (pe < 0) + return -1; + mapper_uninit(mapper); + if (mapper_init(mapper) < 0) + return -1; + } + + if (!p->vdpgl_surface) + return -1; + + mp_vdpau_mixer_render(p->mixer, NULL, p->vdp_surface, NULL, mapper->src, + NULL); + + gl->VDPAUMapSurfacesNV(1, &p->vdpgl_surface); + + p->mapped = true; + + struct ra_tex_params params = { + .dimensions = 2, + .w = mapper->src_params.w, + .h = mapper->src_params.h, + .d = 1, + .format = ra_find_unorm_format(mapper->ra, 1, 4), + .render_src = true, + .src_linear = true, + }; + + if (!params.format) + return -1; + + mapper->tex[0] = + ra_create_wrapped_tex(mapper->ra, ¶ms, p->gl_texture); + if (!mapper->tex[0]) + return -1; + + return 0; +} + +const struct ra_hwdec_driver ra_hwdec_vdpau = { + .name = "vdpau-gl", + .priv_size = sizeof(struct priv_owner), + .imgfmts = {IMGFMT_VDPAU, 0}, + .init = init, + .uninit = uninit, + .mapper = &(const struct ra_hwdec_mapper_driver){ + .priv_size = sizeof(struct priv), + .init = mapper_init, + .uninit = mapper_uninit, + .map = mapper_map, + .unmap = mapper_unmap, + }, +}; diff --git a/video/out/opengl/libmpv_gl.c b/video/out/opengl/libmpv_gl.c new file mode 100644 index 0000000..c297c13 --- /dev/null +++ b/video/out/opengl/libmpv_gl.c @@ -0,0 +1,114 @@ +#include "common.h" +#include "context.h" +#include "ra_gl.h" +#include "options/m_config.h" +#include "libmpv/render_gl.h" +#include "video/out/gpu/libmpv_gpu.h" +#include "video/out/gpu/ra.h" + +struct priv { + GL *gl; + struct ra_ctx *ra_ctx; +}; + +static int init(struct libmpv_gpu_context *ctx, mpv_render_param *params) +{ + ctx->priv = talloc_zero(NULL, struct priv); + struct priv *p = ctx->priv; + + mpv_opengl_init_params *init_params = + get_mpv_render_param(params, MPV_RENDER_PARAM_OPENGL_INIT_PARAMS, NULL); + if (!init_params) + return MPV_ERROR_INVALID_PARAMETER; + + p->gl = talloc_zero(p, GL); + + mpgl_load_functions2(p->gl, init_params->get_proc_address, + init_params->get_proc_address_ctx, + NULL, ctx->log); + if (!p->gl->version && !p->gl->es) { + MP_FATAL(ctx, "OpenGL not initialized.\n"); + return MPV_ERROR_UNSUPPORTED; + } + + // initialize a blank ra_ctx to reuse ra_gl_ctx + p->ra_ctx = talloc_zero(p, struct ra_ctx); + p->ra_ctx->log = ctx->log; + p->ra_ctx->global = ctx->global; + p->ra_ctx->opts = (struct ra_ctx_opts) { + .allow_sw = true, + }; + + static const struct ra_swapchain_fns empty_swapchain_fns = {0}; + struct ra_gl_ctx_params gl_params = { + // vo_libmpv is essentially like a gigantic external swapchain where + // the user is in charge of presentation / swapping etc. But we don't + // actually need to provide any of these functions, since we can just + // not call them to begin with - so just set it to an empty object to + // signal to ra_gl_p that we don't care about its latency emulation + // functionality + .external_swapchain = &empty_swapchain_fns + }; + + p->gl->SwapInterval = NULL; // we shouldn't randomly change this, so lock it + if (!ra_gl_ctx_init(p->ra_ctx, p->gl, gl_params)) + return MPV_ERROR_UNSUPPORTED; + + struct ra_ctx_opts *ctx_opts = mp_get_config_group(ctx, ctx->global, &ra_ctx_conf); + p->ra_ctx->opts.debug = ctx_opts->debug; + p->gl->debug_context = ctx_opts->debug; + ra_gl_set_debug(p->ra_ctx->ra, ctx_opts->debug); + talloc_free(ctx_opts); + + ctx->ra_ctx = p->ra_ctx; + + return 0; +} + +static int wrap_fbo(struct libmpv_gpu_context *ctx, mpv_render_param *params, + struct ra_tex **out) +{ + struct priv *p = ctx->priv; + + mpv_opengl_fbo *fbo = + get_mpv_render_param(params, MPV_RENDER_PARAM_OPENGL_FBO, NULL); + if (!fbo) + return MPV_ERROR_INVALID_PARAMETER; + + if (fbo->fbo && !(p->gl->mpgl_caps & MPGL_CAP_FB)) { + MP_FATAL(ctx, "Rendering to FBO requested, but no FBO extension found!\n"); + return MPV_ERROR_UNSUPPORTED; + } + + struct ra_swapchain *sw = p->ra_ctx->swapchain; + struct ra_fbo target; + ra_gl_ctx_resize(sw, fbo->w, fbo->h, fbo->fbo); + ra_gl_ctx_start_frame(sw, &target); + *out = target.tex; + return 0; +} + +static void done_frame(struct libmpv_gpu_context *ctx, bool ds) +{ + struct priv *p = ctx->priv; + + struct ra_swapchain *sw = p->ra_ctx->swapchain; + struct vo_frame dummy = {.display_synced = ds}; + ra_gl_ctx_submit_frame(sw, &dummy); +} + +static void destroy(struct libmpv_gpu_context *ctx) +{ + struct priv *p = ctx->priv; + + if (p->ra_ctx) + ra_gl_ctx_uninit(p->ra_ctx); +} + +const struct libmpv_gpu_context_fns libmpv_gpu_context_gl = { + .api_name = MPV_RENDER_API_TYPE_OPENGL, + .init = init, + .wrap_fbo = wrap_fbo, + .done_frame = done_frame, + .destroy = destroy, +}; diff --git a/video/out/opengl/ra_gl.c b/video/out/opengl/ra_gl.c new file mode 100644 index 0000000..f535f1f --- /dev/null +++ b/video/out/opengl/ra_gl.c @@ -0,0 +1,1208 @@ +#include <libavutil/intreadwrite.h> + +#include "formats.h" +#include "utils.h" +#include "ra_gl.h" + +static struct ra_fns ra_fns_gl; + +// For ra.priv +struct ra_gl { + GL *gl; + bool debug_enable; + bool timer_active; // hack for GL_TIME_ELAPSED limitations +}; + +// For ra_tex.priv +struct ra_tex_gl { + struct ra_buf_pool pbo; // for ra.use_pbo + bool own_objects; + GLenum target; + GLuint texture; // 0 if no texture data associated + GLuint fbo; // 0 if no rendering requested, or it default framebuffer + // These 3 fields can be 0 if unknown. + GLint internal_format; + GLenum format; + GLenum type; +}; + +// For ra_buf.priv +struct ra_buf_gl { + GLenum target; + GLuint buffer; + GLsync fence; +}; + +// For ra_renderpass.priv +struct ra_renderpass_gl { + GLuint program; + // 1 entry for each ra_renderpass_params.inputs[] entry + GLint *uniform_loc; + int num_uniform_loc; // == ra_renderpass_params.num_inputs + struct gl_vao vao; +}; + +// (Init time only.) +static void probe_real_size(GL *gl, struct ra_format *fmt) +{ + const struct gl_format *gl_fmt = fmt->priv; + + if (!gl->GetTexLevelParameteriv) + return; // GLES + + bool is_la = gl_fmt->format == GL_LUMINANCE || + gl_fmt->format == GL_LUMINANCE_ALPHA; + if (is_la && gl->es) + return; // GLES doesn't provide GL_TEXTURE_LUMINANCE_SIZE. + + GLuint tex; + gl->GenTextures(1, &tex); + gl->BindTexture(GL_TEXTURE_2D, tex); + gl->TexImage2D(GL_TEXTURE_2D, 0, gl_fmt->internal_format, 64, 64, 0, + gl_fmt->format, gl_fmt->type, NULL); + for (int i = 0; i < fmt->num_components; i++) { + const GLenum pnames[] = { + GL_TEXTURE_RED_SIZE, + GL_TEXTURE_GREEN_SIZE, + GL_TEXTURE_BLUE_SIZE, + GL_TEXTURE_ALPHA_SIZE, + GL_TEXTURE_LUMINANCE_SIZE, + GL_TEXTURE_ALPHA_SIZE, + }; + int comp = is_la ? i + 4 : i; + assert(comp < MP_ARRAY_SIZE(pnames)); + GLint param = -1; + gl->GetTexLevelParameteriv(GL_TEXTURE_2D, 0, pnames[comp], ¶m); + fmt->component_depth[i] = param > 0 ? param : 0; + } + gl->DeleteTextures(1, &tex); +} + +static int ra_init_gl(struct ra *ra, GL *gl) +{ + if (gl->version < 210 && gl->es < 200) { + MP_ERR(ra, "At least OpenGL 2.1 or OpenGL ES 2.0 required.\n"); + return -1; + } + + struct ra_gl *p = ra->priv = talloc_zero(NULL, struct ra_gl); + p->gl = gl; + + ra_gl_set_debug(ra, true); + + ra->fns = &ra_fns_gl; + ra->glsl_version = gl->glsl_version; + ra->glsl_es = gl->es > 0; + + static const int caps_map[][2] = { + {RA_CAP_DIRECT_UPLOAD, 0}, + {RA_CAP_GLOBAL_UNIFORM, 0}, + {RA_CAP_FRAGCOORD, 0}, + {RA_CAP_TEX_1D, MPGL_CAP_1D_TEX}, + {RA_CAP_TEX_3D, MPGL_CAP_3D_TEX}, + {RA_CAP_COMPUTE, MPGL_CAP_COMPUTE_SHADER}, + {RA_CAP_NUM_GROUPS, MPGL_CAP_COMPUTE_SHADER}, + {RA_CAP_NESTED_ARRAY, MPGL_CAP_NESTED_ARRAY}, + {RA_CAP_SLOW_DR, MPGL_CAP_SLOW_DR}, + }; + + for (int i = 0; i < MP_ARRAY_SIZE(caps_map); i++) { + if ((gl->mpgl_caps & caps_map[i][1]) == caps_map[i][1]) + ra->caps |= caps_map[i][0]; + } + + if (gl->BindBufferBase) { + if (gl->mpgl_caps & MPGL_CAP_UBO) + ra->caps |= RA_CAP_BUF_RO; + if (gl->mpgl_caps & MPGL_CAP_SSBO) + ra->caps |= RA_CAP_BUF_RW; + } + + // textureGather is only supported in GLSL 400+ / ES 310+ + if (ra->glsl_version >= (ra->glsl_es ? 310 : 400)) + ra->caps |= RA_CAP_GATHER; + + if (gl->BlitFramebuffer) + ra->caps |= RA_CAP_BLIT; + + // Disable compute shaders for GLSL < 420. This work-around is needed since + // some buggy OpenGL drivers expose compute shaders for lower GLSL versions, + // despite the spec requiring 420+. + if (ra->glsl_version < (ra->glsl_es ? 310 : 420)) { + ra->caps &= ~RA_CAP_COMPUTE; + } + + // While we can handle compute shaders on GLES the spec (intentionally) + // does not support binding textures for writing, which all uses inside mpv + // would require. So disable it unconditionally anyway. + if (ra->glsl_es) + ra->caps &= ~RA_CAP_COMPUTE; + + int gl_fmt_features = gl_format_feature_flags(gl); + + for (int n = 0; gl_formats[n].internal_format; n++) { + const struct gl_format *gl_fmt = &gl_formats[n]; + + if (!(gl_fmt->flags & gl_fmt_features)) + continue; + + struct ra_format *fmt = talloc_zero(ra, struct ra_format); + *fmt = (struct ra_format){ + .name = gl_fmt->name, + .priv = (void *)gl_fmt, + .ctype = gl_format_type(gl_fmt), + .num_components = gl_format_components(gl_fmt->format), + .ordered = gl_fmt->format != GL_RGB_422_APPLE, + .pixel_size = gl_bytes_per_pixel(gl_fmt->format, gl_fmt->type), + .luminance_alpha = gl_fmt->format == GL_LUMINANCE_ALPHA, + .linear_filter = gl_fmt->flags & F_TF, + .renderable = (gl_fmt->flags & F_CR) && + (gl->mpgl_caps & MPGL_CAP_FB), + // TODO: Check whether it's a storable format + // https://www.khronos.org/opengl/wiki/Image_Load_Store + .storable = true, + }; + + int csize = gl_component_size(gl_fmt->type) * 8; + int depth = csize; + + if (gl_fmt->flags & F_F16) { + depth = 16; + csize = 32; // always upload as GL_FLOAT (simpler for us) + } + + for (int i = 0; i < fmt->num_components; i++) { + fmt->component_size[i] = csize; + fmt->component_depth[i] = depth; + } + + if (fmt->ctype == RA_CTYPE_UNORM && depth != 8) + probe_real_size(gl, fmt); + + // Special formats for which OpenGL happens to have direct support. + if (strcmp(fmt->name, "rgb565") == 0) { + fmt->special_imgfmt = IMGFMT_RGB565; + struct ra_imgfmt_desc *desc = talloc_zero(fmt, struct ra_imgfmt_desc); + fmt->special_imgfmt_desc = desc; + desc->num_planes = 1; + desc->planes[0] = fmt; + for (int i = 0; i < 3; i++) + desc->components[0][i] = i + 1; + desc->chroma_w = desc->chroma_h = 1; + } + if (strcmp(fmt->name, "rgb10_a2") == 0) { + fmt->special_imgfmt = IMGFMT_RGB30; + struct ra_imgfmt_desc *desc = talloc_zero(fmt, struct ra_imgfmt_desc); + fmt->special_imgfmt_desc = desc; + desc->component_bits = 10; + desc->num_planes = 1; + desc->planes[0] = fmt; + for (int i = 0; i < 3; i++) + desc->components[0][i] = 3 - i; + desc->chroma_w = desc->chroma_h = 1; + } + if (strcmp(fmt->name, "appleyp") == 0) { + fmt->special_imgfmt = IMGFMT_UYVY; + struct ra_imgfmt_desc *desc = talloc_zero(fmt, struct ra_imgfmt_desc); + fmt->special_imgfmt_desc = desc; + desc->num_planes = 1; + desc->planes[0] = fmt; + desc->components[0][0] = 3; + desc->components[0][1] = 1; + desc->components[0][2] = 2; + desc->chroma_w = desc->chroma_h = 1; + } + + fmt->glsl_format = ra_fmt_glsl_format(fmt); + + MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, fmt); + } + + GLint ival; + gl->GetIntegerv(GL_MAX_TEXTURE_SIZE, &ival); + ra->max_texture_wh = ival; + + if (ra->caps & RA_CAP_COMPUTE) { + gl->GetIntegerv(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE, &ival); + ra->max_shmem = ival; + gl->GetIntegerv(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &ival); + ra->max_compute_group_threads = ival; + } + + gl->Disable(GL_DITHER); + + if (!ra_find_unorm_format(ra, 2, 1)) + MP_VERBOSE(ra, "16 bit UNORM textures not available.\n"); + + return 0; +} + +struct ra *ra_create_gl(GL *gl, struct mp_log *log) +{ + struct ra *ra = talloc_zero(NULL, struct ra); + ra->log = log; + if (ra_init_gl(ra, gl) < 0) { + talloc_free(ra); + return NULL; + } + return ra; +} + +static void gl_destroy(struct ra *ra) +{ + talloc_free(ra->priv); +} + +void ra_gl_set_debug(struct ra *ra, bool enable) +{ + struct ra_gl *p = ra->priv; + GL *gl = ra_gl_get(ra); + + p->debug_enable = enable; + if (gl->debug_context) + gl_set_debug_logger(gl, enable ? ra->log : NULL); +} + +static void gl_tex_destroy(struct ra *ra, struct ra_tex *tex) +{ + GL *gl = ra_gl_get(ra); + struct ra_tex_gl *tex_gl = tex->priv; + + ra_buf_pool_uninit(ra, &tex_gl->pbo); + + if (tex_gl->own_objects) { + if (tex_gl->fbo) + gl->DeleteFramebuffers(1, &tex_gl->fbo); + + gl->DeleteTextures(1, &tex_gl->texture); + } + talloc_free(tex_gl); + talloc_free(tex); +} + +static struct ra_tex *gl_tex_create_blank(struct ra *ra, + const struct ra_tex_params *params) +{ + struct ra_tex *tex = talloc_zero(NULL, struct ra_tex); + tex->params = *params; + tex->params.initial_data = NULL; + struct ra_tex_gl *tex_gl = tex->priv = talloc_zero(NULL, struct ra_tex_gl); + + const struct gl_format *fmt = params->format->priv; + tex_gl->internal_format = fmt->internal_format; + tex_gl->format = fmt->format; + tex_gl->type = fmt->type; + switch (params->dimensions) { + case 1: tex_gl->target = GL_TEXTURE_1D; break; + case 2: tex_gl->target = GL_TEXTURE_2D; break; + case 3: tex_gl->target = GL_TEXTURE_3D; break; + default: MP_ASSERT_UNREACHABLE(); + } + if (params->non_normalized) { + assert(params->dimensions == 2); + tex_gl->target = GL_TEXTURE_RECTANGLE; + } + if (params->external_oes) { + assert(params->dimensions == 2 && !params->non_normalized); + tex_gl->target = GL_TEXTURE_EXTERNAL_OES; + } + + if (params->downloadable && !(params->dimensions == 2 && + params->format->renderable)) + { + gl_tex_destroy(ra, tex); + return NULL; + } + + return tex; +} + +static struct ra_tex *gl_tex_create(struct ra *ra, + const struct ra_tex_params *params) +{ + GL *gl = ra_gl_get(ra); + assert(!params->format->dummy_format); + + struct ra_tex *tex = gl_tex_create_blank(ra, params); + if (!tex) + return NULL; + struct ra_tex_gl *tex_gl = tex->priv; + + tex_gl->own_objects = true; + + gl->GenTextures(1, &tex_gl->texture); + gl->BindTexture(tex_gl->target, tex_gl->texture); + + GLint filter = params->src_linear ? GL_LINEAR : GL_NEAREST; + GLint wrap = params->src_repeat ? GL_REPEAT : GL_CLAMP_TO_EDGE; + gl->TexParameteri(tex_gl->target, GL_TEXTURE_MIN_FILTER, filter); + gl->TexParameteri(tex_gl->target, GL_TEXTURE_MAG_FILTER, filter); + gl->TexParameteri(tex_gl->target, GL_TEXTURE_WRAP_S, wrap); + if (params->dimensions > 1) + gl->TexParameteri(tex_gl->target, GL_TEXTURE_WRAP_T, wrap); + if (params->dimensions > 2) + gl->TexParameteri(tex_gl->target, GL_TEXTURE_WRAP_R, wrap); + + gl->PixelStorei(GL_UNPACK_ALIGNMENT, 1); + switch (params->dimensions) { + case 1: + gl->TexImage1D(tex_gl->target, 0, tex_gl->internal_format, params->w, + 0, tex_gl->format, tex_gl->type, params->initial_data); + break; + case 2: + gl->TexImage2D(tex_gl->target, 0, tex_gl->internal_format, params->w, + params->h, 0, tex_gl->format, tex_gl->type, + params->initial_data); + break; + case 3: + gl->TexImage3D(tex_gl->target, 0, tex_gl->internal_format, params->w, + params->h, params->d, 0, tex_gl->format, tex_gl->type, + params->initial_data); + break; + } + gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4); + + gl->BindTexture(tex_gl->target, 0); + + gl_check_error(gl, ra->log, "after creating texture"); + + // Even blitting needs an FBO in OpenGL for strange reasons. + // Download is handled by reading from an FBO. + if (tex->params.render_dst || tex->params.blit_src || + tex->params.blit_dst || tex->params.downloadable) + { + if (!tex->params.format->renderable) { + MP_ERR(ra, "Trying to create renderable texture with unsupported " + "format.\n"); + ra_tex_free(ra, &tex); + return NULL; + } + + assert(gl->mpgl_caps & MPGL_CAP_FB); + + gl->GenFramebuffers(1, &tex_gl->fbo); + gl->BindFramebuffer(GL_FRAMEBUFFER, tex_gl->fbo); + gl->FramebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_TEXTURE_2D, tex_gl->texture, 0); + GLenum err = gl->CheckFramebufferStatus(GL_FRAMEBUFFER); + gl->BindFramebuffer(GL_FRAMEBUFFER, 0); + + if (err != GL_FRAMEBUFFER_COMPLETE) { + MP_ERR(ra, "Error: framebuffer completeness check failed (error=%d).\n", + (int)err); + ra_tex_free(ra, &tex); + return NULL; + } + + + gl_check_error(gl, ra->log, "after creating framebuffer"); + } + + return tex; +} + +// Create a ra_tex that merely wraps an existing texture. The returned object +// is freed with ra_tex_free(), but this will not delete the texture passed to +// this function. +// Some features are unsupported, e.g. setting params->initial_data or render_dst. +struct ra_tex *ra_create_wrapped_tex(struct ra *ra, + const struct ra_tex_params *params, + GLuint gl_texture) +{ + struct ra_tex *tex = gl_tex_create_blank(ra, params); + if (!tex) + return NULL; + struct ra_tex_gl *tex_gl = tex->priv; + tex_gl->texture = gl_texture; + return tex; +} + +static const struct ra_format fbo_dummy_format = { + .name = "unknown_fbo", + .priv = (void *)&(const struct gl_format){ + .name = "unknown", + .format = GL_RGBA, + .flags = F_CR, + }, + .renderable = true, + .dummy_format = true, +}; + +// Create a ra_tex that merely wraps an existing framebuffer. gl_fbo can be 0 +// to wrap the default framebuffer. +// The returned object is freed with ra_tex_free(), but this will not delete +// the framebuffer object passed to this function. +struct ra_tex *ra_create_wrapped_fb(struct ra *ra, GLuint gl_fbo, int w, int h) +{ + struct ra_tex *tex = talloc_zero(ra, struct ra_tex); + *tex = (struct ra_tex){ + .params = { + .dimensions = 2, + .w = w, .h = h, .d = 1, + .format = &fbo_dummy_format, + .render_dst = true, + .blit_src = true, + .blit_dst = true, + }, + }; + + struct ra_tex_gl *tex_gl = tex->priv = talloc_zero(NULL, struct ra_tex_gl); + *tex_gl = (struct ra_tex_gl){ + .fbo = gl_fbo, + .internal_format = 0, + .format = GL_RGBA, + .type = 0, + }; + + return tex; +} + +GL *ra_gl_get(struct ra *ra) +{ + struct ra_gl *p = ra->priv; + return p->gl; +} + +// Return the associate glTexImage arguments for the given format. Sets all +// fields to 0 on failure. +void ra_gl_get_format(const struct ra_format *fmt, GLint *out_internal_format, + GLenum *out_format, GLenum *out_type) +{ + const struct gl_format *gl_format = fmt->priv; + *out_internal_format = gl_format->internal_format; + *out_format = gl_format->format; + *out_type = gl_format->type; +} + +void ra_gl_get_raw_tex(struct ra *ra, struct ra_tex *tex, + GLuint *out_texture, GLenum *out_target) +{ + struct ra_tex_gl *tex_gl = tex->priv; + *out_texture = tex_gl->texture; + *out_target = tex_gl->target; +} + +// Return whether the ra instance was created with ra_create_gl(). This is the +// _only_ function that can be called on a ra instance of any type. +bool ra_is_gl(struct ra *ra) +{ + return ra->fns == &ra_fns_gl; +} + +static bool gl_tex_upload(struct ra *ra, + const struct ra_tex_upload_params *params) +{ + GL *gl = ra_gl_get(ra); + struct ra_tex *tex = params->tex; + struct ra_buf *buf = params->buf; + struct ra_tex_gl *tex_gl = tex->priv; + struct ra_buf_gl *buf_gl = buf ? buf->priv : NULL; + assert(tex->params.host_mutable); + assert(!params->buf || !params->src); + + if (ra->use_pbo && !params->buf) + return ra_tex_upload_pbo(ra, &tex_gl->pbo, params); + + const void *src = params->src; + if (buf) { + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, buf_gl->buffer); + src = (void *)params->buf_offset; + } + + gl->BindTexture(tex_gl->target, tex_gl->texture); + if (params->invalidate && gl->InvalidateTexImage) + gl->InvalidateTexImage(tex_gl->texture, 0); + + switch (tex->params.dimensions) { + case 1: + gl->TexImage1D(tex_gl->target, 0, tex_gl->internal_format, + tex->params.w, 0, tex_gl->format, tex_gl->type, src); + break; + case 2: { + struct mp_rect rc = {0, 0, tex->params.w, tex->params.h}; + if (params->rc) + rc = *params->rc; + gl_upload_tex(gl, tex_gl->target, tex_gl->format, tex_gl->type, + src, params->stride, rc.x0, rc.y0, rc.x1 - rc.x0, + rc.y1 - rc.y0); + break; + } + case 3: + gl->PixelStorei(GL_UNPACK_ALIGNMENT, 1); + gl->TexImage3D(GL_TEXTURE_3D, 0, tex_gl->internal_format, tex->params.w, + tex->params.h, tex->params.d, 0, tex_gl->format, + tex_gl->type, src); + gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4); + break; + } + + gl->BindTexture(tex_gl->target, 0); + + if (buf) { + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + if (buf->params.host_mapped) { + // Make sure the PBO is not reused until GL is done with it. If a + // previous operation is pending, "update" it by creating a new + // fence that will cover the previous operation as well. + gl->DeleteSync(buf_gl->fence); + buf_gl->fence = gl->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + } + } + + return true; +} + +static bool gl_tex_download(struct ra *ra, struct ra_tex_download_params *params) +{ + GL *gl = ra_gl_get(ra); + struct ra_tex *tex = params->tex; + struct ra_tex_gl *tex_gl = tex->priv; + if (!tex_gl->fbo) + return false; + return gl_read_fbo_contents(gl, tex_gl->fbo, 1, tex_gl->format, tex_gl->type, + tex->params.w, tex->params.h, params->dst, + params->stride); +} + +static void gl_buf_destroy(struct ra *ra, struct ra_buf *buf) +{ + if (!buf) + return; + + GL *gl = ra_gl_get(ra); + struct ra_buf_gl *buf_gl = buf->priv; + + if (buf_gl->fence) + gl->DeleteSync(buf_gl->fence); + + if (buf->data) { + gl->BindBuffer(buf_gl->target, buf_gl->buffer); + gl->UnmapBuffer(buf_gl->target); + gl->BindBuffer(buf_gl->target, 0); + } + gl->DeleteBuffers(1, &buf_gl->buffer); + + talloc_free(buf_gl); + talloc_free(buf); +} + +static struct ra_buf *gl_buf_create(struct ra *ra, + const struct ra_buf_params *params) +{ + GL *gl = ra_gl_get(ra); + + if (params->host_mapped && !gl->BufferStorage) + return NULL; + + struct ra_buf *buf = talloc_zero(NULL, struct ra_buf); + buf->params = *params; + buf->params.initial_data = NULL; + + struct ra_buf_gl *buf_gl = buf->priv = talloc_zero(NULL, struct ra_buf_gl); + gl->GenBuffers(1, &buf_gl->buffer); + + switch (params->type) { + case RA_BUF_TYPE_TEX_UPLOAD: buf_gl->target = GL_PIXEL_UNPACK_BUFFER; break; + case RA_BUF_TYPE_SHADER_STORAGE: buf_gl->target = GL_SHADER_STORAGE_BUFFER; break; + case RA_BUF_TYPE_UNIFORM: buf_gl->target = GL_UNIFORM_BUFFER; break; + default: abort(); + }; + + gl->BindBuffer(buf_gl->target, buf_gl->buffer); + + if (params->host_mapped) { + unsigned flags = GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT | + GL_MAP_READ_BIT | GL_MAP_WRITE_BIT; + + unsigned storflags = flags; + if (params->type == RA_BUF_TYPE_TEX_UPLOAD) + storflags |= GL_CLIENT_STORAGE_BIT; + + gl->BufferStorage(buf_gl->target, params->size, params->initial_data, + storflags); + buf->data = gl->MapBufferRange(buf_gl->target, 0, params->size, flags); + if (!buf->data) { + gl_check_error(gl, ra->log, "mapping buffer"); + gl_buf_destroy(ra, buf); + buf = NULL; + } + } else { + GLenum hint; + switch (params->type) { + case RA_BUF_TYPE_TEX_UPLOAD: hint = GL_STREAM_DRAW; break; + case RA_BUF_TYPE_SHADER_STORAGE: hint = GL_STREAM_COPY; break; + case RA_BUF_TYPE_UNIFORM: hint = GL_STATIC_DRAW; break; + default: MP_ASSERT_UNREACHABLE(); + } + + gl->BufferData(buf_gl->target, params->size, params->initial_data, hint); + } + + gl->BindBuffer(buf_gl->target, 0); + return buf; +} + +static void gl_buf_update(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset, + const void *data, size_t size) +{ + GL *gl = ra_gl_get(ra); + struct ra_buf_gl *buf_gl = buf->priv; + assert(buf->params.host_mutable); + + gl->BindBuffer(buf_gl->target, buf_gl->buffer); + gl->BufferSubData(buf_gl->target, offset, size, data); + gl->BindBuffer(buf_gl->target, 0); +} + +static bool gl_buf_poll(struct ra *ra, struct ra_buf *buf) +{ + // Non-persistently mapped buffers are always implicitly reusable in OpenGL, + // the implementation will create more buffers under the hood if needed. + if (!buf->data) + return true; + + GL *gl = ra_gl_get(ra); + struct ra_buf_gl *buf_gl = buf->priv; + + if (buf_gl->fence) { + GLenum res = gl->ClientWaitSync(buf_gl->fence, 0, 0); // non-blocking + if (res == GL_ALREADY_SIGNALED) { + gl->DeleteSync(buf_gl->fence); + buf_gl->fence = NULL; + } + } + + return !buf_gl->fence; +} + +static void gl_clear(struct ra *ra, struct ra_tex *dst, float color[4], + struct mp_rect *scissor) +{ + GL *gl = ra_gl_get(ra); + + assert(dst->params.render_dst); + struct ra_tex_gl *dst_gl = dst->priv; + + gl->BindFramebuffer(GL_FRAMEBUFFER, dst_gl->fbo); + + gl->Scissor(scissor->x0, scissor->y0, + scissor->x1 - scissor->x0, + scissor->y1 - scissor->y0); + + gl->Enable(GL_SCISSOR_TEST); + gl->ClearColor(color[0], color[1], color[2], color[3]); + gl->Clear(GL_COLOR_BUFFER_BIT); + gl->Disable(GL_SCISSOR_TEST); + + gl->BindFramebuffer(GL_FRAMEBUFFER, 0); +} + +static void gl_blit(struct ra *ra, struct ra_tex *dst, struct ra_tex *src, + struct mp_rect *dst_rc, struct mp_rect *src_rc) +{ + GL *gl = ra_gl_get(ra); + + assert(src->params.blit_src); + assert(dst->params.blit_dst); + + struct ra_tex_gl *src_gl = src->priv; + struct ra_tex_gl *dst_gl = dst->priv; + + gl->BindFramebuffer(GL_READ_FRAMEBUFFER, src_gl->fbo); + gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_gl->fbo); + gl->BlitFramebuffer(src_rc->x0, src_rc->y0, src_rc->x1, src_rc->y1, + dst_rc->x0, dst_rc->y0, dst_rc->x1, dst_rc->y1, + GL_COLOR_BUFFER_BIT, GL_NEAREST); + gl->BindFramebuffer(GL_READ_FRAMEBUFFER, 0); + gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); +} + +static int gl_desc_namespace(struct ra *ra, enum ra_vartype type) +{ + return type; +} + +static void gl_renderpass_destroy(struct ra *ra, struct ra_renderpass *pass) +{ + GL *gl = ra_gl_get(ra); + struct ra_renderpass_gl *pass_gl = pass->priv; + gl->DeleteProgram(pass_gl->program); + gl_vao_uninit(&pass_gl->vao); + + talloc_free(pass_gl); + talloc_free(pass); +} + +static const char *shader_typestr(GLenum type) +{ + switch (type) { + case GL_VERTEX_SHADER: return "vertex"; + case GL_FRAGMENT_SHADER: return "fragment"; + case GL_COMPUTE_SHADER: return "compute"; + default: MP_ASSERT_UNREACHABLE(); + } +} + +static void compile_attach_shader(struct ra *ra, GLuint program, + GLenum type, const char *source, bool *ok) +{ + GL *gl = ra_gl_get(ra); + + GLuint shader = gl->CreateShader(type); + gl->ShaderSource(shader, 1, &source, NULL); + gl->CompileShader(shader); + GLint status = 0; + gl->GetShaderiv(shader, GL_COMPILE_STATUS, &status); + GLint log_length = 0; + gl->GetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length); + + int pri = status ? (log_length > 1 ? MSGL_V : MSGL_DEBUG) : MSGL_ERR; + const char *typestr = shader_typestr(type); + if (mp_msg_test(ra->log, pri)) { + MP_MSG(ra, pri, "%s shader source:\n", typestr); + mp_log_source(ra->log, pri, source); + } + if (log_length > 1) { + GLchar *logstr = talloc_zero_size(NULL, log_length + 1); + gl->GetShaderInfoLog(shader, log_length, NULL, logstr); + MP_MSG(ra, pri, "%s shader compile log (status=%d):\n%s\n", + typestr, status, logstr); + talloc_free(logstr); + } + if (gl->GetTranslatedShaderSourceANGLE && mp_msg_test(ra->log, MSGL_DEBUG)) { + GLint len = 0; + gl->GetShaderiv(shader, GL_TRANSLATED_SHADER_SOURCE_LENGTH_ANGLE, &len); + if (len > 0) { + GLchar *sstr = talloc_zero_size(NULL, len + 1); + gl->GetTranslatedShaderSourceANGLE(shader, len, NULL, sstr); + MP_DBG(ra, "Translated shader:\n"); + mp_log_source(ra->log, MSGL_DEBUG, sstr); + } + } + + gl->AttachShader(program, shader); + gl->DeleteShader(shader); + + *ok &= status; +} + +static void link_shader(struct ra *ra, GLuint program, bool *ok) +{ + GL *gl = ra_gl_get(ra); + + gl->LinkProgram(program); + GLint status = 0; + gl->GetProgramiv(program, GL_LINK_STATUS, &status); + GLint log_length = 0; + gl->GetProgramiv(program, GL_INFO_LOG_LENGTH, &log_length); + + int pri = status ? (log_length > 1 ? MSGL_V : MSGL_DEBUG) : MSGL_ERR; + if (mp_msg_test(ra->log, pri)) { + GLchar *logstr = talloc_zero_size(NULL, log_length + 1); + gl->GetProgramInfoLog(program, log_length, NULL, logstr); + MP_MSG(ra, pri, "shader link log (status=%d): %s\n", status, logstr); + talloc_free(logstr); + } + + *ok &= status; +} + +// either 'compute' or both 'vertex' and 'frag' are needed +static GLuint compile_program(struct ra *ra, const struct ra_renderpass_params *p) +{ + GL *gl = ra_gl_get(ra); + + GLuint prog = gl->CreateProgram(); + bool ok = true; + if (p->type == RA_RENDERPASS_TYPE_COMPUTE) + compile_attach_shader(ra, prog, GL_COMPUTE_SHADER, p->compute_shader, &ok); + if (p->type == RA_RENDERPASS_TYPE_RASTER) { + compile_attach_shader(ra, prog, GL_VERTEX_SHADER, p->vertex_shader, &ok); + compile_attach_shader(ra, prog, GL_FRAGMENT_SHADER, p->frag_shader, &ok); + for (int n = 0; n < p->num_vertex_attribs; n++) + gl->BindAttribLocation(prog, n, p->vertex_attribs[n].name); + } + link_shader(ra, prog, &ok); + if (!ok) { + gl->DeleteProgram(prog); + prog = 0; + } + return prog; +} + +static GLuint load_program(struct ra *ra, const struct ra_renderpass_params *p, + bstr *out_cached_data) +{ + GL *gl = ra_gl_get(ra); + + GLuint prog = 0; + + if (gl->ProgramBinary && p->cached_program.len > 4) { + GLenum format = AV_RL32(p->cached_program.start); + prog = gl->CreateProgram(); + gl_check_error(gl, ra->log, "before loading program"); + gl->ProgramBinary(prog, format, p->cached_program.start + 4, + p->cached_program.len - 4); + gl->GetError(); // discard potential useless error + GLint status = 0; + gl->GetProgramiv(prog, GL_LINK_STATUS, &status); + if (status) { + MP_DBG(ra, "Loading binary program succeeded.\n"); + } else { + gl->DeleteProgram(prog); + prog = 0; + } + } + + if (!prog) { + prog = compile_program(ra, p); + + if (gl->GetProgramBinary && prog) { + GLint size = 0; + gl->GetProgramiv(prog, GL_PROGRAM_BINARY_LENGTH, &size); + uint8_t *buffer = talloc_size(NULL, size + 4); + GLsizei actual_size = 0; + GLenum binary_format = 0; + if (size > 0) { + gl->GetProgramBinary(prog, size, &actual_size, &binary_format, + buffer + 4); + } + AV_WL32(buffer, binary_format); + if (actual_size) { + *out_cached_data = (bstr){buffer, actual_size + 4}; + } else { + talloc_free(buffer); + } + } + } + + return prog; +} + +static struct ra_renderpass *gl_renderpass_create(struct ra *ra, + const struct ra_renderpass_params *params) +{ + GL *gl = ra_gl_get(ra); + + struct ra_renderpass *pass = talloc_zero(NULL, struct ra_renderpass); + pass->params = *ra_renderpass_params_copy(pass, params); + pass->params.cached_program = (bstr){0}; + struct ra_renderpass_gl *pass_gl = pass->priv = + talloc_zero(NULL, struct ra_renderpass_gl); + + bstr cached = {0}; + pass_gl->program = load_program(ra, params, &cached); + if (!pass_gl->program) { + gl_renderpass_destroy(ra, pass); + return NULL; + } + + talloc_steal(pass, cached.start); + pass->params.cached_program = cached; + + gl->UseProgram(pass_gl->program); + for (int n = 0; n < params->num_inputs; n++) { + GLint loc = + gl->GetUniformLocation(pass_gl->program, params->inputs[n].name); + MP_TARRAY_APPEND(pass_gl, pass_gl->uniform_loc, pass_gl->num_uniform_loc, + loc); + + // For compatibility with older OpenGL, we need to explicitly update + // the texture/image unit bindings after creating the shader program, + // since specifying it directly requires GLSL 4.20+ + switch (params->inputs[n].type) { + case RA_VARTYPE_TEX: + case RA_VARTYPE_IMG_W: + gl->Uniform1i(loc, params->inputs[n].binding); + break; + } + } + gl->UseProgram(0); + + gl_vao_init(&pass_gl->vao, gl, pass->params.vertex_stride, + pass->params.vertex_attribs, pass->params.num_vertex_attribs); + + return pass; +} + +static GLenum map_blend(enum ra_blend blend) +{ + switch (blend) { + case RA_BLEND_ZERO: return GL_ZERO; + case RA_BLEND_ONE: return GL_ONE; + case RA_BLEND_SRC_ALPHA: return GL_SRC_ALPHA; + case RA_BLEND_ONE_MINUS_SRC_ALPHA: return GL_ONE_MINUS_SRC_ALPHA; + default: return 0; + } +} + +// Assumes program is current (gl->UseProgram(program)). +static void update_uniform(struct ra *ra, struct ra_renderpass *pass, + struct ra_renderpass_input_val *val) +{ + GL *gl = ra_gl_get(ra); + struct ra_renderpass_gl *pass_gl = pass->priv; + + struct ra_renderpass_input *input = &pass->params.inputs[val->index]; + assert(val->index >= 0 && val->index < pass_gl->num_uniform_loc); + GLint loc = pass_gl->uniform_loc[val->index]; + + switch (input->type) { + case RA_VARTYPE_INT: { + assert(input->dim_v * input->dim_m == 1); + if (loc < 0) + break; + gl->Uniform1i(loc, *(int *)val->data); + break; + } + case RA_VARTYPE_FLOAT: { + float *f = val->data; + if (loc < 0) + break; + if (input->dim_m == 1) { + switch (input->dim_v) { + case 1: gl->Uniform1f(loc, f[0]); break; + case 2: gl->Uniform2f(loc, f[0], f[1]); break; + case 3: gl->Uniform3f(loc, f[0], f[1], f[2]); break; + case 4: gl->Uniform4f(loc, f[0], f[1], f[2], f[3]); break; + default: MP_ASSERT_UNREACHABLE(); + } + } else if (input->dim_v == 2 && input->dim_m == 2) { + gl->UniformMatrix2fv(loc, 1, GL_FALSE, f); + } else if (input->dim_v == 3 && input->dim_m == 3) { + gl->UniformMatrix3fv(loc, 1, GL_FALSE, f); + } else { + MP_ASSERT_UNREACHABLE(); + } + break; + } + case RA_VARTYPE_IMG_W: { + struct ra_tex *tex = *(struct ra_tex **)val->data; + struct ra_tex_gl *tex_gl = tex->priv; + assert(tex->params.storage_dst); + gl->BindImageTexture(input->binding, tex_gl->texture, 0, GL_FALSE, 0, + GL_WRITE_ONLY, tex_gl->internal_format); + break; + } + case RA_VARTYPE_TEX: { + struct ra_tex *tex = *(struct ra_tex **)val->data; + struct ra_tex_gl *tex_gl = tex->priv; + assert(tex->params.render_src); + gl->ActiveTexture(GL_TEXTURE0 + input->binding); + gl->BindTexture(tex_gl->target, tex_gl->texture); + break; + } + case RA_VARTYPE_BUF_RO: // fall through + case RA_VARTYPE_BUF_RW: { + struct ra_buf *buf = *(struct ra_buf **)val->data; + struct ra_buf_gl *buf_gl = buf->priv; + gl->BindBufferBase(buf_gl->target, input->binding, buf_gl->buffer); + // SSBOs are not implicitly coherent in OpengL + if (input->type == RA_VARTYPE_BUF_RW) + gl->MemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); + break; + } + default: + MP_ASSERT_UNREACHABLE(); + } +} + +static void disable_binding(struct ra *ra, struct ra_renderpass *pass, + struct ra_renderpass_input_val *val) +{ + GL *gl = ra_gl_get(ra); + + struct ra_renderpass_input *input = &pass->params.inputs[val->index]; + + switch (input->type) { + case RA_VARTYPE_IMG_W: /* fall through */ + case RA_VARTYPE_TEX: { + struct ra_tex *tex = *(struct ra_tex **)val->data; + struct ra_tex_gl *tex_gl = tex->priv; + assert(tex->params.render_src); + if (input->type == RA_VARTYPE_TEX) { + gl->ActiveTexture(GL_TEXTURE0 + input->binding); + gl->BindTexture(tex_gl->target, 0); + } else { + gl->BindImageTexture(input->binding, 0, 0, GL_FALSE, 0, + GL_WRITE_ONLY, tex_gl->internal_format); + } + break; + } + case RA_VARTYPE_BUF_RW: + gl->BindBufferBase(GL_SHADER_STORAGE_BUFFER, input->binding, 0); + break; + } +} + +static void gl_renderpass_run(struct ra *ra, + const struct ra_renderpass_run_params *params) +{ + GL *gl = ra_gl_get(ra); + struct ra_renderpass *pass = params->pass; + struct ra_renderpass_gl *pass_gl = pass->priv; + + gl->UseProgram(pass_gl->program); + + for (int n = 0; n < params->num_values; n++) + update_uniform(ra, pass, ¶ms->values[n]); + gl->ActiveTexture(GL_TEXTURE0); + + switch (pass->params.type) { + case RA_RENDERPASS_TYPE_RASTER: { + struct ra_tex_gl *target_gl = params->target->priv; + assert(params->target->params.render_dst); + assert(params->target->params.format == pass->params.target_format); + gl->BindFramebuffer(GL_FRAMEBUFFER, target_gl->fbo); + if (pass->params.invalidate_target && gl->InvalidateFramebuffer) { + GLenum fb = target_gl->fbo ? GL_COLOR_ATTACHMENT0 : GL_COLOR; + gl->InvalidateFramebuffer(GL_FRAMEBUFFER, 1, &fb); + } + gl->Viewport(params->viewport.x0, params->viewport.y0, + mp_rect_w(params->viewport), + mp_rect_h(params->viewport)); + gl->Scissor(params->scissors.x0, params->scissors.y0, + mp_rect_w(params->scissors), + mp_rect_h(params->scissors)); + gl->Enable(GL_SCISSOR_TEST); + if (pass->params.enable_blend) { + gl->BlendFuncSeparate(map_blend(pass->params.blend_src_rgb), + map_blend(pass->params.blend_dst_rgb), + map_blend(pass->params.blend_src_alpha), + map_blend(pass->params.blend_dst_alpha)); + gl->Enable(GL_BLEND); + } + gl_vao_draw_data(&pass_gl->vao, GL_TRIANGLES, params->vertex_data, + params->vertex_count); + gl->Disable(GL_SCISSOR_TEST); + gl->Disable(GL_BLEND); + gl->BindFramebuffer(GL_FRAMEBUFFER, 0); + break; + } + case RA_RENDERPASS_TYPE_COMPUTE: { + gl->DispatchCompute(params->compute_groups[0], + params->compute_groups[1], + params->compute_groups[2]); + + gl->MemoryBarrier(GL_TEXTURE_FETCH_BARRIER_BIT); + break; + } + default: MP_ASSERT_UNREACHABLE(); + } + + for (int n = 0; n < params->num_values; n++) + disable_binding(ra, pass, ¶ms->values[n]); + gl->ActiveTexture(GL_TEXTURE0); + + gl->UseProgram(0); +} + +// Timers in GL use query objects, and are asynchronous. So pool a few of +// these together. GL_QUERY_OBJECT_NUM should be large enough to avoid this +// ever blocking. We can afford to throw query objects around, there's no +// practical limit on them and their overhead is small. + +#define GL_QUERY_OBJECT_NUM 8 + +struct gl_timer { + GLuint query[GL_QUERY_OBJECT_NUM]; + int idx; + uint64_t result; + bool active; +}; + +static ra_timer *gl_timer_create(struct ra *ra) +{ + GL *gl = ra_gl_get(ra); + + if (!gl->GenQueries) + return NULL; + + struct gl_timer *timer = talloc_zero(NULL, struct gl_timer); + gl->GenQueries(GL_QUERY_OBJECT_NUM, timer->query); + + return (ra_timer *)timer; +} + +static void gl_timer_destroy(struct ra *ra, ra_timer *ratimer) +{ + if (!ratimer) + return; + + GL *gl = ra_gl_get(ra); + struct gl_timer *timer = ratimer; + + gl->DeleteQueries(GL_QUERY_OBJECT_NUM, timer->query); + talloc_free(timer); +} + +static void gl_timer_start(struct ra *ra, ra_timer *ratimer) +{ + struct ra_gl *p = ra->priv; + GL *gl = p->gl; + struct gl_timer *timer = ratimer; + + // GL_TIME_ELAPSED queries are not re-entrant, so just do nothing instead + // of crashing. Work-around for shitty GL limitations + if (p->timer_active) + return; + + // If this query object already contains a result, we need to retrieve it + timer->result = 0; + if (gl->IsQuery(timer->query[timer->idx])) { + gl->GetQueryObjectui64v(timer->query[timer->idx], GL_QUERY_RESULT, + &timer->result); + } + + gl->BeginQuery(GL_TIME_ELAPSED, timer->query[timer->idx++]); + timer->idx %= GL_QUERY_OBJECT_NUM; + + p->timer_active = timer->active = true; +} + +static uint64_t gl_timer_stop(struct ra *ra, ra_timer *ratimer) +{ + struct ra_gl *p = ra->priv; + GL *gl = p->gl; + struct gl_timer *timer = ratimer; + + if (!timer->active) + return 0; + + gl->EndQuery(GL_TIME_ELAPSED); + p->timer_active = timer->active = false; + + return timer->result; +} + +static void gl_debug_marker(struct ra *ra, const char *msg) +{ + struct ra_gl *p = ra->priv; + + if (p->debug_enable) + gl_check_error(p->gl, ra->log, msg); +} + +static struct ra_fns ra_fns_gl = { + .destroy = gl_destroy, + .tex_create = gl_tex_create, + .tex_destroy = gl_tex_destroy, + .tex_upload = gl_tex_upload, + .tex_download = gl_tex_download, + .buf_create = gl_buf_create, + .buf_destroy = gl_buf_destroy, + .buf_update = gl_buf_update, + .buf_poll = gl_buf_poll, + .clear = gl_clear, + .blit = gl_blit, + .uniform_layout = std140_layout, + .desc_namespace = gl_desc_namespace, + .renderpass_create = gl_renderpass_create, + .renderpass_destroy = gl_renderpass_destroy, + .renderpass_run = gl_renderpass_run, + .timer_create = gl_timer_create, + .timer_destroy = gl_timer_destroy, + .timer_start = gl_timer_start, + .timer_stop = gl_timer_stop, + .debug_marker = gl_debug_marker, +}; diff --git a/video/out/opengl/ra_gl.h b/video/out/opengl/ra_gl.h new file mode 100644 index 0000000..9844977 --- /dev/null +++ b/video/out/opengl/ra_gl.h @@ -0,0 +1,17 @@ +#pragma once + +#include "common.h" +#include "utils.h" + +struct ra *ra_create_gl(GL *gl, struct mp_log *log); +struct ra_tex *ra_create_wrapped_tex(struct ra *ra, + const struct ra_tex_params *params, + GLuint gl_texture); +struct ra_tex *ra_create_wrapped_fb(struct ra *ra, GLuint gl_fbo, int w, int h); +GL *ra_gl_get(struct ra *ra); +void ra_gl_set_debug(struct ra *ra, bool enable); +void ra_gl_get_format(const struct ra_format *fmt, GLint *out_internal_format, + GLenum *out_format, GLenum *out_type); +void ra_gl_get_raw_tex(struct ra *ra, struct ra_tex *tex, + GLuint *out_texture, GLenum *out_target); +bool ra_is_gl(struct ra *ra); diff --git a/video/out/opengl/utils.c b/video/out/opengl/utils.c new file mode 100644 index 0000000..a551ce4 --- /dev/null +++ b/video/out/opengl/utils.c @@ -0,0 +1,282 @@ +/* + * This file is part of mpv. + * Parts based on MPlayer code by Reimar Döffinger. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stddef.h> +#include <stdint.h> +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include <assert.h> + +#include <libavutil/sha.h> +#include <libavutil/intreadwrite.h> +#include <libavutil/mem.h> + +#include "osdep/io.h" + +#include "common/common.h" +#include "options/path.h" +#include "stream/stream.h" +#include "formats.h" +#include "utils.h" + +// GLU has this as gluErrorString (we don't use GLU, as it is legacy-OpenGL) +static const char *gl_error_to_string(GLenum error) +{ + switch (error) { + case GL_INVALID_ENUM: return "INVALID_ENUM"; + case GL_INVALID_VALUE: return "INVALID_VALUE"; + case GL_INVALID_OPERATION: return "INVALID_OPERATION"; + case GL_INVALID_FRAMEBUFFER_OPERATION: return "INVALID_FRAMEBUFFER_OPERATION"; + case GL_OUT_OF_MEMORY: return "OUT_OF_MEMORY"; + default: return "unknown"; + } +} + +void gl_check_error(GL *gl, struct mp_log *log, const char *info) +{ + for (;;) { + GLenum error = gl->GetError(); + if (error == GL_NO_ERROR) + break; + mp_msg(log, MSGL_ERR, "%s: OpenGL error %s.\n", info, + gl_error_to_string(error)); + } +} + +static int get_alignment(int stride) +{ + if (stride % 8 == 0) + return 8; + if (stride % 4 == 0) + return 4; + if (stride % 2 == 0) + return 2; + return 1; +} + +// upload a texture, handling things like stride and slices +// target: texture target, usually GL_TEXTURE_2D +// format, type: texture parameters +// dataptr, stride: image data +// x, y, width, height: part of the image to upload +void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type, + const void *dataptr, int stride, + int x, int y, int w, int h) +{ + int bpp = gl_bytes_per_pixel(format, type); + const uint8_t *data = dataptr; + int y_max = y + h; + if (w <= 0 || h <= 0 || !bpp) + return; + assert(stride > 0); + gl->PixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(stride)); + int slice = h; + if (gl->mpgl_caps & MPGL_CAP_ROW_LENGTH) { + // this is not always correct, but should work for MPlayer + gl->PixelStorei(GL_UNPACK_ROW_LENGTH, stride / bpp); + } else { + if (stride != bpp * w) + slice = 1; // very inefficient, but at least it works + } + for (; y + slice <= y_max; y += slice) { + gl->TexSubImage2D(target, 0, x, y, w, slice, format, type, data); + data += stride * slice; + } + if (y < y_max) + gl->TexSubImage2D(target, 0, x, y, w, y_max - y, format, type, data); + if (gl->mpgl_caps & MPGL_CAP_ROW_LENGTH) + gl->PixelStorei(GL_UNPACK_ROW_LENGTH, 0); + gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4); +} + +bool gl_read_fbo_contents(GL *gl, int fbo, int dir, GLenum format, GLenum type, + int w, int h, uint8_t *dst, int dst_stride) +{ + assert(dir == 1 || dir == -1); + if (fbo == 0 && gl->es) + return false; // ES can't read from front buffer + gl->BindFramebuffer(GL_FRAMEBUFFER, fbo); + GLenum obj = fbo ? GL_COLOR_ATTACHMENT0 : GL_FRONT; + gl->PixelStorei(GL_PACK_ALIGNMENT, 1); + gl->ReadBuffer(obj); + // reading by line allows flipping, and avoids stride-related trouble + int y1 = dir > 0 ? 0 : h; + for (int y = 0; y < h; y++) + gl->ReadPixels(0, y, w, 1, format, type, dst + (y1 + dir * y) * dst_stride); + gl->PixelStorei(GL_PACK_ALIGNMENT, 4); + gl->BindFramebuffer(GL_FRAMEBUFFER, 0); + return true; +} + +static void gl_vao_enable_attribs(struct gl_vao *vao) +{ + GL *gl = vao->gl; + + for (int n = 0; n < vao->num_entries; n++) { + const struct ra_renderpass_input *e = &vao->entries[n]; + GLenum type = 0; + bool normalized = false; + switch (e->type) { + case RA_VARTYPE_INT: + type = GL_INT; + break; + case RA_VARTYPE_FLOAT: + type = GL_FLOAT; + break; + case RA_VARTYPE_BYTE_UNORM: + type = GL_UNSIGNED_BYTE; + normalized = true; + break; + default: + abort(); + } + assert(e->dim_m == 1); + + gl->EnableVertexAttribArray(n); + gl->VertexAttribPointer(n, e->dim_v, type, normalized, + vao->stride, (void *)(intptr_t)e->offset); + } +} + +void gl_vao_init(struct gl_vao *vao, GL *gl, int stride, + const struct ra_renderpass_input *entries, + int num_entries) +{ + assert(!vao->vao); + assert(!vao->buffer); + + *vao = (struct gl_vao){ + .gl = gl, + .stride = stride, + .entries = entries, + .num_entries = num_entries, + }; + + gl->GenBuffers(1, &vao->buffer); + + if (gl->BindVertexArray) { + gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer); + + gl->GenVertexArrays(1, &vao->vao); + gl->BindVertexArray(vao->vao); + gl_vao_enable_attribs(vao); + gl->BindVertexArray(0); + + gl->BindBuffer(GL_ARRAY_BUFFER, 0); + } +} + +void gl_vao_uninit(struct gl_vao *vao) +{ + GL *gl = vao->gl; + if (!gl) + return; + + if (gl->DeleteVertexArrays) + gl->DeleteVertexArrays(1, &vao->vao); + gl->DeleteBuffers(1, &vao->buffer); + + *vao = (struct gl_vao){0}; +} + +static void gl_vao_bind(struct gl_vao *vao) +{ + GL *gl = vao->gl; + + if (gl->BindVertexArray) { + gl->BindVertexArray(vao->vao); + } else { + gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer); + gl_vao_enable_attribs(vao); + gl->BindBuffer(GL_ARRAY_BUFFER, 0); + } +} + +static void gl_vao_unbind(struct gl_vao *vao) +{ + GL *gl = vao->gl; + + if (gl->BindVertexArray) { + gl->BindVertexArray(0); + } else { + for (int n = 0; n < vao->num_entries; n++) + gl->DisableVertexAttribArray(n); + } +} + +// Draw the vertex data (as described by the gl_vao_entry entries) in ptr +// to the screen. num is the number of vertexes. prim is usually GL_TRIANGLES. +// If ptr is NULL, then skip the upload, and use the data uploaded with the +// previous call. +void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num) +{ + GL *gl = vao->gl; + + if (ptr) { + gl->BindBuffer(GL_ARRAY_BUFFER, vao->buffer); + gl->BufferData(GL_ARRAY_BUFFER, num * vao->stride, ptr, GL_STREAM_DRAW); + gl->BindBuffer(GL_ARRAY_BUFFER, 0); + } + + gl_vao_bind(vao); + + gl->DrawArrays(prim, 0, num); + + gl_vao_unbind(vao); +} + +static void GLAPIENTRY gl_debug_cb(GLenum source, GLenum type, GLuint id, + GLenum severity, GLsizei length, + const GLchar *message, const void *userParam) +{ + // keep in mind that the debug callback can be asynchronous + struct mp_log *log = (void *)userParam; + int level = MSGL_ERR; + switch (severity) { + case GL_DEBUG_SEVERITY_NOTIFICATION:level = MSGL_V; break; + case GL_DEBUG_SEVERITY_LOW: level = MSGL_INFO; break; + case GL_DEBUG_SEVERITY_MEDIUM: level = MSGL_WARN; break; + case GL_DEBUG_SEVERITY_HIGH: level = MSGL_ERR; break; + } + mp_msg(log, level, "GL: %s\n", message); +} + +void gl_set_debug_logger(GL *gl, struct mp_log *log) +{ + if (gl->DebugMessageCallback) + gl->DebugMessageCallback(log ? gl_debug_cb : NULL, log); +} + +// Given a GL combined extension string in extensions, find out whether ext +// is included in it. Basically, a word search. +bool gl_check_extension(const char *extensions, const char *ext) +{ + int len = strlen(ext); + const char *cur = extensions; + while (cur) { + cur = strstr(cur, ext); + if (!cur) + break; + if ((cur == extensions || cur[-1] == ' ') && + (cur[len] == '\0' || cur[len] == ' ')) + return true; + cur += len; + } + return false; +} diff --git a/video/out/opengl/utils.h b/video/out/opengl/utils.h new file mode 100644 index 0000000..9bcadae --- /dev/null +++ b/video/out/opengl/utils.h @@ -0,0 +1,57 @@ +/* + * This file is part of mpv. + * Parts based on MPlayer code by Reimar Döffinger. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MP_GL_UTILS_ +#define MP_GL_UTILS_ + +#include <math.h> + +#include "video/out/gpu/utils.h" +#include "common.h" + +struct mp_log; + +void gl_check_error(GL *gl, struct mp_log *log, const char *info); + +void gl_upload_tex(GL *gl, GLenum target, GLenum format, GLenum type, + const void *dataptr, int stride, + int x, int y, int w, int h); + +bool gl_read_fbo_contents(GL *gl, int fbo, int dir, GLenum format, GLenum type, + int w, int h, uint8_t *dst, int dst_stride); + +struct gl_vao { + GL *gl; + GLuint vao; // the VAO object, or 0 if unsupported by driver + GLuint buffer; // GL_ARRAY_BUFFER used for the data + int stride; // size of each element (interleaved elements are assumed) + const struct ra_renderpass_input *entries; + int num_entries; +}; + +void gl_vao_init(struct gl_vao *vao, GL *gl, int stride, + const struct ra_renderpass_input *entries, + int num_entries); +void gl_vao_uninit(struct gl_vao *vao); +void gl_vao_draw_data(struct gl_vao *vao, GLenum prim, void *ptr, size_t num); + +void gl_set_debug_logger(GL *gl, struct mp_log *log); + +bool gl_check_extension(const char *extensions, const char *ext); + +#endif diff --git a/video/out/placebo/ra_pl.c b/video/out/placebo/ra_pl.c new file mode 100644 index 0000000..6259651 --- /dev/null +++ b/video/out/placebo/ra_pl.c @@ -0,0 +1,677 @@ +#include "common/common.h" +#include "common/msg.h" + +#include "ra_pl.h" +#include "utils.h" + +struct ra_pl { + pl_gpu gpu; + struct ra_timer_pl *active_timer; +}; + +static inline pl_gpu get_gpu(const struct ra *ra) +{ + struct ra_pl *p = ra->priv; + return p->gpu; +} + +static struct ra_fns ra_fns_pl; + +pl_gpu ra_pl_get(const struct ra *ra) +{ + return ra->fns == &ra_fns_pl ? get_gpu(ra) : NULL; +} + +static pl_timer get_active_timer(const struct ra *ra); + +struct ra *ra_create_pl(pl_gpu gpu, struct mp_log *log) +{ + assert(gpu); + + struct ra *ra = talloc_zero(NULL, struct ra); + ra->log = log; + ra->fns = &ra_fns_pl; + + struct ra_pl *p = ra->priv = talloc_zero(ra, struct ra_pl); + p->gpu = gpu; + + ra->glsl_version = gpu->glsl.version; + ra->glsl_vulkan = gpu->glsl.vulkan; + ra->glsl_es = gpu->glsl.gles; + + ra->caps = RA_CAP_DIRECT_UPLOAD | RA_CAP_NESTED_ARRAY | RA_CAP_FRAGCOORD; + + if (gpu->glsl.compute) + ra->caps |= RA_CAP_COMPUTE | RA_CAP_NUM_GROUPS; + if (gpu->limits.compute_queues > gpu->limits.fragment_queues) + ra->caps |= RA_CAP_PARALLEL_COMPUTE; + if (gpu->limits.max_variable_comps) + ra->caps |= RA_CAP_GLOBAL_UNIFORM; + if (!gpu->limits.host_cached) + ra->caps |= RA_CAP_SLOW_DR; + + if (gpu->limits.max_tex_1d_dim) + ra->caps |= RA_CAP_TEX_1D; + if (gpu->limits.max_tex_3d_dim) + ra->caps |= RA_CAP_TEX_3D; + if (gpu->limits.max_ubo_size) + ra->caps |= RA_CAP_BUF_RO; + if (gpu->limits.max_ssbo_size) + ra->caps |= RA_CAP_BUF_RW; + if (gpu->glsl.min_gather_offset && gpu->glsl.max_gather_offset) + ra->caps |= RA_CAP_GATHER; + + // Semi-hack: assume all textures are blittable if r8 is + pl_fmt r8 = pl_find_named_fmt(gpu, "r8"); + if (r8->caps & PL_FMT_CAP_BLITTABLE) + ra->caps |= RA_CAP_BLIT; + + ra->max_texture_wh = gpu->limits.max_tex_2d_dim; + ra->max_pushc_size = gpu->limits.max_pushc_size; + ra->max_compute_group_threads = gpu->glsl.max_group_threads; + ra->max_shmem = gpu->glsl.max_shmem_size; + + // Set up format wrappers + for (int i = 0; i < gpu->num_formats; i++) { + pl_fmt plfmt = gpu->formats[i]; + static const enum ra_ctype fmt_type_map[PL_FMT_TYPE_COUNT] = { + [PL_FMT_UNORM] = RA_CTYPE_UNORM, + [PL_FMT_UINT] = RA_CTYPE_UINT, + [PL_FMT_FLOAT] = RA_CTYPE_FLOAT, + }; + + enum ra_ctype type = fmt_type_map[plfmt->type]; + if (!type || !(plfmt->caps & PL_FMT_CAP_SAMPLEABLE)) + continue; + + struct ra_format *rafmt = talloc_zero(ra, struct ra_format); + *rafmt = (struct ra_format) { + .name = plfmt->name, + .priv = (void *) plfmt, + .ctype = type, + .ordered = pl_fmt_is_ordered(plfmt), + .num_components = plfmt->num_components, + .pixel_size = plfmt->texel_size, + .linear_filter = plfmt->caps & PL_FMT_CAP_LINEAR, + .renderable = plfmt->caps & PL_FMT_CAP_RENDERABLE, + .storable = plfmt->caps & PL_FMT_CAP_STORABLE, + .glsl_format = plfmt->glsl_format, + }; + + for (int c = 0; c < plfmt->num_components; c++) { + rafmt->component_size[c] = plfmt->host_bits[c]; + rafmt->component_depth[c] = plfmt->component_depth[c]; + } + + MP_TARRAY_APPEND(ra, ra->formats, ra->num_formats, rafmt); + } + + return ra; +} + +static void destroy_ra_pl(struct ra *ra) +{ + talloc_free(ra); +} + +static struct ra_format *map_fmt(struct ra *ra, pl_fmt plfmt) +{ + for (int i = 0; i < ra->num_formats; i++) { + if (ra->formats[i]->priv == plfmt) + return ra->formats[i]; + } + + MP_ERR(ra, "Failed mapping pl_fmt '%s' to ra_fmt?\n", plfmt->name); + return NULL; +} + +bool mppl_wrap_tex(struct ra *ra, pl_tex pltex, struct ra_tex *out_tex) +{ + if (!pltex) + return false; + + *out_tex = (struct ra_tex) { + .params = { + .dimensions = pl_tex_params_dimension(pltex->params), + .w = pltex->params.w, + .h = pltex->params.h, + .d = pltex->params.d, + .format = map_fmt(ra, pltex->params.format), + .render_src = pltex->params.sampleable, + .render_dst = pltex->params.renderable, + .storage_dst = pltex->params.storable, + .blit_src = pltex->params.blit_src, + .blit_dst = pltex->params.blit_dst, + .host_mutable = pltex->params.host_writable, + .downloadable = pltex->params.host_readable, + // These don't exist upstream, so just pick something reasonable + .src_linear = pltex->params.format->caps & PL_FMT_CAP_LINEAR, + .src_repeat = false, + }, + .priv = (void *) pltex, + }; + + return !!out_tex->params.format; +} + +static struct ra_tex *tex_create_pl(struct ra *ra, + const struct ra_tex_params *params) +{ + pl_gpu gpu = get_gpu(ra); + pl_tex pltex = pl_tex_create(gpu, &(struct pl_tex_params) { + .w = params->w, + .h = params->dimensions >= 2 ? params->h : 0, + .d = params->dimensions >= 3 ? params->d : 0, + .format = params->format->priv, + .sampleable = params->render_src, + .renderable = params->render_dst, + .storable = params->storage_dst, + .blit_src = params->blit_src, + .blit_dst = params->blit_dst || params->render_dst, + .host_writable = params->host_mutable, + .host_readable = params->downloadable, + .initial_data = params->initial_data, + }); + + struct ra_tex *ratex = talloc_ptrtype(NULL, ratex); + if (!mppl_wrap_tex(ra, pltex, ratex)) { + pl_tex_destroy(gpu, &pltex); + talloc_free(ratex); + return NULL; + } + + // Keep track of these, so we can correctly bind them later + ratex->params.src_repeat = params->src_repeat; + ratex->params.src_linear = params->src_linear; + + return ratex; +} + +static void tex_destroy_pl(struct ra *ra, struct ra_tex *tex) +{ + if (!tex) + return; + + pl_tex_destroy(get_gpu(ra), (pl_tex *) &tex->priv); + talloc_free(tex); +} + +static bool tex_upload_pl(struct ra *ra, const struct ra_tex_upload_params *params) +{ + pl_gpu gpu = get_gpu(ra); + pl_tex tex = params->tex->priv; + struct pl_tex_transfer_params pl_params = { + .tex = tex, + .buf = params->buf ? params->buf->priv : NULL, + .buf_offset = params->buf_offset, + .ptr = (void *) params->src, + .timer = get_active_timer(ra), + }; + + pl_buf staging = NULL; + if (params->tex->params.dimensions == 2) { + if (params->rc) { + pl_params.rc = (struct pl_rect3d) { + .x0 = params->rc->x0, .x1 = params->rc->x1, + .y0 = params->rc->y0, .y1 = params->rc->y1, + }; + } + + pl_params.row_pitch = params->stride; + } + + bool ok = pl_tex_upload(gpu, &pl_params); + pl_buf_destroy(gpu, &staging); + return ok; +} + +static bool tex_download_pl(struct ra *ra, struct ra_tex_download_params *params) +{ + pl_tex tex = params->tex->priv; + struct pl_tex_transfer_params pl_params = { + .tex = tex, + .ptr = params->dst, + .timer = get_active_timer(ra), + .row_pitch = params->stride, + }; + + return pl_tex_download(get_gpu(ra), &pl_params); +} + +static struct ra_buf *buf_create_pl(struct ra *ra, + const struct ra_buf_params *params) +{ + pl_buf plbuf = pl_buf_create(get_gpu(ra), &(struct pl_buf_params) { + .size = params->size, + .uniform = params->type == RA_BUF_TYPE_UNIFORM, + .storable = params->type == RA_BUF_TYPE_SHADER_STORAGE, + .host_mapped = params->host_mapped, + .host_writable = params->host_mutable, + .initial_data = params->initial_data, + }); + + if (!plbuf) + return NULL; + + struct ra_buf *rabuf = talloc_ptrtype(NULL, rabuf); + *rabuf = (struct ra_buf) { + .params = *params, + .data = plbuf->data, + .priv = (void *) plbuf, + }; + + rabuf->params.initial_data = NULL; + return rabuf; +} + +static void buf_destroy_pl(struct ra *ra, struct ra_buf *buf) +{ + if (!buf) + return; + + pl_buf_destroy(get_gpu(ra), (pl_buf *) &buf->priv); + talloc_free(buf); +} + +static void buf_update_pl(struct ra *ra, struct ra_buf *buf, ptrdiff_t offset, + const void *data, size_t size) +{ + pl_buf_write(get_gpu(ra), buf->priv, offset, data, size); +} + +static bool buf_poll_pl(struct ra *ra, struct ra_buf *buf) +{ + return !pl_buf_poll(get_gpu(ra), buf->priv, 0); +} + +static void clear_pl(struct ra *ra, struct ra_tex *dst, float color[4], + struct mp_rect *scissor) +{ + // TODO: implement scissor clearing by bltting a 1x1 tex instead + pl_tex_clear(get_gpu(ra), dst->priv, color); +} + +static void blit_pl(struct ra *ra, struct ra_tex *dst, struct ra_tex *src, + struct mp_rect *dst_rc, struct mp_rect *src_rc) +{ + struct pl_rect3d plsrc = {0}, pldst = {0}; + if (src_rc) { + plsrc.x0 = MPMIN(MPMAX(src_rc->x0, 0), src->params.w); + plsrc.y0 = MPMIN(MPMAX(src_rc->y0, 0), src->params.h); + plsrc.x1 = MPMIN(MPMAX(src_rc->x1, 0), src->params.w); + plsrc.y1 = MPMIN(MPMAX(src_rc->y1, 0), src->params.h); + } + + if (dst_rc) { + pldst.x0 = MPMIN(MPMAX(dst_rc->x0, 0), dst->params.w); + pldst.y0 = MPMIN(MPMAX(dst_rc->y0, 0), dst->params.h); + pldst.x1 = MPMIN(MPMAX(dst_rc->x1, 0), dst->params.w); + pldst.y1 = MPMIN(MPMAX(dst_rc->y1, 0), dst->params.h); + } + + pl_tex_blit(get_gpu(ra), &(struct pl_tex_blit_params) { + .src = src->priv, + .dst = dst->priv, + .src_rc = plsrc, + .dst_rc = pldst, + .sample_mode = src->params.src_linear ? PL_TEX_SAMPLE_LINEAR + : PL_TEX_SAMPLE_NEAREST, + }); +} + +static const enum pl_var_type var_type[RA_VARTYPE_COUNT] = { + [RA_VARTYPE_INT] = PL_VAR_SINT, + [RA_VARTYPE_FLOAT] = PL_VAR_FLOAT, +}; + +static const enum pl_desc_type desc_type[RA_VARTYPE_COUNT] = { + [RA_VARTYPE_TEX] = PL_DESC_SAMPLED_TEX, + [RA_VARTYPE_IMG_W] = PL_DESC_STORAGE_IMG, + [RA_VARTYPE_BUF_RO] = PL_DESC_BUF_UNIFORM, + [RA_VARTYPE_BUF_RW] = PL_DESC_BUF_STORAGE, +}; + +static const enum pl_fmt_type fmt_type[RA_VARTYPE_COUNT] = { + [RA_VARTYPE_INT] = PL_FMT_SINT, + [RA_VARTYPE_FLOAT] = PL_FMT_FLOAT, + [RA_VARTYPE_BYTE_UNORM] = PL_FMT_UNORM, +}; + +static const size_t var_size[RA_VARTYPE_COUNT] = { + [RA_VARTYPE_INT] = sizeof(int), + [RA_VARTYPE_FLOAT] = sizeof(float), + [RA_VARTYPE_BYTE_UNORM] = sizeof(uint8_t), +}; + +static struct ra_layout uniform_layout_pl(struct ra_renderpass_input *inp) +{ + // To get the alignment requirements, we try laying this out with + // an offset of 1 and then see where it ends up. This will always be + // the minimum alignment requirement. + struct pl_var_layout layout = pl_buf_uniform_layout(1, &(struct pl_var) { + .name = inp->name, + .type = var_type[inp->type], + .dim_v = inp->dim_v, + .dim_m = inp->dim_m, + .dim_a = 1, + }); + + return (struct ra_layout) { + .align = layout.offset, + .stride = layout.stride, + .size = layout.size, + }; +} + +static struct ra_layout push_constant_layout_pl(struct ra_renderpass_input *inp) +{ + struct pl_var_layout layout = pl_push_constant_layout(1, &(struct pl_var) { + .name = inp->name, + .type = var_type[inp->type], + .dim_v = inp->dim_v, + .dim_m = inp->dim_m, + .dim_a = 1, + }); + + return (struct ra_layout) { + .align = layout.offset, + .stride = layout.stride, + .size = layout.size, + }; +} + +static int desc_namespace_pl(struct ra *ra, enum ra_vartype type) +{ + return pl_desc_namespace(get_gpu(ra), desc_type[type]); +} + +struct pass_priv { + pl_pass pass; + uint16_t *inp_index; // index translation map + // Space to hold the descriptor bindings and variable updates + struct pl_desc_binding *binds; + struct pl_var_update *varups; + int num_varups; +}; + +static struct ra_renderpass *renderpass_create_pl(struct ra *ra, + const struct ra_renderpass_params *params) +{ + void *tmp = talloc_new(NULL); + pl_gpu gpu = get_gpu(ra); + struct ra_renderpass *pass = NULL; + + static const enum pl_pass_type pass_type[] = { + [RA_RENDERPASS_TYPE_RASTER] = PL_PASS_RASTER, + [RA_RENDERPASS_TYPE_COMPUTE] = PL_PASS_COMPUTE, + }; + + struct pl_var *vars = NULL; + struct pl_desc *descs = NULL; + int num_vars = 0, num_descs = 0; + + struct pass_priv *priv = talloc_ptrtype(tmp, priv); + priv->inp_index = talloc_zero_array(priv, uint16_t, params->num_inputs); + + for (int i = 0; i < params->num_inputs; i++) { + const struct ra_renderpass_input *inp = ¶ms->inputs[i]; + if (var_type[inp->type]) { + priv->inp_index[i] = num_vars; + MP_TARRAY_APPEND(tmp, vars, num_vars, (struct pl_var) { + .name = inp->name, + .type = var_type[inp->type], + .dim_v = inp->dim_v, + .dim_m = inp->dim_m, + .dim_a = 1, + }); + } else if (desc_type[inp->type]) { + priv->inp_index[i] = num_descs; + MP_TARRAY_APPEND(tmp, descs, num_descs, (struct pl_desc) { + .name = inp->name, + .type = desc_type[inp->type], + .binding = inp->binding, + .access = inp->type == RA_VARTYPE_IMG_W ? PL_DESC_ACCESS_WRITEONLY + : inp->type == RA_VARTYPE_BUF_RW ? PL_DESC_ACCESS_READWRITE + : PL_DESC_ACCESS_READONLY, + }); + } + } + + // Allocate space to store the bindings map persistently + priv->binds = talloc_zero_array(priv, struct pl_desc_binding, num_descs); + + struct pl_pass_params pl_params = { + .type = pass_type[params->type], + .variables = vars, + .num_variables = num_vars, + .descriptors = descs, + .num_descriptors = num_descs, + .push_constants_size = params->push_constants_size, + .glsl_shader = params->type == RA_RENDERPASS_TYPE_COMPUTE + ? params->compute_shader + : params->frag_shader, + }; + + struct pl_blend_params blend_params; + + if (params->type == RA_RENDERPASS_TYPE_RASTER) { + pl_params.vertex_shader = params->vertex_shader; + pl_params.vertex_type = PL_PRIM_TRIANGLE_LIST; + pl_params.vertex_stride = params->vertex_stride; + pl_params.load_target = !params->invalidate_target; + pl_params.target_format = params->target_format->priv; + + if (params->enable_blend) { + pl_params.blend_params = &blend_params; + blend_params = (struct pl_blend_params) { + // Same enum order as ra_blend + .src_rgb = (enum pl_blend_mode) params->blend_src_rgb, + .dst_rgb = (enum pl_blend_mode) params->blend_dst_rgb, + .src_alpha = (enum pl_blend_mode) params->blend_src_alpha, + .dst_alpha = (enum pl_blend_mode) params->blend_dst_alpha, + }; + } + + for (int i = 0; i < params->num_vertex_attribs; i++) { + const struct ra_renderpass_input *inp = ¶ms->vertex_attribs[i]; + struct pl_vertex_attrib attrib = { + .name = inp->name, + .offset = inp->offset, + .location = i, + .fmt = pl_find_fmt(gpu, fmt_type[inp->type], inp->dim_v, 0, + var_size[inp->type] * 8, PL_FMT_CAP_VERTEX), + }; + + if (!attrib.fmt) { + MP_ERR(ra, "Failed mapping vertex attrib '%s' to pl_fmt?\n", + inp->name); + goto error; + } + + MP_TARRAY_APPEND(tmp, pl_params.vertex_attribs, + pl_params.num_vertex_attribs, attrib); + } + } + + priv->pass = pl_pass_create(gpu, &pl_params); + if (!priv->pass) + goto error; + + pass = talloc_ptrtype(NULL, pass); + *pass = (struct ra_renderpass) { + .params = *ra_renderpass_params_copy(pass, params), + .priv = talloc_steal(pass, priv), + }; + + // fall through +error: + talloc_free(tmp); + return pass; +} + +static void renderpass_destroy_pl(struct ra *ra, struct ra_renderpass *pass) +{ + if (!pass) + return; + + struct pass_priv *priv = pass->priv; + pl_pass_destroy(get_gpu(ra), (pl_pass *) &priv->pass); + talloc_free(pass); +} + +static void renderpass_run_pl(struct ra *ra, + const struct ra_renderpass_run_params *params) +{ + struct pass_priv *p = params->pass->priv; + p->num_varups = 0; + + for (int i = 0; i < params->num_values; i++) { + const struct ra_renderpass_input_val *val = ¶ms->values[i]; + const struct ra_renderpass_input *inp = ¶ms->pass->params.inputs[i]; + if (var_type[inp->type]) { + MP_TARRAY_APPEND(p, p->varups, p->num_varups, (struct pl_var_update) { + .index = p->inp_index[val->index], + .data = val->data, + }); + } else { + struct pl_desc_binding bind; + switch (inp->type) { + case RA_VARTYPE_TEX: + case RA_VARTYPE_IMG_W: { + struct ra_tex *tex = *((struct ra_tex **) val->data); + bind.object = tex->priv; + bind.sample_mode = tex->params.src_linear ? PL_TEX_SAMPLE_LINEAR + : PL_TEX_SAMPLE_NEAREST; + bind.address_mode = tex->params.src_repeat ? PL_TEX_ADDRESS_REPEAT + : PL_TEX_ADDRESS_CLAMP; + break; + } + case RA_VARTYPE_BUF_RO: + case RA_VARTYPE_BUF_RW: + bind.object = (* (struct ra_buf **) val->data)->priv; + break; + default: MP_ASSERT_UNREACHABLE(); + }; + + p->binds[p->inp_index[val->index]] = bind; + }; + } + + struct pl_pass_run_params pl_params = { + .pass = p->pass, + .var_updates = p->varups, + .num_var_updates = p->num_varups, + .desc_bindings = p->binds, + .push_constants = params->push_constants, + .timer = get_active_timer(ra), + }; + + if (p->pass->params.type == PL_PASS_RASTER) { + pl_params.target = params->target->priv; + pl_params.viewport = mp_rect2d_to_pl(params->viewport); + pl_params.scissors = mp_rect2d_to_pl(params->scissors); + pl_params.vertex_data = params->vertex_data; + pl_params.vertex_count = params->vertex_count; + } else { + for (int i = 0; i < MP_ARRAY_SIZE(pl_params.compute_groups); i++) + pl_params.compute_groups[i] = params->compute_groups[i]; + } + + pl_pass_run(get_gpu(ra), &pl_params); +} + +struct ra_timer_pl { + // Because libpplacebo only supports one operation per timer, we need + // to use multiple pl_timers to sum up multiple passes/transfers + pl_timer *timers; + int num_timers; + int idx_timers; +}; + +static ra_timer *timer_create_pl(struct ra *ra) +{ + struct ra_timer_pl *t = talloc_zero(ra, struct ra_timer_pl); + return t; +} + +static void timer_destroy_pl(struct ra *ra, ra_timer *timer) +{ + pl_gpu gpu = get_gpu(ra); + struct ra_timer_pl *t = timer; + + for (int i = 0; i < t->num_timers; i++) + pl_timer_destroy(gpu, &t->timers[i]); + + talloc_free(t); +} + +static void timer_start_pl(struct ra *ra, ra_timer *timer) +{ + struct ra_pl *p = ra->priv; + struct ra_timer_pl *t = timer; + + // There's nothing easy we can do in this case, since libplacebo only + // supports one timer object per operation; so just ignore "inner" timers + // when the user is nesting different timer queries + if (p->active_timer) + return; + + p->active_timer = t; + t->idx_timers = 0; +} + +static uint64_t timer_stop_pl(struct ra *ra, ra_timer *timer) +{ + struct ra_pl *p = ra->priv; + struct ra_timer_pl *t = timer; + + if (p->active_timer != t) + return 0; + + p->active_timer = NULL; + + // Sum up all of the active results + uint64_t res = 0; + for (int i = 0; i < t->idx_timers; i++) + res += pl_timer_query(p->gpu, t->timers[i]); + + return res; +} + +static pl_timer get_active_timer(const struct ra *ra) +{ + struct ra_pl *p = ra->priv; + if (!p->active_timer) + return NULL; + + struct ra_timer_pl *t = p->active_timer; + if (t->idx_timers == t->num_timers) + MP_TARRAY_APPEND(t, t->timers, t->num_timers, pl_timer_create(p->gpu)); + + return t->timers[t->idx_timers++]; +} + +static struct ra_fns ra_fns_pl = { + .destroy = destroy_ra_pl, + .tex_create = tex_create_pl, + .tex_destroy = tex_destroy_pl, + .tex_upload = tex_upload_pl, + .tex_download = tex_download_pl, + .buf_create = buf_create_pl, + .buf_destroy = buf_destroy_pl, + .buf_update = buf_update_pl, + .buf_poll = buf_poll_pl, + .clear = clear_pl, + .blit = blit_pl, + .uniform_layout = uniform_layout_pl, + .push_constant_layout = push_constant_layout_pl, + .desc_namespace = desc_namespace_pl, + .renderpass_create = renderpass_create_pl, + .renderpass_destroy = renderpass_destroy_pl, + .renderpass_run = renderpass_run_pl, + .timer_create = timer_create_pl, + .timer_destroy = timer_destroy_pl, + .timer_start = timer_start_pl, + .timer_stop = timer_stop_pl, +}; + diff --git a/video/out/placebo/ra_pl.h b/video/out/placebo/ra_pl.h new file mode 100644 index 0000000..1290c9c --- /dev/null +++ b/video/out/placebo/ra_pl.h @@ -0,0 +1,16 @@ +#pragma once + +#include "video/out/gpu/ra.h" +#include <libplacebo/gpu.h> + +struct ra *ra_create_pl(pl_gpu gpu, struct mp_log *log); + +pl_gpu ra_pl_get(const struct ra *ra); + +static inline pl_fmt ra_pl_fmt_get(const struct ra_format *format) +{ + return format->priv; +} + +// Wrap a pl_tex into a ra_tex struct, returns if successful +bool mppl_wrap_tex(struct ra *ra, pl_tex pltex, struct ra_tex *out_tex); diff --git a/video/out/placebo/utils.c b/video/out/placebo/utils.c new file mode 100644 index 0000000..1209b72 --- /dev/null +++ b/video/out/placebo/utils.c @@ -0,0 +1,263 @@ +#include "common/common.h" +#include "utils.h" + +#include <libplacebo/utils/dolbyvision.h> + +static const int pl_log_to_msg_lev[PL_LOG_ALL+1] = { + [PL_LOG_FATAL] = MSGL_FATAL, + [PL_LOG_ERR] = MSGL_ERR, + [PL_LOG_WARN] = MSGL_WARN, + [PL_LOG_INFO] = MSGL_V, + [PL_LOG_DEBUG] = MSGL_DEBUG, + [PL_LOG_TRACE] = MSGL_TRACE, +}; + +static const enum pl_log_level msg_lev_to_pl_log[MSGL_MAX+1] = { + [MSGL_FATAL] = PL_LOG_FATAL, + [MSGL_ERR] = PL_LOG_ERR, + [MSGL_WARN] = PL_LOG_WARN, + [MSGL_INFO] = PL_LOG_WARN, + [MSGL_STATUS] = PL_LOG_WARN, + [MSGL_V] = PL_LOG_INFO, + [MSGL_DEBUG] = PL_LOG_DEBUG, + [MSGL_TRACE] = PL_LOG_TRACE, + [MSGL_MAX] = PL_LOG_ALL, +}; + +// translates log levels while probing +static const enum pl_log_level probing_map(enum pl_log_level level) +{ + switch (level) { + case PL_LOG_FATAL: + case PL_LOG_ERR: + case PL_LOG_WARN: + return PL_LOG_INFO; + + default: + return level; + } +} + +static void log_cb(void *priv, enum pl_log_level level, const char *msg) +{ + struct mp_log *log = priv; + mp_msg(log, pl_log_to_msg_lev[level], "%s\n", msg); +} + +static void log_cb_probing(void *priv, enum pl_log_level level, const char *msg) +{ + struct mp_log *log = priv; + mp_msg(log, pl_log_to_msg_lev[probing_map(level)], "%s\n", msg); +} + +pl_log mppl_log_create(void *tactx, struct mp_log *log) +{ + return pl_log_create(PL_API_VER, &(struct pl_log_params) { + .log_cb = log_cb, + .log_level = msg_lev_to_pl_log[mp_msg_level(log)], + .log_priv = mp_log_new(tactx, log, "libplacebo"), + }); +} + +void mppl_log_set_probing(pl_log log, bool probing) +{ + struct pl_log_params params = log->params; + params.log_cb = probing ? log_cb_probing : log_cb; + pl_log_update(log, ¶ms); +} + +enum pl_color_primaries mp_prim_to_pl(enum mp_csp_prim prim) +{ + switch (prim) { + case MP_CSP_PRIM_AUTO: return PL_COLOR_PRIM_UNKNOWN; + case MP_CSP_PRIM_BT_601_525: return PL_COLOR_PRIM_BT_601_525; + case MP_CSP_PRIM_BT_601_625: return PL_COLOR_PRIM_BT_601_625; + case MP_CSP_PRIM_BT_709: return PL_COLOR_PRIM_BT_709; + case MP_CSP_PRIM_BT_2020: return PL_COLOR_PRIM_BT_2020; + case MP_CSP_PRIM_BT_470M: return PL_COLOR_PRIM_BT_470M; + case MP_CSP_PRIM_APPLE: return PL_COLOR_PRIM_APPLE; + case MP_CSP_PRIM_ADOBE: return PL_COLOR_PRIM_ADOBE; + case MP_CSP_PRIM_PRO_PHOTO: return PL_COLOR_PRIM_PRO_PHOTO; + case MP_CSP_PRIM_CIE_1931: return PL_COLOR_PRIM_CIE_1931; + case MP_CSP_PRIM_DCI_P3: return PL_COLOR_PRIM_DCI_P3; + case MP_CSP_PRIM_DISPLAY_P3: return PL_COLOR_PRIM_DISPLAY_P3; + case MP_CSP_PRIM_V_GAMUT: return PL_COLOR_PRIM_V_GAMUT; + case MP_CSP_PRIM_S_GAMUT: return PL_COLOR_PRIM_S_GAMUT; + case MP_CSP_PRIM_EBU_3213: return PL_COLOR_PRIM_EBU_3213; + case MP_CSP_PRIM_FILM_C: return PL_COLOR_PRIM_FILM_C; + case MP_CSP_PRIM_ACES_AP0: return PL_COLOR_PRIM_ACES_AP0; + case MP_CSP_PRIM_ACES_AP1: return PL_COLOR_PRIM_ACES_AP1; + case MP_CSP_PRIM_COUNT: return PL_COLOR_PRIM_COUNT; + } + + MP_ASSERT_UNREACHABLE(); +} + +enum mp_csp_prim mp_prim_from_pl(enum pl_color_primaries prim) +{ + switch (prim){ + case PL_COLOR_PRIM_UNKNOWN: return MP_CSP_PRIM_AUTO; + case PL_COLOR_PRIM_BT_601_525: return MP_CSP_PRIM_BT_601_525; + case PL_COLOR_PRIM_BT_601_625: return MP_CSP_PRIM_BT_601_625; + case PL_COLOR_PRIM_BT_709: return MP_CSP_PRIM_BT_709; + case PL_COLOR_PRIM_BT_2020: return MP_CSP_PRIM_BT_2020; + case PL_COLOR_PRIM_BT_470M: return MP_CSP_PRIM_BT_470M; + case PL_COLOR_PRIM_APPLE: return MP_CSP_PRIM_APPLE; + case PL_COLOR_PRIM_ADOBE: return MP_CSP_PRIM_ADOBE; + case PL_COLOR_PRIM_PRO_PHOTO: return MP_CSP_PRIM_PRO_PHOTO; + case PL_COLOR_PRIM_CIE_1931: return MP_CSP_PRIM_CIE_1931; + case PL_COLOR_PRIM_DCI_P3: return MP_CSP_PRIM_DCI_P3; + case PL_COLOR_PRIM_DISPLAY_P3: return MP_CSP_PRIM_DISPLAY_P3; + case PL_COLOR_PRIM_V_GAMUT: return MP_CSP_PRIM_V_GAMUT; + case PL_COLOR_PRIM_S_GAMUT: return MP_CSP_PRIM_S_GAMUT; + case PL_COLOR_PRIM_EBU_3213: return MP_CSP_PRIM_EBU_3213; + case PL_COLOR_PRIM_FILM_C: return MP_CSP_PRIM_FILM_C; + case PL_COLOR_PRIM_ACES_AP0: return MP_CSP_PRIM_ACES_AP0; + case PL_COLOR_PRIM_ACES_AP1: return MP_CSP_PRIM_ACES_AP1; + case PL_COLOR_PRIM_COUNT: return MP_CSP_PRIM_COUNT; + } + + MP_ASSERT_UNREACHABLE(); +} + +enum pl_color_transfer mp_trc_to_pl(enum mp_csp_trc trc) +{ + switch (trc) { + case MP_CSP_TRC_AUTO: return PL_COLOR_TRC_UNKNOWN; + case MP_CSP_TRC_BT_1886: return PL_COLOR_TRC_BT_1886; + case MP_CSP_TRC_SRGB: return PL_COLOR_TRC_SRGB; + case MP_CSP_TRC_LINEAR: return PL_COLOR_TRC_LINEAR; + case MP_CSP_TRC_GAMMA18: return PL_COLOR_TRC_GAMMA18; + case MP_CSP_TRC_GAMMA20: return PL_COLOR_TRC_GAMMA20; + case MP_CSP_TRC_GAMMA22: return PL_COLOR_TRC_GAMMA22; + case MP_CSP_TRC_GAMMA24: return PL_COLOR_TRC_GAMMA24; + case MP_CSP_TRC_GAMMA26: return PL_COLOR_TRC_GAMMA26; + case MP_CSP_TRC_GAMMA28: return PL_COLOR_TRC_GAMMA28; + case MP_CSP_TRC_PRO_PHOTO: return PL_COLOR_TRC_PRO_PHOTO; + case MP_CSP_TRC_PQ: return PL_COLOR_TRC_PQ; + case MP_CSP_TRC_HLG: return PL_COLOR_TRC_HLG; + case MP_CSP_TRC_V_LOG: return PL_COLOR_TRC_V_LOG; + case MP_CSP_TRC_S_LOG1: return PL_COLOR_TRC_S_LOG1; + case MP_CSP_TRC_S_LOG2: return PL_COLOR_TRC_S_LOG2; + case MP_CSP_TRC_ST428: return PL_COLOR_TRC_ST428; + case MP_CSP_TRC_COUNT: return PL_COLOR_TRC_COUNT; + } + + MP_ASSERT_UNREACHABLE(); +} + +enum mp_csp_trc mp_trc_from_pl(enum pl_color_transfer trc) +{ + switch (trc){ + case PL_COLOR_TRC_UNKNOWN: return MP_CSP_TRC_AUTO; + case PL_COLOR_TRC_BT_1886: return MP_CSP_TRC_BT_1886; + case PL_COLOR_TRC_SRGB: return MP_CSP_TRC_SRGB; + case PL_COLOR_TRC_LINEAR: return MP_CSP_TRC_LINEAR; + case PL_COLOR_TRC_GAMMA18: return MP_CSP_TRC_GAMMA18; + case PL_COLOR_TRC_GAMMA20: return MP_CSP_TRC_GAMMA20; + case PL_COLOR_TRC_GAMMA22: return MP_CSP_TRC_GAMMA22; + case PL_COLOR_TRC_GAMMA24: return MP_CSP_TRC_GAMMA24; + case PL_COLOR_TRC_GAMMA26: return MP_CSP_TRC_GAMMA26; + case PL_COLOR_TRC_GAMMA28: return MP_CSP_TRC_GAMMA28; + case PL_COLOR_TRC_PRO_PHOTO: return MP_CSP_TRC_PRO_PHOTO; + case PL_COLOR_TRC_PQ: return MP_CSP_TRC_PQ; + case PL_COLOR_TRC_HLG: return MP_CSP_TRC_HLG; + case PL_COLOR_TRC_V_LOG: return MP_CSP_TRC_V_LOG; + case PL_COLOR_TRC_S_LOG1: return MP_CSP_TRC_S_LOG1; + case PL_COLOR_TRC_S_LOG2: return MP_CSP_TRC_S_LOG2; + case PL_COLOR_TRC_ST428: return MP_CSP_TRC_ST428; + case PL_COLOR_TRC_COUNT: return MP_CSP_TRC_COUNT; + } + + MP_ASSERT_UNREACHABLE(); +} + +enum pl_color_system mp_csp_to_pl(enum mp_csp csp) +{ + switch (csp) { + case MP_CSP_AUTO: return PL_COLOR_SYSTEM_UNKNOWN; + case MP_CSP_BT_601: return PL_COLOR_SYSTEM_BT_601; + case MP_CSP_BT_709: return PL_COLOR_SYSTEM_BT_709; + case MP_CSP_SMPTE_240M: return PL_COLOR_SYSTEM_SMPTE_240M; + case MP_CSP_BT_2020_NC: return PL_COLOR_SYSTEM_BT_2020_NC; + case MP_CSP_BT_2020_C: return PL_COLOR_SYSTEM_BT_2020_C; + case MP_CSP_RGB: return PL_COLOR_SYSTEM_RGB; + case MP_CSP_XYZ: return PL_COLOR_SYSTEM_XYZ; + case MP_CSP_YCGCO: return PL_COLOR_SYSTEM_YCGCO; + case MP_CSP_COUNT: return PL_COLOR_SYSTEM_COUNT; + } + + MP_ASSERT_UNREACHABLE(); +} + +enum pl_color_levels mp_levels_to_pl(enum mp_csp_levels levels) +{ + switch (levels) { + case MP_CSP_LEVELS_AUTO: return PL_COLOR_LEVELS_UNKNOWN; + case MP_CSP_LEVELS_TV: return PL_COLOR_LEVELS_TV; + case MP_CSP_LEVELS_PC: return PL_COLOR_LEVELS_PC; + case MP_CSP_LEVELS_COUNT: return PL_COLOR_LEVELS_COUNT; + } + + MP_ASSERT_UNREACHABLE(); +} + +enum mp_csp_levels mp_levels_from_pl(enum pl_color_levels levels) +{ + switch (levels){ + case PL_COLOR_LEVELS_UNKNOWN: return MP_CSP_LEVELS_AUTO; + case PL_COLOR_LEVELS_TV: return MP_CSP_LEVELS_TV; + case PL_COLOR_LEVELS_PC: return MP_CSP_LEVELS_PC; + case PL_COLOR_LEVELS_COUNT: return MP_CSP_LEVELS_COUNT; + } + + MP_ASSERT_UNREACHABLE(); +} + +enum pl_alpha_mode mp_alpha_to_pl(enum mp_alpha_type alpha) +{ + switch (alpha) { + case MP_ALPHA_AUTO: return PL_ALPHA_UNKNOWN; + case MP_ALPHA_STRAIGHT: return PL_ALPHA_INDEPENDENT; + case MP_ALPHA_PREMUL: return PL_ALPHA_PREMULTIPLIED; + } + + MP_ASSERT_UNREACHABLE(); +} + +enum pl_chroma_location mp_chroma_to_pl(enum mp_chroma_location chroma) +{ + switch (chroma) { + case MP_CHROMA_AUTO: return PL_CHROMA_UNKNOWN; + case MP_CHROMA_TOPLEFT: return PL_CHROMA_TOP_LEFT; + case MP_CHROMA_LEFT: return PL_CHROMA_LEFT; + case MP_CHROMA_CENTER: return PL_CHROMA_CENTER; + case MP_CHROMA_COUNT: return PL_CHROMA_COUNT; + } + + MP_ASSERT_UNREACHABLE(); +} + +void mp_map_dovi_metadata_to_pl(struct mp_image *mpi, + struct pl_frame *frame) +{ +#ifdef PL_HAVE_LAV_DOLBY_VISION + if (mpi->dovi) { + const AVDOVIMetadata *metadata = (AVDOVIMetadata *) mpi->dovi->data; + const AVDOVIRpuDataHeader *header = av_dovi_get_header(metadata); + + if (header->disable_residual_flag) { + // Only automatically map DoVi RPUs that don't require an EL + struct pl_dovi_metadata *dovi = talloc_ptrtype(mpi, dovi); + pl_frame_map_avdovi_metadata(frame, dovi, metadata); + } + } + +#if defined(PL_HAVE_LIBDOVI) + if (mpi->dovi_buf) + pl_hdr_metadata_from_dovi_rpu(&frame->color.hdr, mpi->dovi_buf->data, + mpi->dovi_buf->size); +#endif + +#endif // PL_HAVE_LAV_DOLBY_VISION +} diff --git a/video/out/placebo/utils.h b/video/out/placebo/utils.h new file mode 100644 index 0000000..bf780a8 --- /dev/null +++ b/video/out/placebo/utils.h @@ -0,0 +1,41 @@ +#pragma once + +#include "config.h" +#include "common/common.h" +#include "common/msg.h" +#include "video/csputils.h" +#include "video/mp_image.h" + +#include <libavutil/buffer.h> + +#include <libplacebo/common.h> +#include <libplacebo/log.h> +#include <libplacebo/colorspace.h> +#include <libplacebo/renderer.h> +#include <libplacebo/utils/libav.h> + +pl_log mppl_log_create(void *tactx, struct mp_log *log); +void mppl_log_set_probing(pl_log log, bool probing); + +static inline struct pl_rect2d mp_rect2d_to_pl(struct mp_rect rc) +{ + return (struct pl_rect2d) { + .x0 = rc.x0, + .y0 = rc.y0, + .x1 = rc.x1, + .y1 = rc.y1, + }; +} + +enum pl_color_primaries mp_prim_to_pl(enum mp_csp_prim prim); +enum mp_csp_prim mp_prim_from_pl(enum pl_color_primaries prim); +enum pl_color_transfer mp_trc_to_pl(enum mp_csp_trc trc); +enum mp_csp_trc mp_trc_from_pl(enum pl_color_transfer trc); +enum pl_color_system mp_csp_to_pl(enum mp_csp csp); +enum pl_color_levels mp_levels_to_pl(enum mp_csp_levels levels); +enum mp_csp_levels mp_levels_from_pl(enum pl_color_levels levels); +enum pl_alpha_mode mp_alpha_to_pl(enum mp_alpha_type alpha); +enum pl_chroma_location mp_chroma_to_pl(enum mp_chroma_location chroma); + +void mp_map_dovi_metadata_to_pl(struct mp_image *mpi, + struct pl_frame *frame); diff --git a/video/out/present_sync.c b/video/out/present_sync.c new file mode 100644 index 0000000..a3b1089 --- /dev/null +++ b/video/out/present_sync.c @@ -0,0 +1,126 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <time.h> + +#include "misc/linked_list.h" +#include "mpv_talloc.h" +#include "osdep/timer.h" +#include "present_sync.h" + +/* General nonsense about this mechanism. + * + * This requires that that caller has access to two, related values: + * (ust, msc): clock time and incrementing counter of last vsync (this is + * increased continuously, even if we don't swap) + * + * Note that this concept originates from the GLX_OML_sync_control extension + * which includes another parameter: sbc (swap counter of frame that was + * last displayed). Both the xorg present extension and wayland's + * presentation-time protocol do not include sbc values so they are omitted + * from this mechanism. mpv does not need to keep track of sbc calls and can + * have reliable presentation without it. + */ + +void present_sync_get_info(struct mp_present *present, struct vo_vsync_info *info) +{ + struct mp_present_entry *cur = present->head; + while (cur) { + if (cur->queue_display_time) + break; + cur = cur->list_node.next; + } + if (!cur) + return; + + info->vsync_duration = cur->vsync_duration; + info->skipped_vsyncs = cur->skipped_vsyncs; + info->last_queue_display_time = cur->queue_display_time; + + // Remove from the list, zero out everything, and append at the end + LL_REMOVE(list_node, present, cur); + *cur = (struct mp_present_entry){0}; + LL_APPEND(list_node, present, cur); +} + +struct mp_present *mp_present_initialize(void *talloc_ctx, struct mp_vo_opts *opts, int entries) +{ + struct mp_present *present = talloc_zero(talloc_ctx, struct mp_present); + for (int i = 0; i < entries; i++) { + struct mp_present_entry *entry = talloc_zero(present, struct mp_present_entry); + LL_APPEND(list_node, present, entry); + } + present->opts = opts; + return present; +} + +void present_sync_swap(struct mp_present *present) +{ + struct mp_present_entry *cur = present->head; + while (cur) { + if (!cur->queue_display_time) + break; + cur = cur->list_node.next; + } + if (!cur) + return; + + int64_t ust = cur->ust; + int64_t msc = cur->msc; + int64_t last_ust = cur->list_node.prev ? cur->list_node.prev->ust : 0; + int64_t last_msc = cur->list_node.prev ? cur->list_node.prev->msc : 0; + + // Avoid attempting to use any presentation statistics if the ust is 0 or has + // not actually updated (i.e. the last_ust is equal to ust). + if (!ust || ust == last_ust) { + cur->skipped_vsyncs = -1; + cur->vsync_duration = -1; + cur->queue_display_time = -1; + return; + } + + cur->skipped_vsyncs = 0; + int64_t ust_passed = ust ? ust - last_ust: 0; + int64_t msc_passed = msc ? msc - last_msc: 0; + if (msc_passed && ust_passed) + cur->vsync_duration = ust_passed / msc_passed; + + struct timespec ts; + if (clock_gettime(CLOCK_MONOTONIC, &ts)) + return; + + int64_t now_monotonic = MP_TIME_S_TO_NS(ts.tv_sec) + ts.tv_nsec; + int64_t ust_mp_time = mp_time_ns() - (now_monotonic - ust); + cur->queue_display_time = ust_mp_time; +} + +void present_sync_update_values(struct mp_present *present, int64_t ust, + int64_t msc) +{ + struct mp_present_entry *cur = present->head; + int index = 0; + while (cur && ++index) { + if (!cur->ust || index == present->opts->swapchain_depth) + break; + cur = cur->list_node.next; + } + if (!cur) + return; + + cur->ust = ust; + cur->msc = msc; +} diff --git a/video/out/present_sync.h b/video/out/present_sync.h new file mode 100644 index 0000000..ba6d0b3 --- /dev/null +++ b/video/out/present_sync.h @@ -0,0 +1,57 @@ +/* + * This file is part of mpv video player. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MP_PRESENT_SYNC_H +#define MP_PRESENT_SYNC_H + +#include <stdbool.h> +#include <stdint.h> +#include "vo.h" + +/* Generic helpers for obtaining presentation feedback from + * backend APIs. This requires ust/msc values. */ + +struct mp_present_entry { + int64_t ust; + int64_t msc; + int64_t vsync_duration; + int64_t skipped_vsyncs; + int64_t queue_display_time; + + struct { + struct mp_present_entry *next, *prev; + } list_node; +}; + +struct mp_present { + struct mp_present_entry *head, *tail; + struct mp_vo_opts *opts; +}; + +struct mp_present *mp_present_initialize(void *talloc_ctx, struct mp_vo_opts *opts, int entries); + +// Used during the get_vsync call to deliver the presentation statistics to the VO. +void present_sync_get_info(struct mp_present *present, struct vo_vsync_info *info); + +// Called after every buffer swap to update presentation statistics. +void present_sync_swap(struct mp_present *present); + +// Called anytime the backend delivers new ust/msc values. +void present_sync_update_values(struct mp_present *present, int64_t ust, + int64_t msc); + +#endif /* MP_PRESENT_SYNC_H */ diff --git a/video/out/vo.c b/video/out/vo.c new file mode 100644 index 0000000..50129fb --- /dev/null +++ b/video/out/vo.c @@ -0,0 +1,1441 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <math.h> +#include <stdatomic.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mpv_talloc.h" + +#include "config.h" +#include "osdep/timer.h" +#include "osdep/threads.h" +#include "misc/dispatch.h" +#include "misc/rendezvous.h" +#include "options/options.h" +#include "misc/bstr.h" +#include "vo.h" +#include "aspect.h" +#include "dr_helper.h" +#include "input/input.h" +#include "options/m_config.h" +#include "common/msg.h" +#include "common/global.h" +#include "common/stats.h" +#include "video/hwdec.h" +#include "video/mp_image.h" +#include "sub/osd.h" +#include "osdep/io.h" +#include "osdep/threads.h" + +extern const struct vo_driver video_out_mediacodec_embed; +extern const struct vo_driver video_out_x11; +extern const struct vo_driver video_out_vdpau; +extern const struct vo_driver video_out_xv; +extern const struct vo_driver video_out_gpu; +extern const struct vo_driver video_out_gpu_next; +extern const struct vo_driver video_out_libmpv; +extern const struct vo_driver video_out_null; +extern const struct vo_driver video_out_image; +extern const struct vo_driver video_out_lavc; +extern const struct vo_driver video_out_caca; +extern const struct vo_driver video_out_drm; +extern const struct vo_driver video_out_direct3d; +extern const struct vo_driver video_out_sdl; +extern const struct vo_driver video_out_vaapi; +extern const struct vo_driver video_out_dmabuf_wayland; +extern const struct vo_driver video_out_wlshm; +extern const struct vo_driver video_out_rpi; +extern const struct vo_driver video_out_tct; +extern const struct vo_driver video_out_sixel; +extern const struct vo_driver video_out_kitty; + +static const struct vo_driver *const video_out_drivers[] = +{ + &video_out_libmpv, +#if HAVE_ANDROID + &video_out_mediacodec_embed, +#endif + &video_out_gpu, + &video_out_gpu_next, +#if HAVE_VDPAU + &video_out_vdpau, +#endif +#if HAVE_DIRECT3D + &video_out_direct3d, +#endif +#if HAVE_WAYLAND && HAVE_MEMFD_CREATE + &video_out_wlshm, +#endif +#if HAVE_XV + &video_out_xv, +#endif +#if HAVE_SDL2_VIDEO + &video_out_sdl, +#endif +#if HAVE_DMABUF_WAYLAND + &video_out_dmabuf_wayland, +#endif +#if HAVE_VAAPI_X11 && HAVE_GPL + &video_out_vaapi, +#endif +#if HAVE_X11 + &video_out_x11, +#endif + &video_out_null, + // should not be auto-selected + &video_out_image, + &video_out_tct, +#if HAVE_CACA + &video_out_caca, +#endif +#if HAVE_DRM + &video_out_drm, +#endif +#if HAVE_RPI_MMAL + &video_out_rpi, +#endif +#if HAVE_SIXEL + &video_out_sixel, +#endif + &video_out_kitty, + &video_out_lavc, +}; + +struct vo_internal { + mp_thread thread; + struct mp_dispatch_queue *dispatch; + struct dr_helper *dr_helper; + + // --- The following fields are protected by lock + mp_mutex lock; + mp_cond wakeup; + + bool need_wakeup; + bool terminate; + + bool hasframe; + bool hasframe_rendered; + bool request_redraw; // redraw request from player to VO + bool want_redraw; // redraw request from VO to player + bool send_reset; // send VOCTRL_RESET + bool paused; + int queued_events; // event mask for the user + int internal_events; // event mask for us + + double nominal_vsync_interval; + + double vsync_interval; + int64_t *vsync_samples; + int num_vsync_samples; + int64_t num_total_vsync_samples; + int64_t prev_vsync; + double base_vsync; + int drop_point; + double estimated_vsync_interval; + double estimated_vsync_jitter; + bool expecting_vsync; + int64_t num_successive_vsyncs; + + int64_t flip_queue_offset; // queue flip events at most this much in advance + int64_t timing_offset; // same (but from options; not VO configured) + + int64_t delayed_count; + int64_t drop_count; + bool dropped_frame; // the previous frame was dropped + + struct vo_frame *current_frame; // last frame queued to the VO + + int64_t wakeup_pts; // time at which to pull frame from decoder + + bool rendering; // true if an image is being rendered + struct vo_frame *frame_queued; // should be drawn next + int req_frames; // VO's requested value of num_frames + uint64_t current_frame_id; + + double display_fps; + double reported_display_fps; + + struct stats_ctx *stats; +}; + +extern const struct m_sub_options gl_video_conf; + +static void forget_frames(struct vo *vo); +static MP_THREAD_VOID vo_thread(void *ptr); + +static bool get_desc(struct m_obj_desc *dst, int index) +{ + if (index >= MP_ARRAY_SIZE(video_out_drivers)) + return false; + const struct vo_driver *vo = video_out_drivers[index]; + *dst = (struct m_obj_desc) { + .name = vo->name, + .description = vo->description, + .priv_size = vo->priv_size, + .priv_defaults = vo->priv_defaults, + .options = vo->options, + .options_prefix = vo->options_prefix, + .global_opts = vo->global_opts, + .hidden = vo->encode, + .p = vo, + }; + return true; +} + +// For the vo option +const struct m_obj_list vo_obj_list = { + .get_desc = get_desc, + .description = "video outputs", + .aliases = { + {"gl", "gpu"}, + {"direct3d_shaders", "direct3d"}, + {"opengl", "gpu"}, + {"opengl-cb", "libmpv"}, + {0} + }, + .allow_trailer = true, + .disallow_positional_parameters = true, + .use_global_options = true, +}; + +static void dispatch_wakeup_cb(void *ptr) +{ + struct vo *vo = ptr; + vo_wakeup(vo); +} + +// Initialize or update options from vo->opts +static void read_opts(struct vo *vo) +{ + struct vo_internal *in = vo->in; + + mp_mutex_lock(&in->lock); + in->timing_offset = (uint64_t)(MP_TIME_S_TO_NS(vo->opts->timing_offset)); + mp_mutex_unlock(&in->lock); +} + +static void update_opts(void *p) +{ + struct vo *vo = p; + + if (m_config_cache_update(vo->opts_cache)) { + read_opts(vo); + + if (vo->driver->control) { + vo->driver->control(vo, VOCTRL_VO_OPTS_CHANGED, NULL); + // "Legacy" update of video position related options. + // Unlike VOCTRL_VO_OPTS_CHANGED, often not propagated to backends. + vo->driver->control(vo, VOCTRL_SET_PANSCAN, NULL); + } + } + + if (vo->gl_opts_cache && m_config_cache_update(vo->gl_opts_cache)) { + // "Legacy" update of video GL renderer related options. + if (vo->driver->control) + vo->driver->control(vo, VOCTRL_UPDATE_RENDER_OPTS, NULL); + } + + if (m_config_cache_update(vo->eq_opts_cache)) { + // "Legacy" update of video equalizer related options. + if (vo->driver->control) + vo->driver->control(vo, VOCTRL_SET_EQUALIZER, NULL); + } +} + +// Does not include thread- and VO uninit. +static void dealloc_vo(struct vo *vo) +{ + forget_frames(vo); // implicitly synchronized + + // These must be free'd before vo->in->dispatch. + talloc_free(vo->opts_cache); + talloc_free(vo->gl_opts_cache); + talloc_free(vo->eq_opts_cache); + + mp_mutex_destroy(&vo->in->lock); + mp_cond_destroy(&vo->in->wakeup); + talloc_free(vo); +} + +static struct vo *vo_create(bool probing, struct mpv_global *global, + struct vo_extra *ex, char *name) +{ + assert(ex->wakeup_cb); + + struct mp_log *log = mp_log_new(NULL, global->log, "vo"); + struct m_obj_desc desc; + if (!m_obj_list_find(&desc, &vo_obj_list, bstr0(name))) { + mp_msg(log, MSGL_ERR, "Video output %s not found!\n", name); + talloc_free(log); + return NULL; + }; + struct vo *vo = talloc_ptrtype(NULL, vo); + *vo = (struct vo) { + .log = mp_log_new(vo, log, name), + .driver = desc.p, + .global = global, + .encode_lavc_ctx = ex->encode_lavc_ctx, + .input_ctx = ex->input_ctx, + .osd = ex->osd, + .monitor_par = 1, + .extra = *ex, + .probing = probing, + .in = talloc(vo, struct vo_internal), + }; + talloc_steal(vo, log); + *vo->in = (struct vo_internal) { + .dispatch = mp_dispatch_create(vo), + .req_frames = 1, + .estimated_vsync_jitter = -1, + .stats = stats_ctx_create(vo, global, "vo"), + }; + mp_dispatch_set_wakeup_fn(vo->in->dispatch, dispatch_wakeup_cb, vo); + mp_mutex_init(&vo->in->lock); + mp_cond_init(&vo->in->wakeup); + + vo->opts_cache = m_config_cache_alloc(NULL, global, &vo_sub_opts); + vo->opts = vo->opts_cache->opts; + + m_config_cache_set_dispatch_change_cb(vo->opts_cache, vo->in->dispatch, + update_opts, vo); + + vo->gl_opts_cache = m_config_cache_alloc(NULL, global, &gl_video_conf); + m_config_cache_set_dispatch_change_cb(vo->gl_opts_cache, vo->in->dispatch, + update_opts, vo); + + vo->eq_opts_cache = m_config_cache_alloc(NULL, global, &mp_csp_equalizer_conf); + m_config_cache_set_dispatch_change_cb(vo->eq_opts_cache, vo->in->dispatch, + update_opts, vo); + + mp_input_set_mouse_transform(vo->input_ctx, NULL, NULL); + if (vo->driver->encode != !!vo->encode_lavc_ctx) + goto error; + vo->priv = m_config_group_from_desc(vo, vo->log, global, &desc, name); + if (!vo->priv) + goto error; + + if (mp_thread_create(&vo->in->thread, vo_thread, vo)) + goto error; + if (mp_rendezvous(vo, 0) < 0) { // init barrier + mp_thread_join(vo->in->thread); + goto error; + } + return vo; + +error: + dealloc_vo(vo); + return NULL; +} + +struct vo *init_best_video_out(struct mpv_global *global, struct vo_extra *ex) +{ + struct mp_vo_opts *opts = mp_get_config_group(NULL, global, &vo_sub_opts); + struct m_obj_settings *vo_list = opts->video_driver_list; + struct vo *vo = NULL; + // first try the preferred drivers, with their optional subdevice param: + if (vo_list && vo_list[0].name) { + for (int n = 0; vo_list[n].name; n++) { + // Something like "-vo name," allows fallback to autoprobing. + if (strlen(vo_list[n].name) == 0) + goto autoprobe; + bool p = !!vo_list[n + 1].name; + vo = vo_create(p, global, ex, vo_list[n].name); + if (vo) + goto done; + } + goto done; + } +autoprobe: + // now try the rest... + for (int i = 0; i < MP_ARRAY_SIZE(video_out_drivers); i++) { + const struct vo_driver *driver = video_out_drivers[i]; + if (driver == &video_out_null) + break; + vo = vo_create(true, global, ex, (char *)driver->name); + if (vo) + goto done; + } +done: + talloc_free(opts); + return vo; +} + +static void terminate_vo(void *p) +{ + struct vo *vo = p; + struct vo_internal *in = vo->in; + in->terminate = true; +} + +void vo_destroy(struct vo *vo) +{ + struct vo_internal *in = vo->in; + mp_dispatch_run(in->dispatch, terminate_vo, vo); + mp_thread_join(vo->in->thread); + dealloc_vo(vo); +} + +// Wakeup the playloop to queue new video frames etc. +static void wakeup_core(struct vo *vo) +{ + vo->extra.wakeup_cb(vo->extra.wakeup_ctx); +} + +// Drop timing information on discontinuities like seeking. +// Always called locked. +static void reset_vsync_timings(struct vo *vo) +{ + struct vo_internal *in = vo->in; + in->drop_point = 0; + in->base_vsync = 0; + in->expecting_vsync = false; + in->num_successive_vsyncs = 0; +} + +static double vsync_stddef(struct vo *vo, double ref_vsync) +{ + struct vo_internal *in = vo->in; + double jitter = 0; + for (int n = 0; n < in->num_vsync_samples; n++) { + double diff = in->vsync_samples[n] - ref_vsync; + jitter += diff * diff; + } + return sqrt(jitter / in->num_vsync_samples); +} + +#define MAX_VSYNC_SAMPLES 1000 +#define DELAY_VSYNC_SAMPLES 10 + +// Check if we should switch to measured average display FPS if it seems +// "better" then the system-reported one. (Note that small differences are +// handled as drift instead.) +static void check_estimated_display_fps(struct vo *vo) +{ + struct vo_internal *in = vo->in; + + bool use_estimated = false; + if (in->num_total_vsync_samples >= MAX_VSYNC_SAMPLES / 2 && + in->estimated_vsync_interval <= 1e9 / 20.0 && + in->estimated_vsync_interval >= 1e9 / 400.0) + { + for (int n = 0; n < in->num_vsync_samples; n++) { + if (fabs(in->vsync_samples[n] - in->estimated_vsync_interval) + >= in->estimated_vsync_interval / 4) + goto done; + } + double mjitter = vsync_stddef(vo, in->estimated_vsync_interval); + double njitter = vsync_stddef(vo, in->nominal_vsync_interval); + if (mjitter * 1.01 < njitter) + use_estimated = true; + done: ; + } + if (use_estimated == (fabs(in->vsync_interval - in->nominal_vsync_interval) < 1e9)) { + if (use_estimated) { + MP_TRACE(vo, "adjusting display FPS to a value closer to %.3f Hz\n", + 1e9 / in->estimated_vsync_interval); + } else { + MP_TRACE(vo, "switching back to assuming display fps = %.3f Hz\n", + 1e9 / in->nominal_vsync_interval); + } + } + in->vsync_interval = use_estimated ? in->estimated_vsync_interval + : in->nominal_vsync_interval; +} + +// Attempt to detect vsyncs delayed/skipped by the driver. This tries to deal +// with strong jitter too, because some drivers have crap vsync timing. +static void vsync_skip_detection(struct vo *vo) +{ + struct vo_internal *in = vo->in; + + int window = 4; + double t_r = in->prev_vsync, t_e = in->base_vsync, diff = 0.0, desync_early = 0.0; + for (int n = 0; n < in->drop_point; n++) { + diff += t_r - t_e; + t_r -= in->vsync_samples[n]; + t_e -= in->vsync_interval; + if (n == window + 1) + desync_early = diff / window; + } + double desync = diff / in->num_vsync_samples; + if (in->drop_point > window * 2 && + fabs(desync - desync_early) >= in->vsync_interval * 3 / 4) + { + // Assume a drop. An underflow can technically speaking not be a drop + // (it's up to the driver what this is supposed to mean), but no reason + // to treat it differently. + in->base_vsync = in->prev_vsync; + in->delayed_count += 1; + in->drop_point = 0; + MP_STATS(vo, "vo-delayed"); + } + if (in->drop_point > 10) + in->base_vsync += desync / 10; // smooth out drift +} + +// Always called locked. +static void update_vsync_timing_after_swap(struct vo *vo, + struct vo_vsync_info *vsync) +{ + struct vo_internal *in = vo->in; + + int64_t vsync_time = vsync->last_queue_display_time; + int64_t prev_vsync = in->prev_vsync; + in->prev_vsync = vsync_time; + + if (!in->expecting_vsync) { + reset_vsync_timings(vo); + return; + } + + in->num_successive_vsyncs++; + if (in->num_successive_vsyncs <= DELAY_VSYNC_SAMPLES) + return; + + if (vsync_time <= 0 || vsync_time <= prev_vsync) { + in->prev_vsync = 0; + return; + } + + if (prev_vsync <= 0) + return; + + if (in->num_vsync_samples >= MAX_VSYNC_SAMPLES) + in->num_vsync_samples -= 1; + MP_TARRAY_INSERT_AT(in, in->vsync_samples, in->num_vsync_samples, 0, + vsync_time - prev_vsync); + in->drop_point = MPMIN(in->drop_point + 1, in->num_vsync_samples); + in->num_total_vsync_samples += 1; + if (in->base_vsync) { + in->base_vsync += in->vsync_interval; + } else { + in->base_vsync = vsync_time; + } + + double avg = 0; + for (int n = 0; n < in->num_vsync_samples; n++) { + assert(in->vsync_samples[n] > 0); + avg += in->vsync_samples[n]; + } + in->estimated_vsync_interval = avg / in->num_vsync_samples; + in->estimated_vsync_jitter = + vsync_stddef(vo, in->vsync_interval) / in->vsync_interval; + + check_estimated_display_fps(vo); + vsync_skip_detection(vo); + + MP_STATS(vo, "value %f jitter", in->estimated_vsync_jitter); + MP_STATS(vo, "value %f vsync-diff", MP_TIME_NS_TO_S(in->vsync_samples[0])); +} + +// to be called from VO thread only +static void update_display_fps(struct vo *vo) +{ + struct vo_internal *in = vo->in; + mp_mutex_lock(&in->lock); + if (in->internal_events & VO_EVENT_WIN_STATE) { + in->internal_events &= ~(unsigned)VO_EVENT_WIN_STATE; + + mp_mutex_unlock(&in->lock); + + double fps = 0; + vo->driver->control(vo, VOCTRL_GET_DISPLAY_FPS, &fps); + + mp_mutex_lock(&in->lock); + + in->reported_display_fps = fps; + } + + double display_fps = vo->opts->display_fps_override; + if (display_fps <= 0) + display_fps = in->reported_display_fps; + + if (in->display_fps != display_fps) { + in->nominal_vsync_interval = display_fps > 0 ? 1e9 / display_fps : 0; + in->vsync_interval = MPMAX(in->nominal_vsync_interval, 1); + in->display_fps = display_fps; + + MP_VERBOSE(vo, "Assuming %f FPS for display sync.\n", display_fps); + + // make sure to update the player + in->queued_events |= VO_EVENT_WIN_STATE; + wakeup_core(vo); + } + + mp_mutex_unlock(&in->lock); +} + +static void check_vo_caps(struct vo *vo) +{ + int rot = vo->params->rotate; + if (rot) { + bool ok = rot % 90 ? false : (vo->driver->caps & VO_CAP_ROTATE90); + if (!ok) { + MP_WARN(vo, "Video is flagged as rotated by %d degrees, but the " + "video output does not support this.\n", rot); + } + } +} + +static void run_reconfig(void *p) +{ + void **pp = p; + struct vo *vo = pp[0]; + struct mp_image *img = pp[1]; + int *ret = pp[2]; + + struct mp_image_params *params = &img->params; + + struct vo_internal *in = vo->in; + + MP_VERBOSE(vo, "reconfig to %s\n", mp_image_params_to_str(params)); + + update_opts(vo); + + mp_image_params_get_dsize(params, &vo->dwidth, &vo->dheight); + + talloc_free(vo->params); + vo->params = talloc_dup(vo, params); + + if (vo->driver->reconfig2) { + *ret = vo->driver->reconfig2(vo, img); + } else { + *ret = vo->driver->reconfig(vo, vo->params); + } + vo->config_ok = *ret >= 0; + if (vo->config_ok) { + check_vo_caps(vo); + } else { + talloc_free(vo->params); + vo->params = NULL; + } + + mp_mutex_lock(&in->lock); + talloc_free(in->current_frame); + in->current_frame = NULL; + forget_frames(vo); + reset_vsync_timings(vo); + mp_mutex_unlock(&in->lock); + + update_display_fps(vo); +} + +int vo_reconfig(struct vo *vo, struct mp_image_params *params) +{ + int ret; + struct mp_image dummy = {0}; + mp_image_set_params(&dummy, params); + void *p[] = {vo, &dummy, &ret}; + mp_dispatch_run(vo->in->dispatch, run_reconfig, p); + return ret; +} + +int vo_reconfig2(struct vo *vo, struct mp_image *img) +{ + int ret; + void *p[] = {vo, img, &ret}; + mp_dispatch_run(vo->in->dispatch, run_reconfig, p); + return ret; +} + +static void run_control(void *p) +{ + void **pp = p; + struct vo *vo = pp[0]; + int request = (intptr_t)pp[1]; + void *data = pp[2]; + update_opts(vo); + int ret = vo->driver->control(vo, request, data); + if (pp[3]) + *(int *)pp[3] = ret; +} + +int vo_control(struct vo *vo, int request, void *data) +{ + int ret; + void *p[] = {vo, (void *)(intptr_t)request, data, &ret}; + mp_dispatch_run(vo->in->dispatch, run_control, p); + return ret; +} + +// Run vo_control() without waiting for a reply. +// (Only works for some VOCTRLs.) +void vo_control_async(struct vo *vo, int request, void *data) +{ + void *p[4] = {vo, (void *)(intptr_t)request, NULL, NULL}; + void **d = talloc_memdup(NULL, p, sizeof(p)); + + switch (request) { + case VOCTRL_UPDATE_PLAYBACK_STATE: + d[2] = talloc_dup(d, (struct voctrl_playback_state *)data); + break; + case VOCTRL_KILL_SCREENSAVER: + case VOCTRL_RESTORE_SCREENSAVER: + break; + default: + abort(); // requires explicit support + } + + mp_dispatch_enqueue_autofree(vo->in->dispatch, run_control, d); +} + +// must be called locked +static void forget_frames(struct vo *vo) +{ + struct vo_internal *in = vo->in; + in->hasframe = false; + in->hasframe_rendered = false; + in->drop_count = 0; + in->delayed_count = 0; + talloc_free(in->frame_queued); + in->frame_queued = NULL; + in->current_frame_id += VO_MAX_REQ_FRAMES + 1; + // don't unref current_frame; we always want to be able to redraw it + if (in->current_frame) { + in->current_frame->num_vsyncs = 0; // but reset future repeats + in->current_frame->display_synced = false; // mark discontinuity + } +} + +// VOs which have no special requirements on UI event loops etc. can set the +// vo_driver.wait_events callback to this (and leave vo_driver.wakeup unset). +// This function must not be used or called for other purposes. +void vo_wait_default(struct vo *vo, int64_t until_time) +{ + struct vo_internal *in = vo->in; + + mp_mutex_lock(&in->lock); + if (!in->need_wakeup) + mp_cond_timedwait_until(&in->wakeup, &in->lock, until_time); + mp_mutex_unlock(&in->lock); +} + +// Called unlocked. +static void wait_vo(struct vo *vo, int64_t until_time) +{ + struct vo_internal *in = vo->in; + + if (vo->driver->wait_events) { + vo->driver->wait_events(vo, until_time); + } else { + vo_wait_default(vo, until_time); + } + mp_mutex_lock(&in->lock); + in->need_wakeup = false; + mp_mutex_unlock(&in->lock); +} + +static void wakeup_locked(struct vo *vo) +{ + struct vo_internal *in = vo->in; + + mp_cond_broadcast(&in->wakeup); + if (vo->driver->wakeup) + vo->driver->wakeup(vo); + in->need_wakeup = true; +} + +// Wakeup VO thread, and make it check for new events with VOCTRL_CHECK_EVENTS. +// To be used by threaded VO backends. +void vo_wakeup(struct vo *vo) +{ + struct vo_internal *in = vo->in; + + mp_mutex_lock(&in->lock); + wakeup_locked(vo); + mp_mutex_unlock(&in->lock); +} + +// Whether vo_queue_frame() can be called. If the VO is not ready yet, the +// function will return false, and the VO will call the wakeup callback once +// it's ready. +// next_pts is the exact time when the next frame should be displayed. If the +// VO is ready, but the time is too "early", return false, and call the wakeup +// callback once the time is right. +// If next_pts is negative, disable any timing and draw the frame as fast as +// possible. +bool vo_is_ready_for_frame(struct vo *vo, int64_t next_pts) +{ + struct vo_internal *in = vo->in; + mp_mutex_lock(&in->lock); + bool blocked = vo->driver->initially_blocked && + !(in->internal_events & VO_EVENT_INITIAL_UNBLOCK); + bool r = vo->config_ok && !in->frame_queued && !blocked && + (!in->current_frame || in->current_frame->num_vsyncs < 1); + if (r && next_pts >= 0) { + // Don't show the frame too early - it would basically freeze the + // display by disallowing OSD redrawing or VO interaction. + // Actually render the frame at earliest the given offset before target + // time. + next_pts -= in->timing_offset; + next_pts -= in->flip_queue_offset; + int64_t now = mp_time_ns(); + if (next_pts > now) + r = false; + if (!in->wakeup_pts || next_pts < in->wakeup_pts) { + in->wakeup_pts = next_pts; + // If we have to wait, update the vo thread's timer. + if (!r) + wakeup_locked(vo); + } + } + mp_mutex_unlock(&in->lock); + return r; +} + +// Direct the VO thread to put the currently queued image on the screen. +// vo_is_ready_for_frame() must have returned true before this call. +// Ownership of frame is handed to the vo. +void vo_queue_frame(struct vo *vo, struct vo_frame *frame) +{ + struct vo_internal *in = vo->in; + mp_mutex_lock(&in->lock); + assert(vo->config_ok && !in->frame_queued && + (!in->current_frame || in->current_frame->num_vsyncs < 1)); + in->hasframe = true; + frame->frame_id = ++(in->current_frame_id); + in->frame_queued = frame; + in->wakeup_pts = frame->display_synced + ? 0 : frame->pts + MPMAX(frame->duration, 0); + wakeup_locked(vo); + mp_mutex_unlock(&in->lock); +} + +// If a frame is currently being rendered (or queued), wait until it's done. +// Otherwise, return immediately. +void vo_wait_frame(struct vo *vo) +{ + struct vo_internal *in = vo->in; + mp_mutex_lock(&in->lock); + while (in->frame_queued || in->rendering) + mp_cond_wait(&in->wakeup, &in->lock); + mp_mutex_unlock(&in->lock); +} + +// Wait until realtime is >= ts +// called without lock +static void wait_until(struct vo *vo, int64_t target) +{ + struct vo_internal *in = vo->in; + mp_mutex_lock(&in->lock); + while (target > mp_time_ns()) { + if (in->queued_events & VO_EVENT_LIVE_RESIZING) + break; + if (mp_cond_timedwait_until(&in->wakeup, &in->lock, target)) + break; + } + mp_mutex_unlock(&in->lock); +} + +static bool render_frame(struct vo *vo) +{ + struct vo_internal *in = vo->in; + struct vo_frame *frame = NULL; + bool more_frames = false; + + update_display_fps(vo); + + mp_mutex_lock(&in->lock); + + if (in->frame_queued) { + talloc_free(in->current_frame); + in->current_frame = in->frame_queued; + in->frame_queued = NULL; + } else if (in->paused || !in->current_frame || !in->hasframe || + (in->current_frame->display_synced && in->current_frame->num_vsyncs < 1) || + !in->current_frame->display_synced) + { + goto done; + } + + frame = vo_frame_ref(in->current_frame); + assert(frame); + + if (frame->display_synced) { + frame->pts = 0; + frame->duration = -1; + } + + int64_t now = mp_time_ns(); + int64_t pts = frame->pts; + int64_t duration = frame->duration; + int64_t end_time = pts + duration; + + // Time at which we should flip_page on the VO. + int64_t target = frame->display_synced ? 0 : pts - in->flip_queue_offset; + + // "normal" strict drop threshold. + in->dropped_frame = duration >= 0 && end_time < now; + + in->dropped_frame &= !frame->display_synced; + in->dropped_frame &= !(vo->driver->caps & VO_CAP_FRAMEDROP); + in->dropped_frame &= frame->can_drop; + // Even if we're hopelessly behind, rather degrade to 10 FPS playback, + // instead of just freezing the display forever. + in->dropped_frame &= now - in->prev_vsync < MP_TIME_MS_TO_NS(100); + in->dropped_frame &= in->hasframe_rendered; + + // Setup parameters for the next time this frame is drawn. ("frame" is the + // frame currently drawn, while in->current_frame is the potentially next.) + in->current_frame->repeat = true; + if (frame->display_synced) { + // Increment the offset only if it's not the last vsync. The current_frame + // can still be reused. This is mostly important for redraws that might + // overshoot the target vsync point. + if (in->current_frame->num_vsyncs > 1) { + in->current_frame->vsync_offset += in->current_frame->vsync_interval; + in->current_frame->ideal_frame_vsync += in->current_frame->ideal_frame_vsync_duration; + } + in->dropped_frame |= in->current_frame->num_vsyncs < 1; + } + if (in->current_frame->num_vsyncs > 0) + in->current_frame->num_vsyncs -= 1; + + // Always render when paused (it's typically the last frame for a while). + in->dropped_frame &= !in->paused; + + bool use_vsync = in->current_frame->display_synced && !in->paused; + if (use_vsync && !in->expecting_vsync) // first DS frame in a row + in->prev_vsync = now; + in->expecting_vsync = use_vsync; + + // Store the initial value before we unlock. + bool request_redraw = in->request_redraw; + + if (in->dropped_frame) { + in->drop_count += 1; + } else { + in->rendering = true; + in->hasframe_rendered = true; + int64_t prev_drop_count = vo->in->drop_count; + // Can the core queue new video now? Non-display-sync uses a separate + // timer instead, but possibly benefits from preparing a frame early. + bool can_queue = !in->frame_queued && + (in->current_frame->num_vsyncs < 1 || !use_vsync); + mp_mutex_unlock(&in->lock); + + if (can_queue) + wakeup_core(vo); + + stats_time_start(in->stats, "video-draw"); + + vo->driver->draw_frame(vo, frame); + + stats_time_end(in->stats, "video-draw"); + + wait_until(vo, target); + + stats_time_start(in->stats, "video-flip"); + + vo->driver->flip_page(vo); + + struct vo_vsync_info vsync = { + .last_queue_display_time = -1, + .skipped_vsyncs = -1, + }; + if (vo->driver->get_vsync) + vo->driver->get_vsync(vo, &vsync); + + // Make up some crap if presentation feedback is missing. + if (vsync.last_queue_display_time <= 0) + vsync.last_queue_display_time = mp_time_ns(); + + stats_time_end(in->stats, "video-flip"); + + mp_mutex_lock(&in->lock); + in->dropped_frame = prev_drop_count < vo->in->drop_count; + in->rendering = false; + + update_vsync_timing_after_swap(vo, &vsync); + } + + if (vo->driver->caps & VO_CAP_NORETAIN) { + talloc_free(in->current_frame); + in->current_frame = NULL; + } + + if (in->dropped_frame) { + MP_STATS(vo, "drop-vo"); + } else { + // If the initial redraw request was true or mpv is still playing, + // then we can clear it here since we just performed a redraw, or the + // next loop will draw what we need. However if there initially is + // no redraw request, then something can change this (i.e. the OSD) + // while the vo was unlocked. If we are paused, don't touch + // in->request_redraw in that case. + if (request_redraw || !in->paused) + in->request_redraw = false; + } + + if (in->current_frame && in->current_frame->num_vsyncs && + in->current_frame->display_synced) + more_frames = true; + + if (in->frame_queued && in->frame_queued->display_synced) + more_frames = true; + + mp_cond_broadcast(&in->wakeup); // for vo_wait_frame() + wakeup_core(vo); + +done: + if (!vo->driver->frame_owner) + talloc_free(frame); + mp_mutex_unlock(&in->lock); + + return more_frames; +} + +static void do_redraw(struct vo *vo) +{ + struct vo_internal *in = vo->in; + + if (!vo->config_ok || (vo->driver->caps & VO_CAP_NORETAIN)) + return; + + mp_mutex_lock(&in->lock); + in->request_redraw = false; + bool full_redraw = in->dropped_frame; + struct vo_frame *frame = NULL; + if (!vo->driver->untimed) + frame = vo_frame_ref(in->current_frame); + if (frame) + in->dropped_frame = false; + struct vo_frame dummy = {0}; + if (!frame) + frame = &dummy; + frame->redraw = !full_redraw; // unconditionally redraw if it was dropped + frame->repeat = false; + frame->still = true; + frame->pts = 0; + frame->duration = -1; + mp_mutex_unlock(&in->lock); + + vo->driver->draw_frame(vo, frame); + vo->driver->flip_page(vo); + + if (frame != &dummy && !vo->driver->frame_owner) + talloc_free(frame); +} + +static struct mp_image *get_image_vo(void *ctx, int imgfmt, int w, int h, + int stride_align, int flags) +{ + struct vo *vo = ctx; + return vo->driver->get_image(vo, imgfmt, w, h, stride_align, flags); +} + +static MP_THREAD_VOID vo_thread(void *ptr) +{ + struct vo *vo = ptr; + struct vo_internal *in = vo->in; + bool vo_paused = false; + + mp_thread_set_name("vo"); + + if (vo->driver->get_image) { + in->dr_helper = dr_helper_create(in->dispatch, get_image_vo, vo); + dr_helper_acquire_thread(in->dr_helper); + } + + int r = vo->driver->preinit(vo) ? -1 : 0; + mp_rendezvous(vo, r); // init barrier + if (r < 0) + goto done; + + read_opts(vo); + update_display_fps(vo); + vo_event(vo, VO_EVENT_WIN_STATE); + + while (1) { + mp_dispatch_queue_process(vo->in->dispatch, 0); + if (in->terminate) + break; + stats_event(in->stats, "iterations"); + vo->driver->control(vo, VOCTRL_CHECK_EVENTS, NULL); + bool working = render_frame(vo); + int64_t now = mp_time_ns(); + int64_t wait_until = now + MP_TIME_S_TO_NS(working ? 0 : 1000); + + mp_mutex_lock(&in->lock); + if (in->wakeup_pts) { + if (in->wakeup_pts > now) { + wait_until = MPMIN(wait_until, in->wakeup_pts); + } else { + in->wakeup_pts = 0; + wakeup_core(vo); + } + } + if (vo->want_redraw && !in->want_redraw) { + in->want_redraw = true; + wakeup_core(vo); + } + vo->want_redraw = false; + bool redraw = in->request_redraw; + bool send_reset = in->send_reset; + in->send_reset = false; + bool send_pause = in->paused != vo_paused; + vo_paused = in->paused; + mp_mutex_unlock(&in->lock); + + if (send_reset) + vo->driver->control(vo, VOCTRL_RESET, NULL); + if (send_pause) + vo->driver->control(vo, vo_paused ? VOCTRL_PAUSE : VOCTRL_RESUME, NULL); + if (wait_until > now && redraw) { + do_redraw(vo); // now is a good time + continue; + } + if (vo->want_redraw) // might have been set by VOCTRLs + wait_until = 0; + + if (wait_until <= now) + continue; + + wait_vo(vo, wait_until); + } + forget_frames(vo); // implicitly synchronized + talloc_free(in->current_frame); + in->current_frame = NULL; + vo->driver->uninit(vo); +done: + TA_FREEP(&in->dr_helper); + MP_THREAD_RETURN(); +} + +void vo_set_paused(struct vo *vo, bool paused) +{ + struct vo_internal *in = vo->in; + mp_mutex_lock(&in->lock); + if (in->paused != paused) { + in->paused = paused; + if (in->paused && in->dropped_frame) { + in->request_redraw = true; + wakeup_core(vo); + } + reset_vsync_timings(vo); + wakeup_locked(vo); + } + mp_mutex_unlock(&in->lock); +} + +int64_t vo_get_drop_count(struct vo *vo) +{ + mp_mutex_lock(&vo->in->lock); + int64_t r = vo->in->drop_count; + mp_mutex_unlock(&vo->in->lock); + return r; +} + +void vo_increment_drop_count(struct vo *vo, int64_t n) +{ + mp_mutex_lock(&vo->in->lock); + vo->in->drop_count += n; + mp_mutex_unlock(&vo->in->lock); +} + +// Make the VO redraw the OSD at some point in the future. +void vo_redraw(struct vo *vo) +{ + struct vo_internal *in = vo->in; + mp_mutex_lock(&in->lock); + if (!in->request_redraw) { + in->request_redraw = true; + in->want_redraw = false; + wakeup_locked(vo); + } + mp_mutex_unlock(&in->lock); +} + +bool vo_want_redraw(struct vo *vo) +{ + struct vo_internal *in = vo->in; + mp_mutex_lock(&in->lock); + bool r = in->want_redraw; + mp_mutex_unlock(&in->lock); + return r; +} + +void vo_seek_reset(struct vo *vo) +{ + struct vo_internal *in = vo->in; + mp_mutex_lock(&in->lock); + forget_frames(vo); + reset_vsync_timings(vo); + in->send_reset = true; + wakeup_locked(vo); + mp_mutex_unlock(&in->lock); +} + +// Return true if there is still a frame being displayed (or queued). +// If this returns true, a wakeup some time in the future is guaranteed. +bool vo_still_displaying(struct vo *vo) +{ + struct vo_internal *in = vo->in; + mp_mutex_lock(&in->lock); + bool working = in->rendering || in->frame_queued; + mp_mutex_unlock(&in->lock); + return working && in->hasframe; +} + +// Whether at least 1 frame was queued or rendered since last seek or reconfig. +bool vo_has_frame(struct vo *vo) +{ + return vo->in->hasframe; +} + +static void run_query_format(void *p) +{ + void **pp = p; + struct vo *vo = pp[0]; + uint8_t *list = pp[1]; + for (int format = IMGFMT_START; format < IMGFMT_END; format++) + list[format - IMGFMT_START] = vo->driver->query_format(vo, format); +} + +// For each item in the list (allocated as uint8_t[IMGFMT_END - IMGFMT_START]), +// set the supported format flags. +void vo_query_formats(struct vo *vo, uint8_t *list) +{ + void *p[] = {vo, list}; + mp_dispatch_run(vo->in->dispatch, run_query_format, p); +} + +// Calculate the appropriate source and destination rectangle to +// get a correctly scaled picture, including pan-scan. +// out_src: visible part of the video +// out_dst: area of screen covered by the video source rectangle +// out_osd: OSD size, OSD margins, etc. +// Must be called from the VO thread only. +void vo_get_src_dst_rects(struct vo *vo, struct mp_rect *out_src, + struct mp_rect *out_dst, struct mp_osd_res *out_osd) +{ + if (!vo->params) { + *out_src = *out_dst = (struct mp_rect){0}; + *out_osd = (struct mp_osd_res){0}; + return; + } + mp_get_src_dst_rects(vo->log, vo->opts, vo->driver->caps, vo->params, + vo->dwidth, vo->dheight, vo->monitor_par, + out_src, out_dst, out_osd); +} + +// flip_page[_timed] will be called offset_us nanoseconds too early. +// (For vo_vdpau, which does its own timing.) +// num_req_frames set the requested number of requested vo_frame.frames. +// (For vo_gpu interpolation.) +void vo_set_queue_params(struct vo *vo, int64_t offset_ns, int num_req_frames) +{ + struct vo_internal *in = vo->in; + mp_mutex_lock(&in->lock); + in->flip_queue_offset = offset_ns; + in->req_frames = MPCLAMP(num_req_frames, 1, VO_MAX_REQ_FRAMES); + mp_mutex_unlock(&in->lock); +} + +int vo_get_num_req_frames(struct vo *vo) +{ + struct vo_internal *in = vo->in; + mp_mutex_lock(&in->lock); + int res = in->req_frames; + mp_mutex_unlock(&in->lock); + return res; +} + +double vo_get_vsync_interval(struct vo *vo) +{ + struct vo_internal *in = vo->in; + mp_mutex_lock(&in->lock); + double res = vo->in->vsync_interval > 1 ? vo->in->vsync_interval : -1; + mp_mutex_unlock(&in->lock); + return res; +} + +double vo_get_estimated_vsync_interval(struct vo *vo) +{ + struct vo_internal *in = vo->in; + mp_mutex_lock(&in->lock); + double res = in->estimated_vsync_interval; + mp_mutex_unlock(&in->lock); + return res; +} + +double vo_get_estimated_vsync_jitter(struct vo *vo) +{ + struct vo_internal *in = vo->in; + mp_mutex_lock(&in->lock); + double res = in->estimated_vsync_jitter; + mp_mutex_unlock(&in->lock); + return res; +} + +// Get the time in seconds at after which the currently rendering frame will +// end. Returns positive values if the frame is yet to be finished, negative +// values if it already finished. +// This can only be called while no new frame is queued (after +// vo_is_ready_for_frame). Returns 0 for non-display synced frames, or if the +// deadline for continuous display was missed. +double vo_get_delay(struct vo *vo) +{ + struct vo_internal *in = vo->in; + mp_mutex_lock(&in->lock); + assert (!in->frame_queued); + int64_t res = 0; + if (in->base_vsync && in->vsync_interval > 1 && in->current_frame) { + res = in->base_vsync; + int extra = !!in->rendering; + res += (in->current_frame->num_vsyncs + extra) * in->vsync_interval; + if (!in->current_frame->display_synced) + res = 0; + } + mp_mutex_unlock(&in->lock); + return res ? MP_TIME_NS_TO_S(res - mp_time_ns()) : 0; +} + +void vo_discard_timing_info(struct vo *vo) +{ + struct vo_internal *in = vo->in; + mp_mutex_lock(&in->lock); + reset_vsync_timings(vo); + mp_mutex_unlock(&in->lock); +} + +int64_t vo_get_delayed_count(struct vo *vo) +{ + struct vo_internal *in = vo->in; + mp_mutex_lock(&in->lock); + int64_t res = vo->in->delayed_count; + mp_mutex_unlock(&in->lock); + return res; +} + +double vo_get_display_fps(struct vo *vo) +{ + struct vo_internal *in = vo->in; + mp_mutex_lock(&in->lock); + double res = vo->in->display_fps; + mp_mutex_unlock(&in->lock); + return res; +} + +// Set specific event flags, and wakeup the playback core if needed. +// vo_query_and_reset_events() can retrieve the events again. +void vo_event(struct vo *vo, int event) +{ + struct vo_internal *in = vo->in; + mp_mutex_lock(&in->lock); + if ((in->queued_events & event & VO_EVENTS_USER) != (event & VO_EVENTS_USER)) + wakeup_core(vo); + if (event) + wakeup_locked(vo); + in->queued_events |= event; + in->internal_events |= event; + mp_mutex_unlock(&in->lock); +} + +// Check event flags set with vo_event(). Return the mask of events that was +// set and included in the events parameter. Clear the returned events. +int vo_query_and_reset_events(struct vo *vo, int events) +{ + struct vo_internal *in = vo->in; + mp_mutex_lock(&in->lock); + int r = in->queued_events & events; + in->queued_events &= ~(unsigned)r; + mp_mutex_unlock(&in->lock); + return r; +} + +struct mp_image *vo_get_current_frame(struct vo *vo) +{ + struct vo_internal *in = vo->in; + mp_mutex_lock(&in->lock); + struct mp_image *r = NULL; + if (vo->in->current_frame) + r = mp_image_new_ref(vo->in->current_frame->current); + mp_mutex_unlock(&in->lock); + return r; +} + +struct vo_frame *vo_get_current_vo_frame(struct vo *vo) +{ + struct vo_internal *in = vo->in; + mp_mutex_lock(&in->lock); + struct vo_frame *r = vo_frame_ref(vo->in->current_frame); + mp_mutex_unlock(&in->lock); + return r; +} + +struct mp_image *vo_get_image(struct vo *vo, int imgfmt, int w, int h, + int stride_align, int flags) +{ + if (vo->driver->get_image_ts) + return vo->driver->get_image_ts(vo, imgfmt, w, h, stride_align, flags); + if (vo->in->dr_helper) + return dr_helper_get_image(vo->in->dr_helper, imgfmt, w, h, stride_align, flags); + return NULL; +} + +static void destroy_frame(void *p) +{ + struct vo_frame *frame = p; + for (int n = 0; n < frame->num_frames; n++) + talloc_free(frame->frames[n]); +} + +// Return a new reference to the given frame. The image pointers are also new +// references. Calling talloc_free() on the frame unrefs all currently set +// image references. (Assuming current==frames[0].) +struct vo_frame *vo_frame_ref(struct vo_frame *frame) +{ + if (!frame) + return NULL; + + struct vo_frame *new = talloc_ptrtype(NULL, new); + talloc_set_destructor(new, destroy_frame); + *new = *frame; + for (int n = 0; n < frame->num_frames; n++) + new->frames[n] = mp_image_new_ref(frame->frames[n]); + new->current = new->num_frames ? new->frames[0] : NULL; + return new; +} + +/* + * lookup an integer in a table, table must have 0 as the last key + * param: key key to search for + * returns translation corresponding to key or "to" value of last mapping + * if not found. + */ +int lookup_keymap_table(const struct mp_keymap *map, int key) +{ + while (map->from && map->from != key) + map++; + return map->to; +} + +struct mp_image_params vo_get_current_params(struct vo *vo) +{ + struct mp_image_params p = {0}; + mp_mutex_lock(&vo->in->lock); + if (vo->params) + p = *vo->params; + mp_mutex_unlock(&vo->in->lock); + return p; +} diff --git a/video/out/vo.h b/video/out/vo.h new file mode 100644 index 0000000..e38dcf8 --- /dev/null +++ b/video/out/vo.h @@ -0,0 +1,544 @@ +/* + * Copyright (C) Aaron Holtzman - Aug 1999 + * + * Strongly modified, most parts rewritten: A'rpi/ESP-team - 2000-2001 + * (C) MPlayer developers + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MPLAYER_VIDEO_OUT_H +#define MPLAYER_VIDEO_OUT_H + +#include <inttypes.h> +#include <stdbool.h> + +#include "video/img_format.h" +#include "common/common.h" +#include "options/options.h" + +enum { + // VO needs to redraw + VO_EVENT_EXPOSE = 1 << 0, + // VO needs to update state to a new window size + VO_EVENT_RESIZE = 1 << 1, + // The ICC profile needs to be reloaded + VO_EVENT_ICC_PROFILE_CHANGED = 1 << 2, + // Some other window state changed (position, window state, fps) + VO_EVENT_WIN_STATE = 1 << 3, + // The ambient light conditions changed and need to be reloaded + VO_EVENT_AMBIENT_LIGHTING_CHANGED = 1 << 4, + // Special mechanism for making resizing with Cocoa react faster + VO_EVENT_LIVE_RESIZING = 1 << 5, + // For VOCTRL_GET_HIDPI_SCALE changes. + VO_EVENT_DPI = 1 << 6, + // Special thing for encode mode (vo_driver.initially_blocked). + // Part of VO_EVENTS_USER to make vo_is_ready_for_frame() work properly. + VO_EVENT_INITIAL_UNBLOCK = 1 << 7, + VO_EVENT_FOCUS = 1 << 8, + + // Set of events the player core may be interested in. + VO_EVENTS_USER = VO_EVENT_RESIZE | VO_EVENT_WIN_STATE | VO_EVENT_DPI | + VO_EVENT_INITIAL_UNBLOCK | VO_EVENT_FOCUS, +}; + +enum mp_voctrl { + /* signal a device reset seek */ + VOCTRL_RESET = 1, + /* Handle input and redraw events, called by vo_check_events() */ + VOCTRL_CHECK_EVENTS, + /* signal a device pause */ + VOCTRL_PAUSE, + /* start/resume playback */ + VOCTRL_RESUME, + + VOCTRL_SET_PANSCAN, + VOCTRL_SET_EQUALIZER, + + // Triggered by any change to mp_vo_opts. This is for convenience. In theory, + // you could install your own listener. + VOCTRL_VO_OPTS_CHANGED, + + /* private to vo_gpu */ + VOCTRL_LOAD_HWDEC_API, + + // Only used internally in vo_libmpv + VOCTRL_PREINIT, + VOCTRL_UNINIT, + VOCTRL_RECONFIG, + + VOCTRL_UPDATE_WINDOW_TITLE, // char* + VOCTRL_UPDATE_PLAYBACK_STATE, // struct voctrl_playback_state* + + VOCTRL_PERFORMANCE_DATA, // struct voctrl_performance_data* + + VOCTRL_SET_CURSOR_VISIBILITY, // bool* + + VOCTRL_CONTENT_TYPE, // enum mp_content_type* + + VOCTRL_KILL_SCREENSAVER, + VOCTRL_RESTORE_SCREENSAVER, + + // Return or set window size (not-fullscreen mode only - if fullscreened, + // these must access the not-fullscreened window size only). + VOCTRL_GET_UNFS_WINDOW_SIZE, // int[2] (w/h) + VOCTRL_SET_UNFS_WINDOW_SIZE, // int[2] (w/h) + + VOCTRL_GET_FOCUSED, // bool* + + // char *** (NULL terminated array compatible with CONF_TYPE_STRING_LIST) + // names for displays the window is on + VOCTRL_GET_DISPLAY_NAMES, + + // Retrieve window contents. (Normal screenshots use vo_get_current_frame().) + // Deprecated for VOCTRL_SCREENSHOT with corresponding flags. + VOCTRL_SCREENSHOT_WIN, // struct mp_image** + + // A normal screenshot - VOs can react to this if vo_get_current_frame() is + // not sufficient. + VOCTRL_SCREENSHOT, // struct voctrl_screenshot* + + VOCTRL_UPDATE_RENDER_OPTS, + + VOCTRL_GET_ICC_PROFILE, // bstr* + VOCTRL_GET_AMBIENT_LUX, // int* + VOCTRL_GET_DISPLAY_FPS, // double* + VOCTRL_GET_HIDPI_SCALE, // double* + VOCTRL_GET_DISPLAY_RES, // int[2] + VOCTRL_GET_WINDOW_ID, // int64_t* + + /* private to vo_gpu and vo_gpu_next */ + VOCTRL_EXTERNAL_RESIZE, +}; + +// Helper to expose what kind of content is currently playing to the VO. +enum mp_content_type { + MP_CONTENT_NONE, // used for force-window + MP_CONTENT_IMAGE, + MP_CONTENT_VIDEO, +}; + +#define VO_TRUE true +#define VO_FALSE false +#define VO_ERROR -1 +#define VO_NOTAVAIL -2 +#define VO_NOTIMPL -3 + +// VOCTRL_UPDATE_PLAYBACK_STATE +struct voctrl_playback_state { + bool taskbar_progress; + bool playing; + bool paused; + int percent_pos; +}; + +// VOCTRL_PERFORMANCE_DATA +#define VO_PERF_SAMPLE_COUNT 256 + +struct mp_pass_perf { + // times are all in nanoseconds + uint64_t last, avg, peak; + uint64_t samples[VO_PERF_SAMPLE_COUNT]; + uint64_t count; +}; + +#define VO_PASS_PERF_MAX 64 +#define VO_PASS_DESC_MAX_LEN 128 + +struct mp_frame_perf { + int count; + struct mp_pass_perf perf[VO_PASS_PERF_MAX]; + char desc[VO_PASS_PERF_MAX][VO_PASS_DESC_MAX_LEN]; +}; + +struct voctrl_performance_data { + struct mp_frame_perf fresh, redraw; +}; + +struct voctrl_screenshot { + bool scaled, subs, osd, high_bit_depth, native_csp; + struct mp_image *res; +}; + +enum { + // VO does handle mp_image_params.rotate in 90 degree steps + VO_CAP_ROTATE90 = 1 << 0, + // VO does framedrop itself (vo_vdpau). Untimed/encoding VOs never drop. + VO_CAP_FRAMEDROP = 1 << 1, + // VO does not allow frames to be retained (vo_mediacodec_embed). + VO_CAP_NORETAIN = 1 << 2, + // VO supports applying film grain + VO_CAP_FILM_GRAIN = 1 << 3, +}; + +enum { + // Require DR buffers to be host-cached (i.e. fast readback) + VO_DR_FLAG_HOST_CACHED = 1 << 0, +}; + +#define VO_MAX_REQ_FRAMES 10 +#define VO_MAX_SWAPCHAIN_DEPTH 8 + +struct vo; +struct osd_state; +struct mp_image; +struct mp_image_params; + +struct vo_extra { + struct input_ctx *input_ctx; + struct osd_state *osd; + struct encode_lavc_context *encode_lavc_ctx; + void (*wakeup_cb)(void *ctx); + void *wakeup_ctx; +}; + +struct vo_frame { + // If > 0, realtime when frame should be shown, in mp_time_ns() units. + // If 0, present immediately. + int64_t pts; + // Approximate frame duration, in ns. + int duration; + // Realtime of estimated distance between 2 vsync events. + double vsync_interval; + // "ideal" display time within the vsync + double vsync_offset; + // "ideal" frame duration (can be different from num_vsyncs*vsync_interval + // up to a vsync) - valid for the entire frame, i.e. not changed for repeats + double ideal_frame_duration; + // "ideal" frame vsync point relative to the pts + double ideal_frame_vsync; + // "ideal" frame duration relative to the pts + double ideal_frame_vsync_duration; + // how often the frame will be repeated (does not include OSD redraws) + int num_vsyncs; + // Set if the current frame is repeated from the previous. It's guaranteed + // that the current is the same as the previous one, even if the image + // pointer is different. + // The repeat flag is set if exactly the same frame should be rendered + // again (and the OSD does not need to be redrawn). + // A repeat frame can be redrawn, in which case repeat==redraw==true, and + // OSD should be updated. + bool redraw, repeat; + // The frame is not in movement - e.g. redrawing while paused. + bool still; + // Frames are output as fast as possible, with implied vsync blocking. + bool display_synced; + // Dropping the frame is allowed if the VO is behind. + bool can_drop; + // The current frame to be drawn. + // Warning: When OSD should be redrawn in --force-window --idle mode, this + // can be NULL. The VO should draw a black background, OSD on top. + struct mp_image *current; + // List of future images, starting with the current one. This does not + // care about repeated frames - it simply contains the next real frames. + // vo_set_queue_params() sets how many future frames this should include. + // The actual number of frames delivered to the VO can be lower. + // frames[0] is current, frames[1] is the next frame. + // Note that some future frames may never be sent as current frame to the + // VO if frames are dropped. + int num_frames; + struct mp_image *frames[VO_MAX_REQ_FRAMES]; + // Speed unadjusted, approximate frame duration inferred from past frames + double approx_duration; + // ID for frames[0] (== current). If current==NULL, the number is + // meaningless. Otherwise, it's an unique ID for the frame. The ID for + // a frame is guaranteed not to change (instant redraws will use the same + // ID). frames[n] has the ID frame_id+n, with the guarantee that frame + // drops or reconfigs will keep the guarantee. + // The ID is never 0 (unless num_frames==0). IDs are strictly monotonous. + uint64_t frame_id; +}; + +// Presentation feedback. See get_vsync() for how backends should fill this +// struct. +struct vo_vsync_info { + // mp_time_ns() timestamp at which the last queued frame will likely be + // displayed (this is in the future, unless the frame is instantly output). + // 0 or lower if unset or unsupported. + // This implies the latency of the output. + int64_t last_queue_display_time; + + // Time between 2 vsync events in nanoseconds. The difference should be the + // from 2 times sampled from the same reference point (it should not be the + // difference between e.g. the end of scanout and the start of the next one; + // it must be continuous). + // -1 if unsupported. + // 0 if supported, but no value available yet. It is assumed that the value + // becomes available after enough swap_buffers() calls were done. + // >0 values are taken for granted. Very bad things will happen if it's + // inaccurate. + int64_t vsync_duration; + + // Number of skipped physical vsyncs at some point in time. Typically, this + // value is some time in the past by an offset that equals to the latency. + // This value is reset and newly sampled at every swap_buffers() call. + // This can be used to detect delayed frames iff you try to call + // swap_buffers() for every physical vsync. + // -1 if unset or unsupported. + int64_t skipped_vsyncs; +}; + +struct vo_driver { + // Encoding functionality, which can be invoked via --o only. + bool encode; + + // This requires waiting for a VO_EVENT_INITIAL_UNBLOCK event before the + // first frame can be sent. Doing vo_reconfig*() calls is allowed though. + // Encode mode uses this, the core uses vo_is_ready_for_frame() to + // implicitly check for this. + bool initially_blocked; + + // VO_CAP_* bits + int caps; + + // Disable video timing, push frames as quickly as possible, never redraw. + bool untimed; + + // The VO is responsible for freeing frames. + bool frame_owner; + + const char *name; + const char *description; + + /* + * returns: zero on successful initialization, non-zero on error. + */ + int (*preinit)(struct vo *vo); + + /* + * Whether the given image format is supported and config() will succeed. + * format: one of IMGFMT_* + * returns: 0 on not supported, otherwise 1 + */ + int (*query_format)(struct vo *vo, int format); + + /* + * Initialize or reconfigure the display driver. + * params: video parameters, like pixel format and frame size + * returns: < 0 on error, >= 0 on success + */ + int (*reconfig)(struct vo *vo, struct mp_image_params *params); + + /* + * Like reconfig(), but provides the whole mp_image for which the change is + * required. (The image doesn't have to have real data.) + */ + int (*reconfig2)(struct vo *vo, struct mp_image *img); + + /* + * Control interface + */ + int (*control)(struct vo *vo, uint32_t request, void *data); + + /* + * lavc callback for direct rendering + * + * Optional. To make implementation easier, the callback is always run on + * the VO thread. The returned mp_image's destructor callback is also called + * on the VO thread, even if it's actually unref'ed from another thread. + * + * It is guaranteed that the last reference to an image is destroyed before + * ->uninit is called (except it's not - libmpv screenshots can hold the + * reference longer, fuck). + * + * The allocated image - or a part of it, can be passed to draw_frame(). The + * point of this mechanism is that the decoder directly renders to GPU + * staging memory, to avoid a memcpy on frame upload. But this is not a + * guarantee. A filter could change the data pointers or return a newly + * allocated image. It's even possible that only 1 plane uses the buffer + * allocated by the get_image function. The VO has to check for this. + * + * stride_align is always a value >=1. The stride values of the returned + * image must be divisible by this value. This may be a non power of two. + * + * flags is a combination of VO_DR_FLAG_* flags. + * + * Currently, the returned image must have exactly 1 AVBufferRef set, for + * internal implementation simplicity. + * + * returns: an allocated, refcounted image; if NULL is returned, the caller + * will silently fallback to a default allocator + */ + struct mp_image *(*get_image)(struct vo *vo, int imgfmt, int w, int h, + int stride_align, int flags); + + /* + * Thread-safe variant of get_image. Set at most one of these callbacks. + * This excludes _all_ synchronization magic. The only guarantee is that + * vo_driver.uninit is not called before this function returns. + */ + struct mp_image *(*get_image_ts)(struct vo *vo, int imgfmt, int w, int h, + int stride_align, int flags); + + /* Render the given frame. Note that this is also called when repeating + * or redrawing frames. + * + * frame is freed by the caller if the callee did not assume ownership + * of the frames, but in any case the callee can still modify the + * contained data and references. + */ + void (*draw_frame)(struct vo *vo, struct vo_frame *frame); + + /* + * Blit/Flip buffer to the screen. Must be called after each frame! + */ + void (*flip_page)(struct vo *vo); + + /* + * Return presentation feedback. The implementation should not touch fields + * it doesn't support; the info fields are preinitialized to neutral values. + * Usually called once after flip_page(), but can be called any time. + * The values returned by this are always relative to the last flip_page() + * call. + */ + void (*get_vsync)(struct vo *vo, struct vo_vsync_info *info); + + /* These optional callbacks can be provided if the GUI framework used by + * the VO requires entering a message loop for receiving events and does + * not call vo_wakeup() from a separate thread when there are new events. + * + * wait_events() will wait for new events, until the timeout expires, or the + * function is interrupted. wakeup() is used to possibly interrupt the + * event loop (wakeup() itself must be thread-safe, and not call any other + * VO functions; it's the only vo_driver function with this requirement). + * wakeup() should behave like a binary semaphore; if wait_events() is not + * being called while wakeup() is, the next wait_events() call should exit + * immediately. + */ + void (*wakeup)(struct vo *vo); + void (*wait_events)(struct vo *vo, int64_t until_time_ns); + + /* + * Closes driver. Should restore the original state of the system. + */ + void (*uninit)(struct vo *vo); + + // Size of private struct for automatic allocation (0 doesn't allocate) + int priv_size; + + // If not NULL, it's copied into the newly allocated private struct. + const void *priv_defaults; + + // List of options to parse into priv struct (requires priv_size to be set) + // This will register them as global options (with options_prefix), and + // copy the current value at VO creation time to the priv struct. + const struct m_option *options; + + // All options in the above array are prefixed with this string. (It's just + // for convenience and makes no difference in semantics.) + const char *options_prefix; + + // Registers global options that go to a separate options struct. + const struct m_sub_options *global_opts; +}; + +struct vo { + const struct vo_driver *driver; + struct mp_log *log; // Using e.g. "[vo/vdpau]" as prefix + void *priv; + struct mpv_global *global; + struct vo_x11_state *x11; + struct vo_w32_state *w32; + struct vo_wayland_state *wl; + struct vo_android_state *android; + struct vo_drm_state *drm; + struct mp_hwdec_devices *hwdec_devs; + struct input_ctx *input_ctx; + struct osd_state *osd; + struct encode_lavc_context *encode_lavc_ctx; + struct vo_internal *in; + struct vo_extra extra; + + // --- The following fields are generally only changed during initialization. + + bool probing; + + // --- The following fields are only changed with vo_reconfig(), and can + // be accessed unsynchronized (read-only). + + int config_ok; // Last config call was successful? + struct mp_image_params *params; // Configured parameters (as in vo_reconfig) + + // --- The following fields can be accessed only by the VO thread, or from + // anywhere _if_ the VO thread is suspended (use vo->dispatch). + + struct m_config_cache *opts_cache; // cache for ->opts + struct mp_vo_opts *opts; + struct m_config_cache *gl_opts_cache; + struct m_config_cache *eq_opts_cache; + + bool want_redraw; // redraw as soon as possible + + // current window state + int dwidth; + int dheight; + float monitor_par; +}; + +struct mpv_global; +struct vo *init_best_video_out(struct mpv_global *global, struct vo_extra *ex); +int vo_reconfig(struct vo *vo, struct mp_image_params *p); +int vo_reconfig2(struct vo *vo, struct mp_image *img); + +int vo_control(struct vo *vo, int request, void *data); +void vo_control_async(struct vo *vo, int request, void *data); +bool vo_is_ready_for_frame(struct vo *vo, int64_t next_pts); +void vo_queue_frame(struct vo *vo, struct vo_frame *frame); +void vo_wait_frame(struct vo *vo); +bool vo_still_displaying(struct vo *vo); +bool vo_has_frame(struct vo *vo); +void vo_redraw(struct vo *vo); +bool vo_want_redraw(struct vo *vo); +void vo_seek_reset(struct vo *vo); +void vo_destroy(struct vo *vo); +void vo_set_paused(struct vo *vo, bool paused); +int64_t vo_get_drop_count(struct vo *vo); +void vo_increment_drop_count(struct vo *vo, int64_t n); +int64_t vo_get_delayed_count(struct vo *vo); +void vo_query_formats(struct vo *vo, uint8_t *list); +void vo_event(struct vo *vo, int event); +int vo_query_and_reset_events(struct vo *vo, int events); +struct mp_image *vo_get_current_frame(struct vo *vo); +void vo_set_queue_params(struct vo *vo, int64_t offset_ns, int num_req_frames); +int vo_get_num_req_frames(struct vo *vo); +double vo_get_vsync_interval(struct vo *vo); +double vo_get_estimated_vsync_interval(struct vo *vo); +double vo_get_estimated_vsync_jitter(struct vo *vo); +double vo_get_display_fps(struct vo *vo); +double vo_get_delay(struct vo *vo); +void vo_discard_timing_info(struct vo *vo); +struct vo_frame *vo_get_current_vo_frame(struct vo *vo); +struct mp_image *vo_get_image(struct vo *vo, int imgfmt, int w, int h, + int stride_align, int flags); + +void vo_wakeup(struct vo *vo); +void vo_wait_default(struct vo *vo, int64_t until_time); + +struct mp_keymap { + int from; + int to; +}; +int lookup_keymap_table(const struct mp_keymap *map, int key); + +struct mp_osd_res; +void vo_get_src_dst_rects(struct vo *vo, struct mp_rect *out_src, + struct mp_rect *out_dst, struct mp_osd_res *out_osd); + +struct vo_frame *vo_frame_ref(struct vo_frame *frame); + +struct mp_image_params vo_get_current_params(struct vo *vo); + +#endif /* MPLAYER_VIDEO_OUT_H */ diff --git a/video/out/vo_caca.c b/video/out/vo_caca.c new file mode 100644 index 0000000..0625de0 --- /dev/null +++ b/video/out/vo_caca.c @@ -0,0 +1,314 @@ +/* + * video output driver for libcaca + * + * by Pigeon <pigeon@pigeond.net> + * + * Some functions/codes/ideas are from x11 and aalib vo + * + * TODO: support draw_alpha? + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <unistd.h> +#include <string.h> +#include <time.h> +#include <errno.h> +#include <assert.h> +#include <caca.h> + +#include "config.h" +#include "vo.h" +#include "video/mp_image.h" + +#include "input/keycodes.h" +#include "input/input.h" +#include "common/msg.h" +#include "input/input.h" + +#include "config.h" +#if !HAVE_GPL +#error GPL only +#endif + +struct priv { + caca_canvas_t *canvas; + caca_display_t *display; + caca_dither_t *dither; + uint8_t *dither_buffer; + const char *dither_antialias; + const char *dither_charset; + const char *dither_color; + const char *dither_algo; + + /* image infos */ + int image_format; + int image_width; + int image_height; + + int screen_w, screen_h; +}; + +/* We want 24bpp always for now */ +static const unsigned int bpp = 24; +static const unsigned int depth = 3; +static const unsigned int rmask = 0xff0000; +static const unsigned int gmask = 0x00ff00; +static const unsigned int bmask = 0x0000ff; +static const unsigned int amask = 0; + +static int resize(struct vo *vo) +{ + struct priv *priv = vo->priv; + priv->screen_w = caca_get_canvas_width(priv->canvas); + priv->screen_h = caca_get_canvas_height(priv->canvas); + + caca_free_dither(priv->dither); + talloc_free(priv->dither_buffer); + + priv->dither = caca_create_dither(bpp, priv->image_width, priv->image_height, + depth * priv->image_width, + rmask, gmask, bmask, amask); + if (priv->dither == NULL) { + MP_FATAL(vo, "caca_create_dither failed!\n"); + return -1; + } + priv->dither_buffer = + talloc_array(NULL, uint8_t, depth * priv->image_width * priv->image_height); + + /* Default libcaca features */ + caca_set_dither_antialias(priv->dither, priv->dither_antialias); + caca_set_dither_charset(priv->dither, priv->dither_charset); + caca_set_dither_color(priv->dither, priv->dither_color); + caca_set_dither_algorithm(priv->dither, priv->dither_algo); + + return 0; +} + +static int reconfig(struct vo *vo, struct mp_image_params *params) +{ + struct priv *priv = vo->priv; + priv->image_height = params->h; + priv->image_width = params->w; + priv->image_format = params->imgfmt; + + return resize(vo); +} + +static void draw_frame(struct vo *vo, struct vo_frame *frame) +{ + struct priv *priv = vo->priv; + struct mp_image *mpi = frame->current; + if (!mpi) + return; + memcpy_pic(priv->dither_buffer, mpi->planes[0], priv->image_width * depth, priv->image_height, + priv->image_width * depth, mpi->stride[0]); + caca_dither_bitmap(priv->canvas, 0, 0, priv->screen_w, priv->screen_h, priv->dither, priv->dither_buffer); +} + +static void flip_page(struct vo *vo) +{ + struct priv *priv = vo->priv; + caca_refresh_display(priv->display); +} + +static void set_next_str(const char * const *list, const char **str, + const char **msg) +{ + int ind; + for (ind = 0; list[ind]; ind += 2) { + if (strcmp(list[ind], *str) == 0) { + if (list[ind + 2] == NULL) + ind = -2; + *str = list[ind + 2]; + *msg = list[ind + 3]; + return; + } + } + + *str = list[0]; + *msg = list[1]; +} + +static const struct mp_keymap keysym_map[] = { + {CACA_KEY_RETURN, MP_KEY_ENTER}, {CACA_KEY_ESCAPE, MP_KEY_ESC}, + {CACA_KEY_UP, MP_KEY_UP}, {CACA_KEY_DOWN, MP_KEY_DOWN}, + {CACA_KEY_LEFT, MP_KEY_LEFT}, {CACA_KEY_RIGHT, MP_KEY_RIGHT}, + {CACA_KEY_PAGEUP, MP_KEY_PAGE_UP}, {CACA_KEY_PAGEDOWN, MP_KEY_PAGE_DOWN}, + {CACA_KEY_HOME, MP_KEY_HOME}, {CACA_KEY_END, MP_KEY_END}, + {CACA_KEY_INSERT, MP_KEY_INSERT}, {CACA_KEY_DELETE, MP_KEY_DELETE}, + {CACA_KEY_BACKSPACE, MP_KEY_BACKSPACE}, {CACA_KEY_TAB, MP_KEY_TAB}, + {CACA_KEY_PAUSE, MP_KEY_PAUSE}, + {CACA_KEY_F1, MP_KEY_F+1}, {CACA_KEY_F2, MP_KEY_F+2}, + {CACA_KEY_F3, MP_KEY_F+3}, {CACA_KEY_F4, MP_KEY_F+4}, + {CACA_KEY_F5, MP_KEY_F+5}, {CACA_KEY_F6, MP_KEY_F+6}, + {CACA_KEY_F7, MP_KEY_F+7}, {CACA_KEY_F8, MP_KEY_F+8}, + {CACA_KEY_F9, MP_KEY_F+9}, {CACA_KEY_F10, MP_KEY_F+10}, + {CACA_KEY_F11, MP_KEY_F+11}, {CACA_KEY_F12, MP_KEY_F+12}, + {CACA_KEY_F13, MP_KEY_F+13}, {CACA_KEY_F14, MP_KEY_F+14}, + {CACA_KEY_F15, MP_KEY_F+15}, + {0, 0} +}; + +static void check_events(struct vo *vo) +{ + struct priv *priv = vo->priv; + + caca_event_t cev; + while (caca_get_event(priv->display, CACA_EVENT_ANY, &cev, 0)) { + + switch (cev.type) { + case CACA_EVENT_RESIZE: + caca_refresh_display(priv->display); + resize(vo); + break; + case CACA_EVENT_QUIT: + mp_input_put_key(vo->input_ctx, MP_KEY_CLOSE_WIN); + break; + case CACA_EVENT_MOUSE_MOTION: + mp_input_set_mouse_pos(vo->input_ctx, cev.data.mouse.x, cev.data.mouse.y); + break; + case CACA_EVENT_MOUSE_PRESS: + mp_input_put_key(vo->input_ctx, + (MP_MBTN_BASE + cev.data.mouse.button - 1) | MP_KEY_STATE_DOWN); + break; + case CACA_EVENT_MOUSE_RELEASE: + mp_input_put_key(vo->input_ctx, + (MP_MBTN_BASE + cev.data.mouse.button - 1) | MP_KEY_STATE_UP); + break; + case CACA_EVENT_KEY_PRESS: + { + int key = cev.data.key.ch; + int mpkey = lookup_keymap_table(keysym_map, key); + const char *msg_name; + + if (mpkey) + mp_input_put_key(vo->input_ctx, mpkey); + else + switch (key) { + case 'd': + case 'D': + /* Toggle dithering algorithm */ + set_next_str(caca_get_dither_algorithm_list(priv->dither), + &priv->dither_algo, &msg_name); + caca_set_dither_algorithm(priv->dither, priv->dither_algo); + break; + + case 'a': + case 'A': + /* Toggle antialiasing method */ + set_next_str(caca_get_dither_antialias_list(priv->dither), + &priv->dither_antialias, &msg_name); + caca_set_dither_antialias(priv->dither, priv->dither_antialias); + break; + + case 'h': + case 'H': + /* Toggle charset method */ + set_next_str(caca_get_dither_charset_list(priv->dither), + &priv->dither_charset, &msg_name); + caca_set_dither_charset(priv->dither, priv->dither_charset); + break; + + case 'c': + case 'C': + /* Toggle color method */ + set_next_str(caca_get_dither_color_list(priv->dither), + &priv->dither_color, &msg_name); + caca_set_dither_color(priv->dither, priv->dither_color); + break; + + default: + if (key <= 255) + mp_input_put_key(vo->input_ctx, key); + break; + } + } + } + } +} + +static void uninit(struct vo *vo) +{ + struct priv *priv = vo->priv; + caca_free_dither(priv->dither); + priv->dither = NULL; + talloc_free(priv->dither_buffer); + priv->dither_buffer = NULL; + caca_free_display(priv->display); + caca_free_canvas(priv->canvas); +} + +static int preinit(struct vo *vo) +{ + struct priv *priv = vo->priv; + + priv->dither_antialias = "default"; + priv->dither_charset = "default"; + priv->dither_color = "default"; + priv->dither_algo = "none"; + + priv->canvas = caca_create_canvas(0, 0); + if (priv->canvas == NULL) { + MP_ERR(vo, "failed to create canvas\n"); + return ENOSYS; + } + + priv->display = caca_create_display(priv->canvas); + + if (priv->display == NULL) { + MP_ERR(vo, "failed to create display\n"); + caca_free_canvas(priv->canvas); + return ENOSYS; + } + + return 0; +} + +static int query_format(struct vo *vo, int format) +{ + return format == IMGFMT_BGR24; +} + +static int control(struct vo *vo, uint32_t request, void *data) +{ + struct priv *priv = vo->priv; + switch (request) { + case VOCTRL_CHECK_EVENTS: + check_events(vo); + return VO_TRUE; + case VOCTRL_UPDATE_WINDOW_TITLE: + caca_set_display_title(priv->display, (char *)data); + return VO_TRUE; + } + return VO_NOTIMPL; +} + +const struct vo_driver video_out_caca = { + .name = "caca", + .description = "libcaca", + .preinit = preinit, + .query_format = query_format, + .reconfig = reconfig, + .control = control, + .draw_frame = draw_frame, + .flip_page = flip_page, + .uninit = uninit, + .priv_size = sizeof(struct priv), +}; diff --git a/video/out/vo_direct3d.c b/video/out/vo_direct3d.c new file mode 100644 index 0000000..16936bb --- /dev/null +++ b/video/out/vo_direct3d.c @@ -0,0 +1,1247 @@ +/* + * Copyright (c) 2008 Georgi Petrov (gogothebee) <gogothebee@gmail.com> + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <windows.h> +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <stdbool.h> +#include <assert.h> +#include <d3d9.h> +#include <inttypes.h> +#include <limits.h> +#include "config.h" +#include "options/options.h" +#include "options/m_option.h" +#include "sub/draw_bmp.h" +#include "mpv_talloc.h" +#include "vo.h" +#include "video/csputils.h" +#include "video/mp_image.h" +#include "video/img_format.h" +#include "common/msg.h" +#include "common/common.h" +#include "w32_common.h" +#include "sub/osd.h" + +#include "config.h" +#if !HAVE_GPL +#error GPL only +#endif + +#define DEVTYPE D3DDEVTYPE_HAL +//#define DEVTYPE D3DDEVTYPE_REF + +#define D3DFVF_OSD_VERTEX (D3DFVF_XYZ | D3DFVF_TEX1) + +typedef struct { + float x, y, z; + float tu, tv; +} vertex_osd; + +struct d3dtex { + // user-requested size + int w, h; + // allocated texture size + int tex_w, tex_h; + // D3DPOOL_SYSTEMMEM (or others) texture: + // - can be locked in order to write (and even read) data + // - can _not_ (probably) be used as texture for rendering + // This is always non-NULL if d3dtex_allocate succeeds. + IDirect3DTexture9 *system; + // D3DPOOL_DEFAULT texture: + // - can't be locked (Probably.) + // - must be used for rendering + // This can be NULL if the system one can be both locked and mapped. + IDirect3DTexture9 *device; +}; + +#define MAX_OSD_RECTS 64 + +/* Global variables "priv" structure. I try to keep their count low. + */ +typedef struct d3d_priv { + struct mp_log *log; + + bool opt_disable_texture_align; + // debugging + bool opt_force_power_of_2; + int opt_texture_memory; + bool opt_swap_discard; + bool opt_exact_backbuffer; + + struct vo *vo; + + bool have_image; + double osd_pts; + + D3DLOCKED_RECT locked_rect; /**< The locked offscreen surface */ + RECT fs_movie_rect; /**< Rect (upscaled) of the movie when displayed + in fullscreen */ + RECT fs_panscan_rect; /**< PanScan source surface cropping in + fullscreen */ + int src_width; /**< Source (movie) width */ + int src_height; /**< Source (movie) height */ + struct mp_osd_res osd_res; + int image_format; /**< mplayer image format */ + struct mp_image_params params; + + D3DFORMAT movie_src_fmt; /**< Movie colorspace format (depends on + the movie's codec) */ + D3DFORMAT desktop_fmt; /**< Desktop (screen) colorspace format. + Usually XRGB */ + + HANDLE d3d9_dll; /**< d3d9 Library HANDLE */ + IDirect3D9 * (WINAPI *pDirect3DCreate9)(UINT); /**< pointer to Direct3DCreate9 function */ + + LPDIRECT3D9 d3d_handle; /**< Direct3D Handle */ + LPDIRECT3DDEVICE9 d3d_device; /**< The Direct3D Adapter */ + bool d3d_in_scene; /**< BeginScene was called, EndScene not */ + IDirect3DSurface9 *d3d_surface; /**< Offscreen Direct3D Surface. MPlayer + renders inside it. Uses colorspace + priv->movie_src_fmt */ + IDirect3DSurface9 *d3d_backbuf; /**< Video card's back buffer (used to + display next frame) */ + int cur_backbuf_width; /**< Current backbuffer width */ + int cur_backbuf_height; /**< Current backbuffer height */ + int device_caps_power2_only; /**< 1 = texture sizes have to be power 2 + 0 = texture sizes can be anything */ + int device_caps_square_only; /**< 1 = textures have to be square + 0 = textures do not have to be square */ + int device_texture_sys; /**< 1 = device can texture from system memory + 0 = device requires shadow */ + int max_texture_width; /**< from the device capabilities */ + int max_texture_height; /**< from the device capabilities */ + + D3DMATRIX d3d_colormatrix; + + struct mp_draw_sub_cache *osd_cache; + struct d3dtex osd_texture; + int osd_num_vertices; + vertex_osd osd_vertices[MAX_OSD_RECTS * 6]; +} d3d_priv; + +struct fmt_entry { + const unsigned int mplayer_fmt; /**< Given by MPlayer */ + const D3DFORMAT fourcc; /**< Required by D3D's test function */ +}; + +/* Map table from reported MPlayer format to the required + fourcc. This is needed to perform the format query. */ + +static const struct fmt_entry fmt_table[] = { + // planar YUV + {IMGFMT_420P, MAKEFOURCC('Y','V','1','2')}, + {IMGFMT_420P, MAKEFOURCC('I','4','2','0')}, + {IMGFMT_420P, MAKEFOURCC('I','Y','U','V')}, + {IMGFMT_NV12, MAKEFOURCC('N','V','1','2')}, + // packed YUV + {IMGFMT_UYVY, D3DFMT_UYVY}, + // packed RGB + {IMGFMT_BGR0, D3DFMT_X8R8G8B8}, + {IMGFMT_RGB0, D3DFMT_X8B8G8R8}, + {IMGFMT_BGR24, D3DFMT_R8G8B8}, //untested + {IMGFMT_RGB565, D3DFMT_R5G6B5}, + {0}, +}; + + +static bool resize_d3d(d3d_priv *priv); +static void uninit(struct vo *vo); +static void flip_page(struct vo *vo); +static mp_image_t *get_window_screenshot(d3d_priv *priv); +static void draw_osd(struct vo *vo); +static bool change_d3d_backbuffer(d3d_priv *priv); + +static void d3d_matrix_identity(D3DMATRIX *m) +{ + memset(m, 0, sizeof(D3DMATRIX)); + m->_11 = m->_22 = m->_33 = m->_44 = 1.0f; +} + +static void d3d_matrix_ortho(D3DMATRIX *m, float left, float right, + float bottom, float top) +{ + d3d_matrix_identity(m); + m->_11 = 2.0f / (right - left); + m->_22 = 2.0f / (top - bottom); + m->_33 = 1.0f; + m->_41 = -(right + left) / (right - left); + m->_42 = -(top + bottom) / (top - bottom); + m->_43 = 0; + m->_44 = 1.0f; +} + +/**************************************************************************** + * * + * * + * * + * Direct3D specific implementation functions * + * * + * * + * * + ****************************************************************************/ + +static bool d3d_begin_scene(d3d_priv *priv) +{ + if (!priv->d3d_in_scene) { + if (FAILED(IDirect3DDevice9_BeginScene(priv->d3d_device))) { + MP_ERR(priv, "BeginScene failed.\n"); + return false; + } + priv->d3d_in_scene = true; + } + return true; +} + +/** @brief Calculate scaled fullscreen movie rectangle with + * preserved aspect ratio. + */ +static void calc_fs_rect(d3d_priv *priv) +{ + struct mp_rect src_rect; + struct mp_rect dst_rect; + vo_get_src_dst_rects(priv->vo, &src_rect, &dst_rect, &priv->osd_res); + + priv->fs_movie_rect.left = dst_rect.x0; + priv->fs_movie_rect.right = dst_rect.x1; + priv->fs_movie_rect.top = dst_rect.y0; + priv->fs_movie_rect.bottom = dst_rect.y1; + priv->fs_panscan_rect.left = src_rect.x0; + priv->fs_panscan_rect.right = src_rect.x1; + priv->fs_panscan_rect.top = src_rect.y0; + priv->fs_panscan_rect.bottom = src_rect.y1; +} + +// Adjust the texture size *width/*height to fit the requirements of the D3D +// device. The texture size is only increased. +static void d3d_fix_texture_size(d3d_priv *priv, int *width, int *height) +{ + int tex_width = *width; + int tex_height = *height; + + // avoid nasty special cases with 0-sized textures and texture sizes + tex_width = MPMAX(tex_width, 1); + tex_height = MPMAX(tex_height, 1); + + if (priv->device_caps_power2_only) { + tex_width = 1; + tex_height = 1; + while (tex_width < *width) tex_width <<= 1; + while (tex_height < *height) tex_height <<= 1; + } + if (priv->device_caps_square_only) + /* device only supports square textures */ + tex_width = tex_height = MPMAX(tex_width, tex_height); + // better round up to a multiple of 16 + if (!priv->opt_disable_texture_align) { + tex_width = (tex_width + 15) & ~15; + tex_height = (tex_height + 15) & ~15; + } + + *width = tex_width; + *height = tex_height; +} + +static void d3dtex_release(d3d_priv *priv, struct d3dtex *tex) +{ + if (tex->system) + IDirect3DTexture9_Release(tex->system); + tex->system = NULL; + + if (tex->device) + IDirect3DTexture9_Release(tex->device); + tex->device = NULL; + + tex->tex_w = tex->tex_h = 0; +} + +static bool d3dtex_allocate(d3d_priv *priv, struct d3dtex *tex, D3DFORMAT fmt, + int w, int h) +{ + d3dtex_release(priv, tex); + + tex->w = w; + tex->h = h; + + int tw = w, th = h; + d3d_fix_texture_size(priv, &tw, &th); + + bool use_sh = !priv->device_texture_sys; + int memtype = D3DPOOL_SYSTEMMEM; + switch (priv->opt_texture_memory) { + case 1: memtype = D3DPOOL_MANAGED; use_sh = false; break; + case 2: memtype = D3DPOOL_DEFAULT; use_sh = false; break; + case 3: memtype = D3DPOOL_DEFAULT; use_sh = true; break; + case 4: memtype = D3DPOOL_SCRATCH; use_sh = true; break; + } + + if (FAILED(IDirect3DDevice9_CreateTexture(priv->d3d_device, tw, th, 1, + D3DUSAGE_DYNAMIC, fmt, memtype, &tex->system, NULL))) + { + MP_ERR(priv, "Allocating %dx%d texture in system RAM failed.\n", w, h); + goto error_exit; + } + + if (use_sh) { + if (FAILED(IDirect3DDevice9_CreateTexture(priv->d3d_device, tw, th, 1, + D3DUSAGE_DYNAMIC, fmt, D3DPOOL_DEFAULT, &tex->device, NULL))) + { + MP_ERR(priv, "Allocating %dx%d texture in video RAM failed.\n", w, h); + goto error_exit; + } + } + + tex->tex_w = tw; + tex->tex_h = th; + + return true; + +error_exit: + d3dtex_release(priv, tex); + return false; +} + +static IDirect3DBaseTexture9 *d3dtex_get_render_texture(d3d_priv *priv, + struct d3dtex *tex) +{ + return (IDirect3DBaseTexture9 *)(tex->device ? tex->device : tex->system); +} + +// Copy system texture contents to device texture. +static bool d3dtex_update(d3d_priv *priv, struct d3dtex *tex) +{ + if (!tex->device) + return true; + return !FAILED(IDirect3DDevice9_UpdateTexture(priv->d3d_device, + (IDirect3DBaseTexture9 *)tex->system, + (IDirect3DBaseTexture9 *)tex->device)); +} + +static void d3d_unlock_video_objects(d3d_priv *priv) +{ + if (priv->locked_rect.pBits) { + if (FAILED(IDirect3DSurface9_UnlockRect(priv->d3d_surface))) + MP_VERBOSE(priv, "Unlocking video objects failed.\n"); + } + priv->locked_rect.pBits = NULL; +} + +// Free video surface/textures, etc. +static void d3d_destroy_video_objects(d3d_priv *priv) +{ + d3d_unlock_video_objects(priv); + + if (priv->d3d_surface) + IDirect3DSurface9_Release(priv->d3d_surface); + priv->d3d_surface = NULL; +} + +/** @brief Destroy D3D Offscreen and Backbuffer surfaces. + */ +static void destroy_d3d_surfaces(d3d_priv *priv) +{ + MP_VERBOSE(priv, "destroy_d3d_surfaces called.\n"); + + d3d_destroy_video_objects(priv); + d3dtex_release(priv, &priv->osd_texture); + + if (priv->d3d_backbuf) + IDirect3DSurface9_Release(priv->d3d_backbuf); + priv->d3d_backbuf = NULL; + + priv->d3d_in_scene = false; +} + +// Allocate video surface. +static bool d3d_configure_video_objects(d3d_priv *priv) +{ + assert(priv->image_format != 0); + + if (!priv->d3d_surface && + FAILED(IDirect3DDevice9_CreateOffscreenPlainSurface( + priv->d3d_device, priv->src_width, priv->src_height, + priv->movie_src_fmt, D3DPOOL_DEFAULT, &priv->d3d_surface, NULL))) + { + MP_ERR(priv, "Allocating offscreen surface failed.\n"); + return false; + } + + return true; +} + +// Recreate and initialize D3D objects if necessary. The amount of work that +// needs to be done can be quite different: it could be that full initialization +// is required, or that some objects need to be created, or that nothing is +// done. +static bool create_d3d_surfaces(d3d_priv *priv) +{ + MP_VERBOSE(priv, "create_d3d_surfaces called.\n"); + + if (!priv->d3d_backbuf && + FAILED(IDirect3DDevice9_GetBackBuffer(priv->d3d_device, 0, 0, + D3DBACKBUFFER_TYPE_MONO, + &priv->d3d_backbuf))) { + MP_ERR(priv, "Allocating backbuffer failed.\n"); + return 0; + } + + if (!d3d_configure_video_objects(priv)) + return 0; + + /* setup default renderstate */ + IDirect3DDevice9_SetRenderState(priv->d3d_device, + D3DRS_SRCBLEND, D3DBLEND_SRCALPHA); + IDirect3DDevice9_SetRenderState(priv->d3d_device, + D3DRS_DESTBLEND, D3DBLEND_INVSRCALPHA); + IDirect3DDevice9_SetRenderState(priv->d3d_device, + D3DRS_ALPHAFUNC, D3DCMP_GREATER); + IDirect3DDevice9_SetRenderState(priv->d3d_device, + D3DRS_ALPHAREF, (DWORD)0x0); + IDirect3DDevice9_SetRenderState(priv->d3d_device, + D3DRS_LIGHTING, FALSE); + + // we use up to 3 samplers for up to 3 YUV planes + // TODO + /* + for (int n = 0; n < 3; n++) { + IDirect3DDevice9_SetSamplerState(priv->d3d_device, n, D3DSAMP_MINFILTER, + D3DTEXF_LINEAR); + IDirect3DDevice9_SetSamplerState(priv->d3d_device, n, D3DSAMP_MAGFILTER, + D3DTEXF_LINEAR); + IDirect3DDevice9_SetSamplerState(priv->d3d_device, n, D3DSAMP_ADDRESSU, + D3DTADDRESS_CLAMP); + IDirect3DDevice9_SetSamplerState(priv->d3d_device, n, D3DSAMP_ADDRESSV, + D3DTADDRESS_CLAMP); + } + */ + + return 1; +} + +static bool init_d3d(d3d_priv *priv) +{ + D3DDISPLAYMODE disp_mode; + D3DCAPS9 disp_caps; + DWORD texture_caps; + DWORD dev_caps; + + priv->d3d_handle = priv->pDirect3DCreate9(D3D_SDK_VERSION); + if (!priv->d3d_handle) { + MP_ERR(priv, "Initializing Direct3D failed.\n"); + return false; + } + + if (FAILED(IDirect3D9_GetAdapterDisplayMode(priv->d3d_handle, + D3DADAPTER_DEFAULT, + &disp_mode))) { + MP_ERR(priv, "Reading display mode failed.\n"); + return false; + } + + priv->desktop_fmt = disp_mode.Format; + priv->cur_backbuf_width = disp_mode.Width; + priv->cur_backbuf_height = disp_mode.Height; + + MP_VERBOSE(priv, "Setting backbuffer dimensions to (%dx%d).\n", + disp_mode.Width, disp_mode.Height); + + if (FAILED(IDirect3D9_GetDeviceCaps(priv->d3d_handle, + D3DADAPTER_DEFAULT, + DEVTYPE, + &disp_caps))) + { + MP_ERR(priv, "Reading display capabilities failed.\n"); + return false; + } + + /* Store relevant information reguarding caps of device */ + texture_caps = disp_caps.TextureCaps; + dev_caps = disp_caps.DevCaps; + priv->device_caps_power2_only = (texture_caps & D3DPTEXTURECAPS_POW2) && + !(texture_caps & D3DPTEXTURECAPS_NONPOW2CONDITIONAL); + priv->device_caps_square_only = texture_caps & D3DPTEXTURECAPS_SQUAREONLY; + priv->device_texture_sys = dev_caps & D3DDEVCAPS_TEXTURESYSTEMMEMORY; + priv->max_texture_width = disp_caps.MaxTextureWidth; + priv->max_texture_height = disp_caps.MaxTextureHeight; + + if (priv->opt_force_power_of_2) + priv->device_caps_power2_only = 1; + + if (FAILED(IDirect3D9_CheckDeviceFormat(priv->d3d_handle, + D3DADAPTER_DEFAULT, + DEVTYPE, + priv->desktop_fmt, + D3DUSAGE_DYNAMIC | D3DUSAGE_QUERY_FILTER, + D3DRTYPE_TEXTURE, + D3DFMT_A8R8G8B8))) + { + MP_ERR(priv, "OSD texture format not supported.\n"); + return false; + } + + if (!change_d3d_backbuffer(priv)) + return false; + + MP_VERBOSE(priv, "device_caps_power2_only %d, device_caps_square_only %d\n" + "device_texture_sys %d\n" + "max_texture_width %d, max_texture_height %d\n", + priv->device_caps_power2_only, priv->device_caps_square_only, + priv->device_texture_sys, priv->max_texture_width, + priv->max_texture_height); + + return true; +} + +/** @brief Fill D3D Presentation parameters + */ +static void fill_d3d_presentparams(d3d_priv *priv, + D3DPRESENT_PARAMETERS *present_params) +{ + /* Prepare Direct3D initialization parameters. */ + memset(present_params, 0, sizeof(D3DPRESENT_PARAMETERS)); + present_params->Windowed = TRUE; + present_params->SwapEffect = + priv->opt_swap_discard ? D3DSWAPEFFECT_DISCARD : D3DSWAPEFFECT_COPY; + present_params->Flags = D3DPRESENTFLAG_VIDEO; + present_params->hDeviceWindow = vo_w32_hwnd(priv->vo); + present_params->BackBufferWidth = priv->cur_backbuf_width; + present_params->BackBufferHeight = priv->cur_backbuf_height; + present_params->MultiSampleType = D3DMULTISAMPLE_NONE; + present_params->PresentationInterval = D3DPRESENT_INTERVAL_ONE; + present_params->BackBufferFormat = priv->desktop_fmt; + present_params->BackBufferCount = 1; + present_params->EnableAutoDepthStencil = FALSE; +} + + +// Create a new backbuffer. Create or Reset the D3D device. +static bool change_d3d_backbuffer(d3d_priv *priv) +{ + int window_w = priv->vo->dwidth; + int window_h = priv->vo->dheight; + + /* Grow the backbuffer in the required dimension. */ + if (window_w > priv->cur_backbuf_width) + priv->cur_backbuf_width = window_w; + + if (window_h > priv->cur_backbuf_height) + priv->cur_backbuf_height = window_h; + + if (priv->opt_exact_backbuffer) { + priv->cur_backbuf_width = window_w; + priv->cur_backbuf_height = window_h; + } + + /* The grown backbuffer dimensions are ready and fill_d3d_presentparams + * will use them, so we can reset the device. + */ + D3DPRESENT_PARAMETERS present_params; + fill_d3d_presentparams(priv, &present_params); + + if (!priv->d3d_device) { + if (FAILED(IDirect3D9_CreateDevice(priv->d3d_handle, + D3DADAPTER_DEFAULT, + DEVTYPE, vo_w32_hwnd(priv->vo), + D3DCREATE_SOFTWARE_VERTEXPROCESSING + | D3DCREATE_FPU_PRESERVE + | D3DCREATE_MULTITHREADED, + &present_params, &priv->d3d_device))) + { + MP_VERBOSE(priv, "Creating Direct3D device failed.\n"); + return 0; + } + } else { + if (FAILED(IDirect3DDevice9_Reset(priv->d3d_device, &present_params))) { + MP_ERR(priv, "Resetting Direct3D device failed.\n"); + return 0; + } + } + + MP_VERBOSE(priv, "New backbuffer (%dx%d), VO (%dx%d)\n", + present_params.BackBufferWidth, present_params.BackBufferHeight, + window_w, window_h); + + return 1; +} + +static void destroy_d3d(d3d_priv *priv) +{ + destroy_d3d_surfaces(priv); + + if (priv->d3d_device) + IDirect3DDevice9_Release(priv->d3d_device); + priv->d3d_device = NULL; + + if (priv->d3d_handle) { + MP_VERBOSE(priv, "Stopping Direct3D.\n"); + IDirect3D9_Release(priv->d3d_handle); + } + priv->d3d_handle = NULL; +} + +/** @brief Reconfigure the whole Direct3D. Called only + * when the video adapter becomes uncooperative. ("Lost" devices) + * @return 1 on success, 0 on failure + */ +static int reconfigure_d3d(d3d_priv *priv) +{ + MP_VERBOSE(priv, "reconfigure_d3d called.\n"); + + // Force complete destruction of the D3D state. + // Note: this step could be omitted. The resize_d3d call below would detect + // that d3d_device is NULL, and would properly recreate it. I'm not sure why + // the following code to release and recreate the d3d_handle exists. + destroy_d3d(priv); + if (!init_d3d(priv)) + return 0; + + // Proper re-initialization. + if (!resize_d3d(priv)) + return 0; + + return 1; +} + +// Resize Direct3D context on window resize. +// This function also is called when major initializations need to be done. +static bool resize_d3d(d3d_priv *priv) +{ + D3DVIEWPORT9 vp = {0, 0, priv->vo->dwidth, priv->vo->dheight, 0, 1}; + + MP_VERBOSE(priv, "resize_d3d %dx%d called.\n", + priv->vo->dwidth, priv->vo->dheight); + + /* Make sure that backbuffer is large enough to accommodate the new + viewport dimensions. Grow it if necessary. */ + + bool backbuf_resize = priv->vo->dwidth > priv->cur_backbuf_width || + priv->vo->dheight > priv->cur_backbuf_height; + + if (priv->opt_exact_backbuffer) { + backbuf_resize = priv->vo->dwidth != priv->cur_backbuf_width || + priv->vo->dheight != priv->cur_backbuf_height; + } + + if (backbuf_resize || !priv->d3d_device) + { + destroy_d3d_surfaces(priv); + if (!change_d3d_backbuffer(priv)) + return 0; + } + + if (!priv->d3d_device || !priv->image_format) + return 1; + + if (!create_d3d_surfaces(priv)) + return 0; + + if (FAILED(IDirect3DDevice9_SetViewport(priv->d3d_device, &vp))) { + MP_ERR(priv, "Setting viewport failed.\n"); + return 0; + } + + // so that screen coordinates map to D3D ones + D3DMATRIX view; + d3d_matrix_ortho(&view, 0.5f, vp.Width + 0.5f, vp.Height + 0.5f, 0.5f); + IDirect3DDevice9_SetTransform(priv->d3d_device, D3DTS_VIEW, &view); + + calc_fs_rect(priv); + priv->vo->want_redraw = true; + + return 1; +} + +/** @brief Uninitialize Direct3D and close the window. + */ +static void uninit_d3d(d3d_priv *priv) +{ + MP_VERBOSE(priv, "uninit_d3d called.\n"); + + destroy_d3d(priv); +} + +static uint32_t d3d_draw_frame(d3d_priv *priv) +{ + if (!priv->d3d_device) + return VO_TRUE; + + if (!d3d_begin_scene(priv)) + return VO_ERROR; + + IDirect3DDevice9_Clear(priv->d3d_device, 0, NULL, D3DCLEAR_TARGET, 0, 0, 0); + + if (!priv->have_image) + goto render_osd; + + RECT rm = priv->fs_movie_rect; + RECT rs = priv->fs_panscan_rect; + + rs.left &= ~(ULONG)1; + rs.top &= ~(ULONG)1; + rs.right &= ~(ULONG)1; + rs.bottom &= ~(ULONG)1; + if (FAILED(IDirect3DDevice9_StretchRect(priv->d3d_device, + priv->d3d_surface, + &rs, + priv->d3d_backbuf, + &rm, + D3DTEXF_LINEAR))) { + MP_ERR(priv, "Copying frame to the backbuffer failed.\n"); + return VO_ERROR; + } + +render_osd: + + draw_osd(priv->vo); + + return VO_TRUE; +} + +static D3DFORMAT check_format(d3d_priv *priv, uint32_t movie_fmt) +{ + const struct fmt_entry *cur = &fmt_table[0]; + + while (cur->mplayer_fmt) { + if (cur->mplayer_fmt == movie_fmt) { + HRESULT res; + /* Test conversion from Movie colorspace to + * display's target colorspace. */ + res = IDirect3D9_CheckDeviceFormatConversion(priv->d3d_handle, + D3DADAPTER_DEFAULT, + DEVTYPE, + cur->fourcc, + priv->desktop_fmt); + if (FAILED(res)) { + MP_VERBOSE(priv, "Rejected image format: %s\n", + vo_format_name(cur->mplayer_fmt)); + return 0; + } + + MP_DBG(priv, "Accepted image format: %s\n", + vo_format_name(cur->mplayer_fmt)); + + return cur->fourcc; + } + cur++; + } + + return 0; +} + +// Return if the image format can be used. If it can, decide which rendering +// and conversion mode to use. +// If initialize is true, actually setup all variables to use the picked +// rendering mode. +static bool init_rendering_mode(d3d_priv *priv, uint32_t fmt, bool initialize) +{ + int blit_d3dfmt = check_format(priv, fmt); + + if (!blit_d3dfmt) + return false; + + MP_VERBOSE(priv, "Accepted rendering methods for " + "format='%s': StretchRect=%#x.\n", + vo_format_name(fmt), blit_d3dfmt); + + if (!initialize) + return true; + + // initialization doesn't fail beyond this point + + priv->movie_src_fmt = 0; + priv->image_format = fmt; + + priv->movie_src_fmt = blit_d3dfmt; + + return true; +} + +/** @brief Query if movie colorspace is supported by the HW. + * @return 0 on failure, device capabilities (not probed + * currently) on success. + */ +static int query_format(struct vo *vo, int movie_fmt) +{ + d3d_priv *priv = vo->priv; + if (!init_rendering_mode(priv, movie_fmt, false)) + return 0; + + return 1; +} + +/**************************************************************************** + * * + * * + * * + * libvo Control / Callback functions * + * * + * * + * * + ****************************************************************************/ + + +/** @brief libvo Callback: Preinitialize the video card. + * Preinit the hardware just enough to be queried about + * supported formats. + * + * @return 0 on success, -1 on failure + */ + +static int preinit(struct vo *vo) +{ + d3d_priv *priv = vo->priv; + priv->vo = vo; + priv->log = vo->log; + + priv->d3d9_dll = LoadLibraryA("d3d9.dll"); + if (!priv->d3d9_dll) { + MP_ERR(priv, "Unable to dynamically load d3d9.dll\n"); + goto err_out; + } + + priv->pDirect3DCreate9 = (void *)GetProcAddress(priv->d3d9_dll, + "Direct3DCreate9"); + if (!priv->pDirect3DCreate9) { + MP_ERR(priv, "Unable to find entry point of Direct3DCreate9\n"); + goto err_out; + } + + /* w32_common framework call. Configures window on the screen, gets + * fullscreen dimensions and does other useful stuff. + */ + if (!vo_w32_init(vo)) { + MP_VERBOSE(priv, "Configuring onscreen window failed.\n"); + goto err_out; + } + + if (!init_d3d(priv)) + goto err_out; + + return 0; + +err_out: + uninit(vo); + return -1; +} + +/** @brief libvo Callback: Handle control requests. + * @return VO_TRUE on success, VO_NOTIMPL when not implemented + */ +static int control(struct vo *vo, uint32_t request, void *data) +{ + d3d_priv *priv = vo->priv; + + switch (request) { + case VOCTRL_SET_PANSCAN: + calc_fs_rect(priv); + priv->vo->want_redraw = true; + return VO_TRUE; + case VOCTRL_SCREENSHOT_WIN: + *(struct mp_image **)data = get_window_screenshot(priv); + return VO_TRUE; + } + + int events = 0; + int r = vo_w32_control(vo, &events, request, data); + + if (events & VO_EVENT_RESIZE) + resize_d3d(priv); + + if (events & VO_EVENT_EXPOSE) + vo->want_redraw = true; + + vo_event(vo, events); + + return r; +} + +static int reconfig(struct vo *vo, struct mp_image_params *params) +{ + d3d_priv *priv = vo->priv; + + priv->have_image = false; + + vo_w32_config(vo); + + if ((priv->image_format != params->imgfmt) + || (priv->src_width != params->w) + || (priv->src_height != params->h)) + { + d3d_destroy_video_objects(priv); + + priv->src_width = params->w; + priv->src_height = params->h; + priv->params = *params; + init_rendering_mode(priv, params->imgfmt, true); + } + + if (!resize_d3d(priv)) + return VO_ERROR; + + return 0; /* Success */ +} + +/** @brief libvo Callback: Flip next already drawn frame on the + * screen. + */ +static void flip_page(struct vo *vo) +{ + d3d_priv *priv = vo->priv; + + if (priv->d3d_device && priv->d3d_in_scene) { + if (FAILED(IDirect3DDevice9_EndScene(priv->d3d_device))) { + MP_ERR(priv, "EndScene failed.\n"); + } + } + priv->d3d_in_scene = false; + + RECT rect = {0, 0, vo->dwidth, vo->dheight}; + if (!priv->d3d_device || + FAILED(IDirect3DDevice9_Present(priv->d3d_device, &rect, 0, 0, 0))) { + MP_VERBOSE(priv, "Trying to reinitialize uncooperative video adapter.\n"); + if (!reconfigure_d3d(priv)) { + MP_VERBOSE(priv, "Reinitialization failed.\n"); + return; + } else { + MP_VERBOSE(priv, "Video adapter reinitialized.\n"); + } + } +} + +/** @brief libvo Callback: Uninitializes all pointers and closes + * all D3D related stuff, + */ +static void uninit(struct vo *vo) +{ + d3d_priv *priv = vo->priv; + + MP_VERBOSE(priv, "uninit called.\n"); + + uninit_d3d(priv); + vo_w32_uninit(vo); + if (priv->d3d9_dll) + FreeLibrary(priv->d3d9_dll); + priv->d3d9_dll = NULL; +} + +// Lock buffers and fill out to point to them. +// Must call d3d_unlock_video_objects() to unlock the buffers again. +static bool get_video_buffer(d3d_priv *priv, struct mp_image *out) +{ + *out = (struct mp_image) {0}; + mp_image_set_size(out, priv->src_width, priv->src_height); + mp_image_setfmt(out, priv->image_format); + + if (!priv->d3d_device) + return false; + + if (!priv->locked_rect.pBits) { + if (FAILED(IDirect3DSurface9_LockRect(priv->d3d_surface, + &priv->locked_rect, NULL, 0))) + { + MP_ERR(priv, "Surface lock failed.\n"); + return false; + } + } + + uint8_t *base = priv->locked_rect.pBits; + size_t stride = priv->locked_rect.Pitch; + + out->planes[0] = base; + out->stride[0] = stride; + + if (out->num_planes == 2) { + // NV12, NV21 + out->planes[1] = base + stride * out->h; + out->stride[1] = stride; + } + + if (out->num_planes == 3) { + bool swap = priv->movie_src_fmt == MAKEFOURCC('Y','V','1','2'); + + size_t uv_stride = stride / 2; + uint8_t *u = base + out->h * stride; + uint8_t *v = u + (out->h / 2) * uv_stride; + + out->planes[1] = swap ? v : u; + out->planes[2] = swap ? u : v; + + out->stride[1] = out->stride[2] = uv_stride; + } + + return true; +} + +static void draw_frame(struct vo *vo, struct vo_frame *frame) +{ + d3d_priv *priv = vo->priv; + if (!priv->d3d_device) + return; + + struct mp_image buffer; + if (!get_video_buffer(priv, &buffer)) + return; + + if (!frame->current) + return; + + mp_image_copy(&buffer, frame->current); + + d3d_unlock_video_objects(priv); + + priv->have_image = true; + priv->osd_pts = frame->current->pts; + + d3d_draw_frame(priv); +} + +static mp_image_t *get_window_screenshot(d3d_priv *priv) +{ + D3DDISPLAYMODE mode; + mp_image_t *image = NULL; + RECT window_rc; + RECT screen_rc; + RECT visible; + POINT pt; + D3DLOCKED_RECT locked_rect; + int width, height; + IDirect3DSurface9 *surface = NULL; + + if (FAILED(IDirect3DDevice9_GetDisplayMode(priv->d3d_device, 0, &mode))) { + MP_ERR(priv, "GetDisplayMode failed.\n"); + goto error_exit; + } + + if (FAILED(IDirect3DDevice9_CreateOffscreenPlainSurface(priv->d3d_device, + mode.Width, mode.Height, D3DFMT_A8R8G8B8, D3DPOOL_SYSTEMMEM, &surface, + NULL))) + { + MP_ERR(priv, "Couldn't create surface.\n"); + goto error_exit; + } + + if (FAILED(IDirect3DDevice9_GetFrontBufferData(priv->d3d_device, 0, + surface))) + { + MP_ERR(priv, "Couldn't copy frontbuffer.\n"); + goto error_exit; + } + + GetClientRect(vo_w32_hwnd(priv->vo), &window_rc); + pt = (POINT) { 0, 0 }; + ClientToScreen(vo_w32_hwnd(priv->vo), &pt); + window_rc.left = pt.x; + window_rc.top = pt.y; + window_rc.right += window_rc.left; + window_rc.bottom += window_rc.top; + + screen_rc = (RECT) { 0, 0, mode.Width, mode.Height }; + + if (!IntersectRect(&visible, &screen_rc, &window_rc)) + goto error_exit; + width = visible.right - visible.left; + height = visible.bottom - visible.top; + if (width < 1 || height < 1) + goto error_exit; + + image = mp_image_alloc(IMGFMT_BGR0, width, height); + if (!image) + goto error_exit; + + IDirect3DSurface9_LockRect(surface, &locked_rect, NULL, 0); + + memcpy_pic(image->planes[0], (char*)locked_rect.pBits + visible.top * + locked_rect.Pitch + visible.left * 4, width * 4, height, + image->stride[0], locked_rect.Pitch); + + IDirect3DSurface9_UnlockRect(surface); + IDirect3DSurface9_Release(surface); + + return image; + +error_exit: + talloc_free(image); + if (surface) + IDirect3DSurface9_Release(surface); + return NULL; +} + +static void update_osd(d3d_priv *priv) +{ + if (!priv->osd_cache) + priv->osd_cache = mp_draw_sub_alloc(priv, priv->vo->global); + + struct sub_bitmap_list *sbs = osd_render(priv->vo->osd, priv->osd_res, + priv->osd_pts, 0, mp_draw_sub_formats); + + struct mp_rect act_rc[MAX_OSD_RECTS], mod_rc[64]; + int num_act_rc = 0, num_mod_rc = 0; + + struct mp_image *osd = mp_draw_sub_overlay(priv->osd_cache, sbs, + act_rc, MP_ARRAY_SIZE(act_rc), &num_act_rc, + mod_rc, MP_ARRAY_SIZE(mod_rc), &num_mod_rc); + + talloc_free(sbs); + + if (!osd) { + MP_ERR(priv, "Failed to render OSD.\n"); + return; + } + + if (!num_mod_rc && priv->osd_texture.system) + return; // nothing changed + + priv->osd_num_vertices = 0; + + if (osd->w > priv->osd_texture.tex_w || osd->h > priv->osd_texture.tex_h) { + int new_w = osd->w; + int new_h = osd->h; + d3d_fix_texture_size(priv, &new_w, &new_h); + + MP_DBG(priv, "reallocate OSD surface to %dx%d.\n", new_w, new_h); + + d3dtex_release(priv, &priv->osd_texture); + if (!d3dtex_allocate(priv, &priv->osd_texture, D3DFMT_A8R8G8B8, + new_w, new_h)) + return; + } + + // Lazy; could/should use the bounding rect, or perform multiple lock calls. + // The previous approach (fully packed texture) was more efficient. + RECT dirty_rc = { 0, 0, priv->osd_texture.w, priv->osd_texture.h }; + + D3DLOCKED_RECT locked_rect; + + if (FAILED(IDirect3DTexture9_LockRect(priv->osd_texture.system, 0, &locked_rect, + &dirty_rc, 0))) + { + MP_ERR(priv, "OSD texture lock failed.\n"); + return; + } + + for (int n = 0; n < num_mod_rc; n++) { + struct mp_rect rc = mod_rc[n]; + int w = mp_rect_w(rc); + int h = mp_rect_h(rc); + void *src = mp_image_pixel_ptr(osd, 0, rc.x0, rc.y0); + void *dst = (char *)locked_rect.pBits + locked_rect.Pitch * rc.y0 + + rc.x0 * 4; + memcpy_pic(dst, src, w * 4, h, locked_rect.Pitch, osd->stride[0]); + } + + if (FAILED(IDirect3DTexture9_UnlockRect(priv->osd_texture.system, 0))) { + MP_ERR(priv, "OSD texture unlock failed.\n"); + return; + } + + if (!d3dtex_update(priv, &priv->osd_texture)) + return; + + // We need 2 primitives per quad which makes 6 vertices. + priv->osd_num_vertices = num_act_rc * 6; + + float tex_w = priv->osd_texture.tex_w; + float tex_h = priv->osd_texture.tex_h; + + for (int n = 0; n < num_act_rc; n++) { + struct mp_rect rc = act_rc[n]; + + float tx0 = rc.x0 / tex_w; + float ty0 = rc.y0 / tex_h; + float tx1 = rc.x1 / tex_w; + float ty1 = rc.y1 / tex_h; + + vertex_osd *v = &priv->osd_vertices[n * 6]; + v[0] = (vertex_osd) { rc.x0, rc.y0, 0, tx0, ty0 }; + v[1] = (vertex_osd) { rc.x1, rc.y0, 0, tx1, ty0 }; + v[2] = (vertex_osd) { rc.x0, rc.y1, 0, tx0, ty1 }; + v[3] = (vertex_osd) { rc.x1, rc.y1, 0, tx1, ty1 }; + v[4] = v[2]; + v[5] = v[1]; + } +} + +static void draw_osd(struct vo *vo) +{ + d3d_priv *priv = vo->priv; + if (!priv->d3d_device) + return; + + update_osd(priv); + + if (!priv->osd_num_vertices) + return; + + d3d_begin_scene(priv); + + IDirect3DDevice9_SetRenderState(priv->d3d_device, + D3DRS_ALPHABLENDENABLE, TRUE); + + IDirect3DDevice9_SetTexture(priv->d3d_device, 0, + d3dtex_get_render_texture(priv, &priv->osd_texture)); + + IDirect3DDevice9_SetRenderState(priv->d3d_device, D3DRS_SRCBLEND, + D3DBLEND_ONE); + + IDirect3DDevice9_SetFVF(priv->d3d_device, D3DFVF_OSD_VERTEX); + IDirect3DDevice9_DrawPrimitiveUP(priv->d3d_device, D3DPT_TRIANGLELIST, + priv->osd_num_vertices / 3, + priv->osd_vertices, sizeof(vertex_osd)); + + IDirect3DDevice9_SetRenderState(priv->d3d_device, + D3DRS_SRCBLEND, D3DBLEND_SRCALPHA); + + IDirect3DDevice9_SetTexture(priv->d3d_device, 0, NULL); + + IDirect3DDevice9_SetRenderState(priv->d3d_device, + D3DRS_ALPHABLENDENABLE, FALSE); +} + +#define OPT_BASE_STRUCT d3d_priv + +static const struct m_option opts[] = { + {"force-power-of-2", OPT_BOOL(opt_force_power_of_2)}, + {"disable-texture-align", OPT_BOOL(opt_disable_texture_align)}, + {"texture-memory", OPT_CHOICE(opt_texture_memory, + {"default", 0}, + {"managed", 1}, + {"default-pool", 2}, + {"default-pool-shadow", 3}, + {"scratch", 4})}, + {"swap-discard", OPT_BOOL(opt_swap_discard)}, + {"exact-backbuffer", OPT_BOOL(opt_exact_backbuffer)}, + {0} +}; + +const struct vo_driver video_out_direct3d = { + .description = "Direct3D 9 Renderer", + .name = "direct3d", + .preinit = preinit, + .query_format = query_format, + .reconfig = reconfig, + .control = control, + .draw_frame = draw_frame, + .flip_page = flip_page, + .uninit = uninit, + .priv_size = sizeof(d3d_priv), + .options = opts, + .options_prefix = "vo-direct3d", +}; diff --git a/video/out/vo_dmabuf_wayland.c b/video/out/vo_dmabuf_wayland.c new file mode 100644 index 0000000..e04ff5d --- /dev/null +++ b/video/out/vo_dmabuf_wayland.c @@ -0,0 +1,872 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <libavutil/hwcontext_drm.h> +#include <sys/mman.h> +#include <unistd.h> +#include "config.h" + +#if HAVE_VAAPI +#include <va/va_drmcommon.h> +#endif + +#include "common/global.h" +#include "gpu/hwdec.h" +#include "gpu/video.h" +#include "mpv_talloc.h" +#include "present_sync.h" +#include "sub/draw_bmp.h" +#include "video/fmt-conversion.h" +#include "video/mp_image.h" +#include "vo.h" +#include "wayland_common.h" +#include "wldmabuf/ra_wldmabuf.h" + +#if HAVE_VAAPI +#include "video/vaapi.h" +#endif + +// Generated from wayland-protocols +#include "linux-dmabuf-unstable-v1.h" +#include "viewporter.h" + +#if HAVE_WAYLAND_PROTOCOLS_1_27 +#include "single-pixel-buffer-v1.h" +#endif + +// We need at least enough buffers to avoid a +// flickering artifact in certain formats. +#define WL_BUFFERS_WANTED 15 + +enum hwdec_type { + HWDEC_NONE, + HWDEC_VAAPI, + HWDEC_DRMPRIME, +}; + +struct buffer { + struct vo *vo; + struct wl_buffer *buffer; + struct wl_list link; + struct vo_frame *frame; + + uint32_t drm_format; + uintptr_t id; +}; + +struct osd_buffer { + struct vo *vo; + struct wl_buffer *buffer; + struct wl_list link; + struct mp_image image; + size_t size; +}; + +struct priv { + struct mp_log *log; + struct mp_rect src; + struct mpv_global *global; + + struct ra_ctx *ctx; + struct ra_hwdec_ctx hwdec_ctx; + + struct wl_shm_pool *solid_buffer_pool; + struct wl_buffer *solid_buffer; + struct wl_list buffer_list; + struct wl_list osd_buffer_list; + + struct wl_shm_pool *osd_shm_pool; + uint8_t *osd_shm_data; + int osd_shm_width; + int osd_shm_stride; + int osd_shm_height; + bool osd_surface_is_mapped; + bool osd_surface_has_contents; + + struct osd_buffer *osd_buffer; + struct mp_draw_sub_cache *osd_cache; + struct mp_osd_res screen_osd_res; + + bool destroy_buffers; + bool force_window; + enum hwdec_type hwdec_type; + uint32_t drm_format; + uint64_t drm_modifier; +}; + +static void buffer_handle_release(void *data, struct wl_buffer *wl_buffer) +{ + struct buffer *buf = data; + if (buf->frame) { + talloc_free(buf->frame); + buf->frame = NULL; + } +} + +static const struct wl_buffer_listener buffer_listener = { + buffer_handle_release, +}; + +static void osd_buffer_handle_release(void *data, struct wl_buffer *wl_buffer) +{ + struct osd_buffer *osd_buf = data; + wl_list_remove(&osd_buf->link); + if (osd_buf->buffer) { + wl_buffer_destroy(osd_buf->buffer); + osd_buf->buffer = NULL; + } + talloc_free(osd_buf); +} + +static const struct wl_buffer_listener osd_buffer_listener = { + osd_buffer_handle_release, +}; + +#if HAVE_VAAPI +static void close_file_descriptors(VADRMPRIMESurfaceDescriptor desc) +{ + for (int i = 0; i < desc.num_objects; i++) + close(desc.objects[i].fd); +} +#endif + +static uintptr_t vaapi_surface_id(struct mp_image *src) +{ + uintptr_t id = 0; +#if HAVE_VAAPI + id = (uintptr_t)va_surface_id(src); +#endif + return id; +} + +static bool vaapi_drm_format(struct vo *vo, struct mp_image *src) +{ + bool format = false; +#if HAVE_VAAPI + struct priv *p = vo->priv; + VADRMPRIMESurfaceDescriptor desc = {0}; + + uintptr_t id = vaapi_surface_id(src); + VADisplay display = ra_get_native_resource(p->ctx->ra, "VADisplay"); + VAStatus status = vaExportSurfaceHandle(display, id, VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2, + VA_EXPORT_SURFACE_COMPOSED_LAYERS | VA_EXPORT_SURFACE_READ_ONLY, &desc); + + if (!CHECK_VA_STATUS(vo, "vaExportSurfaceHandle()")) { + /* invalid surface warning => composed layers not supported */ + if (status == VA_STATUS_ERROR_INVALID_SURFACE) + MP_VERBOSE(vo, "vaExportSurfaceHandle: composed layers not supported.\n"); + goto done; + } + p->drm_format = desc.layers[0].drm_format; + p->drm_modifier = desc.objects[0].drm_format_modifier; + format = true; +done: + close_file_descriptors(desc); +#endif + return format; +} + +static void vaapi_dmabuf_importer(struct buffer *buf, struct mp_image *src, + struct zwp_linux_buffer_params_v1 *params) +{ +#if HAVE_VAAPI + struct vo *vo = buf->vo; + struct priv *p = vo->priv; + VADRMPRIMESurfaceDescriptor desc = {0}; + VADisplay display = ra_get_native_resource(p->ctx->ra, "VADisplay"); + + /* composed has single layer */ + int layer_no = 0; + buf->id = vaapi_surface_id(src); + VAStatus status = vaExportSurfaceHandle(display, buf->id, VA_SURFACE_ATTRIB_MEM_TYPE_DRM_PRIME_2, + VA_EXPORT_SURFACE_COMPOSED_LAYERS | VA_EXPORT_SURFACE_READ_ONLY, &desc); + + if (!CHECK_VA_STATUS(vo, "vaExportSurfaceHandle()")) { + /* invalid surface warning => composed layers not supported */ + if (status == VA_STATUS_ERROR_INVALID_SURFACE) + MP_VERBOSE(vo, "vaExportSurfaceHandle: composed layers not supported.\n"); + goto done; + } + buf->drm_format = desc.layers[layer_no].drm_format; + if (!ra_compatible_format(p->ctx->ra, buf->drm_format, desc.objects[0].drm_format_modifier)) { + MP_VERBOSE(vo, "%s(%016lx) is not supported.\n", + mp_tag_str(buf->drm_format), desc.objects[0].drm_format_modifier); + buf->drm_format = 0; + goto done; + } + for (int plane_no = 0; plane_no < desc.layers[layer_no].num_planes; ++plane_no) { + int object = desc.layers[layer_no].object_index[plane_no]; + uint64_t modifier = desc.objects[object].drm_format_modifier; + zwp_linux_buffer_params_v1_add(params, desc.objects[object].fd, plane_no, desc.layers[layer_no].offset[plane_no], + desc.layers[layer_no].pitch[plane_no], modifier >> 32, modifier & 0xffffffff); + } + +done: + close_file_descriptors(desc); +#endif +} + +static uintptr_t drmprime_surface_id(struct mp_image *src) +{ + uintptr_t id = 0; + struct AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->planes[0]; + + AVDRMObjectDescriptor object = desc->objects[0]; + id = (uintptr_t)object.fd; + return id; +} + +static bool drmprime_drm_format(struct vo *vo, struct mp_image *src) +{ + struct priv *p = vo->priv; + struct AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->planes[0]; + if (!desc) + return false; + + // Just check the very first layer/plane. + p->drm_format = desc->layers[0].format; + int object_index = desc->layers[0].planes[0].object_index; + p->drm_modifier = desc->objects[object_index].format_modifier; + return true; +} + +static void drmprime_dmabuf_importer(struct buffer *buf, struct mp_image *src, + struct zwp_linux_buffer_params_v1 *params) +{ + int layer_no, plane_no; + int max_planes = 0; + const AVDRMFrameDescriptor *desc = (AVDRMFrameDescriptor *)src->planes[0]; + if (!desc) + return; + + buf->id = drmprime_surface_id(src); + for (layer_no = 0; layer_no < desc->nb_layers; layer_no++) { + AVDRMLayerDescriptor layer = desc->layers[layer_no]; + + buf->drm_format = layer.format; + max_planes = MPMAX(max_planes, layer.nb_planes); + for (plane_no = 0; plane_no < layer.nb_planes; ++plane_no) { + AVDRMPlaneDescriptor plane = layer.planes[plane_no]; + int object_index = plane.object_index; + AVDRMObjectDescriptor object = desc->objects[object_index]; + uint64_t modifier = object.format_modifier; + + zwp_linux_buffer_params_v1_add(params, object.fd, plane_no, plane.offset, + plane.pitch, modifier >> 32, modifier & 0xffffffff); + } + } +} + +static intptr_t surface_id(struct vo *vo, struct mp_image *src) +{ + struct priv *p = vo->priv; + switch(p->hwdec_type) { + case HWDEC_VAAPI: + return vaapi_surface_id(src); + case HWDEC_DRMPRIME: + return drmprime_surface_id(src); + default: + return 0; + } +} + +static bool drm_format_check(struct vo *vo, struct mp_image *src) +{ + struct priv *p = vo->priv; + switch(p->hwdec_type) { + case HWDEC_VAAPI: + return vaapi_drm_format(vo, src); + case HWDEC_DRMPRIME: + return drmprime_drm_format(vo, src); + } + return false; +} + +static struct buffer *buffer_check(struct vo *vo, struct vo_frame *frame) +{ + struct priv *p = vo->priv; + + /* Make more buffers if we're not at the desired amount yet. */ + if (wl_list_length(&p->buffer_list) < WL_BUFFERS_WANTED) + goto done; + + uintptr_t id = surface_id(vo, frame->current); + struct buffer *buf; + wl_list_for_each(buf, &p->buffer_list, link) { + if (buf->id == id) { + if (buf->frame) + talloc_free(buf->frame); + buf->frame = frame; + return buf; + } + } + +done: + return NULL; +} + +static struct buffer *buffer_create(struct vo *vo, struct vo_frame *frame) +{ + struct vo_wayland_state *wl = vo->wl; + struct priv *p = vo->priv; + + struct buffer *buf = talloc_zero(vo, struct buffer); + buf->vo = vo; + buf->frame = frame; + + struct mp_image *image = buf->frame->current; + struct zwp_linux_buffer_params_v1 *params = zwp_linux_dmabuf_v1_create_params(wl->dmabuf); + + switch(p->hwdec_type) { + case HWDEC_VAAPI: + vaapi_dmabuf_importer(buf, image, params); + break; + case HWDEC_DRMPRIME: + drmprime_dmabuf_importer(buf, image, params); + break; + } + + if (!buf->drm_format) { + talloc_free(buf->frame); + talloc_free(buf); + zwp_linux_buffer_params_v1_destroy(params); + return NULL; + } + + buf->buffer = zwp_linux_buffer_params_v1_create_immed(params, image->params.w, image->params.h, + buf->drm_format, 0); + zwp_linux_buffer_params_v1_destroy(params); + wl_buffer_add_listener(buf->buffer, &buffer_listener, buf); + wl_list_insert(&p->buffer_list, &buf->link); + return buf; +} + +static struct buffer *buffer_get(struct vo *vo, struct vo_frame *frame) +{ + /* Reuse existing buffer if possible. */ + struct buffer *buf = buffer_check(vo, frame); + if (buf) { + return buf; + } else { + return buffer_create(vo, frame); + } +} + +static void destroy_buffers(struct vo *vo) +{ + struct priv *p = vo->priv; + struct buffer *buf, *tmp; + p->destroy_buffers = false; + wl_list_for_each_safe(buf, tmp, &p->buffer_list, link) { + wl_list_remove(&buf->link); + if (buf->frame) { + talloc_free(buf->frame); + buf->frame = NULL; + } + if (buf->buffer) { + wl_buffer_destroy(buf->buffer); + buf->buffer = NULL; + } + talloc_free(buf); + } +} + +static void destroy_osd_buffers(struct vo *vo) +{ + if (!vo->wl) + return; + + // Remove any existing buffer before we destroy them. + wl_surface_attach(vo->wl->osd_surface, NULL, 0, 0); + wl_surface_commit(vo->wl->osd_surface); + + struct priv *p = vo->priv; + struct osd_buffer *osd_buf, *tmp; + wl_list_for_each_safe(osd_buf, tmp, &p->osd_buffer_list, link) { + wl_list_remove(&osd_buf->link); + munmap(osd_buf->image.planes[0], osd_buf->size); + if (osd_buf->buffer) { + wl_buffer_destroy(osd_buf->buffer); + osd_buf->buffer = NULL; + } + } +} + +static struct osd_buffer *osd_buffer_check(struct vo *vo) +{ + struct priv *p = vo->priv; + struct osd_buffer *osd_buf; + wl_list_for_each(osd_buf, &p->osd_buffer_list, link) { + return osd_buf; + } + return NULL; +} + +static struct osd_buffer *osd_buffer_create(struct vo *vo) +{ + struct priv *p = vo->priv; + struct osd_buffer *osd_buf = talloc_zero(vo, struct osd_buffer); + + osd_buf->vo = vo; + osd_buf->size = p->osd_shm_height * p->osd_shm_stride; + mp_image_set_size(&osd_buf->image, p->osd_shm_width, p->osd_shm_height); + osd_buf->image.planes[0] = p->osd_shm_data; + osd_buf->image.stride[0] = p->osd_shm_stride; + osd_buf->buffer = wl_shm_pool_create_buffer(p->osd_shm_pool, 0, + p->osd_shm_width, p->osd_shm_height, + p->osd_shm_stride, WL_SHM_FORMAT_ARGB8888); + + if (!osd_buf->buffer) { + talloc_free(osd_buf); + return NULL; + } + + wl_list_insert(&p->osd_buffer_list, &osd_buf->link); + wl_buffer_add_listener(osd_buf->buffer, &osd_buffer_listener, osd_buf); + return osd_buf; +} + +static struct osd_buffer *osd_buffer_get(struct vo *vo) +{ + struct osd_buffer *osd_buf = osd_buffer_check(vo); + if (osd_buf) { + return osd_buf; + } else { + return osd_buffer_create(vo); + } +} + +static void create_shm_pool(struct vo *vo) +{ + struct vo_wayland_state *wl = vo->wl; + struct priv *p = vo->priv; + + int stride = MP_ALIGN_UP(vo->dwidth * 4, 16); + size_t size = vo->dheight * stride; + int fd = vo_wayland_allocate_memfd(vo, size); + if (fd < 0) + return; + uint8_t *data = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (data == MAP_FAILED) + goto error1; + struct wl_shm_pool *pool = wl_shm_create_pool(wl->shm, fd, size); + if (!pool) + goto error2; + close(fd); + + destroy_osd_buffers(vo); + + if (p->osd_shm_pool) + wl_shm_pool_destroy(p->osd_shm_pool); + p->osd_shm_pool = pool; + p->osd_shm_width = vo->dwidth; + p->osd_shm_height = vo->dheight; + p->osd_shm_stride = stride; + p->osd_shm_data = data; + return; + +error2: + munmap(data, size); +error1: + close(fd); +} + +static void set_viewport_source(struct vo *vo, struct mp_rect src) +{ + struct priv *p = vo->priv; + struct vo_wayland_state *wl = vo->wl; + + if (p->force_window) + return; + + if (wl->video_viewport && !mp_rect_equals(&p->src, &src)) { + wp_viewport_set_source(wl->video_viewport, src.x0 << 8, + src.y0 << 8, mp_rect_w(src) << 8, + mp_rect_h(src) << 8); + p->src = src; + } +} + +static void resize(struct vo *vo) +{ + struct vo_wayland_state *wl = vo->wl; + struct priv *p = vo->priv; + + struct mp_rect src; + struct mp_rect dst; + struct mp_vo_opts *vo_opts = wl->vo_opts; + + const int width = mp_rect_w(wl->geometry); + const int height = mp_rect_h(wl->geometry); + + if (width == 0 || height == 0) + return; + + vo_wayland_set_opaque_region(wl, false); + vo->dwidth = width; + vo->dheight = height; + + create_shm_pool(vo); + + // top level viewport is calculated with pan set to zero + vo->opts->pan_x = 0; + vo->opts->pan_y = 0; + vo_get_src_dst_rects(vo, &src, &dst, &p->screen_osd_res); + int window_w = p->screen_osd_res.ml + p->screen_osd_res.mr + mp_rect_w(dst); + int window_h = p->screen_osd_res.mt + p->screen_osd_res.mb + mp_rect_h(dst); + wp_viewport_set_destination(wl->viewport, window_w, window_h); + + //now we restore pan for video viewport calculation + vo->opts->pan_x = vo_opts->pan_x; + vo->opts->pan_y = vo_opts->pan_y; + vo_get_src_dst_rects(vo, &src, &dst, &p->screen_osd_res); + wp_viewport_set_destination(wl->video_viewport, mp_rect_w(dst), mp_rect_h(dst)); + wl_subsurface_set_position(wl->video_subsurface, dst.x0, dst.y0); + wp_viewport_set_destination(wl->osd_viewport, vo->dwidth, vo->dheight); + wl_subsurface_set_position(wl->osd_subsurface, 0 - dst.x0, 0 - dst.y0); + set_viewport_source(vo, src); +} + +static bool draw_osd(struct vo *vo, struct mp_image *cur, double pts) +{ + struct priv *p = vo->priv; + struct mp_osd_res *res = &p->screen_osd_res; + bool draw = false; + + struct sub_bitmap_list *sbs = osd_render(vo->osd, *res, pts, 0, mp_draw_sub_formats); + + if (!sbs) + return draw; + + struct mp_rect act_rc[1], mod_rc[64]; + int num_act_rc = 0, num_mod_rc = 0; + + if (!p->osd_cache) + p->osd_cache = mp_draw_sub_alloc(p, vo->global); + + struct mp_image *osd = mp_draw_sub_overlay(p->osd_cache, sbs, act_rc, + MP_ARRAY_SIZE(act_rc), &num_act_rc, + mod_rc, MP_ARRAY_SIZE(mod_rc), &num_mod_rc); + + p->osd_surface_has_contents = num_act_rc > 0; + + if (!osd || !num_mod_rc) + goto done; + + for (int n = 0; n < num_mod_rc; n++) { + struct mp_rect rc = mod_rc[n]; + + int rw = mp_rect_w(rc); + int rh = mp_rect_h(rc); + + void *src = mp_image_pixel_ptr(osd, 0, rc.x0, rc.y0); + void *dst = cur->planes[0] + rc.x0 * 4 + rc.y0 * cur->stride[0]; + + memcpy_pic(dst, src, rw * 4, rh, cur->stride[0], osd->stride[0]); + } + + draw = true; +done: + talloc_free(sbs); + return draw; +} + +static void draw_frame(struct vo *vo, struct vo_frame *frame) +{ + struct priv *p = vo->priv; + struct vo_wayland_state *wl = vo->wl; + struct buffer *buf; + struct osd_buffer *osd_buf; + double pts; + + if (!vo_wayland_check_visible(vo)) { + if (frame->current) + talloc_free(frame); + return; + } + + if (p->destroy_buffers) + destroy_buffers(vo); + + // Reuse the solid buffer so the osd can be visible + if (p->force_window) { + wl_surface_attach(wl->video_surface, p->solid_buffer, 0, 0); + wl_surface_damage_buffer(wl->video_surface, 0, 0, 1, 1); + } + + pts = frame->current ? frame->current->pts : 0; + if (frame->current) { + buf = buffer_get(vo, frame); + + if (buf && buf->frame) { + struct mp_image *image = buf->frame->current; + wl_surface_attach(wl->video_surface, buf->buffer, 0, 0); + wl_surface_damage_buffer(wl->video_surface, 0, 0, image->w, + image->h); + + } + } + + osd_buf = osd_buffer_get(vo); + if (osd_buf && osd_buf->buffer) { + if (draw_osd(vo, &osd_buf->image, pts) && p->osd_surface_has_contents) { + wl_surface_attach(wl->osd_surface, osd_buf->buffer, 0, 0); + wl_surface_damage_buffer(wl->osd_surface, 0, 0, osd_buf->image.w, + osd_buf->image.h); + p->osd_surface_is_mapped = true; + } else if (!p->osd_surface_has_contents && p->osd_surface_is_mapped) { + wl_surface_attach(wl->osd_surface, NULL, 0, 0); + p->osd_surface_is_mapped = false; + } + } +} + +static void flip_page(struct vo *vo) +{ + struct vo_wayland_state *wl = vo->wl; + + wl_surface_commit(wl->video_surface); + wl_surface_commit(wl->osd_surface); + wl_surface_commit(wl->surface); + + if (!wl->opts->disable_vsync) + vo_wayland_wait_frame(wl); + + if (wl->use_present) + present_sync_swap(wl->present); +} + +static void get_vsync(struct vo *vo, struct vo_vsync_info *info) +{ + struct vo_wayland_state *wl = vo->wl; + if (wl->use_present) + present_sync_get_info(wl->present, info); +} + +static bool is_supported_fmt(int fmt) +{ + return (fmt == IMGFMT_DRMPRIME || fmt == IMGFMT_VAAPI); +} + +static int query_format(struct vo *vo, int format) +{ + return is_supported_fmt(format); +} + +static int reconfig(struct vo *vo, struct mp_image *img) +{ + struct priv *p = vo->priv; + + if (img->params.force_window) { + p->force_window = true; + goto done; + } + + if (!drm_format_check(vo, img)) { + MP_ERR(vo, "Unable to get drm format from hardware decoding!\n"); + return VO_ERROR; + } + + if (!ra_compatible_format(p->ctx->ra, p->drm_format, p->drm_modifier)) { + MP_ERR(vo, "Format '%s' with modifier '(%016lx)' is not supported by" + " the compositor.\n", mp_tag_str(p->drm_format), p->drm_modifier); + return VO_ERROR; + } + + p->force_window = false; +done: + if (!vo_wayland_reconfig(vo)) + return VO_ERROR; + + // mpv rotates clockwise but the wayland spec has counter-clockwise rotations + // swap 1 and 3 to match mpv's direction + int transform = (360 - img->params.rotate) % 360 / 90; + wl_surface_set_buffer_transform(vo->wl->video_surface, transform); + + // Immediately destroy all buffers if params change. + destroy_buffers(vo); + return 0; +} + +static int control(struct vo *vo, uint32_t request, void *data) +{ + struct priv *p = vo->priv; + int events = 0; + int ret; + + switch (request) { + case VOCTRL_RESET: + p->destroy_buffers = true; + return VO_TRUE; + case VOCTRL_SET_PANSCAN: + resize(vo); + return VO_TRUE; + } + + ret = vo_wayland_control(vo, &events, request, data); + if (events & VO_EVENT_RESIZE) + resize(vo); + if (events & VO_EVENT_EXPOSE) + vo->want_redraw = true; + vo_event(vo, events); + + return ret; +} + +static void uninit(struct vo *vo) +{ + struct priv *p = vo->priv; + + destroy_buffers(vo); + destroy_osd_buffers(vo); + if (p->osd_shm_pool) + wl_shm_pool_destroy(p->osd_shm_pool); + if (p->solid_buffer_pool) + wl_shm_pool_destroy(p->solid_buffer_pool); + if (p->solid_buffer) + wl_buffer_destroy(p->solid_buffer); + ra_hwdec_ctx_uninit(&p->hwdec_ctx); + if (vo->hwdec_devs) { + hwdec_devices_set_loader(vo->hwdec_devs, NULL, NULL); + hwdec_devices_destroy(vo->hwdec_devs); + } + + vo_wayland_uninit(vo); + ra_ctx_destroy(&p->ctx); +} + +static int preinit(struct vo *vo) +{ + struct priv *p = vo->priv; + + p->log = vo->log; + p->global = vo->global; + p->ctx = ra_ctx_create_by_name(vo, "wldmabuf"); + wl_list_init(&p->buffer_list); + wl_list_init(&p->osd_buffer_list); + if (!p->ctx) + goto err; + + assert(p->ctx->ra); + + if (!vo->wl->dmabuf || !vo->wl->dmabuf_feedback) { + MP_FATAL(vo->wl, "Compositor doesn't support the %s (ver. 4) protocol!\n", + zwp_linux_dmabuf_v1_interface.name); + goto err; + } + + if (!vo->wl->shm) { + MP_FATAL(vo->wl, "Compositor doesn't support the %s protocol!\n", + wl_shm_interface.name); + goto err; + } + + if (!vo->wl->video_subsurface) { + MP_FATAL(vo->wl, "Compositor doesn't support the %s protocol!\n", + wl_subcompositor_interface.name); + goto err; + } + + if (!vo->wl->viewport) { + MP_FATAL(vo->wl, "Compositor doesn't support the %s protocol!\n", + wp_viewporter_interface.name); + goto err; + } + + if (vo->wl->single_pixel_manager) { +#if HAVE_WAYLAND_PROTOCOLS_1_27 + p->solid_buffer = wp_single_pixel_buffer_manager_v1_create_u32_rgba_buffer( + vo->wl->single_pixel_manager, 0, 0, 0, UINT32_MAX); /* R, G, B, A */ +#endif + } else { + int width = 1; + int height = 1; + int stride = MP_ALIGN_UP(width * 4, 16); + int fd = vo_wayland_allocate_memfd(vo, stride); + if (fd < 0) + goto err; + p->solid_buffer_pool = wl_shm_create_pool(vo->wl->shm, fd, height * stride); + close(fd); + if (!p->solid_buffer_pool) + goto err; + p->solid_buffer = wl_shm_pool_create_buffer( + p->solid_buffer_pool, 0, width, height, stride, WL_SHM_FORMAT_XRGB8888); + } + if (!p->solid_buffer) + goto err; + + wl_surface_attach(vo->wl->surface, p->solid_buffer, 0, 0); + + vo->hwdec_devs = hwdec_devices_create(); + p->hwdec_ctx = (struct ra_hwdec_ctx) { + .log = p->log, + .global = p->global, + .ra_ctx = p->ctx, + }; + ra_hwdec_ctx_init(&p->hwdec_ctx, vo->hwdec_devs, NULL, true); + + // Loop through hardware accelerated formats and only request known + // supported formats. + for (int i = IMGFMT_VDPAU_OUTPUT; i < IMGFMT_AVPIXFMT_START; ++i) { + if (is_supported_fmt(i)) { + struct hwdec_imgfmt_request params = { + .imgfmt = i, + .probing = false, + }; + ra_hwdec_ctx_load_fmt(&p->hwdec_ctx, vo->hwdec_devs, ¶ms); + } + } + + for (int i = 0; i < p->hwdec_ctx.num_hwdecs; i++) { + struct ra_hwdec *hw = p->hwdec_ctx.hwdecs[i]; + if (ra_get_native_resource(p->ctx->ra, "VADisplay")) { + p->hwdec_type = HWDEC_VAAPI; + } else if (strcmp(hw->driver->name, "drmprime") == 0) { + p->hwdec_type = HWDEC_DRMPRIME; + } + } + + if (p->hwdec_type == HWDEC_NONE) { + MP_ERR(vo, "No valid hardware decoding driver could be loaded!\n"); + goto err; + } + + p->src = (struct mp_rect){0, 0, 0, 0}; + return 0; + +err: + uninit(vo); + return -1; +} + +const struct vo_driver video_out_dmabuf_wayland = { + .description = "Wayland dmabuf video output", + .name = "dmabuf-wayland", + .caps = VO_CAP_ROTATE90, + .frame_owner = true, + .preinit = preinit, + .query_format = query_format, + .reconfig2 = reconfig, + .control = control, + .draw_frame = draw_frame, + .flip_page = flip_page, + .get_vsync = get_vsync, + .wakeup = vo_wayland_wakeup, + .wait_events = vo_wayland_wait_events, + .uninit = uninit, + .priv_size = sizeof(struct priv), +}; diff --git a/video/out/vo_drm.c b/video/out/vo_drm.c new file mode 100644 index 0000000..aae73f7 --- /dev/null +++ b/video/out/vo_drm.c @@ -0,0 +1,458 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <errno.h> +#include <fcntl.h> +#include <stdbool.h> +#include <sys/mman.h> +#include <poll.h> +#include <unistd.h> + +#include <drm_fourcc.h> +#include <libswscale/swscale.h> + +#include "common/msg.h" +#include "drm_atomic.h" +#include "drm_common.h" +#include "osdep/timer.h" +#include "sub/osd.h" +#include "video/fmt-conversion.h" +#include "video/mp_image.h" +#include "video/out/present_sync.h" +#include "video/sws_utils.h" +#include "vo.h" + +#define IMGFMT_XRGB8888 IMGFMT_BGR0 +#if BYTE_ORDER == BIG_ENDIAN +#define IMGFMT_XRGB2101010 pixfmt2imgfmt(AV_PIX_FMT_GBRP10BE) +#else +#define IMGFMT_XRGB2101010 pixfmt2imgfmt(AV_PIX_FMT_GBRP10LE) +#endif + +#define BYTES_PER_PIXEL 4 +#define BITS_PER_PIXEL 32 + +struct drm_frame { + struct framebuffer *fb; +}; + +struct priv { + struct drm_frame **fb_queue; + unsigned int fb_queue_len; + + uint32_t drm_format; + enum mp_imgfmt imgfmt; + + struct mp_image *last_input; + struct mp_image *cur_frame; + struct mp_image *cur_frame_cropped; + struct mp_rect src; + struct mp_rect dst; + struct mp_osd_res osd; + struct mp_sws_context *sws; + + struct framebuffer **bufs; + int front_buf; + int buf_count; +}; + +static void destroy_framebuffer(int fd, struct framebuffer *fb) +{ + if (!fb) + return; + + if (fb->map) { + munmap(fb->map, fb->size); + } + if (fb->id) { + drmModeRmFB(fd, fb->id); + } + if (fb->handle) { + struct drm_mode_destroy_dumb dreq = { + .handle = fb->handle, + }; + drmIoctl(fd, DRM_IOCTL_MODE_DESTROY_DUMB, &dreq); + } +} + +static struct framebuffer *setup_framebuffer(struct vo *vo) +{ + struct priv *p = vo->priv; + struct vo_drm_state *drm = vo->drm; + + struct framebuffer *fb = talloc_zero(drm, struct framebuffer); + fb->width = drm->mode.mode.hdisplay; + fb->height = drm->mode.mode.vdisplay; + fb->fd = drm->fd; + fb->handle = 0; + + // create dumb buffer + struct drm_mode_create_dumb creq = { + .width = fb->width, + .height = fb->height, + .bpp = BITS_PER_PIXEL, + }; + + if (drmIoctl(drm->fd, DRM_IOCTL_MODE_CREATE_DUMB, &creq) < 0) { + MP_ERR(vo, "Cannot create dumb buffer: %s\n", mp_strerror(errno)); + goto err; + } + + fb->stride = creq.pitch; + fb->size = creq.size; + fb->handle = creq.handle; + + // select format + if (drm->opts->drm_format == DRM_OPTS_FORMAT_XRGB2101010) { + p->drm_format = DRM_FORMAT_XRGB2101010; + p->imgfmt = IMGFMT_XRGB2101010; + } else { + p->drm_format = DRM_FORMAT_XRGB8888;; + p->imgfmt = IMGFMT_XRGB8888; + } + + // create framebuffer object for the dumb-buffer + int ret = drmModeAddFB2(fb->fd, fb->width, fb->height, + p->drm_format, + (uint32_t[4]){fb->handle, 0, 0, 0}, + (uint32_t[4]){fb->stride, 0, 0, 0}, + (uint32_t[4]){0, 0, 0, 0}, + &fb->id, 0); + if (ret) { + MP_ERR(vo, "Cannot create framebuffer: %s\n", mp_strerror(errno)); + goto err; + } + + // prepare buffer for memory mapping + struct drm_mode_map_dumb mreq = { + .handle = fb->handle, + }; + if (drmIoctl(drm->fd, DRM_IOCTL_MODE_MAP_DUMB, &mreq)) { + MP_ERR(vo, "Cannot map dumb buffer: %s\n", mp_strerror(errno)); + goto err; + } + + // perform actual memory mapping + fb->map = mmap(0, fb->size, PROT_READ | PROT_WRITE, MAP_SHARED, + drm->fd, mreq.offset); + if (fb->map == MAP_FAILED) { + MP_ERR(vo, "Cannot map dumb buffer: %s\n", mp_strerror(errno)); + goto err; + } + + memset(fb->map, 0, fb->size); + return fb; + +err: + destroy_framebuffer(drm->fd, fb); + return NULL; +} + +static int reconfig(struct vo *vo, struct mp_image_params *params) +{ + struct priv *p = vo->priv; + struct vo_drm_state *drm = vo->drm; + + vo->dwidth =drm->fb->width; + vo->dheight = drm->fb->height; + vo_get_src_dst_rects(vo, &p->src, &p->dst, &p->osd); + + int w = p->dst.x1 - p->dst.x0; + int h = p->dst.y1 - p->dst.y0; + + p->sws->src = *params; + p->sws->dst = (struct mp_image_params) { + .imgfmt = p->imgfmt, + .w = w, + .h = h, + .p_w = 1, + .p_h = 1, + }; + + talloc_free(p->cur_frame); + p->cur_frame = mp_image_alloc(p->imgfmt, drm->fb->width, drm->fb->height); + mp_image_params_guess_csp(&p->sws->dst); + mp_image_set_params(p->cur_frame, &p->sws->dst); + mp_image_set_size(p->cur_frame, drm->fb->width, drm->fb->height); + + talloc_free(p->cur_frame_cropped); + p->cur_frame_cropped = mp_image_new_dummy_ref(p->cur_frame); + mp_image_crop_rc(p->cur_frame_cropped, p->dst); + + talloc_free(p->last_input); + p->last_input = NULL; + + if (mp_sws_reinit(p->sws) < 0) + return -1; + + vo->want_redraw = true; + return 0; +} + +static struct framebuffer *get_new_fb(struct vo *vo) +{ + struct priv *p = vo->priv; + + p->front_buf++; + p->front_buf %= p->buf_count; + + return p->bufs[p->front_buf]; +} + +static void draw_image(struct vo *vo, mp_image_t *mpi, struct framebuffer *buf) +{ + struct priv *p = vo->priv; + struct vo_drm_state *drm = vo->drm; + + if (drm->active && buf != NULL) { + if (mpi) { + struct mp_image src = *mpi; + struct mp_rect src_rc = p->src; + src_rc.x0 = MP_ALIGN_DOWN(src_rc.x0, mpi->fmt.align_x); + src_rc.y0 = MP_ALIGN_DOWN(src_rc.y0, mpi->fmt.align_y); + mp_image_crop_rc(&src, src_rc); + + mp_image_clear(p->cur_frame, 0, 0, p->cur_frame->w, p->dst.y0); + mp_image_clear(p->cur_frame, 0, p->dst.y1, p->cur_frame->w, p->cur_frame->h); + mp_image_clear(p->cur_frame, 0, p->dst.y0, p->dst.x0, p->dst.y1); + mp_image_clear(p->cur_frame, p->dst.x1, p->dst.y0, p->cur_frame->w, p->dst.y1); + + mp_sws_scale(p->sws, p->cur_frame_cropped, &src); + osd_draw_on_image(vo->osd, p->osd, src.pts, 0, p->cur_frame); + } else { + mp_image_clear(p->cur_frame, 0, 0, p->cur_frame->w, p->cur_frame->h); + osd_draw_on_image(vo->osd, p->osd, 0, 0, p->cur_frame); + } + + if (p->drm_format == DRM_FORMAT_XRGB2101010) { + // Pack GBRP10 image into XRGB2101010 for DRM + const int w = p->cur_frame->w; + const int h = p->cur_frame->h; + + const int g_padding = p->cur_frame->stride[0]/sizeof(uint16_t) - w; + const int b_padding = p->cur_frame->stride[1]/sizeof(uint16_t) - w; + const int r_padding = p->cur_frame->stride[2]/sizeof(uint16_t) - w; + const int fbuf_padding = buf->stride/sizeof(uint32_t) - w; + + uint16_t *g_ptr = (uint16_t*)p->cur_frame->planes[0]; + uint16_t *b_ptr = (uint16_t*)p->cur_frame->planes[1]; + uint16_t *r_ptr = (uint16_t*)p->cur_frame->planes[2]; + uint32_t *fbuf_ptr = (uint32_t*)buf->map; + for (unsigned y = 0; y < h; ++y) { + for (unsigned x = 0; x < w; ++x) { + *fbuf_ptr++ = (*r_ptr++ << 20) | (*g_ptr++ << 10) | (*b_ptr++); + } + g_ptr += g_padding; + b_ptr += b_padding; + r_ptr += r_padding; + fbuf_ptr += fbuf_padding; + } + } else { // p->drm_format == DRM_FORMAT_XRGB8888 + memcpy_pic(buf->map, p->cur_frame->planes[0], + p->cur_frame->w * BYTES_PER_PIXEL, p->cur_frame->h, + buf->stride, + p->cur_frame->stride[0]); + } + } + + if (mpi != p->last_input) { + talloc_free(p->last_input); + p->last_input = mpi; + } +} + +static void enqueue_frame(struct vo *vo, struct framebuffer *fb) +{ + struct priv *p = vo->priv; + + struct drm_frame *new_frame = talloc(p, struct drm_frame); + new_frame->fb = fb; + MP_TARRAY_APPEND(p, p->fb_queue, p->fb_queue_len, new_frame); +} + +static void dequeue_frame(struct vo *vo) +{ + struct priv *p = vo->priv; + + talloc_free(p->fb_queue[0]); + MP_TARRAY_REMOVE_AT(p->fb_queue, p->fb_queue_len, 0); +} + +static void swapchain_step(struct vo *vo) +{ + struct priv *p = vo->priv; + + if (p->fb_queue_len > 0) { + dequeue_frame(vo); + } +} + +static void draw_frame(struct vo *vo, struct vo_frame *frame) +{ + struct vo_drm_state *drm = vo->drm; + struct priv *p = vo->priv; + + if (!drm->active) + return; + + drm->still = frame->still; + + // we redraw the entire image when OSD needs to be redrawn + struct framebuffer *fb = p->bufs[p->front_buf]; + const bool repeat = frame->repeat && !frame->redraw; + if (!repeat) { + fb = get_new_fb(vo); + draw_image(vo, mp_image_new_ref(frame->current), fb); + } + + enqueue_frame(vo, fb); +} + +static void queue_flip(struct vo *vo, struct drm_frame *frame) +{ + struct vo_drm_state *drm = vo->drm; + + drm->fb = frame->fb; + + int ret = drmModePageFlip(drm->fd, drm->crtc_id, + drm->fb->id, DRM_MODE_PAGE_FLIP_EVENT, drm); + if (ret) + MP_WARN(vo, "Failed to queue page flip: %s\n", mp_strerror(errno)); + drm->waiting_for_flip = !ret; +} + +static void flip_page(struct vo *vo) +{ + struct priv *p = vo->priv; + struct vo_drm_state *drm = vo->drm; + const bool drain = drm->paused || drm->still; + + if (!drm->active) + return; + + while (drain || p->fb_queue_len > vo->opts->swapchain_depth) { + if (drm->waiting_for_flip) { + vo_drm_wait_on_flip(vo->drm); + swapchain_step(vo); + } + if (p->fb_queue_len <= 1) + break; + if (!p->fb_queue[1] || !p->fb_queue[1]->fb) { + MP_ERR(vo, "Hole in swapchain?\n"); + swapchain_step(vo); + continue; + } + queue_flip(vo, p->fb_queue[1]); + } +} + +static void get_vsync(struct vo *vo, struct vo_vsync_info *info) +{ + struct vo_drm_state *drm = vo->drm; + present_sync_get_info(drm->present, info); +} + +static void uninit(struct vo *vo) +{ + struct priv *p = vo->priv; + + vo_drm_uninit(vo); + + while (p->fb_queue_len > 0) { + swapchain_step(vo); + } + + talloc_free(p->last_input); + talloc_free(p->cur_frame); + talloc_free(p->cur_frame_cropped); +} + +static int preinit(struct vo *vo) +{ + struct priv *p = vo->priv; + + if (!vo_drm_init(vo)) + goto err; + + struct vo_drm_state *drm = vo->drm; + p->buf_count = vo->opts->swapchain_depth + 1; + p->bufs = talloc_zero_array(p, struct framebuffer *, p->buf_count); + + p->front_buf = 0; + for (int i = 0; i < p->buf_count; i++) { + p->bufs[i] = setup_framebuffer(vo); + if (!p->bufs[i]) + goto err; + } + drm->fb = p->bufs[0]; + + vo->drm->width = vo->drm->fb->width; + vo->drm->height = vo->drm->fb->height; + + if (!vo_drm_acquire_crtc(vo->drm)) { + MP_ERR(vo, "Failed to set CRTC for connector %u: %s\n", + vo->drm->connector->connector_id, mp_strerror(errno)); + goto err; + } + + vo_drm_set_monitor_par(vo); + p->sws = mp_sws_alloc(vo); + p->sws->log = vo->log; + mp_sws_enable_cmdline_opts(p->sws, vo->global); + return 0; + +err: + uninit(vo); + return -1; +} + +static int query_format(struct vo *vo, int format) +{ + return sws_isSupportedInput(imgfmt2pixfmt(format)); +} + +static int control(struct vo *vo, uint32_t request, void *arg) +{ + switch (request) { + case VOCTRL_SET_PANSCAN: + if (vo->config_ok) + reconfig(vo, vo->params); + return VO_TRUE; + } + + int events = 0; + int ret = vo_drm_control(vo, &events, request, arg); + vo_event(vo, events); + return ret; +} + +const struct vo_driver video_out_drm = { + .name = "drm", + .description = "Direct Rendering Manager (software scaling)", + .preinit = preinit, + .query_format = query_format, + .reconfig = reconfig, + .control = control, + .draw_frame = draw_frame, + .flip_page = flip_page, + .get_vsync = get_vsync, + .uninit = uninit, + .wait_events = vo_drm_wait_events, + .wakeup = vo_drm_wakeup, + .priv_size = sizeof(struct priv), +}; diff --git a/video/out/vo_gpu.c b/video/out/vo_gpu.c new file mode 100644 index 0000000..c02e6e7 --- /dev/null +++ b/video/out/vo_gpu.c @@ -0,0 +1,336 @@ +/* + * Based on vo_gl.c by Reimar Doeffinger. + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> +#include <stdbool.h> +#include <assert.h> + +#include <libavutil/common.h> + +#include "mpv_talloc.h" +#include "common/common.h" +#include "misc/bstr.h" +#include "common/msg.h" +#include "common/global.h" +#include "options/m_config.h" +#include "vo.h" +#include "video/mp_image.h" +#include "sub/osd.h" + +#include "gpu/context.h" +#include "gpu/hwdec.h" +#include "gpu/video.h" + +struct gpu_priv { + struct mp_log *log; + struct ra_ctx *ctx; + + char *context_name; + char *context_type; + struct gl_video *renderer; + + int events; +}; +static void resize(struct vo *vo) +{ + struct gpu_priv *p = vo->priv; + struct ra_swapchain *sw = p->ctx->swapchain; + + MP_VERBOSE(vo, "Resize: %dx%d\n", vo->dwidth, vo->dheight); + + struct mp_rect src, dst; + struct mp_osd_res osd; + vo_get_src_dst_rects(vo, &src, &dst, &osd); + + gl_video_resize(p->renderer, &src, &dst, &osd); + + int fb_depth = sw->fns->color_depth ? sw->fns->color_depth(sw) : 0; + if (fb_depth) + MP_VERBOSE(p, "Reported display depth: %d\n", fb_depth); + gl_video_set_fb_depth(p->renderer, fb_depth); + + vo->want_redraw = true; +} + +static void draw_frame(struct vo *vo, struct vo_frame *frame) +{ + struct gpu_priv *p = vo->priv; + struct ra_swapchain *sw = p->ctx->swapchain; + + struct ra_fbo fbo; + if (!sw->fns->start_frame(sw, &fbo)) + return; + + gl_video_render_frame(p->renderer, frame, fbo, RENDER_FRAME_DEF); + if (!sw->fns->submit_frame(sw, frame)) { + MP_ERR(vo, "Failed presenting frame!\n"); + return; + } +} + +static void flip_page(struct vo *vo) +{ + struct gpu_priv *p = vo->priv; + struct ra_swapchain *sw = p->ctx->swapchain; + sw->fns->swap_buffers(sw); +} + +static void get_vsync(struct vo *vo, struct vo_vsync_info *info) +{ + struct gpu_priv *p = vo->priv; + struct ra_swapchain *sw = p->ctx->swapchain; + if (sw->fns->get_vsync) + sw->fns->get_vsync(sw, info); +} + +static int query_format(struct vo *vo, int format) +{ + struct gpu_priv *p = vo->priv; + if (!gl_video_check_format(p->renderer, format)) + return 0; + return 1; +} + +static int reconfig(struct vo *vo, struct mp_image_params *params) +{ + struct gpu_priv *p = vo->priv; + + if (!p->ctx->fns->reconfig(p->ctx)) + return -1; + + resize(vo); + gl_video_config(p->renderer, params); + + return 0; +} + +static void request_hwdec_api(struct vo *vo, void *data) +{ + struct gpu_priv *p = vo->priv; + gl_video_load_hwdecs_for_img_fmt(p->renderer, vo->hwdec_devs, data); +} + +static void call_request_hwdec_api(void *ctx, + struct hwdec_imgfmt_request *params) +{ + // Roundabout way to run hwdec loading on the VO thread. + // Redirects to request_hwdec_api(). + vo_control(ctx, VOCTRL_LOAD_HWDEC_API, params); +} + +static void get_and_update_icc_profile(struct gpu_priv *p) +{ + if (gl_video_icc_auto_enabled(p->renderer)) { + MP_VERBOSE(p, "Querying ICC profile...\n"); + bstr icc = bstr0(NULL); + int r = p->ctx->fns->control(p->ctx, &p->events, VOCTRL_GET_ICC_PROFILE, &icc); + + if (r != VO_NOTAVAIL) { + if (r == VO_FALSE) { + MP_WARN(p, "Could not retrieve an ICC profile.\n"); + } else if (r == VO_NOTIMPL) { + MP_ERR(p, "icc-profile-auto not implemented on this platform.\n"); + } + + gl_video_set_icc_profile(p->renderer, icc); + } + } +} + +static void get_and_update_ambient_lighting(struct gpu_priv *p) +{ + int lux; + int r = p->ctx->fns->control(p->ctx, &p->events, VOCTRL_GET_AMBIENT_LUX, &lux); + if (r == VO_TRUE) { + gl_video_set_ambient_lux(p->renderer, lux); + } + if (r != VO_TRUE && gl_video_gamma_auto_enabled(p->renderer)) { + MP_ERR(p, "gamma_auto option provided, but querying for ambient" + " lighting is not supported on this platform\n"); + } +} + +static void update_ra_ctx_options(struct vo *vo) +{ + struct gpu_priv *p = vo->priv; + + /* Only the alpha option has any runtime toggle ability. */ + struct gl_video_opts *gl_opts = mp_get_config_group(p->ctx, vo->global, &gl_video_conf); + p->ctx->opts.want_alpha = gl_opts->alpha_mode == 1; +} + +static int control(struct vo *vo, uint32_t request, void *data) +{ + struct gpu_priv *p = vo->priv; + + switch (request) { + case VOCTRL_SET_PANSCAN: + resize(vo); + return VO_TRUE; + case VOCTRL_SET_EQUALIZER: + vo->want_redraw = true; + return VO_TRUE; + case VOCTRL_SCREENSHOT: { + struct vo_frame *frame = vo_get_current_vo_frame(vo); + if (frame) + gl_video_screenshot(p->renderer, frame, data); + talloc_free(frame); + return true; + } + case VOCTRL_LOAD_HWDEC_API: + request_hwdec_api(vo, data); + return true; + case VOCTRL_UPDATE_RENDER_OPTS: { + update_ra_ctx_options(vo); + gl_video_configure_queue(p->renderer, vo); + get_and_update_icc_profile(p); + if (p->ctx->fns->update_render_opts) + p->ctx->fns->update_render_opts(p->ctx); + vo->want_redraw = true; + return true; + } + case VOCTRL_RESET: + gl_video_reset(p->renderer); + return true; + case VOCTRL_PAUSE: + if (gl_video_showing_interpolated_frame(p->renderer)) + vo->want_redraw = true; + return true; + case VOCTRL_PERFORMANCE_DATA: + gl_video_perfdata(p->renderer, (struct voctrl_performance_data *)data); + return true; + case VOCTRL_EXTERNAL_RESIZE: + p->ctx->fns->reconfig(p->ctx); + resize(vo); + return true; + } + + int events = 0; + int r = p->ctx->fns->control(p->ctx, &events, request, data); + if (events & VO_EVENT_ICC_PROFILE_CHANGED) { + get_and_update_icc_profile(p); + vo->want_redraw = true; + } + if (events & VO_EVENT_AMBIENT_LIGHTING_CHANGED) { + get_and_update_ambient_lighting(p); + vo->want_redraw = true; + } + events |= p->events; + p->events = 0; + if (events & VO_EVENT_RESIZE) + resize(vo); + if (events & VO_EVENT_EXPOSE) + vo->want_redraw = true; + vo_event(vo, events); + + return r; +} + +static void wakeup(struct vo *vo) +{ + struct gpu_priv *p = vo->priv; + if (p->ctx && p->ctx->fns->wakeup) + p->ctx->fns->wakeup(p->ctx); +} + +static void wait_events(struct vo *vo, int64_t until_time_ns) +{ + struct gpu_priv *p = vo->priv; + if (p->ctx && p->ctx->fns->wait_events) { + p->ctx->fns->wait_events(p->ctx, until_time_ns); + } else { + vo_wait_default(vo, until_time_ns); + } +} + +static struct mp_image *get_image(struct vo *vo, int imgfmt, int w, int h, + int stride_align, int flags) +{ + struct gpu_priv *p = vo->priv; + + return gl_video_get_image(p->renderer, imgfmt, w, h, stride_align, flags); +} + +static void uninit(struct vo *vo) +{ + struct gpu_priv *p = vo->priv; + + gl_video_uninit(p->renderer); + if (vo->hwdec_devs) { + hwdec_devices_set_loader(vo->hwdec_devs, NULL, NULL); + hwdec_devices_destroy(vo->hwdec_devs); + } + ra_ctx_destroy(&p->ctx); +} + +static int preinit(struct vo *vo) +{ + struct gpu_priv *p = vo->priv; + p->log = vo->log; + + struct ra_ctx_opts *ctx_opts = mp_get_config_group(vo, vo->global, &ra_ctx_conf); + struct gl_video_opts *gl_opts = mp_get_config_group(vo, vo->global, &gl_video_conf); + struct ra_ctx_opts opts = *ctx_opts; + opts.want_alpha = gl_opts->alpha_mode == 1; + p->ctx = ra_ctx_create(vo, opts); + talloc_free(ctx_opts); + talloc_free(gl_opts); + if (!p->ctx) + goto err_out; + assert(p->ctx->ra); + assert(p->ctx->swapchain); + + p->renderer = gl_video_init(p->ctx->ra, vo->log, vo->global); + gl_video_set_osd_source(p->renderer, vo->osd); + gl_video_configure_queue(p->renderer, vo); + + get_and_update_icc_profile(p); + + vo->hwdec_devs = hwdec_devices_create(); + hwdec_devices_set_loader(vo->hwdec_devs, call_request_hwdec_api, vo); + + gl_video_init_hwdecs(p->renderer, p->ctx, vo->hwdec_devs, false); + + return 0; + +err_out: + uninit(vo); + return -1; +} + +const struct vo_driver video_out_gpu = { + .description = "Shader-based GPU Renderer", + .name = "gpu", + .caps = VO_CAP_ROTATE90, + .preinit = preinit, + .query_format = query_format, + .reconfig = reconfig, + .control = control, + .get_image = get_image, + .draw_frame = draw_frame, + .flip_page = flip_page, + .get_vsync = get_vsync, + .wait_events = wait_events, + .wakeup = wakeup, + .uninit = uninit, + .priv_size = sizeof(struct gpu_priv), +}; diff --git a/video/out/vo_gpu_next.c b/video/out/vo_gpu_next.c new file mode 100644 index 0000000..1dc1b18 --- /dev/null +++ b/video/out/vo_gpu_next.c @@ -0,0 +1,2104 @@ +/* + * Copyright (C) 2021 Niklas Haas + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <unistd.h> + +#include <libplacebo/colorspace.h> +#include <libplacebo/options.h> +#include <libplacebo/renderer.h> +#include <libplacebo/shaders/lut.h> +#include <libplacebo/shaders/icc.h> +#include <libplacebo/utils/libav.h> +#include <libplacebo/utils/frame_queue.h> + +#include "config.h" +#include "common/common.h" +#include "options/m_config.h" +#include "options/path.h" +#include "osdep/io.h" +#include "osdep/threads.h" +#include "stream/stream.h" +#include "video/fmt-conversion.h" +#include "video/mp_image.h" +#include "video/out/placebo/ra_pl.h" +#include "placebo/utils.h" +#include "gpu/context.h" +#include "gpu/hwdec.h" +#include "gpu/video.h" +#include "gpu/video_shaders.h" +#include "sub/osd.h" +#include "gpu_next/context.h" + +#if HAVE_GL && defined(PL_HAVE_OPENGL) +#include <libplacebo/opengl.h> +#include "video/out/opengl/ra_gl.h" +#endif + +#if HAVE_D3D11 && defined(PL_HAVE_D3D11) +#include <libplacebo/d3d11.h> +#include "video/out/d3d11/ra_d3d11.h" +#include "osdep/windows_utils.h" +#endif + + +struct osd_entry { + pl_tex tex; + struct pl_overlay_part *parts; + int num_parts; +}; + +struct osd_state { + struct osd_entry entries[MAX_OSD_PARTS]; + struct pl_overlay overlays[MAX_OSD_PARTS]; +}; + +struct scaler_params { + struct pl_filter_config config; +}; + +struct user_hook { + char *path; + const struct pl_hook *hook; +}; + +struct user_lut { + char *opt; + char *path; + int type; + struct pl_custom_lut *lut; +}; + +struct frame_info { + int count; + struct pl_dispatch_info info[VO_PASS_PERF_MAX]; +}; + +struct cache { + char *path; + pl_cache cache; + uint64_t sig; +}; + +struct priv { + struct mp_log *log; + struct mpv_global *global; + struct ra_ctx *ra_ctx; + struct gpu_ctx *context; + struct ra_hwdec_ctx hwdec_ctx; + struct ra_hwdec_mapper *hwdec_mapper; + + // Allocated DR buffers + mp_mutex dr_lock; + pl_buf *dr_buffers; + int num_dr_buffers; + + pl_log pllog; + pl_gpu gpu; + pl_renderer rr; + pl_queue queue; + pl_swapchain sw; + pl_fmt osd_fmt[SUBBITMAP_COUNT]; + pl_tex *sub_tex; + int num_sub_tex; + + struct mp_rect src, dst; + struct mp_osd_res osd_res; + struct osd_state osd_state; + + uint64_t last_id; + uint64_t osd_sync; + double last_pts; + bool is_interpolated; + bool want_reset; + bool frame_pending; + bool redraw; + + pl_options pars; + struct m_config_cache *opts_cache; + struct cache shader_cache, icc_cache; + struct mp_csp_equalizer_state *video_eq; + struct scaler_params scalers[SCALER_COUNT]; + const struct pl_hook **hooks; // storage for `params.hooks` + enum mp_csp_levels output_levels; + char **raw_opts; + + struct pl_icc_params icc_params; + char *icc_path; + pl_icc_object icc_profile; + + struct user_lut image_lut; + struct user_lut target_lut; + struct user_lut lut; + + // Cached shaders, preserved across options updates + struct user_hook *user_hooks; + int num_user_hooks; + + // Performance data of last frame + struct frame_info perf_fresh; + struct frame_info perf_redraw; + + bool delayed_peak; + bool inter_preserve; + bool target_hint; + + float corner_rounding; +}; + +static void update_render_options(struct vo *vo); +static void update_lut(struct priv *p, struct user_lut *lut); + +static pl_buf get_dr_buf(struct priv *p, const uint8_t *ptr) +{ + mp_mutex_lock(&p->dr_lock); + + for (int i = 0; i < p->num_dr_buffers; i++) { + pl_buf buf = p->dr_buffers[i]; + if (ptr >= buf->data && ptr < buf->data + buf->params.size) { + mp_mutex_unlock(&p->dr_lock); + return buf; + } + } + + mp_mutex_unlock(&p->dr_lock); + return NULL; +} + +static void free_dr_buf(void *opaque, uint8_t *data) +{ + struct priv *p = opaque; + mp_mutex_lock(&p->dr_lock); + + for (int i = 0; i < p->num_dr_buffers; i++) { + if (p->dr_buffers[i]->data == data) { + pl_buf_destroy(p->gpu, &p->dr_buffers[i]); + MP_TARRAY_REMOVE_AT(p->dr_buffers, p->num_dr_buffers, i); + mp_mutex_unlock(&p->dr_lock); + return; + } + } + + MP_ASSERT_UNREACHABLE(); +} + +static struct mp_image *get_image(struct vo *vo, int imgfmt, int w, int h, + int stride_align, int flags) +{ + struct priv *p = vo->priv; + pl_gpu gpu = p->gpu; + if (!gpu->limits.thread_safe || !gpu->limits.max_mapped_size) + return NULL; + + if ((flags & VO_DR_FLAG_HOST_CACHED) && !gpu->limits.host_cached) + return NULL; + + stride_align = mp_lcm(stride_align, gpu->limits.align_tex_xfer_pitch); + stride_align = mp_lcm(stride_align, gpu->limits.align_tex_xfer_offset); + int size = mp_image_get_alloc_size(imgfmt, w, h, stride_align); + if (size < 0) + return NULL; + + pl_buf buf = pl_buf_create(gpu, &(struct pl_buf_params) { + .memory_type = PL_BUF_MEM_HOST, + .host_mapped = true, + .size = size + stride_align, + }); + + if (!buf) + return NULL; + + struct mp_image *mpi = mp_image_from_buffer(imgfmt, w, h, stride_align, + buf->data, buf->params.size, + p, free_dr_buf); + if (!mpi) { + pl_buf_destroy(gpu, &buf); + return NULL; + } + + mp_mutex_lock(&p->dr_lock); + MP_TARRAY_APPEND(p, p->dr_buffers, p->num_dr_buffers, buf); + mp_mutex_unlock(&p->dr_lock); + + return mpi; +} + +static struct pl_color_space get_mpi_csp(struct vo *vo, struct mp_image *mpi); + +static void update_overlays(struct vo *vo, struct mp_osd_res res, + int flags, enum pl_overlay_coords coords, + struct osd_state *state, struct pl_frame *frame, + struct mp_image *src) +{ + struct priv *p = vo->priv; + static const bool subfmt_all[SUBBITMAP_COUNT] = { + [SUBBITMAP_LIBASS] = true, + [SUBBITMAP_BGRA] = true, + }; + + double pts = src ? src->pts : 0; + struct sub_bitmap_list *subs = osd_render(vo->osd, res, pts, flags, subfmt_all); + + frame->overlays = state->overlays; + frame->num_overlays = 0; + + for (int n = 0; n < subs->num_items; n++) { + const struct sub_bitmaps *item = subs->items[n]; + if (!item->num_parts || !item->packed) + continue; + struct osd_entry *entry = &state->entries[item->render_index]; + pl_fmt tex_fmt = p->osd_fmt[item->format]; + if (!entry->tex) + MP_TARRAY_POP(p->sub_tex, p->num_sub_tex, &entry->tex); + bool ok = pl_tex_recreate(p->gpu, &entry->tex, &(struct pl_tex_params) { + .format = tex_fmt, + .w = MPMAX(item->packed_w, entry->tex ? entry->tex->params.w : 0), + .h = MPMAX(item->packed_h, entry->tex ? entry->tex->params.h : 0), + .host_writable = true, + .sampleable = true, + }); + if (!ok) { + MP_ERR(vo, "Failed recreating OSD texture!\n"); + break; + } + ok = pl_tex_upload(p->gpu, &(struct pl_tex_transfer_params) { + .tex = entry->tex, + .rc = { .x1 = item->packed_w, .y1 = item->packed_h, }, + .row_pitch = item->packed->stride[0], + .ptr = item->packed->planes[0], + }); + if (!ok) { + MP_ERR(vo, "Failed uploading OSD texture!\n"); + break; + } + + entry->num_parts = 0; + for (int i = 0; i < item->num_parts; i++) { + const struct sub_bitmap *b = &item->parts[i]; + uint32_t c = b->libass.color; + struct pl_overlay_part part = { + .src = { b->src_x, b->src_y, b->src_x + b->w, b->src_y + b->h }, + .dst = { b->x, b->y, b->x + b->dw, b->y + b->dh }, + .color = { + (c >> 24) / 255.0, + ((c >> 16) & 0xFF) / 255.0, + ((c >> 8) & 0xFF) / 255.0, + 1.0 - (c & 0xFF) / 255.0, + } + }; + MP_TARRAY_APPEND(p, entry->parts, entry->num_parts, part); + } + + struct pl_overlay *ol = &state->overlays[frame->num_overlays++]; + *ol = (struct pl_overlay) { + .tex = entry->tex, + .parts = entry->parts, + .num_parts = entry->num_parts, + .color = { + .primaries = PL_COLOR_PRIM_BT_709, + .transfer = PL_COLOR_TRC_SRGB, + }, + .coords = coords, + }; + + switch (item->format) { + case SUBBITMAP_BGRA: + ol->mode = PL_OVERLAY_NORMAL; + ol->repr.alpha = PL_ALPHA_PREMULTIPLIED; + // Infer bitmap colorspace from source + if (src) { + ol->color = get_mpi_csp(vo, src); + // Seems like HDR subtitles are targeting SDR white + if (pl_color_transfer_is_hdr(ol->color.transfer)) { + ol->color.hdr = (struct pl_hdr_metadata) { + .max_luma = PL_COLOR_SDR_WHITE, + }; + } + } + break; + case SUBBITMAP_LIBASS: + ol->mode = PL_OVERLAY_MONOCHROME; + ol->repr.alpha = PL_ALPHA_INDEPENDENT; + break; + } + } + + talloc_free(subs); +} + +struct frame_priv { + struct vo *vo; + struct osd_state subs; + uint64_t osd_sync; + struct ra_hwdec *hwdec; +}; + +static int plane_data_from_imgfmt(struct pl_plane_data out_data[4], + struct pl_bit_encoding *out_bits, + enum mp_imgfmt imgfmt) +{ + struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(imgfmt); + if (!desc.num_planes || !(desc.flags & MP_IMGFLAG_HAS_COMPS)) + return 0; + + if (desc.flags & MP_IMGFLAG_HWACCEL) + return 0; // HW-accelerated frames need to be mapped differently + + if (!(desc.flags & MP_IMGFLAG_NE)) + return 0; // GPU endianness follows the host's + + if (desc.flags & MP_IMGFLAG_PAL) + return 0; // Palette formats (currently) not supported in libplacebo + + if ((desc.flags & MP_IMGFLAG_TYPE_FLOAT) && (desc.flags & MP_IMGFLAG_YUV)) + return 0; // Floating-point YUV (currently) unsupported + + bool has_bits = false; + bool any_padded = false; + + for (int p = 0; p < desc.num_planes; p++) { + struct pl_plane_data *data = &out_data[p]; + struct mp_imgfmt_comp_desc sorted[MP_NUM_COMPONENTS]; + int num_comps = 0; + if (desc.bpp[p] % 8) + return 0; // Pixel size is not byte-aligned + + for (int c = 0; c < mp_imgfmt_desc_get_num_comps(&desc); c++) { + if (desc.comps[c].plane != p) + continue; + + data->component_map[num_comps] = c; + sorted[num_comps] = desc.comps[c]; + num_comps++; + + // Sort components by offset order, while keeping track of the + // semantic mapping in `data->component_map` + for (int i = num_comps - 1; i > 0; i--) { + if (sorted[i].offset >= sorted[i - 1].offset) + break; + MPSWAP(struct mp_imgfmt_comp_desc, sorted[i], sorted[i - 1]); + MPSWAP(int, data->component_map[i], data->component_map[i - 1]); + } + } + + uint64_t total_bits = 0; + + // Fill in the pl_plane_data fields for each component + memset(data->component_size, 0, sizeof(data->component_size)); + for (int c = 0; c < num_comps; c++) { + data->component_size[c] = sorted[c].size; + data->component_pad[c] = sorted[c].offset - total_bits; + total_bits += data->component_pad[c] + data->component_size[c]; + any_padded |= sorted[c].pad; + + // Ignore bit encoding of alpha channel + if (!out_bits || data->component_map[c] == PL_CHANNEL_A) + continue; + + struct pl_bit_encoding bits = { + .sample_depth = data->component_size[c], + .color_depth = sorted[c].size - abs(sorted[c].pad), + .bit_shift = MPMAX(sorted[c].pad, 0), + }; + + if (!has_bits) { + *out_bits = bits; + has_bits = true; + } else { + if (!pl_bit_encoding_equal(out_bits, &bits)) { + // Bit encoding differs between components/planes, + // cannot handle this + *out_bits = (struct pl_bit_encoding) {0}; + out_bits = NULL; + } + } + } + + data->pixel_stride = desc.bpp[p] / 8; + data->type = (desc.flags & MP_IMGFLAG_TYPE_FLOAT) + ? PL_FMT_FLOAT + : PL_FMT_UNORM; + } + + if (any_padded && !out_bits) + return 0; // can't handle padded components without `pl_bit_encoding` + + return desc.num_planes; +} + +static struct pl_color_space get_mpi_csp(struct vo *vo, struct mp_image *mpi) +{ + struct pl_color_space csp = { + .primaries = mp_prim_to_pl(mpi->params.color.primaries), + .transfer = mp_trc_to_pl(mpi->params.color.gamma), + .hdr = mpi->params.color.hdr, + }; + return csp; +} + +static bool hwdec_reconfig(struct priv *p, struct ra_hwdec *hwdec, + const struct mp_image_params *par) +{ + if (p->hwdec_mapper) { + if (mp_image_params_equal(par, &p->hwdec_mapper->src_params)) { + return p->hwdec_mapper; + } else { + ra_hwdec_mapper_free(&p->hwdec_mapper); + } + } + + p->hwdec_mapper = ra_hwdec_mapper_create(hwdec, par); + if (!p->hwdec_mapper) { + MP_ERR(p, "Initializing texture for hardware decoding failed.\n"); + return NULL; + } + + return p->hwdec_mapper; +} + +// For RAs not based on ra_pl, this creates a new pl_tex wrapper +static pl_tex hwdec_get_tex(struct priv *p, int n) +{ + struct ra_tex *ratex = p->hwdec_mapper->tex[n]; + struct ra *ra = p->hwdec_mapper->ra; + if (ra_pl_get(ra)) + return (pl_tex) ratex->priv; + +#if HAVE_GL && defined(PL_HAVE_OPENGL) + if (ra_is_gl(ra) && pl_opengl_get(p->gpu)) { + struct pl_opengl_wrap_params par = { + .width = ratex->params.w, + .height = ratex->params.h, + }; + + ra_gl_get_format(ratex->params.format, &par.iformat, + &(GLenum){0}, &(GLenum){0}); + ra_gl_get_raw_tex(ra, ratex, &par.texture, &par.target); + return pl_opengl_wrap(p->gpu, &par); + } +#endif + +#if HAVE_D3D11 && defined(PL_HAVE_D3D11) + if (ra_is_d3d11(ra)) { + int array_slice = 0; + ID3D11Resource *res = ra_d3d11_get_raw_tex(ra, ratex, &array_slice); + pl_tex tex = pl_d3d11_wrap(p->gpu, pl_d3d11_wrap_params( + .tex = res, + .array_slice = array_slice, + .fmt = ra_d3d11_get_format(ratex->params.format), + .w = ratex->params.w, + .h = ratex->params.h, + )); + SAFE_RELEASE(res); + return tex; + } +#endif + + MP_ERR(p, "Failed mapping hwdec frame? Open a bug!\n"); + return false; +} + +static bool hwdec_acquire(pl_gpu gpu, struct pl_frame *frame) +{ + struct mp_image *mpi = frame->user_data; + struct frame_priv *fp = mpi->priv; + struct priv *p = fp->vo->priv; + if (!hwdec_reconfig(p, fp->hwdec, &mpi->params)) + return false; + + if (ra_hwdec_mapper_map(p->hwdec_mapper, mpi) < 0) { + MP_ERR(p, "Mapping hardware decoded surface failed.\n"); + return false; + } + + for (int n = 0; n < frame->num_planes; n++) { + if (!(frame->planes[n].texture = hwdec_get_tex(p, n))) + return false; + } + + return true; +} + +static void hwdec_release(pl_gpu gpu, struct pl_frame *frame) +{ + struct mp_image *mpi = frame->user_data; + struct frame_priv *fp = mpi->priv; + struct priv *p = fp->vo->priv; + if (!ra_pl_get(p->hwdec_mapper->ra)) { + for (int n = 0; n < frame->num_planes; n++) + pl_tex_destroy(p->gpu, &frame->planes[n].texture); + } + + ra_hwdec_mapper_unmap(p->hwdec_mapper); +} + +static bool map_frame(pl_gpu gpu, pl_tex *tex, const struct pl_source_frame *src, + struct pl_frame *frame) +{ + struct mp_image *mpi = src->frame_data; + const struct mp_image_params *par = &mpi->params; + struct frame_priv *fp = mpi->priv; + struct vo *vo = fp->vo; + struct priv *p = vo->priv; + + fp->hwdec = ra_hwdec_get(&p->hwdec_ctx, mpi->imgfmt); + if (fp->hwdec) { + // Note: We don't actually need the mapper to map the frame yet, we + // only reconfig the mapper here (potentially creating it) to access + // `dst_params`. In practice, though, this should not matter unless the + // image format changes mid-stream. + if (!hwdec_reconfig(p, fp->hwdec, &mpi->params)) { + talloc_free(mpi); + return false; + } + + par = &p->hwdec_mapper->dst_params; + } + + *frame = (struct pl_frame) { + .color = get_mpi_csp(vo, mpi), + .repr = { + .sys = mp_csp_to_pl(par->color.space), + .levels = mp_levels_to_pl(par->color.levels), + .alpha = mp_alpha_to_pl(par->alpha), + }, + .profile = { + .data = mpi->icc_profile ? mpi->icc_profile->data : NULL, + .len = mpi->icc_profile ? mpi->icc_profile->size : 0, + }, + .rotation = par->rotate / 90, + .user_data = mpi, + }; + + // mp_image, like AVFrame, likes communicating RGB/XYZ/YCbCr status + // implicitly via the image format, rather than the actual tagging. + switch (mp_imgfmt_get_forced_csp(par->imgfmt)) { + case MP_CSP_RGB: + frame->repr.sys = PL_COLOR_SYSTEM_RGB; + frame->repr.levels = PL_COLOR_LEVELS_FULL; + break; + case MP_CSP_XYZ: + frame->repr.sys = PL_COLOR_SYSTEM_XYZ; + break; + case MP_CSP_AUTO: + if (!frame->repr.sys) + frame->repr.sys = pl_color_system_guess_ycbcr(par->w, par->h); + break; + default: break; + } + + if (fp->hwdec) { + + struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(par->imgfmt); + frame->acquire = hwdec_acquire; + frame->release = hwdec_release; + frame->num_planes = desc.num_planes; + for (int n = 0; n < frame->num_planes; n++) { + struct pl_plane *plane = &frame->planes[n]; + int *map = plane->component_mapping; + for (int c = 0; c < mp_imgfmt_desc_get_num_comps(&desc); c++) { + if (desc.comps[c].plane != n) + continue; + + // Sort by component offset + uint8_t offset = desc.comps[c].offset; + int index = plane->components++; + while (index > 0 && desc.comps[map[index - 1]].offset > offset) { + map[index] = map[index - 1]; + index--; + } + map[index] = c; + } + } + + } else { // swdec + + struct pl_plane_data data[4] = {0}; + frame->num_planes = plane_data_from_imgfmt(data, &frame->repr.bits, mpi->imgfmt); + for (int n = 0; n < frame->num_planes; n++) { + struct pl_plane *plane = &frame->planes[n]; + data[n].width = mp_image_plane_w(mpi, n); + data[n].height = mp_image_plane_h(mpi, n); + if (mpi->stride[n] < 0) { + data[n].pixels = mpi->planes[n] + (data[n].height - 1) * mpi->stride[n]; + data[n].row_stride = -mpi->stride[n]; + plane->flipped = true; + } else { + data[n].pixels = mpi->planes[n]; + data[n].row_stride = mpi->stride[n]; + } + + pl_buf buf = get_dr_buf(p, data[n].pixels); + if (buf) { + data[n].buf = buf; + data[n].buf_offset = (uint8_t *) data[n].pixels - buf->data; + data[n].pixels = NULL; + } else if (gpu->limits.callbacks) { + data[n].callback = talloc_free; + data[n].priv = mp_image_new_ref(mpi); + } + + if (!pl_upload_plane(gpu, plane, &tex[n], &data[n])) { + MP_ERR(vo, "Failed uploading frame!\n"); + talloc_free(data[n].priv); + talloc_free(mpi); + return false; + } + } + + } + + // Update chroma location, must be done after initializing planes + pl_frame_set_chroma_location(frame, mp_chroma_to_pl(par->chroma_location)); + + // Set the frame DOVI metadata + mp_map_dovi_metadata_to_pl(mpi, frame); + + if (mpi->film_grain) + pl_film_grain_from_av(&frame->film_grain, (AVFilmGrainParams *) mpi->film_grain->data); + + // Compute a unique signature for any attached ICC profile. Wasteful in + // theory if the ICC profile is the same for multiple frames, but in + // practice ICC profiles are overwhelmingly going to be attached to + // still images so it shouldn't matter. + pl_icc_profile_compute_signature(&frame->profile); + + // Update LUT attached to this frame + update_lut(p, &p->image_lut); + frame->lut = p->image_lut.lut; + frame->lut_type = p->image_lut.type; + return true; +} + +static void unmap_frame(pl_gpu gpu, struct pl_frame *frame, + const struct pl_source_frame *src) +{ + struct mp_image *mpi = src->frame_data; + struct frame_priv *fp = mpi->priv; + struct priv *p = fp->vo->priv; + for (int i = 0; i < MP_ARRAY_SIZE(fp->subs.entries); i++) { + pl_tex tex = fp->subs.entries[i].tex; + if (tex) + MP_TARRAY_APPEND(p, p->sub_tex, p->num_sub_tex, tex); + } + talloc_free(mpi); +} + +static void discard_frame(const struct pl_source_frame *src) +{ + struct mp_image *mpi = src->frame_data; + talloc_free(mpi); +} + +static void info_callback(void *priv, const struct pl_render_info *info) +{ + struct vo *vo = priv; + struct priv *p = vo->priv; + if (info->index >= VO_PASS_PERF_MAX) + return; // silently ignore clipped passes, whatever + + struct frame_info *frame; + switch (info->stage) { + case PL_RENDER_STAGE_FRAME: frame = &p->perf_fresh; break; + case PL_RENDER_STAGE_BLEND: frame = &p->perf_redraw; break; + default: abort(); + } + + frame->count = info->index + 1; + pl_dispatch_info_move(&frame->info[info->index], info->pass); +} + +static void update_options(struct vo *vo) +{ + struct priv *p = vo->priv; + pl_options pars = p->pars; + if (m_config_cache_update(p->opts_cache)) + update_render_options(vo); + + update_lut(p, &p->lut); + pars->params.lut = p->lut.lut; + pars->params.lut_type = p->lut.type; + + // Update equalizer state + struct mp_csp_params cparams = MP_CSP_PARAMS_DEFAULTS; + mp_csp_equalizer_state_get(p->video_eq, &cparams); + pars->color_adjustment.brightness = cparams.brightness; + pars->color_adjustment.contrast = cparams.contrast; + pars->color_adjustment.hue = cparams.hue; + pars->color_adjustment.saturation = cparams.saturation; + pars->color_adjustment.gamma = cparams.gamma; + p->output_levels = cparams.levels_out; + + for (char **kv = p->raw_opts; kv && kv[0]; kv += 2) + pl_options_set_str(pars, kv[0], kv[1]); +} + +static void apply_target_contrast(struct priv *p, struct pl_color_space *color) +{ + const struct gl_video_opts *opts = p->opts_cache->opts; + + // Auto mode, leave as is + if (!opts->target_contrast) + return; + + // Infinite contrast + if (opts->target_contrast == -1) { + color->hdr.min_luma = 1e-7; + return; + } + + // Infer max_luma for current pl_color_space + pl_color_space_nominal_luma_ex(pl_nominal_luma_params( + .color = color, + // with HDR10 meta to respect value if already set + .metadata = PL_HDR_METADATA_HDR10, + .scaling = PL_HDR_NITS, + .out_max = &color->hdr.max_luma + )); + + color->hdr.min_luma = color->hdr.max_luma / opts->target_contrast; +} + +static void apply_target_options(struct priv *p, struct pl_frame *target) +{ + update_lut(p, &p->target_lut); + target->lut = p->target_lut.lut; + target->lut_type = p->target_lut.type; + + // Colorspace overrides + const struct gl_video_opts *opts = p->opts_cache->opts; + if (p->output_levels) + target->repr.levels = mp_levels_to_pl(p->output_levels); + if (opts->target_prim) + target->color.primaries = mp_prim_to_pl(opts->target_prim); + if (opts->target_trc) + target->color.transfer = mp_trc_to_pl(opts->target_trc); + // If swapchain returned a value use this, override is used in hint + if (opts->target_peak && !target->color.hdr.max_luma) + target->color.hdr.max_luma = opts->target_peak; + if (!target->color.hdr.min_luma) + apply_target_contrast(p, &target->color); + if (opts->target_gamut) { + // Ensure resulting gamut still fits inside container + const struct pl_raw_primaries *gamut, *container; + gamut = pl_raw_primaries_get(mp_prim_to_pl(opts->target_gamut)); + container = pl_raw_primaries_get(target->color.primaries); + target->color.hdr.prim = pl_primaries_clip(gamut, container); + } + if (opts->dither_depth > 0) { + struct pl_bit_encoding *tbits = &target->repr.bits; + tbits->color_depth += opts->dither_depth - tbits->sample_depth; + tbits->sample_depth = opts->dither_depth; + } + + if (opts->icc_opts->icc_use_luma) { + p->icc_params.max_luma = 0.0f; + } else { + pl_color_space_nominal_luma_ex(pl_nominal_luma_params( + .color = &target->color, + .metadata = PL_HDR_METADATA_HDR10, // use only static HDR nits + .scaling = PL_HDR_NITS, + .out_max = &p->icc_params.max_luma, + )); + } + + pl_icc_update(p->pllog, &p->icc_profile, NULL, &p->icc_params); + target->icc = p->icc_profile; +} + +static void apply_crop(struct pl_frame *frame, struct mp_rect crop, + int width, int height) +{ + frame->crop = (struct pl_rect2df) { + .x0 = crop.x0, + .y0 = crop.y0, + .x1 = crop.x1, + .y1 = crop.y1, + }; + + // mpv gives us rotated/flipped rects, libplacebo expects unrotated + pl_rect2df_rotate(&frame->crop, -frame->rotation); + if (frame->crop.x1 < frame->crop.x0) { + frame->crop.x0 = width - frame->crop.x0; + frame->crop.x1 = width - frame->crop.x1; + } + + if (frame->crop.y1 < frame->crop.y0) { + frame->crop.y0 = height - frame->crop.y0; + frame->crop.y1 = height - frame->crop.y1; + } +} + +static void update_tm_viz(struct pl_color_map_params *params, + const struct pl_frame *target) +{ + if (!params->visualize_lut) + return; + + // Use right half of sceen for TM visualization, constrain to 1:1 AR + const float out_w = fabsf(pl_rect_w(target->crop)); + const float out_h = fabsf(pl_rect_h(target->crop)); + const float size = MPMIN(out_w / 2.0f, out_h); + params->visualize_rect = (pl_rect2df) { + .x0 = 1.0f - size / out_w, + .x1 = 1.0f, + .y0 = 0.0f, + .y1 = size / out_h, + }; + + // Visualize red-blue plane + params->visualize_hue = M_PI / 4.0; +} + +static void draw_frame(struct vo *vo, struct vo_frame *frame) +{ + struct priv *p = vo->priv; + pl_options pars = p->pars; + pl_gpu gpu = p->gpu; + update_options(vo); + + struct pl_render_params params = pars->params; + const struct gl_video_opts *opts = p->opts_cache->opts; + bool will_redraw = frame->display_synced && frame->num_vsyncs > 1; + bool cache_frame = will_redraw || frame->still; + bool can_interpolate = opts->interpolation && frame->display_synced && + !frame->still && frame->num_frames > 1; + double pts_offset = can_interpolate ? frame->ideal_frame_vsync : 0; + params.info_callback = info_callback; + params.info_priv = vo; + params.skip_caching_single_frame = !cache_frame; + params.preserve_mixing_cache = p->inter_preserve && !frame->still; + if (frame->still) + params.frame_mixer = NULL; + + // pl_queue advances its internal virtual PTS and culls available frames + // based on this value and the VPS/FPS ratio. Requesting a non-monotonic PTS + // is an invalid use of pl_queue. Reset it if this happens in an attempt to + // recover as much as possible. Ideally, this should never occur, and if it + // does, it should be corrected. The ideal_frame_vsync may be negative if + // the last draw did not align perfectly with the vsync. In this case, we + // should have the previous frame available in pl_queue, or a reset is + // already requested. Clamp the check to 0, as we don't have the previous + // frame in vo_frame anyway. + struct pl_source_frame vpts; + if (frame->current && !p->want_reset) { + if (pl_queue_peek(p->queue, 0, &vpts) && + frame->current->pts + MPMAX(0, pts_offset) < vpts.pts) + { + MP_VERBOSE(vo, "Forcing queue refill, PTS(%f + %f | %f) < VPTS(%f)\n", + frame->current->pts, pts_offset, + frame->ideal_frame_vsync_duration, vpts.pts); + p->want_reset = true; + } + } + + // Push all incoming frames into the frame queue + for (int n = 0; n < frame->num_frames; n++) { + int id = frame->frame_id + n; + + if (p->want_reset) { + pl_renderer_flush_cache(p->rr); + pl_queue_reset(p->queue); + p->last_pts = 0.0; + p->last_id = 0; + p->want_reset = false; + } + + if (id <= p->last_id) + continue; // ignore already seen frames + + struct mp_image *mpi = mp_image_new_ref(frame->frames[n]); + struct frame_priv *fp = talloc_zero(mpi, struct frame_priv); + mpi->priv = fp; + fp->vo = vo; + + pl_queue_push(p->queue, &(struct pl_source_frame) { + .pts = mpi->pts, + .duration = can_interpolate ? frame->approx_duration : 0, + .frame_data = mpi, + .map = map_frame, + .unmap = unmap_frame, + .discard = discard_frame, + }); + + p->last_id = id; + } + + if (p->target_hint && frame->current) { + struct pl_color_space hint = get_mpi_csp(vo, frame->current); + if (opts->target_prim) + hint.primaries = mp_prim_to_pl(opts->target_prim); + if (opts->target_trc) + hint.transfer = mp_trc_to_pl(opts->target_trc); + if (opts->target_peak) + hint.hdr.max_luma = opts->target_peak; + apply_target_contrast(p, &hint); + pl_swapchain_colorspace_hint(p->sw, &hint); + } else if (!p->target_hint) { + pl_swapchain_colorspace_hint(p->sw, NULL); + } + + struct pl_swapchain_frame swframe; + struct ra_swapchain *sw = p->ra_ctx->swapchain; + bool should_draw = sw->fns->start_frame(sw, NULL); // for wayland logic + if (!should_draw || !pl_swapchain_start_frame(p->sw, &swframe)) { + if (frame->current) { + // Advance the queue state to the current PTS to discard unused frames + pl_queue_update(p->queue, NULL, pl_queue_params( + .pts = frame->current->pts + pts_offset, + .radius = pl_frame_mix_radius(¶ms), + .vsync_duration = can_interpolate ? frame->ideal_frame_vsync_duration : 0, +#if PL_API_VER >= 340 + .drift_compensation = 0, +#endif + )); + } + return; + } + + bool valid = false; + p->is_interpolated = false; + + // Calculate target + struct pl_frame target; + pl_frame_from_swapchain(&target, &swframe); + apply_target_options(p, &target); + update_overlays(vo, p->osd_res, + (frame->current && opts->blend_subs) ? OSD_DRAW_OSD_ONLY : 0, + PL_OVERLAY_COORDS_DST_FRAME, &p->osd_state, &target, frame->current); + apply_crop(&target, p->dst, swframe.fbo->params.w, swframe.fbo->params.h); + update_tm_viz(&pars->color_map_params, &target); + + struct pl_frame_mix mix = {0}; + if (frame->current) { + // Update queue state + struct pl_queue_params qparams = *pl_queue_params( + .pts = frame->current->pts + pts_offset, + .radius = pl_frame_mix_radius(¶ms), + .vsync_duration = can_interpolate ? frame->ideal_frame_vsync_duration : 0, + .interpolation_threshold = opts->interpolation_threshold, +#if PL_API_VER >= 340 + .drift_compensation = 0, +#endif + ); + + // Depending on the vsync ratio, we may be up to half of the vsync + // duration before the current frame time. This works fine because + // pl_queue will have this frame, unless it's after a reset event. In + // this case, start from the first available frame. + struct pl_source_frame first; + if (pl_queue_peek(p->queue, 0, &first) && qparams.pts < first.pts) { + if (first.pts != frame->current->pts) + MP_VERBOSE(vo, "Current PTS(%f) != VPTS(%f)\n", frame->current->pts, first.pts); + MP_VERBOSE(vo, "Clamping first frame PTS from %f to %f\n", qparams.pts, first.pts); + qparams.pts = first.pts; + } + p->last_pts = qparams.pts; + + switch (pl_queue_update(p->queue, &mix, &qparams)) { + case PL_QUEUE_ERR: + MP_ERR(vo, "Failed updating frames!\n"); + goto done; + case PL_QUEUE_EOF: + abort(); // we never signal EOF + case PL_QUEUE_MORE: + // This is expected to happen semi-frequently near the start and + // end of a file, so only log it at high verbosity and move on. + MP_DBG(vo, "Render queue underrun.\n"); + break; + case PL_QUEUE_OK: + break; + } + + // Update source crop and overlays on all existing frames. We + // technically own the `pl_frame` struct so this is kosher. This could + // be partially avoided by instead flushing the queue on resizes, but + // doing it this way avoids unnecessarily re-uploading frames. + for (int i = 0; i < mix.num_frames; i++) { + struct pl_frame *image = (struct pl_frame *) mix.frames[i]; + struct mp_image *mpi = image->user_data; + struct frame_priv *fp = mpi->priv; + apply_crop(image, p->src, vo->params->w, vo->params->h); + if (opts->blend_subs) { + if (frame->redraw || fp->osd_sync < p->osd_sync) { + float rx = pl_rect_w(p->dst) / pl_rect_w(image->crop); + float ry = pl_rect_h(p->dst) / pl_rect_h(image->crop); + struct mp_osd_res res = { + .w = pl_rect_w(p->dst), + .h = pl_rect_h(p->dst), + .ml = -image->crop.x0 * rx, + .mr = (image->crop.x1 - vo->params->w) * rx, + .mt = -image->crop.y0 * ry, + .mb = (image->crop.y1 - vo->params->h) * ry, + .display_par = 1.0, + }; + // TODO: fix this doing pointless updates + if (frame->redraw) + p->osd_sync++; + update_overlays(vo, res, OSD_DRAW_SUB_ONLY, + PL_OVERLAY_COORDS_DST_CROP, + &fp->subs, image, mpi); + fp->osd_sync = p->osd_sync; + } + } else { + // Disable overlays when blend_subs is disabled + image->num_overlays = 0; + fp->osd_sync = 0; + } + + // Update the frame signature to include the current OSD sync + // value, in order to disambiguate between identical frames with + // modified OSD. Shift the OSD sync value by a lot to avoid + // collisions with low signature values. + // + // This is safe to do because `pl_frame_mix.signature` lives in + // temporary memory that is only valid for this `pl_queue_update`. + ((uint64_t *) mix.signatures)[i] ^= fp->osd_sync << 48; + } + } + + // Render frame + if (!pl_render_image_mix(p->rr, &mix, &target, ¶ms)) { + MP_ERR(vo, "Failed rendering frame!\n"); + goto done; + } + + const struct pl_frame *cur_frame = pl_frame_mix_nearest(&mix); + if (cur_frame && vo->params) { + vo->params->color.hdr = cur_frame->color.hdr; + // Augment metadata with peak detection max_pq_y / avg_pq_y + pl_renderer_get_hdr_metadata(p->rr, &vo->params->color.hdr); + } + + p->is_interpolated = pts_offset != 0 && mix.num_frames > 1; + valid = true; + // fall through + +done: + if (!valid) // clear with purple to indicate error + pl_tex_clear(gpu, swframe.fbo, (float[4]){ 0.5, 0.0, 1.0, 1.0 }); + + pl_gpu_flush(gpu); + p->frame_pending = true; +} + +static void flip_page(struct vo *vo) +{ + struct priv *p = vo->priv; + struct ra_swapchain *sw = p->ra_ctx->swapchain; + + if (p->frame_pending) { + if (!pl_swapchain_submit_frame(p->sw)) + MP_ERR(vo, "Failed presenting frame!\n"); + p->frame_pending = false; + } + + sw->fns->swap_buffers(sw); +} + +static void get_vsync(struct vo *vo, struct vo_vsync_info *info) +{ + struct priv *p = vo->priv; + struct ra_swapchain *sw = p->ra_ctx->swapchain; + if (sw->fns->get_vsync) + sw->fns->get_vsync(sw, info); +} + +static int query_format(struct vo *vo, int format) +{ + struct priv *p = vo->priv; + if (ra_hwdec_get(&p->hwdec_ctx, format)) + return true; + + struct pl_bit_encoding bits; + struct pl_plane_data data[4] = {0}; + int planes = plane_data_from_imgfmt(data, &bits, format); + if (!planes) + return false; + + for (int i = 0; i < planes; i++) { + if (!pl_plane_find_fmt(p->gpu, NULL, &data[i])) + return false; + } + + return true; +} + +static void resize(struct vo *vo) +{ + struct priv *p = vo->priv; + struct mp_rect src, dst; + struct mp_osd_res osd; + vo_get_src_dst_rects(vo, &src, &dst, &osd); + if (vo->dwidth && vo->dheight) { + gpu_ctx_resize(p->context, vo->dwidth, vo->dheight); + vo->want_redraw = true; + } + + if (mp_rect_equals(&p->src, &src) && + mp_rect_equals(&p->dst, &dst) && + osd_res_equals(p->osd_res, osd)) + return; + + p->osd_sync++; + p->osd_res = osd; + p->src = src; + p->dst = dst; +} + +static int reconfig(struct vo *vo, struct mp_image_params *params) +{ + struct priv *p = vo->priv; + if (!p->ra_ctx->fns->reconfig(p->ra_ctx)) + return -1; + + resize(vo); + return 0; +} + +// Takes over ownership of `icc`. Can be used to unload profile (icc.len == 0) +static bool update_icc(struct priv *p, struct bstr icc) +{ + struct pl_icc_profile profile = { + .data = icc.start, + .len = icc.len, + }; + + pl_icc_profile_compute_signature(&profile); + + bool ok = pl_icc_update(p->pllog, &p->icc_profile, &profile, &p->icc_params); + talloc_free(icc.start); + return ok; +} + +// Returns whether the ICC profile was updated (even on failure) +static bool update_auto_profile(struct priv *p, int *events) +{ + const struct gl_video_opts *opts = p->opts_cache->opts; + if (!opts->icc_opts || !opts->icc_opts->profile_auto || p->icc_path) + return false; + + MP_VERBOSE(p, "Querying ICC profile...\n"); + bstr icc = {0}; + int r = p->ra_ctx->fns->control(p->ra_ctx, events, VOCTRL_GET_ICC_PROFILE, &icc); + + if (r != VO_NOTAVAIL) { + if (r == VO_FALSE) { + MP_WARN(p, "Could not retrieve an ICC profile.\n"); + } else if (r == VO_NOTIMPL) { + MP_ERR(p, "icc-profile-auto not implemented on this platform.\n"); + } + + update_icc(p, icc); + return true; + } + + return false; +} + +static void video_screenshot(struct vo *vo, struct voctrl_screenshot *args) +{ + struct priv *p = vo->priv; + pl_options pars = p->pars; + pl_gpu gpu = p->gpu; + pl_tex fbo = NULL; + args->res = NULL; + + update_options(vo); + struct pl_render_params params = pars->params; + params.info_callback = NULL; + params.skip_caching_single_frame = true; + params.preserve_mixing_cache = false; + params.frame_mixer = NULL; + + struct pl_peak_detect_params peak_params; + if (params.peak_detect_params) { + peak_params = *params.peak_detect_params; + params.peak_detect_params = &peak_params; + peak_params.allow_delayed = false; + } + + // Retrieve the current frame from the frame queue + struct pl_frame_mix mix; + enum pl_queue_status status; + status = pl_queue_update(p->queue, &mix, pl_queue_params( + .pts = p->last_pts, +#if PL_API_VER >= 340 + .drift_compensation = 0, +#endif + )); + assert(status != PL_QUEUE_EOF); + if (status == PL_QUEUE_ERR) { + MP_ERR(vo, "Unknown error occurred while trying to take screenshot!\n"); + return; + } + if (!mix.num_frames) { + MP_ERR(vo, "No frames available to take screenshot of, is a file loaded?\n"); + return; + } + + // Passing an interpolation radius of 0 guarantees that the first frame in + // the resulting mix is the correct frame for this PTS + struct pl_frame image = *(struct pl_frame *) mix.frames[0]; + struct mp_image *mpi = image.user_data; + struct mp_rect src = p->src, dst = p->dst; + struct mp_osd_res osd = p->osd_res; + if (!args->scaled) { + int w, h; + mp_image_params_get_dsize(&mpi->params, &w, &h); + if (w < 1 || h < 1) + return; + + int src_w = mpi->params.w; + int src_h = mpi->params.h; + src = (struct mp_rect) {0, 0, src_w, src_h}; + dst = (struct mp_rect) {0, 0, w, h}; + + if (mp_image_crop_valid(&mpi->params)) + src = mpi->params.crop; + + if (mpi->params.rotate % 180 == 90) { + MPSWAP(int, w, h); + MPSWAP(int, src_w, src_h); + } + mp_rect_rotate(&src, src_w, src_h, mpi->params.rotate); + mp_rect_rotate(&dst, w, h, mpi->params.rotate); + + osd = (struct mp_osd_res) { + .display_par = 1.0, + .w = mp_rect_w(dst), + .h = mp_rect_h(dst), + }; + } + + // Create target FBO, try high bit depth first + int mpfmt; + for (int depth = args->high_bit_depth ? 16 : 8; depth; depth -= 8) { + if (depth == 16) { + mpfmt = IMGFMT_RGBA64; + } else { + mpfmt = p->ra_ctx->opts.want_alpha ? IMGFMT_RGBA : IMGFMT_RGB0; + } + pl_fmt fmt = pl_find_fmt(gpu, PL_FMT_UNORM, 4, depth, depth, + PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_HOST_READABLE); + if (!fmt) + continue; + + fbo = pl_tex_create(gpu, pl_tex_params( + .w = osd.w, + .h = osd.h, + .format = fmt, + .blit_dst = true, + .renderable = true, + .host_readable = true, + .storable = fmt->caps & PL_FMT_CAP_STORABLE, + )); + if (fbo) + break; + } + + if (!fbo) { + MP_ERR(vo, "Failed creating target FBO for screenshot!\n"); + return; + } + + struct pl_frame target = { + .repr = pl_color_repr_rgb, + .num_planes = 1, + .planes[0] = { + .texture = fbo, + .components = 4, + .component_mapping = {0, 1, 2, 3}, + }, + }; + + if (args->scaled) { + // Apply target LUT, ICC profile and CSP override only in window mode + apply_target_options(p, &target); + } else if (args->native_csp) { + target.color = image.color; + } else { + target.color = pl_color_space_srgb; + } + + apply_crop(&image, src, mpi->params.w, mpi->params.h); + apply_crop(&target, dst, fbo->params.w, fbo->params.h); + update_tm_viz(&pars->color_map_params, &target); + + int osd_flags = 0; + if (!args->subs) + osd_flags |= OSD_DRAW_OSD_ONLY; + if (!args->osd) + osd_flags |= OSD_DRAW_SUB_ONLY; + + const struct gl_video_opts *opts = p->opts_cache->opts; + struct frame_priv *fp = mpi->priv; + if (opts->blend_subs) { + float rx = pl_rect_w(dst) / pl_rect_w(image.crop); + float ry = pl_rect_h(dst) / pl_rect_h(image.crop); + struct mp_osd_res res = { + .w = pl_rect_w(dst), + .h = pl_rect_h(dst), + .ml = -image.crop.x0 * rx, + .mr = (image.crop.x1 - vo->params->w) * rx, + .mt = -image.crop.y0 * ry, + .mb = (image.crop.y1 - vo->params->h) * ry, + .display_par = 1.0, + }; + update_overlays(vo, res, osd_flags, + PL_OVERLAY_COORDS_DST_CROP, + &fp->subs, &image, mpi); + } else { + // Disable overlays when blend_subs is disabled + update_overlays(vo, osd, osd_flags, PL_OVERLAY_COORDS_DST_FRAME, + &p->osd_state, &target, mpi); + image.num_overlays = 0; + } + + if (!pl_render_image(p->rr, &image, &target, ¶ms)) { + MP_ERR(vo, "Failed rendering frame!\n"); + goto done; + } + + args->res = mp_image_alloc(mpfmt, fbo->params.w, fbo->params.h); + if (!args->res) + goto done; + + args->res->params.color.primaries = mp_prim_from_pl(target.color.primaries); + args->res->params.color.gamma = mp_trc_from_pl(target.color.transfer); + args->res->params.color.levels = mp_levels_from_pl(target.repr.levels); + args->res->params.color.hdr = target.color.hdr; + if (args->scaled) + args->res->params.p_w = args->res->params.p_h = 1; + + bool ok = pl_tex_download(gpu, pl_tex_transfer_params( + .tex = fbo, + .ptr = args->res->planes[0], + .row_pitch = args->res->stride[0], + )); + + if (!ok) + TA_FREEP(&args->res); + + // fall through +done: + pl_tex_destroy(gpu, &fbo); +} + +static inline void copy_frame_info_to_mp(struct frame_info *pl, + struct mp_frame_perf *mp) { + static_assert(MP_ARRAY_SIZE(pl->info) == MP_ARRAY_SIZE(mp->perf), ""); + assert(pl->count <= VO_PASS_PERF_MAX); + mp->count = MPMIN(pl->count, VO_PASS_PERF_MAX); + + for (int i = 0; i < mp->count; ++i) { + const struct pl_dispatch_info *pass = &pl->info[i]; + + static_assert(VO_PERF_SAMPLE_COUNT >= MP_ARRAY_SIZE(pass->samples), ""); + assert(pass->num_samples <= MP_ARRAY_SIZE(pass->samples)); + + struct mp_pass_perf *perf = &mp->perf[i]; + perf->count = MPMIN(pass->num_samples, VO_PERF_SAMPLE_COUNT); + memcpy(perf->samples, pass->samples, perf->count * sizeof(pass->samples[0])); + perf->last = pass->last; + perf->peak = pass->peak; + perf->avg = pass->average; + + strncpy(mp->desc[i], pass->shader->description, sizeof(mp->desc[i]) - 1); + mp->desc[i][sizeof(mp->desc[i]) - 1] = '\0'; + } +} + +static int control(struct vo *vo, uint32_t request, void *data) +{ + struct priv *p = vo->priv; + + switch (request) { + case VOCTRL_SET_PANSCAN: + resize(vo); + return VO_TRUE; + case VOCTRL_SET_EQUALIZER: + case VOCTRL_PAUSE: + if (p->is_interpolated) + vo->want_redraw = true; + return VO_TRUE; + + case VOCTRL_UPDATE_RENDER_OPTS: { + m_config_cache_update(p->opts_cache); + const struct gl_video_opts *opts = p->opts_cache->opts; + p->ra_ctx->opts.want_alpha = opts->alpha_mode == ALPHA_YES; + if (p->ra_ctx->fns->update_render_opts) + p->ra_ctx->fns->update_render_opts(p->ra_ctx); + update_render_options(vo); + vo->want_redraw = true; + + // Also re-query the auto profile, in case `update_render_options` + // unloaded a manually specified icc profile in favor of + // icc-profile-auto + int events = 0; + update_auto_profile(p, &events); + vo_event(vo, events); + return VO_TRUE; + } + + case VOCTRL_RESET: + // Defer until the first new frame (unique ID) actually arrives + p->want_reset = true; + return VO_TRUE; + + case VOCTRL_PERFORMANCE_DATA: { + struct voctrl_performance_data *perf = data; + copy_frame_info_to_mp(&p->perf_fresh, &perf->fresh); + copy_frame_info_to_mp(&p->perf_redraw, &perf->redraw); + return true; + } + + case VOCTRL_SCREENSHOT: + video_screenshot(vo, data); + return true; + + case VOCTRL_EXTERNAL_RESIZE: + reconfig(vo, NULL); + return true; + + case VOCTRL_LOAD_HWDEC_API: + ra_hwdec_ctx_load_fmt(&p->hwdec_ctx, vo->hwdec_devs, data); + return true; + } + + int events = 0; + int r = p->ra_ctx->fns->control(p->ra_ctx, &events, request, data); + if (events & VO_EVENT_ICC_PROFILE_CHANGED) { + if (update_auto_profile(p, &events)) + vo->want_redraw = true; + } + if (events & VO_EVENT_RESIZE) + resize(vo); + if (events & VO_EVENT_EXPOSE) + vo->want_redraw = true; + vo_event(vo, events); + + return r; +} + +static void wakeup(struct vo *vo) +{ + struct priv *p = vo->priv; + if (p->ra_ctx && p->ra_ctx->fns->wakeup) + p->ra_ctx->fns->wakeup(p->ra_ctx); +} + +static void wait_events(struct vo *vo, int64_t until_time_ns) +{ + struct priv *p = vo->priv; + if (p->ra_ctx && p->ra_ctx->fns->wait_events) { + p->ra_ctx->fns->wait_events(p->ra_ctx, until_time_ns); + } else { + vo_wait_default(vo, until_time_ns); + } +} + +#if PL_API_VER < 342 +static inline void xor_hash(void *hash, pl_cache_obj obj) +{ + *((uint64_t *) hash) ^= obj.key; +} + +static inline uint64_t pl_cache_signature(pl_cache cache) +{ + uint64_t hash = 0; + pl_cache_iterate(cache, xor_hash, &hash); + return hash; +} +#endif + +static void cache_init(struct vo *vo, struct cache *cache, size_t max_size, + const char *dir_opt) +{ + struct priv *p = vo->priv; + const char *name = cache == &p->shader_cache ? "shader.cache" : "icc.cache"; + + char *dir; + if (dir_opt && dir_opt[0]) { + dir = mp_get_user_path(NULL, p->global, dir_opt); + } else { + dir = mp_find_user_file(NULL, p->global, "cache", ""); + } + if (!dir || !dir[0]) + goto done; + + mp_mkdirp(dir); + cache->path = mp_path_join(vo, dir, name); + cache->cache = pl_cache_create(pl_cache_params( + .log = p->pllog, + .max_total_size = max_size, + )); + + FILE *file = fopen(cache->path, "rb"); + if (file) { + int ret = pl_cache_load_file(cache->cache, file); + fclose(file); + if (ret < 0) + MP_WARN(p, "Failed loading cache from %s\n", cache->path); + } + + cache->sig = pl_cache_signature(cache->cache); +done: + talloc_free(dir); +} + +static void cache_uninit(struct priv *p, struct cache *cache) +{ + if (!cache->cache) + goto done; + if (pl_cache_signature(cache->cache) == cache->sig) + goto done; // skip re-saving identical cache + + assert(cache->path); + char *tmp = talloc_asprintf(cache->path, "%sXXXXXX", cache->path); + int fd = mkstemp(tmp); + if (fd < 0) + goto done; + FILE *file = fdopen(fd, "wb"); + if (!file) { + close(fd); + unlink(tmp); + goto done; + } + int ret = pl_cache_save_file(cache->cache, file); + fclose(file); + if (ret >= 0) + ret = rename(tmp, cache->path); + if (ret < 0) { + MP_WARN(p, "Failed saving cache to %s\n", cache->path); + unlink(tmp); + } + + // fall through +done: + pl_cache_destroy(&cache->cache); +} + +static void uninit(struct vo *vo) +{ + struct priv *p = vo->priv; + pl_queue_destroy(&p->queue); // destroy this first + for (int i = 0; i < MP_ARRAY_SIZE(p->osd_state.entries); i++) + pl_tex_destroy(p->gpu, &p->osd_state.entries[i].tex); + for (int i = 0; i < p->num_sub_tex; i++) + pl_tex_destroy(p->gpu, &p->sub_tex[i]); + for (int i = 0; i < p->num_user_hooks; i++) + pl_mpv_user_shader_destroy(&p->user_hooks[i].hook); + + if (vo->hwdec_devs) { + ra_hwdec_mapper_free(&p->hwdec_mapper); + ra_hwdec_ctx_uninit(&p->hwdec_ctx); + hwdec_devices_set_loader(vo->hwdec_devs, NULL, NULL); + hwdec_devices_destroy(vo->hwdec_devs); + } + + assert(p->num_dr_buffers == 0); + mp_mutex_destroy(&p->dr_lock); + + cache_uninit(p, &p->shader_cache); + cache_uninit(p, &p->icc_cache); + + pl_icc_close(&p->icc_profile); + pl_renderer_destroy(&p->rr); + + for (int i = 0; i < VO_PASS_PERF_MAX; ++i) { + pl_shader_info_deref(&p->perf_fresh.info[i].shader); + pl_shader_info_deref(&p->perf_redraw.info[i].shader); + } + + pl_options_free(&p->pars); + + p->ra_ctx = NULL; + p->pllog = NULL; + p->gpu = NULL; + p->sw = NULL; + gpu_ctx_destroy(&p->context); +} + +static void load_hwdec_api(void *ctx, struct hwdec_imgfmt_request *params) +{ + vo_control(ctx, VOCTRL_LOAD_HWDEC_API, params); +} + +static int preinit(struct vo *vo) +{ + struct priv *p = vo->priv; + p->opts_cache = m_config_cache_alloc(p, vo->global, &gl_video_conf); + p->video_eq = mp_csp_equalizer_create(p, vo->global); + p->global = vo->global; + p->log = vo->log; + + struct gl_video_opts *gl_opts = p->opts_cache->opts; + p->context = gpu_ctx_create(vo, gl_opts); + if (!p->context) + goto err_out; + // For the time being + p->ra_ctx = p->context->ra_ctx; + p->pllog = p->context->pllog; + p->gpu = p->context->gpu; + p->sw = p->context->swapchain; + p->hwdec_ctx = (struct ra_hwdec_ctx) { + .log = p->log, + .global = p->global, + .ra_ctx = p->ra_ctx, + }; + + vo->hwdec_devs = hwdec_devices_create(); + hwdec_devices_set_loader(vo->hwdec_devs, load_hwdec_api, vo); + ra_hwdec_ctx_init(&p->hwdec_ctx, vo->hwdec_devs, gl_opts->hwdec_interop, false); + mp_mutex_init(&p->dr_lock); + + if (gl_opts->shader_cache) + cache_init(vo, &p->shader_cache, 10 << 20, gl_opts->shader_cache_dir); + if (gl_opts->icc_opts->cache) + cache_init(vo, &p->icc_cache, 20 << 20, gl_opts->icc_opts->cache_dir); + + pl_gpu_set_cache(p->gpu, p->shader_cache.cache); + p->rr = pl_renderer_create(p->pllog, p->gpu); + p->queue = pl_queue_create(p->gpu); + p->osd_fmt[SUBBITMAP_LIBASS] = pl_find_named_fmt(p->gpu, "r8"); + p->osd_fmt[SUBBITMAP_BGRA] = pl_find_named_fmt(p->gpu, "bgra8"); + p->osd_sync = 1; + + p->pars = pl_options_alloc(p->pllog); + update_render_options(vo); + return 0; + +err_out: + uninit(vo); + return -1; +} + +static const struct pl_filter_config *map_scaler(struct priv *p, + enum scaler_unit unit) +{ + const struct pl_filter_preset fixed_scalers[] = { + { "bilinear", &pl_filter_bilinear }, + { "bicubic_fast", &pl_filter_bicubic }, + { "nearest", &pl_filter_nearest }, + { "oversample", &pl_filter_oversample }, + {0}, + }; + + const struct pl_filter_preset fixed_frame_mixers[] = { + { "linear", &pl_filter_bilinear }, + { "oversample", &pl_filter_oversample }, + {0}, + }; + + const struct pl_filter_preset *fixed_presets = + unit == SCALER_TSCALE ? fixed_frame_mixers : fixed_scalers; + + const struct gl_video_opts *opts = p->opts_cache->opts; + const struct scaler_config *cfg = &opts->scaler[unit]; + if (unit == SCALER_DSCALE && (!cfg->kernel.name || !cfg->kernel.name[0])) + cfg = &opts->scaler[SCALER_SCALE]; + if (unit == SCALER_CSCALE && (!cfg->kernel.name || !cfg->kernel.name[0])) + cfg = &opts->scaler[SCALER_SCALE]; + + for (int i = 0; fixed_presets[i].name; i++) { + if (strcmp(cfg->kernel.name, fixed_presets[i].name) == 0) + return fixed_presets[i].filter; + } + + // Attempt loading filter preset first, fall back to raw filter function + struct scaler_params *par = &p->scalers[unit]; + const struct pl_filter_preset *preset; + const struct pl_filter_function_preset *fpreset; + if ((preset = pl_find_filter_preset(cfg->kernel.name))) { + par->config = *preset->filter; + } else if ((fpreset = pl_find_filter_function_preset(cfg->kernel.name))) { + par->config = (struct pl_filter_config) { + .kernel = fpreset->function, + .params[0] = fpreset->function->params[0], + .params[1] = fpreset->function->params[1], + }; + } else { + MP_ERR(p, "Failed mapping filter function '%s', no libplacebo analog?\n", + cfg->kernel.name); + return &pl_filter_bilinear; + } + + const struct pl_filter_function_preset *wpreset; + if ((wpreset = pl_find_filter_function_preset(cfg->window.name))) { + par->config.window = wpreset->function; + par->config.wparams[0] = wpreset->function->params[0]; + par->config.wparams[1] = wpreset->function->params[1]; + } + + for (int i = 0; i < 2; i++) { + if (!isnan(cfg->kernel.params[i])) + par->config.params[i] = cfg->kernel.params[i]; + if (!isnan(cfg->window.params[i])) + par->config.wparams[i] = cfg->window.params[i]; + } + + par->config.clamp = cfg->clamp; + if (cfg->kernel.blur > 0.0) + par->config.blur = cfg->kernel.blur; + if (cfg->kernel.taper > 0.0) + par->config.taper = cfg->kernel.taper; + if (cfg->radius > 0.0) { + if (par->config.kernel->resizable) { + par->config.radius = cfg->radius; + } else { + MP_WARN(p, "Filter radius specified but filter '%s' is not " + "resizable, ignoring\n", cfg->kernel.name); + } + } + + return &par->config; +} + +static const struct pl_hook *load_hook(struct priv *p, const char *path) +{ + if (!path || !path[0]) + return NULL; + + for (int i = 0; i < p->num_user_hooks; i++) { + if (strcmp(p->user_hooks[i].path, path) == 0) + return p->user_hooks[i].hook; + } + + char *fname = mp_get_user_path(NULL, p->global, path); + bstr shader = stream_read_file(fname, p, p->global, 1000000000); // 1GB + talloc_free(fname); + + const struct pl_hook *hook = NULL; + if (shader.len) + hook = pl_mpv_user_shader_parse(p->gpu, shader.start, shader.len); + + MP_TARRAY_APPEND(p, p->user_hooks, p->num_user_hooks, (struct user_hook) { + .path = talloc_strdup(p, path), + .hook = hook, + }); + + return hook; +} + +static void update_icc_opts(struct priv *p, const struct mp_icc_opts *opts) +{ + if (!opts) + return; + + if (!opts->profile_auto && !p->icc_path) { + // Un-set any auto-loaded profiles if icc-profile-auto was disabled + update_icc(p, (bstr) {0}); + } + + int s_r = 0, s_g = 0, s_b = 0; + gl_parse_3dlut_size(opts->size_str, &s_r, &s_g, &s_b); + p->icc_params = pl_icc_default_params; + p->icc_params.intent = opts->intent; + p->icc_params.size_r = s_r; + p->icc_params.size_g = s_g; + p->icc_params.size_b = s_b; + p->icc_params.cache = p->icc_cache.cache; + + if (!opts->profile || !opts->profile[0]) { + // No profile enabled, un-load any existing profiles + update_icc(p, (bstr) {0}); + TA_FREEP(&p->icc_path); + return; + } + + if (p->icc_path && strcmp(opts->profile, p->icc_path) == 0) + return; // ICC profile hasn't changed + + char *fname = mp_get_user_path(NULL, p->global, opts->profile); + MP_VERBOSE(p, "Opening ICC profile '%s'\n", fname); + struct bstr icc = stream_read_file(fname, p, p->global, 100000000); // 100 MB + talloc_free(fname); + update_icc(p, icc); + + // Update cached path + talloc_free(p->icc_path); + p->icc_path = talloc_strdup(p, opts->profile); +} + +static void update_lut(struct priv *p, struct user_lut *lut) +{ + if (!lut->opt) { + pl_lut_free(&lut->lut); + TA_FREEP(&lut->path); + return; + } + + if (lut->path && strcmp(lut->path, lut->opt) == 0) + return; // no change + + // Update cached path + pl_lut_free(&lut->lut); + talloc_free(lut->path); + lut->path = talloc_strdup(p, lut->opt); + + // Load LUT file + char *fname = mp_get_user_path(NULL, p->global, lut->path); + MP_VERBOSE(p, "Loading custom LUT '%s'\n", fname); + struct bstr lutdata = stream_read_file(fname, p, p->global, 100000000); // 100 MB + lut->lut = pl_lut_parse_cube(p->pllog, lutdata.start, lutdata.len); + talloc_free(lutdata.start); +} + +static void update_hook_opts(struct priv *p, char **opts, const char *shaderpath, + const struct pl_hook *hook) +{ + if (!opts) + return; + + const char *basename = mp_basename(shaderpath); + struct bstr shadername; + if (!mp_splitext(basename, &shadername)) + shadername = bstr0(basename); + + for (int n = 0; opts[n * 2]; n++) { + struct bstr k = bstr0(opts[n * 2 + 0]); + struct bstr v = bstr0(opts[n * 2 + 1]); + int pos; + if ((pos = bstrchr(k, '/')) >= 0) { + if (!bstr_equals(bstr_splice(k, 0, pos), shadername)) + continue; + k = bstr_cut(k, pos + 1); + } + + for (int i = 0; i < hook->num_parameters; i++) { + const struct pl_hook_par *hp = &hook->parameters[i]; + if (!bstr_equals0(k, hp->name) != 0) + continue; + + m_option_t opt = { + .name = hp->name, + }; + + if (hp->names) { + for (int j = hp->minimum.i; j <= hp->maximum.i; j++) { + if (bstr_equals0(v, hp->names[j])) { + hp->data->i = j; + goto next_hook; + } + } + } + + switch (hp->type) { + case PL_VAR_FLOAT: + opt.type = &m_option_type_float; + opt.min = hp->minimum.f; + opt.max = hp->maximum.f; + break; + case PL_VAR_SINT: + opt.type = &m_option_type_int; + opt.min = hp->minimum.i; + opt.max = hp->maximum.i; + break; + case PL_VAR_UINT: + opt.type = &m_option_type_int; + opt.min = MPMIN(hp->minimum.u, INT_MAX); + opt.max = MPMIN(hp->maximum.u, INT_MAX); + break; + } + + if (!opt.type) + goto next_hook; + + opt.type->parse(p->log, &opt, k, v, hp->data); + goto next_hook; + } + + next_hook:; + } +} + +static void update_render_options(struct vo *vo) +{ + struct priv *p = vo->priv; + pl_options pars = p->pars; + const struct gl_video_opts *opts = p->opts_cache->opts; + pars->params.antiringing_strength = opts->scaler[0].antiring; + pars->params.background_color[0] = opts->background.r / 255.0; + pars->params.background_color[1] = opts->background.g / 255.0; + pars->params.background_color[2] = opts->background.b / 255.0; + pars->params.background_transparency = 1.0 - opts->background.a / 255.0; + pars->params.skip_anti_aliasing = !opts->correct_downscaling; + pars->params.disable_linear_scaling = !opts->linear_downscaling && !opts->linear_upscaling; + pars->params.disable_fbos = opts->dumb_mode == 1; + pars->params.blend_against_tiles = opts->alpha_mode == ALPHA_BLEND_TILES; + pars->params.corner_rounding = p->corner_rounding; + pars->params.correct_subpixel_offsets = !opts->scaler_resizes_only; + + // Map scaler options as best we can + pars->params.upscaler = map_scaler(p, SCALER_SCALE); + pars->params.downscaler = map_scaler(p, SCALER_DSCALE); + pars->params.plane_upscaler = map_scaler(p, SCALER_CSCALE); + pars->params.frame_mixer = opts->interpolation ? map_scaler(p, SCALER_TSCALE) : NULL; + + // Request as many frames as required from the decoder, depending on the + // speed VPS/FPS ratio libplacebo may need more frames. Request frames up to + // ratio of 1/2, but only if anti aliasing is enabled. + int req_frames = 2; + if (pars->params.frame_mixer) { + req_frames += ceilf(pars->params.frame_mixer->kernel->radius) * + (pars->params.skip_anti_aliasing ? 1 : 2); + } + vo_set_queue_params(vo, 0, MPMIN(VO_MAX_REQ_FRAMES, req_frames)); + + pars->params.deband_params = opts->deband ? &pars->deband_params : NULL; + pars->deband_params.iterations = opts->deband_opts->iterations; + pars->deband_params.radius = opts->deband_opts->range; + pars->deband_params.threshold = opts->deband_opts->threshold / 16.384; + pars->deband_params.grain = opts->deband_opts->grain / 8.192; + + pars->params.sigmoid_params = opts->sigmoid_upscaling ? &pars->sigmoid_params : NULL; + pars->sigmoid_params.center = opts->sigmoid_center; + pars->sigmoid_params.slope = opts->sigmoid_slope; + + pars->params.peak_detect_params = opts->tone_map.compute_peak >= 0 ? &pars->peak_detect_params : NULL; + pars->peak_detect_params.smoothing_period = opts->tone_map.decay_rate; + pars->peak_detect_params.scene_threshold_low = opts->tone_map.scene_threshold_low; + pars->peak_detect_params.scene_threshold_high = opts->tone_map.scene_threshold_high; + pars->peak_detect_params.percentile = opts->tone_map.peak_percentile; + pars->peak_detect_params.allow_delayed = p->delayed_peak; + + const struct pl_tone_map_function * const tone_map_funs[] = { + [TONE_MAPPING_AUTO] = &pl_tone_map_auto, + [TONE_MAPPING_CLIP] = &pl_tone_map_clip, + [TONE_MAPPING_MOBIUS] = &pl_tone_map_mobius, + [TONE_MAPPING_REINHARD] = &pl_tone_map_reinhard, + [TONE_MAPPING_HABLE] = &pl_tone_map_hable, + [TONE_MAPPING_GAMMA] = &pl_tone_map_gamma, + [TONE_MAPPING_LINEAR] = &pl_tone_map_linear, + [TONE_MAPPING_SPLINE] = &pl_tone_map_spline, + [TONE_MAPPING_BT_2390] = &pl_tone_map_bt2390, + [TONE_MAPPING_BT_2446A] = &pl_tone_map_bt2446a, + [TONE_MAPPING_ST2094_40] = &pl_tone_map_st2094_40, + [TONE_MAPPING_ST2094_10] = &pl_tone_map_st2094_10, + }; + + const struct pl_gamut_map_function * const gamut_modes[] = { + [GAMUT_AUTO] = pl_color_map_default_params.gamut_mapping, + [GAMUT_CLIP] = &pl_gamut_map_clip, + [GAMUT_PERCEPTUAL] = &pl_gamut_map_perceptual, + [GAMUT_RELATIVE] = &pl_gamut_map_relative, + [GAMUT_SATURATION] = &pl_gamut_map_saturation, + [GAMUT_ABSOLUTE] = &pl_gamut_map_absolute, + [GAMUT_DESATURATE] = &pl_gamut_map_desaturate, + [GAMUT_DARKEN] = &pl_gamut_map_darken, + [GAMUT_WARN] = &pl_gamut_map_highlight, + [GAMUT_LINEAR] = &pl_gamut_map_linear, + }; + + pars->color_map_params.tone_mapping_function = tone_map_funs[opts->tone_map.curve]; + pars->color_map_params.tone_mapping_param = opts->tone_map.curve_param; + if (isnan(pars->color_map_params.tone_mapping_param)) // vo_gpu compatibility + pars->color_map_params.tone_mapping_param = 0.0; + pars->color_map_params.inverse_tone_mapping = opts->tone_map.inverse; + pars->color_map_params.contrast_recovery = opts->tone_map.contrast_recovery; + pars->color_map_params.visualize_lut = opts->tone_map.visualize; + pars->color_map_params.contrast_smoothness = opts->tone_map.contrast_smoothness; + pars->color_map_params.gamut_mapping = gamut_modes[opts->tone_map.gamut_mode]; + + switch (opts->dither_algo) { + case DITHER_NONE: + pars->params.dither_params = NULL; + break; + case DITHER_ERROR_DIFFUSION: + pars->params.error_diffusion = pl_find_error_diffusion_kernel(opts->error_diffusion); + if (!pars->params.error_diffusion) { + MP_WARN(p, "Could not find error diffusion kernel '%s', falling " + "back to fruit.\n", opts->error_diffusion); + } + MP_FALLTHROUGH; + case DITHER_ORDERED: + case DITHER_FRUIT: + pars->params.dither_params = &pars->dither_params; + pars->dither_params.method = opts->dither_algo == DITHER_ORDERED + ? PL_DITHER_ORDERED_FIXED + : PL_DITHER_BLUE_NOISE; + pars->dither_params.lut_size = opts->dither_size; + pars->dither_params.temporal = opts->temporal_dither; + break; + } + + if (opts->dither_depth < 0) + pars->params.dither_params = NULL; + + update_icc_opts(p, opts->icc_opts); + + pars->params.num_hooks = 0; + const struct pl_hook *hook; + for (int i = 0; opts->user_shaders && opts->user_shaders[i]; i++) { + if ((hook = load_hook(p, opts->user_shaders[i]))) { + MP_TARRAY_APPEND(p, p->hooks, pars->params.num_hooks, hook); + update_hook_opts(p, opts->user_shader_opts, opts->user_shaders[i], hook); + } + } + + pars->params.hooks = p->hooks; +} + +#define OPT_BASE_STRUCT struct priv + +const struct m_opt_choice_alternatives lut_types[] = { + {"auto", PL_LUT_UNKNOWN}, + {"native", PL_LUT_NATIVE}, + {"normalized", PL_LUT_NORMALIZED}, + {"conversion", PL_LUT_CONVERSION}, + {0} +}; + +const struct vo_driver video_out_gpu_next = { + .description = "Video output based on libplacebo", + .name = "gpu-next", + .caps = VO_CAP_ROTATE90 | + VO_CAP_FILM_GRAIN | + 0x0, + .preinit = preinit, + .query_format = query_format, + .reconfig = reconfig, + .control = control, + .get_image_ts = get_image, + .draw_frame = draw_frame, + .flip_page = flip_page, + .get_vsync = get_vsync, + .wait_events = wait_events, + .wakeup = wakeup, + .uninit = uninit, + .priv_size = sizeof(struct priv), + .priv_defaults = &(const struct priv) { + .inter_preserve = true, + }, + + .options = (const struct m_option[]) { + {"allow-delayed-peak-detect", OPT_BOOL(delayed_peak)}, + {"corner-rounding", OPT_FLOAT(corner_rounding), M_RANGE(0, 1)}, + {"interpolation-preserve", OPT_BOOL(inter_preserve)}, + {"lut", OPT_STRING(lut.opt), .flags = M_OPT_FILE}, + {"lut-type", OPT_CHOICE_C(lut.type, lut_types)}, + {"image-lut", OPT_STRING(image_lut.opt), .flags = M_OPT_FILE}, + {"image-lut-type", OPT_CHOICE_C(image_lut.type, lut_types)}, + {"target-lut", OPT_STRING(target_lut.opt), .flags = M_OPT_FILE}, + {"target-colorspace-hint", OPT_BOOL(target_hint)}, + // No `target-lut-type` because we don't support non-RGB targets + {"libplacebo-opts", OPT_KEYVALUELIST(raw_opts)}, + {0} + }, +}; diff --git a/video/out/vo_image.c b/video/out/vo_image.c new file mode 100644 index 0000000..cc48ab3 --- /dev/null +++ b/video/out/vo_image.c @@ -0,0 +1,165 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> +#include <stdbool.h> +#include <sys/stat.h> + +#include <libswscale/swscale.h> + +#include "misc/bstr.h" +#include "osdep/io.h" +#include "options/m_config.h" +#include "options/path.h" +#include "mpv_talloc.h" +#include "common/common.h" +#include "common/msg.h" +#include "video/out/vo.h" +#include "video/csputils.h" +#include "video/mp_image.h" +#include "video/fmt-conversion.h" +#include "video/image_writer.h" +#include "video/sws_utils.h" +#include "sub/osd.h" +#include "options/m_option.h" + +static const struct m_sub_options image_writer_conf = { + .opts = image_writer_opts, + .size = sizeof(struct image_writer_opts), + .defaults = &image_writer_opts_defaults, +}; + +struct vo_image_opts { + struct image_writer_opts *opts; + char *outdir; +}; + +#define OPT_BASE_STRUCT struct vo_image_opts + +static const struct m_sub_options vo_image_conf = { + .opts = (const struct m_option[]) { + {"vo-image", OPT_SUBSTRUCT(opts, image_writer_conf)}, + {"vo-image-outdir", OPT_STRING(outdir), .flags = M_OPT_FILE}, + {0}, + }, + .size = sizeof(struct vo_image_opts), +}; + +struct priv { + struct vo_image_opts *opts; + + struct mp_image *current; + int frame; +}; + +static bool checked_mkdir(struct vo *vo, const char *buf) +{ + MP_INFO(vo, "Creating output directory '%s'...\n", buf); + if (mkdir(buf, 0755) < 0) { + char *errstr = mp_strerror(errno); + if (errno == EEXIST) { + struct stat stat_p; + if (stat(buf, &stat_p ) == 0 && S_ISDIR(stat_p.st_mode)) + return true; + } + MP_ERR(vo, "Error creating output directory: %s\n", errstr); + return false; + } + return true; +} + +static int reconfig(struct vo *vo, struct mp_image_params *params) +{ + return 0; +} + +static void draw_frame(struct vo *vo, struct vo_frame *frame) +{ + struct priv *p = vo->priv; + if (!frame->current) + return; + + p->current = frame->current; + + struct mp_osd_res dim = osd_res_from_image_params(vo->params); + osd_draw_on_image(vo->osd, dim, frame->current->pts, OSD_DRAW_SUB_ONLY, p->current); +} + +static void flip_page(struct vo *vo) +{ + struct priv *p = vo->priv; + if (!p->current) + return; + + (p->frame)++; + + void *t = talloc_new(NULL); + char *filename = talloc_asprintf(t, "%08d.%s", p->frame, + image_writer_file_ext(p->opts->opts)); + + if (p->opts->outdir && strlen(p->opts->outdir)) + filename = mp_path_join(t, p->opts->outdir, filename); + + MP_INFO(vo, "Saving %s\n", filename); + write_image(p->current, p->opts->opts, filename, vo->global, vo->log); + + talloc_free(t); +} + +static int query_format(struct vo *vo, int fmt) +{ + if (mp_sws_supported_format(fmt)) + return 1; + return 0; +} + +static void uninit(struct vo *vo) +{ +} + +static int preinit(struct vo *vo) +{ + struct priv *p = vo->priv; + p->opts = mp_get_config_group(vo, vo->global, &vo_image_conf); + if (p->opts->outdir && !checked_mkdir(vo, p->opts->outdir)) + return -1; + return 0; +} + +static int control(struct vo *vo, uint32_t request, void *data) +{ + return VO_NOTIMPL; +} + +const struct vo_driver video_out_image = +{ + .description = "Write video frames to image files", + .name = "image", + .untimed = true, + .priv_size = sizeof(struct priv), + .preinit = preinit, + .query_format = query_format, + .reconfig = reconfig, + .control = control, + .draw_frame = draw_frame, + .flip_page = flip_page, + .uninit = uninit, + .global_opts = &vo_image_conf, +}; diff --git a/video/out/vo_kitty.c b/video/out/vo_kitty.c new file mode 100644 index 0000000..7d548c7 --- /dev/null +++ b/video/out/vo_kitty.c @@ -0,0 +1,433 @@ +/* + * Video output device using the kitty terminal graphics protocol + * See https://sw.kovidgoyal.net/kitty/graphics-protocol/ + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> + +#include "config.h" + +#if HAVE_POSIX +#include <fcntl.h> +#include <sys/mman.h> +#include <sys/stat.h> +#include <unistd.h> +#endif + +#include <libswscale/swscale.h> +#include <libavutil/base64.h> + +#include "options/m_config.h" +#include "osdep/terminal.h" +#include "sub/osd.h" +#include "vo.h" +#include "video/sws_utils.h" +#include "video/mp_image.h" + +#define IMGFMT IMGFMT_RGB24 +#define BYTES_PER_PX 3 +#define DEFAULT_WIDTH_PX 320 +#define DEFAULT_HEIGHT_PX 240 +#define DEFAULT_WIDTH 80 +#define DEFAULT_HEIGHT 25 + +static inline void write_str(const char *s) +{ + // On POSIX platforms, write() is the fastest method. It also is the only + // one that allows atomic writes so mpv’s output will not be interrupted + // by other processes or threads that write to stdout, which would cause + // screen corruption. POSIX does not guarantee atomicity for writes + // exceeding PIPE_BUF, but at least Linux does seem to implement it that + // way. +#if HAVE_POSIX + int remain = strlen(s); + while (remain > 0) { + ssize_t written = write(STDOUT_FILENO, s, remain); + if (written < 0) + return; + remain -= written; + s += written; + } +#else + printf("%s", s); + fflush(stdout); +#endif +} + +#define KITTY_ESC_IMG "\033_Ga=T,f=24,s=%d,v=%d,C=1,q=2,m=1;" +#define KITTY_ESC_IMG_SHM "\033_Ga=T,t=s,f=24,s=%d,v=%d,C=1,q=2,m=1;%s\033\\" +#define KITTY_ESC_CONTINUE "\033_Gm=%d;" +#define KITTY_ESC_END "\033\\" +#define KITTY_ESC_DELETE_ALL "\033_Ga=d;\033\\" + +struct vo_kitty_opts { + int width, height, top, left, rows, cols; + bool config_clear, alt_screen; + bool use_shm; +}; + +struct priv { + struct vo_kitty_opts opts; + + uint8_t *buffer; + char *output; + char *shm_path, *shm_path_b64; + int buffer_size, output_size; + int shm_fd; + + int left, top, width, height, cols, rows; + + struct mp_rect src; + struct mp_rect dst; + struct mp_osd_res osd; + struct mp_image *frame; + struct mp_sws_context *sws; +}; + +#if HAVE_POSIX +static struct sigaction saved_sigaction = {0}; +static bool resized; +#endif + +static void close_shm(struct priv *p) +{ +#if HAVE_POSIX_SHM + if (p->buffer != NULL) { + munmap(p->buffer, p->buffer_size); + p->buffer = NULL; + } + if (p->shm_fd != -1) { + close(p->shm_fd); + p->shm_fd = -1; + } +#endif +} + +static void free_bufs(struct vo* vo) +{ + struct priv* p = vo->priv; + + talloc_free(p->frame); + talloc_free(p->output); + + if (p->opts.use_shm) { + close_shm(p); + } else { + talloc_free(p->buffer); + } +} + +static void get_win_size(struct vo *vo, int *out_rows, int *out_cols, + int *out_width, int *out_height) +{ + struct priv *p = vo->priv; + *out_rows = DEFAULT_HEIGHT; + *out_cols = DEFAULT_WIDTH; + *out_width = DEFAULT_WIDTH_PX; + *out_height = DEFAULT_HEIGHT_PX; + + terminal_get_size2(out_rows, out_cols, out_width, out_height); + + *out_rows = p->opts.rows > 0 ? p->opts.rows : *out_rows; + *out_cols = p->opts.cols > 0 ? p->opts.cols : *out_cols; + *out_width = p->opts.width > 0 ? p->opts.width : *out_width; + *out_height = p->opts.height > 0 ? p->opts.height : *out_height; +} + +static void set_out_params(struct vo *vo) +{ + struct priv *p = vo->priv; + + vo_get_src_dst_rects(vo, &p->src, &p->dst, &p->osd); + + p->width = p->dst.x1 - p->dst.x0; + p->height = p->dst.y1 - p->dst.y0; + p->top = p->opts.top > 0 ? + p->opts.top : p->rows * p->dst.y0 / vo->dheight; + p->left = p->opts.left > 0 ? + p->opts.left : p->cols * p->dst.x0 / vo->dwidth; + + p->buffer_size = 3 * p->width * p->height; + p->output_size = AV_BASE64_SIZE(p->buffer_size); +} + +static int reconfig(struct vo *vo, struct mp_image_params *params) +{ + struct priv *p = vo->priv; + + vo->want_redraw = true; + write_str(KITTY_ESC_DELETE_ALL); + if (p->opts.config_clear) + write_str(TERM_ESC_CLEAR_SCREEN); + + get_win_size(vo, &p->rows, &p->cols, &vo->dwidth, &vo->dheight); + set_out_params(vo); + free_bufs(vo); + + p->sws->src = *params; + p->sws->src.w = mp_rect_w(p->src); + p->sws->src.h = mp_rect_h(p->src); + p->sws->dst = (struct mp_image_params) { + .imgfmt = IMGFMT, + .w = p->width, + .h = p->height, + .p_w = 1, + .p_h = 1, + }; + + p->frame = mp_image_alloc(IMGFMT, p->width, p->height); + if (!p->frame) + return -1; + + if (mp_sws_reinit(p->sws) < 0) + return -1; + + if (!p->opts.use_shm) { + p->buffer = talloc_array(NULL, uint8_t, p->buffer_size); + p->output = talloc_array(NULL, char, p->output_size); + } + + return 0; +} + +static int create_shm(struct vo *vo) +{ +#if HAVE_POSIX_SHM + struct priv *p = vo->priv; + p->shm_fd = shm_open(p->shm_path, O_CREAT | O_RDWR, S_IRUSR | S_IWUSR); + if (p->shm_fd == -1) { + MP_ERR(vo, "Failed to create shared memory object"); + return 0; + } + + if (ftruncate(p->shm_fd, p->buffer_size) == -1) { + MP_ERR(vo, "Failed to truncate shared memory object"); + shm_unlink(p->shm_path); + close(p->shm_fd); + return 0; + } + + p->buffer = mmap(NULL, p->buffer_size, + PROT_READ | PROT_WRITE, MAP_SHARED, p->shm_fd, 0); + + if (p->buffer == MAP_FAILED) { + MP_ERR(vo, "Failed to mmap shared memory object"); + shm_unlink(p->shm_path); + close(p->shm_fd); + return 0; + } + return 1; +#else + return 0; +#endif +} + +static void draw_frame(struct vo *vo, struct vo_frame *frame) +{ + struct priv *p = vo->priv; + mp_image_t *mpi = NULL; + +#if !HAVE_POSIX + int prev_height = vo->dheight; + int prev_width = vo->dwidth; + get_win_size(vo, &p->rows, &p->cols, &vo->dwidth, &vo->dheight); + bool resized = (prev_width != vo->dwidth || prev_height != vo->dheight); +#endif + + if (resized) + reconfig(vo, vo->params); + + resized = false; + + if (frame->current) { + mpi = mp_image_new_ref(frame->current); + struct mp_rect src_rc = p->src; + src_rc.x0 = MP_ALIGN_DOWN(src_rc.x0, mpi->fmt.align_x); + src_rc.y0 = MP_ALIGN_DOWN(src_rc.y0, mpi->fmt.align_y); + mp_image_crop_rc(mpi, src_rc); + + mp_sws_scale(p->sws, p->frame, mpi); + } else { + mp_image_clear(p->frame, 0, 0, p->width, p->height); + } + + struct mp_osd_res res = { .w = p->width, .h = p->height }; + osd_draw_on_image(vo->osd, res, mpi ? mpi->pts : 0, 0, p->frame); + + + if (p->opts.use_shm && !create_shm(vo)) + return; + + memcpy_pic(p->buffer, p->frame->planes[0], p->width * BYTES_PER_PX, + p->height, p->width * BYTES_PER_PX, p->frame->stride[0]); + + if (!p->opts.use_shm) + av_base64_encode(p->output, p->output_size, p->buffer, p->buffer_size); + + talloc_free(mpi); +} + +static void flip_page(struct vo *vo) +{ + struct priv* p = vo->priv; + + if (p->buffer == NULL) + return; + + char *cmd = talloc_asprintf(NULL, TERM_ESC_GOTO_YX, p->top, p->left); + + if (p->opts.use_shm) { + cmd = talloc_asprintf_append(cmd, KITTY_ESC_IMG_SHM, p->width, p->height, p->shm_path_b64); + } else { + if (p->output == NULL) { + talloc_free(cmd); + return; + } + + cmd = talloc_asprintf_append(cmd, KITTY_ESC_IMG, p->width, p->height); + for (int offset = 0, noffset;; offset += noffset) { + if (offset) + cmd = talloc_asprintf_append(cmd, KITTY_ESC_CONTINUE, offset < p->output_size); + noffset = MPMIN(4096, p->output_size - offset); + cmd = talloc_strndup_append(cmd, p->output + offset, noffset); + cmd = talloc_strdup_append(cmd, KITTY_ESC_END); + + if (offset >= p->output_size) + break; + } + } + + write_str(cmd); + talloc_free(cmd); + +#if HAVE_POSIX + if (p->opts.use_shm) + close_shm(p); +#endif +} + +#if HAVE_POSIX +static void handle_winch(int sig) { + resized = true; + if (saved_sigaction.sa_handler) + saved_sigaction.sa_handler(sig); +} +#endif + +static int preinit(struct vo *vo) +{ + struct priv *p = vo->priv; + + p->sws = mp_sws_alloc(vo); + p->sws->log = vo->log; + mp_sws_enable_cmdline_opts(p->sws, vo->global); + +#if HAVE_POSIX + struct sigaction sa; + sa.sa_handler = handle_winch; + sigaction(SIGWINCH, &sa, &saved_sigaction); +#endif + +#if HAVE_POSIX_SHM + if (p->opts.use_shm) { + p->shm_path = talloc_asprintf(vo, "/mpv-kitty-%p", vo); + int p_size = strlen(p->shm_path) - 1; + int b64_size = AV_BASE64_SIZE(p_size); + p->shm_path_b64 = talloc_array(vo, char, b64_size); + av_base64_encode(p->shm_path_b64, b64_size, p->shm_path + 1, p_size); + } +#else + if (p->opts.use_shm) { + MP_ERR(vo, "Shared memory support is not available on this platform."); + return -1; + } +#endif + + write_str(TERM_ESC_HIDE_CURSOR); + if (p->opts.alt_screen) + write_str(TERM_ESC_ALT_SCREEN); + + return 0; +} + +static int query_format(struct vo *vo, int format) +{ + return format == IMGFMT; +} + +static int control(struct vo *vo, uint32_t request, void *data) +{ + if (request == VOCTRL_SET_PANSCAN) + return (vo->config_ok && !reconfig(vo, vo->params)) ? VO_TRUE : VO_FALSE; + return VO_NOTIMPL; +} + +static void uninit(struct vo *vo) +{ + struct priv *p = vo->priv; + +#if HAVE_POSIX + sigaction(SIGWINCH, &saved_sigaction, NULL); +#endif + + write_str(TERM_ESC_RESTORE_CURSOR); + + if (p->opts.alt_screen) { + write_str(TERM_ESC_NORMAL_SCREEN); + } else { + char *cmd = talloc_asprintf(vo, TERM_ESC_GOTO_YX, p->cols, 0); + write_str(cmd); + } + + free_bufs(vo); +} + +#define OPT_BASE_STRUCT struct priv + +const struct vo_driver video_out_kitty = { + .name = "kitty", + .description = "Kitty terminal graphics protocol", + .preinit = preinit, + .query_format = query_format, + .reconfig = reconfig, + .control = control, + .draw_frame = draw_frame, + .flip_page = flip_page, + .uninit = uninit, + .priv_size = sizeof(struct priv), + .priv_defaults = &(const struct priv) { + .shm_fd = -1, + .opts.config_clear = true, + .opts.alt_screen = true, + }, + .options = (const m_option_t[]) { + {"width", OPT_INT(opts.width)}, + {"height", OPT_INT(opts.height)}, + {"top", OPT_INT(opts.top)}, + {"left", OPT_INT(opts.left)}, + {"rows", OPT_INT(opts.rows)}, + {"cols", OPT_INT(opts.cols)}, + {"config-clear", OPT_BOOL(opts.config_clear), }, + {"alt-screen", OPT_BOOL(opts.alt_screen), }, + {"use-shm", OPT_BOOL(opts.use_shm), }, + {0} + }, + .options_prefix = "vo-kitty", +}; diff --git a/video/out/vo_lavc.c b/video/out/vo_lavc.c new file mode 100644 index 0000000..7170c1d --- /dev/null +++ b/video/out/vo_lavc.c @@ -0,0 +1,262 @@ +/* + * video encoding using libavformat + * + * Copyright (C) 2010 Nicolas George <george@nsup.org> + * Copyright (C) 2011-2012 Rudolf Polzer <divVerent@xonotic.org> + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include "common/common.h" +#include "options/options.h" +#include "video/fmt-conversion.h" +#include "video/mp_image.h" +#include "mpv_talloc.h" +#include "vo.h" + +#include "common/encode_lavc.h" + +#include "sub/osd.h" + +struct priv { + struct encoder_context *enc; + + bool shutdown; +}; + +static int preinit(struct vo *vo) +{ + struct priv *vc = vo->priv; + vc->enc = encoder_context_alloc(vo->encode_lavc_ctx, STREAM_VIDEO, vo->log); + if (!vc->enc) + return -1; + talloc_steal(vc, vc->enc); + return 0; +} + +static void uninit(struct vo *vo) +{ + struct priv *vc = vo->priv; + struct encoder_context *enc = vc->enc; + + if (!vc->shutdown) + encoder_encode(enc, NULL); // finish encoding +} + +static void on_ready(void *ptr) +{ + struct vo *vo = ptr; + + vo_event(vo, VO_EVENT_INITIAL_UNBLOCK); +} + +static int reconfig2(struct vo *vo, struct mp_image *img) +{ + struct priv *vc = vo->priv; + AVCodecContext *encoder = vc->enc->encoder; + + struct mp_image_params *params = &img->params; + enum AVPixelFormat pix_fmt = imgfmt2pixfmt(params->imgfmt); + AVRational aspect = {params->p_w, params->p_h}; + int width = params->w; + int height = params->h; + + if (vc->shutdown) + return -1; + + if (avcodec_is_open(encoder)) { + if (width == encoder->width && height == encoder->height && + pix_fmt == encoder->pix_fmt) + { + // consider these changes not critical + MP_ERR(vo, "Ignoring mid-stream parameter changes!\n"); + return 0; + } + + /* FIXME Is it possible with raw video? */ + MP_ERR(vo, "resolution changes not supported.\n"); + goto error; + } + + // When we get here, this must be the first call to reconfigure(). Thus, we + // can rely on no existing data in vc having been allocated yet. + // Reason: + // - Second calls after reconfigure() already failed once fail (due to the + // vc->shutdown check above). + // - Second calls after reconfigure() already succeeded once return early + // (due to the avcodec_is_open() check above). + + if (pix_fmt == AV_PIX_FMT_NONE) { + MP_FATAL(vo, "Format %s not supported by lavc.\n", + mp_imgfmt_to_name(params->imgfmt)); + goto error; + } + + encoder->sample_aspect_ratio = aspect; + encoder->width = width; + encoder->height = height; + encoder->pix_fmt = pix_fmt; + encoder->colorspace = mp_csp_to_avcol_spc(params->color.space); + encoder->color_range = mp_csp_levels_to_avcol_range(params->color.levels); + + AVRational tb; + + // we want to handle: + // 1/25 + // 1001/24000 + // 1001/30000 + // for this we would need 120000fps... + // however, mpeg-4 only allows 16bit values + // so let's take 1001/30000 out + tb.num = 24000; + tb.den = 1; + + const AVRational *rates = encoder->codec->supported_framerates; + if (rates && rates[0].den) + tb = rates[av_find_nearest_q_idx(tb, rates)]; + + encoder->time_base = av_inv_q(tb); + + // Used for rate control, level selection, etc. + // Usually it's not too catastrophic if this isn't exactly correct, + // as long as it's not off by orders of magnitude. + // If we don't set anything, encoders will use the time base, + // and 24000 is so high that the output can end up extremely screwy (see #11215), + // so we default to 240 if we don't have a real value. + if (img->nominal_fps > 0) + encoder->framerate = av_d2q(img->nominal_fps, img->nominal_fps * 1001 + 2); // Hopefully give exact results for NTSC rates + else + encoder->framerate = (AVRational){ 240, 1 }; + + if (!encoder_init_codec_and_muxer(vc->enc, on_ready, vo)) + goto error; + + return 0; + +error: + vc->shutdown = true; + return -1; +} + +static int query_format(struct vo *vo, int format) +{ + struct priv *vc = vo->priv; + + enum AVPixelFormat pix_fmt = imgfmt2pixfmt(format); + const enum AVPixelFormat *p = vc->enc->encoder->codec->pix_fmts; + + if (!p) + return 1; + + while (*p != AV_PIX_FMT_NONE) { + if (*p == pix_fmt) + return 1; + p++; + } + + return 0; +} + +static void draw_frame(struct vo *vo, struct vo_frame *voframe) +{ + struct priv *vc = vo->priv; + struct encoder_context *enc = vc->enc; + struct encode_lavc_context *ectx = enc->encode_lavc_ctx; + AVCodecContext *avc = enc->encoder; + + if (voframe->redraw || voframe->repeat || voframe->num_frames < 1) + return; + + struct mp_image *mpi = voframe->frames[0]; + + struct mp_osd_res dim = osd_res_from_image_params(vo->params); + osd_draw_on_image(vo->osd, dim, mpi->pts, OSD_DRAW_SUB_ONLY, mpi); + + if (vc->shutdown) + return; + + // Lock for shared timestamp fields. + mp_mutex_lock(&ectx->lock); + + double pts = mpi->pts; + double outpts = pts; + if (!enc->options->rawts) { + // fix the discontinuity pts offset + if (ectx->discontinuity_pts_offset == MP_NOPTS_VALUE) { + ectx->discontinuity_pts_offset = ectx->next_in_pts - pts; + } else if (fabs(pts + ectx->discontinuity_pts_offset - + ectx->next_in_pts) > 30) + { + MP_WARN(vo, "detected an unexpected discontinuity (pts jumped by " + "%f seconds)\n", + pts + ectx->discontinuity_pts_offset - ectx->next_in_pts); + ectx->discontinuity_pts_offset = ectx->next_in_pts - pts; + } + + outpts = pts + ectx->discontinuity_pts_offset; + } + + if (!enc->options->rawts) { + // calculate expected pts of next video frame + double timeunit = av_q2d(avc->time_base); + double expected_next_pts = pts + timeunit; + // set next allowed output pts value + double nextpts = expected_next_pts + ectx->discontinuity_pts_offset; + if (nextpts > ectx->next_in_pts) + ectx->next_in_pts = nextpts; + } + + mp_mutex_unlock(&ectx->lock); + + AVFrame *frame = mp_image_to_av_frame(mpi); + MP_HANDLE_OOM(frame); + + frame->pts = rint(outpts * av_q2d(av_inv_q(avc->time_base))); + frame->pict_type = 0; // keep this at unknown/undefined + frame->quality = avc->global_quality; + encoder_encode(enc, frame); + av_frame_free(&frame); +} + +static void flip_page(struct vo *vo) +{ +} + +static int control(struct vo *vo, uint32_t request, void *data) +{ + return VO_NOTIMPL; +} + +const struct vo_driver video_out_lavc = { + .encode = true, + .description = "video encoding using libavcodec", + .name = "lavc", + .initially_blocked = true, + .untimed = true, + .priv_size = sizeof(struct priv), + .preinit = preinit, + .query_format = query_format, + .reconfig2 = reconfig2, + .control = control, + .uninit = uninit, + .draw_frame = draw_frame, + .flip_page = flip_page, +}; + +// vim: sw=4 ts=4 et tw=80 diff --git a/video/out/vo_libmpv.c b/video/out/vo_libmpv.c new file mode 100644 index 0000000..972588e --- /dev/null +++ b/video/out/vo_libmpv.c @@ -0,0 +1,748 @@ +#include <assert.h> +#include <limits.h> +#include <math.h> +#include <stdatomic.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "mpv_talloc.h" +#include "common/common.h" +#include "misc/bstr.h" +#include "misc/dispatch.h" +#include "common/msg.h" +#include "options/m_config.h" +#include "options/options.h" +#include "aspect.h" +#include "dr_helper.h" +#include "vo.h" +#include "video/mp_image.h" +#include "sub/osd.h" +#include "osdep/threads.h" +#include "osdep/timer.h" + +#include "common/global.h" +#include "player/client.h" + +#include "libmpv.h" + +/* + * mpv_render_context is managed by the host application - the host application + * can access it any time, even if the VO is destroyed (or not created yet). + * + * - the libmpv user can mix render API and normal API; thus render API + * functions can wait on the core, but not the reverse + * - the core does blocking calls into the VO thread, thus the VO functions + * can't wait on the user calling the API functions + * - to make video timing work like it should, the VO thread waits on the + * render API user anyway, and the (unlikely) deadlock is avoided with + * a timeout + * + * Locking: mpv core > VO > mpv_render_context.lock > mp_client_api.lock + * > mpv_render_context.update_lock + * And: render thread > VO (wait for present) + * VO > render thread (wait for present done, via timeout) + * + * Locking gets more complex with advanced_control enabled. Use + * mpv_render_context.dispatch with care; synchronous calls can add lock + * dependencies. + */ + +struct vo_priv { + struct mpv_render_context *ctx; // immutable after init +}; + +struct mpv_render_context { + struct mp_log *log; + struct mpv_global *global; + struct mp_client_api *client_api; + + atomic_bool in_use; + + // --- Immutable after init + struct mp_dispatch_queue *dispatch; + bool advanced_control; + struct dr_helper *dr; // NULL if advanced_control disabled + + mp_mutex control_lock; + // --- Protected by control_lock + mp_render_cb_control_fn control_cb; + void *control_cb_ctx; + + mp_mutex update_lock; + mp_cond update_cond; // paired with update_lock + + // --- Protected by update_lock + mpv_render_update_fn update_cb; + void *update_cb_ctx; + + mp_mutex lock; + mp_cond video_wait; // paired with lock + + // --- Protected by lock + struct vo_frame *next_frame; // next frame to draw + int64_t present_count; // incremented when next frame can be shown + int64_t expected_flip_count; // next vsync event for next_frame + bool redrawing; // next_frame was a redraw request + int64_t flip_count; + struct vo_frame *cur_frame; + struct mp_image_params img_params; + int vp_w, vp_h; + bool flip; + bool imgfmt_supported[IMGFMT_END - IMGFMT_START]; + bool need_reconfig; + bool need_resize; + bool need_reset; + bool need_update_external; + struct vo *vo; + + // --- Mostly immutable after init. + struct mp_hwdec_devices *hwdec_devs; + + // --- All of these can only be accessed from mpv_render_*() API, for + // which the user makes sure they're called synchronized. + struct render_backend *renderer; + struct m_config_cache *vo_opts_cache; + struct mp_vo_opts *vo_opts; +}; + +const struct render_backend_fns *render_backends[] = { + &render_backend_gpu, + &render_backend_sw, + NULL +}; + +static void update(struct mpv_render_context *ctx) +{ + mp_mutex_lock(&ctx->update_lock); + if (ctx->update_cb) + ctx->update_cb(ctx->update_cb_ctx); + + mp_cond_broadcast(&ctx->update_cond); + mp_mutex_unlock(&ctx->update_lock); +} + +void *get_mpv_render_param(mpv_render_param *params, mpv_render_param_type type, + void *def) +{ + for (int n = 0; params && params[n].type; n++) { + if (params[n].type == type) + return params[n].data; + } + return def; +} + +static void forget_frames(struct mpv_render_context *ctx, bool all) +{ + mp_cond_broadcast(&ctx->video_wait); + if (all) { + talloc_free(ctx->cur_frame); + ctx->cur_frame = NULL; + } +} + +static void dispatch_wakeup(void *ptr) +{ + struct mpv_render_context *ctx = ptr; + + update(ctx); +} + +static struct mp_image *render_get_image(void *ptr, int imgfmt, int w, int h, + int stride_align, int flags) +{ + struct mpv_render_context *ctx = ptr; + + return ctx->renderer->fns->get_image(ctx->renderer, imgfmt, w, h, stride_align, flags); +} + +int mpv_render_context_create(mpv_render_context **res, mpv_handle *mpv, + mpv_render_param *params) +{ + mpv_render_context *ctx = talloc_zero(NULL, mpv_render_context); + mp_mutex_init(&ctx->control_lock); + mp_mutex_init(&ctx->lock); + mp_mutex_init(&ctx->update_lock); + mp_cond_init(&ctx->update_cond); + mp_cond_init(&ctx->video_wait); + + ctx->global = mp_client_get_global(mpv); + ctx->client_api = ctx->global->client_api; + ctx->log = mp_log_new(ctx, ctx->global->log, "libmpv_render"); + + ctx->vo_opts_cache = m_config_cache_alloc(ctx, ctx->global, &vo_sub_opts); + ctx->vo_opts = ctx->vo_opts_cache->opts; + + ctx->dispatch = mp_dispatch_create(ctx); + mp_dispatch_set_wakeup_fn(ctx->dispatch, dispatch_wakeup, ctx); + + if (GET_MPV_RENDER_PARAM(params, MPV_RENDER_PARAM_ADVANCED_CONTROL, int, 0)) + ctx->advanced_control = true; + + int err = MPV_ERROR_NOT_IMPLEMENTED; + for (int n = 0; render_backends[n]; n++) { + ctx->renderer = talloc_zero(NULL, struct render_backend); + *ctx->renderer = (struct render_backend){ + .global = ctx->global, + .log = ctx->log, + .fns = render_backends[n], + }; + err = ctx->renderer->fns->init(ctx->renderer, params); + if (err >= 0) + break; + ctx->renderer->fns->destroy(ctx->renderer); + talloc_free(ctx->renderer->priv); + TA_FREEP(&ctx->renderer); + if (err != MPV_ERROR_NOT_IMPLEMENTED) + break; + } + + if (err < 0) { + mpv_render_context_free(ctx); + return err; + } + + ctx->hwdec_devs = ctx->renderer->hwdec_devs; + + for (int n = IMGFMT_START; n < IMGFMT_END; n++) { + ctx->imgfmt_supported[n - IMGFMT_START] = + ctx->renderer->fns->check_format(ctx->renderer, n); + } + + if (ctx->renderer->fns->get_image && ctx->advanced_control) + ctx->dr = dr_helper_create(ctx->dispatch, render_get_image, ctx); + + if (!mp_set_main_render_context(ctx->client_api, ctx, true)) { + MP_ERR(ctx, "There is already a mpv_render_context set.\n"); + mpv_render_context_free(ctx); + return MPV_ERROR_GENERIC; + } + + *res = ctx; + return 0; +} + +void mpv_render_context_set_update_callback(mpv_render_context *ctx, + mpv_render_update_fn callback, + void *callback_ctx) +{ + mp_mutex_lock(&ctx->update_lock); + ctx->update_cb = callback; + ctx->update_cb_ctx = callback_ctx; + if (ctx->update_cb) + ctx->update_cb(ctx->update_cb_ctx); + mp_mutex_unlock(&ctx->update_lock); +} + +void mp_render_context_set_control_callback(mpv_render_context *ctx, + mp_render_cb_control_fn callback, + void *callback_ctx) +{ + mp_mutex_lock(&ctx->control_lock); + ctx->control_cb = callback; + ctx->control_cb_ctx = callback_ctx; + mp_mutex_unlock(&ctx->control_lock); +} + +void mpv_render_context_free(mpv_render_context *ctx) +{ + if (!ctx) + return; + + // From here on, ctx becomes invisible and cannot be newly acquired. Only + // a VO could still hold a reference. + mp_set_main_render_context(ctx->client_api, ctx, false); + + if (atomic_load(&ctx->in_use)) { + // Start destroy the VO, and also bring down the decoder etc., which + // still might be using the hwdec context or use DR images. The above + // mp_set_main_render_context() call guarantees it can't come back (so + // ctx->vo can't change to non-NULL). + // In theory, this races with vo_libmpv exiting and another VO being + // used, which is a harmless grotesque corner case. + kill_video_async(ctx->client_api); + + while (atomic_load(&ctx->in_use)) { + // As a nasty detail, we need to wait until the VO is released, but + // also need to react to update() calls during it (the update calls + // are supposed to trigger processing ctx->dispatch). We solve this + // by making the VO uninit function call mp_dispatch_interrupt(). + // + // Other than that, processing ctx->dispatch is needed to serve the + // video decoder, which might still not be fully destroyed, and e.g. + // performs calls to release DR images (or, as a grotesque corner + // case may even try to allocate new ones). + // + // Once the VO is released, ctx->dispatch becomes truly inactive. + // (The libmpv API user could call mpv_render_context_update() while + // mpv_render_context_free() is being called, but of course this is + // invalid.) + mp_dispatch_queue_process(ctx->dispatch, INFINITY); + } + } + + mp_mutex_lock(&ctx->lock); + // Barrier - guarantee uninit() has left the lock region. It will access ctx + // until the lock has been released, so we must not proceed with destruction + // before we can acquire the lock. (The opposite, uninit() acquiring the + // lock, can not happen anymore at this point - we've waited for VO uninit, + // and prevented that new VOs can be created.) + mp_mutex_unlock(&ctx->lock); + + assert(!atomic_load(&ctx->in_use)); + assert(!ctx->vo); + + // With the dispatch queue not being served anymore, allow frame free + // requests from this thread to be served directly. + if (ctx->dr) + dr_helper_acquire_thread(ctx->dr); + + // Possibly remaining outstanding work. + mp_dispatch_queue_process(ctx->dispatch, 0); + + forget_frames(ctx, true); + + if (ctx->renderer) { + ctx->renderer->fns->destroy(ctx->renderer); + talloc_free(ctx->renderer->priv); + talloc_free(ctx->renderer); + } + talloc_free(ctx->dr); + talloc_free(ctx->dispatch); + + mp_cond_destroy(&ctx->update_cond); + mp_cond_destroy(&ctx->video_wait); + mp_mutex_destroy(&ctx->update_lock); + mp_mutex_destroy(&ctx->lock); + mp_mutex_destroy(&ctx->control_lock); + + talloc_free(ctx); +} + +// Try to mark the context as "in exclusive use" (e.g. by a VO). +// Note: the function must not acquire any locks, because it's called with an +// external leaf lock held. +bool mp_render_context_acquire(mpv_render_context *ctx) +{ + bool prev = false; + return atomic_compare_exchange_strong(&ctx->in_use, &prev, true); +} + +int mpv_render_context_render(mpv_render_context *ctx, mpv_render_param *params) +{ + mp_mutex_lock(&ctx->lock); + + int do_render = + !GET_MPV_RENDER_PARAM(params, MPV_RENDER_PARAM_SKIP_RENDERING, int, 0); + + if (do_render) { + int vp_w, vp_h; + int err = ctx->renderer->fns->get_target_size(ctx->renderer, params, + &vp_w, &vp_h); + if (err < 0) { + mp_mutex_unlock(&ctx->lock); + return err; + } + + if (ctx->vo && (ctx->vp_w != vp_w || ctx->vp_h != vp_h || + ctx->need_resize)) + { + ctx->vp_w = vp_w; + ctx->vp_h = vp_h; + + m_config_cache_update(ctx->vo_opts_cache); + + struct mp_rect src, dst; + struct mp_osd_res osd; + mp_get_src_dst_rects(ctx->log, ctx->vo_opts, ctx->vo->driver->caps, + &ctx->img_params, vp_w, abs(vp_h), + 1.0, &src, &dst, &osd); + + ctx->renderer->fns->resize(ctx->renderer, &src, &dst, &osd); + } + ctx->need_resize = false; + } + + if (ctx->need_reconfig) + ctx->renderer->fns->reconfig(ctx->renderer, &ctx->img_params); + ctx->need_reconfig = false; + + if (ctx->need_update_external) + ctx->renderer->fns->update_external(ctx->renderer, ctx->vo); + ctx->need_update_external = false; + + if (ctx->need_reset) { + ctx->renderer->fns->reset(ctx->renderer); + if (ctx->cur_frame) + ctx->cur_frame->still = true; + } + ctx->need_reset = false; + + struct vo_frame *frame = ctx->next_frame; + int64_t wait_present_count = ctx->present_count; + if (frame) { + ctx->next_frame = NULL; + if (!(frame->redraw || !frame->current)) + wait_present_count += 1; + mp_cond_broadcast(&ctx->video_wait); + talloc_free(ctx->cur_frame); + ctx->cur_frame = vo_frame_ref(frame); + } else { + frame = vo_frame_ref(ctx->cur_frame); + if (frame) + frame->redraw = true; + MP_STATS(ctx, "glcb-noframe"); + } + struct vo_frame dummy = {0}; + if (!frame) + frame = &dummy; + + mp_mutex_unlock(&ctx->lock); + + MP_STATS(ctx, "glcb-render"); + + int err = 0; + + if (do_render) + err = ctx->renderer->fns->render(ctx->renderer, params, frame); + + if (frame != &dummy) + talloc_free(frame); + + if (GET_MPV_RENDER_PARAM(params, MPV_RENDER_PARAM_BLOCK_FOR_TARGET_TIME, + int, 1)) + { + mp_mutex_lock(&ctx->lock); + while (wait_present_count > ctx->present_count) + mp_cond_wait(&ctx->video_wait, &ctx->lock); + mp_mutex_unlock(&ctx->lock); + } + + return err; +} + +void mpv_render_context_report_swap(mpv_render_context *ctx) +{ + MP_STATS(ctx, "glcb-reportflip"); + + mp_mutex_lock(&ctx->lock); + ctx->flip_count += 1; + mp_cond_broadcast(&ctx->video_wait); + mp_mutex_unlock(&ctx->lock); +} + +uint64_t mpv_render_context_update(mpv_render_context *ctx) +{ + uint64_t res = 0; + + mp_dispatch_queue_process(ctx->dispatch, 0); + + mp_mutex_lock(&ctx->lock); + if (ctx->next_frame) + res |= MPV_RENDER_UPDATE_FRAME; + mp_mutex_unlock(&ctx->lock); + return res; +} + +int mpv_render_context_set_parameter(mpv_render_context *ctx, + mpv_render_param param) +{ + return ctx->renderer->fns->set_parameter(ctx->renderer, param); +} + +int mpv_render_context_get_info(mpv_render_context *ctx, + mpv_render_param param) +{ + int res = MPV_ERROR_NOT_IMPLEMENTED; + mp_mutex_lock(&ctx->lock); + + switch (param.type) { + case MPV_RENDER_PARAM_NEXT_FRAME_INFO: { + mpv_render_frame_info *info = param.data; + *info = (mpv_render_frame_info){0}; + struct vo_frame *frame = ctx->next_frame; + if (frame) { + info->flags = + MPV_RENDER_FRAME_INFO_PRESENT | + (frame->redraw ? MPV_RENDER_FRAME_INFO_REDRAW : 0) | + (frame->repeat ? MPV_RENDER_FRAME_INFO_REPEAT : 0) | + (frame->display_synced && !frame->redraw ? + MPV_RENDER_FRAME_INFO_BLOCK_VSYNC : 0); + info->target_time = frame->pts; + } + res = 0; + break; + } + default:; + } + + mp_mutex_unlock(&ctx->lock); + return res; +} + +static void draw_frame(struct vo *vo, struct vo_frame *frame) +{ + struct vo_priv *p = vo->priv; + struct mpv_render_context *ctx = p->ctx; + + mp_mutex_lock(&ctx->lock); + assert(!ctx->next_frame); + ctx->next_frame = vo_frame_ref(frame); + ctx->expected_flip_count = ctx->flip_count + 1; + ctx->redrawing = frame->redraw || !frame->current; + mp_mutex_unlock(&ctx->lock); + + update(ctx); +} + +static void flip_page(struct vo *vo) +{ + struct vo_priv *p = vo->priv; + struct mpv_render_context *ctx = p->ctx; + int64_t until = mp_time_ns() + MP_TIME_MS_TO_NS(200); + + mp_mutex_lock(&ctx->lock); + + // Wait until frame was rendered + while (ctx->next_frame) { + if (mp_cond_timedwait_until(&ctx->video_wait, &ctx->lock, until)) { + if (ctx->next_frame) { + MP_VERBOSE(vo, "mpv_render_context_render() not being called " + "or stuck.\n"); + goto done; + } + } + } + + // Unblock mpv_render_context_render(). + ctx->present_count += 1; + mp_cond_broadcast(&ctx->video_wait); + + if (ctx->redrawing) + goto done; // do not block for redrawing + + // Wait until frame was presented + while (ctx->expected_flip_count > ctx->flip_count) { + // mpv_render_report_swap() is declared as optional API. + // Assume the user calls it consistently _if_ it's called at all. + if (!ctx->flip_count) + break; + if (mp_cond_timedwait_until(&ctx->video_wait, &ctx->lock, until)) { + MP_VERBOSE(vo, "mpv_render_report_swap() not being called.\n"); + goto done; + } + } + +done: + + // Cleanup after the API user is not reacting, or is being unusually slow. + if (ctx->next_frame) { + talloc_free(ctx->cur_frame); + ctx->cur_frame = ctx->next_frame; + ctx->next_frame = NULL; + ctx->present_count += 2; + mp_cond_signal(&ctx->video_wait); + vo_increment_drop_count(vo, 1); + } + + mp_mutex_unlock(&ctx->lock); +} + +static int query_format(struct vo *vo, int format) +{ + struct vo_priv *p = vo->priv; + struct mpv_render_context *ctx = p->ctx; + + bool ok = false; + mp_mutex_lock(&ctx->lock); + if (format >= IMGFMT_START && format < IMGFMT_END) + ok = ctx->imgfmt_supported[format - IMGFMT_START]; + mp_mutex_unlock(&ctx->lock); + return ok; +} + +static void run_control_on_render_thread(void *p) +{ + void **args = p; + struct mpv_render_context *ctx = args[0]; + int request = (intptr_t)args[1]; + void *data = args[2]; + int ret = VO_NOTIMPL; + + switch (request) { + case VOCTRL_SCREENSHOT: { + mp_mutex_lock(&ctx->lock); + struct vo_frame *frame = vo_frame_ref(ctx->cur_frame); + mp_mutex_unlock(&ctx->lock); + if (frame && ctx->renderer->fns->screenshot) + ctx->renderer->fns->screenshot(ctx->renderer, frame, data); + talloc_free(frame); + break; + } + case VOCTRL_PERFORMANCE_DATA: { + if (ctx->renderer->fns->perfdata) { + ctx->renderer->fns->perfdata(ctx->renderer, data); + ret = VO_TRUE; + } + break; + } + } + + *(int *)args[3] = ret; +} + +static int control(struct vo *vo, uint32_t request, void *data) +{ + struct vo_priv *p = vo->priv; + struct mpv_render_context *ctx = p->ctx; + + switch (request) { + case VOCTRL_RESET: + mp_mutex_lock(&ctx->lock); + forget_frames(ctx, false); + ctx->need_reset = true; + mp_mutex_unlock(&ctx->lock); + vo->want_redraw = true; + return VO_TRUE; + case VOCTRL_PAUSE: + vo->want_redraw = true; + return VO_TRUE; + case VOCTRL_SET_EQUALIZER: + vo->want_redraw = true; + return VO_TRUE; + case VOCTRL_SET_PANSCAN: + mp_mutex_lock(&ctx->lock); + ctx->need_resize = true; + mp_mutex_unlock(&ctx->lock); + vo->want_redraw = true; + return VO_TRUE; + case VOCTRL_UPDATE_RENDER_OPTS: + mp_mutex_lock(&ctx->lock); + ctx->need_update_external = true; + mp_mutex_unlock(&ctx->lock); + vo->want_redraw = true; + return VO_TRUE; + } + + // VOCTRLs to be run on the renderer thread (if possible at all). + if (ctx->advanced_control) { + switch (request) { + case VOCTRL_SCREENSHOT: + case VOCTRL_PERFORMANCE_DATA: { + int ret; + void *args[] = {ctx, (void *)(intptr_t)request, data, &ret}; + mp_dispatch_run(ctx->dispatch, run_control_on_render_thread, args); + return ret; + } + } + } + + int r = VO_NOTIMPL; + mp_mutex_lock(&ctx->control_lock); + if (ctx->control_cb) { + int events = 0; + r = p->ctx->control_cb(vo, p->ctx->control_cb_ctx, + &events, request, data); + vo_event(vo, events); + } + mp_mutex_unlock(&ctx->control_lock); + + return r; +} + +static struct mp_image *get_image(struct vo *vo, int imgfmt, int w, int h, + int stride_align, int flags) +{ + struct vo_priv *p = vo->priv; + struct mpv_render_context *ctx = p->ctx; + + if (ctx->dr) + return dr_helper_get_image(ctx->dr, imgfmt, w, h, stride_align, flags); + + return NULL; +} + +static int reconfig(struct vo *vo, struct mp_image_params *params) +{ + struct vo_priv *p = vo->priv; + struct mpv_render_context *ctx = p->ctx; + + mp_mutex_lock(&ctx->lock); + forget_frames(ctx, true); + ctx->img_params = *params; + ctx->need_reconfig = true; + ctx->need_resize = true; + mp_mutex_unlock(&ctx->lock); + + control(vo, VOCTRL_RECONFIG, NULL); + + return 0; +} + +static void uninit(struct vo *vo) +{ + struct vo_priv *p = vo->priv; + struct mpv_render_context *ctx = p->ctx; + + control(vo, VOCTRL_UNINIT, NULL); + + mp_mutex_lock(&ctx->lock); + + forget_frames(ctx, true); + ctx->img_params = (struct mp_image_params){0}; + ctx->need_reconfig = true; + ctx->need_resize = true; + ctx->need_update_external = true; + ctx->need_reset = true; + ctx->vo = NULL; + + // The following do not normally need ctx->lock, however, ctx itself may + // become invalid once we release ctx->lock. + bool prev_in_use = atomic_exchange(&ctx->in_use, false); + assert(prev_in_use); // obviously must have been set + mp_dispatch_interrupt(ctx->dispatch); + + mp_mutex_unlock(&ctx->lock); +} + +static int preinit(struct vo *vo) +{ + struct vo_priv *p = vo->priv; + + struct mpv_render_context *ctx = + mp_client_api_acquire_render_context(vo->global->client_api); + p->ctx = ctx; + + if (!ctx) { + if (!vo->probing) + MP_FATAL(vo, "No render context set.\n"); + return -1; + } + + mp_mutex_lock(&ctx->lock); + ctx->vo = vo; + ctx->need_resize = true; + ctx->need_update_external = true; + mp_mutex_unlock(&ctx->lock); + + vo->hwdec_devs = ctx->hwdec_devs; + control(vo, VOCTRL_PREINIT, NULL); + + return 0; +} + +const struct vo_driver video_out_libmpv = { + .description = "render API for libmpv", + .name = "libmpv", + .caps = VO_CAP_ROTATE90, + .preinit = preinit, + .query_format = query_format, + .reconfig = reconfig, + .control = control, + .get_image_ts = get_image, + .draw_frame = draw_frame, + .flip_page = flip_page, + .uninit = uninit, + .priv_size = sizeof(struct vo_priv), +}; diff --git a/video/out/vo_mediacodec_embed.c b/video/out/vo_mediacodec_embed.c new file mode 100644 index 0000000..08d3866 --- /dev/null +++ b/video/out/vo_mediacodec_embed.c @@ -0,0 +1,127 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <libavcodec/mediacodec.h> +#include <libavutil/hwcontext.h> +#include <libavutil/hwcontext_mediacodec.h> + +#include "common/common.h" +#include "vo.h" +#include "video/mp_image.h" +#include "video/hwdec.h" + +struct priv { + struct mp_image *next_image; + struct mp_hwdec_ctx hwctx; +}; + +static AVBufferRef *create_mediacodec_device_ref(struct vo *vo) +{ + AVBufferRef *device_ref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_MEDIACODEC); + if (!device_ref) + return NULL; + + AVHWDeviceContext *ctx = (void *)device_ref->data; + AVMediaCodecDeviceContext *hwctx = ctx->hwctx; + assert(vo->opts->WinID != 0 && vo->opts->WinID != -1); + hwctx->surface = (void *)(intptr_t)(vo->opts->WinID); + + if (av_hwdevice_ctx_init(device_ref) < 0) + av_buffer_unref(&device_ref); + + return device_ref; +} + +static int preinit(struct vo *vo) +{ + struct priv *p = vo->priv; + vo->hwdec_devs = hwdec_devices_create(); + p->hwctx = (struct mp_hwdec_ctx){ + .driver_name = "mediacodec_embed", + .av_device_ref = create_mediacodec_device_ref(vo), + .hw_imgfmt = IMGFMT_MEDIACODEC, + }; + + if (!p->hwctx.av_device_ref) { + MP_VERBOSE(vo, "Failed to create hwdevice_ctx\n"); + return -1; + } + + hwdec_devices_add(vo->hwdec_devs, &p->hwctx); + return 0; +} + +static void flip_page(struct vo *vo) +{ + struct priv *p = vo->priv; + if (!p->next_image) + return; + + AVMediaCodecBuffer *buffer = (AVMediaCodecBuffer *)p->next_image->planes[3]; + av_mediacodec_release_buffer(buffer, 1); + mp_image_unrefp(&p->next_image); +} + +static void draw_frame(struct vo *vo, struct vo_frame *frame) +{ + struct priv *p = vo->priv; + + mp_image_t *mpi = NULL; + if (!frame->redraw && !frame->repeat) + mpi = mp_image_new_ref(frame->current); + + talloc_free(p->next_image); + p->next_image = mpi; +} + +static int query_format(struct vo *vo, int format) +{ + return format == IMGFMT_MEDIACODEC; +} + +static int control(struct vo *vo, uint32_t request, void *data) +{ + return VO_NOTIMPL; +} + +static int reconfig(struct vo *vo, struct mp_image_params *params) +{ + return 0; +} + +static void uninit(struct vo *vo) +{ + struct priv *p = vo->priv; + mp_image_unrefp(&p->next_image); + + hwdec_devices_remove(vo->hwdec_devs, &p->hwctx); + av_buffer_unref(&p->hwctx.av_device_ref); +} + +const struct vo_driver video_out_mediacodec_embed = { + .description = "Android (Embedded MediaCodec Surface)", + .name = "mediacodec_embed", + .caps = VO_CAP_NORETAIN, + .preinit = preinit, + .query_format = query_format, + .control = control, + .draw_frame = draw_frame, + .flip_page = flip_page, + .reconfig = reconfig, + .uninit = uninit, + .priv_size = sizeof(struct priv), +}; diff --git a/video/out/vo_null.c b/video/out/vo_null.c new file mode 100644 index 0000000..0c49062 --- /dev/null +++ b/video/out/vo_null.c @@ -0,0 +1,104 @@ +/* + * based on video_out_null.c from mpeg2dec + * + * Copyright (C) Aaron Holtzman - June 2000 + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdlib.h> +#include "common/msg.h" +#include "vo.h" +#include "video/mp_image.h" +#include "osdep/timer.h" +#include "options/m_option.h" + +struct priv { + int64_t last_vsync; + + double cfg_fps; +}; + +static void draw_frame(struct vo *vo, struct vo_frame *frame) +{ +} + +static void flip_page(struct vo *vo) +{ + struct priv *p = vo->priv; + if (p->cfg_fps) { + int64_t ft = 1e9 / p->cfg_fps; + int64_t prev_vsync = mp_time_ns() / ft; + int64_t target_time = (prev_vsync + 1) * ft; + for (;;) { + int64_t now = mp_time_ns(); + if (now >= target_time) + break; + mp_sleep_ns(target_time - now); + } + } +} + +static int query_format(struct vo *vo, int format) +{ + return 1; +} + +static int reconfig(struct vo *vo, struct mp_image_params *params) +{ + return 0; +} + +static void uninit(struct vo *vo) +{ +} + +static int preinit(struct vo *vo) +{ + return 0; +} + +static int control(struct vo *vo, uint32_t request, void *data) +{ + struct priv *p = vo->priv; + switch (request) { + case VOCTRL_GET_DISPLAY_FPS: + if (!p->cfg_fps) + break; + *(double *)data = p->cfg_fps; + return VO_TRUE; + } + return VO_NOTIMPL; +} + +#define OPT_BASE_STRUCT struct priv +const struct vo_driver video_out_null = { + .description = "Null video output", + .name = "null", + .preinit = preinit, + .query_format = query_format, + .reconfig = reconfig, + .control = control, + .draw_frame = draw_frame, + .flip_page = flip_page, + .uninit = uninit, + .priv_size = sizeof(struct priv), + .options = (const struct m_option[]) { + {"fps", OPT_DOUBLE(cfg_fps), M_RANGE(0, 10000)}, + {0}, + }, + .options_prefix = "vo-null", +}; diff --git a/video/out/vo_rpi.c b/video/out/vo_rpi.c new file mode 100644 index 0000000..55f1a68 --- /dev/null +++ b/video/out/vo_rpi.c @@ -0,0 +1,938 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <math.h> +#include <stdbool.h> +#include <assert.h> + +#include <bcm_host.h> +#include <interface/mmal/mmal.h> +#include <interface/mmal/util/mmal_util.h> +#include <interface/mmal/util/mmal_default_components.h> +#include <interface/mmal/vc/mmal_vc_api.h> + +#include <EGL/egl.h> +#include <EGL/eglext.h> + +#include <libavutil/rational.h> + +#include "common/common.h" +#include "common/msg.h" +#include "opengl/common.h" +#include "options/m_config.h" +#include "osdep/timer.h" +#include "vo.h" +#include "win_state.h" +#include "video/mp_image.h" +#include "sub/osd.h" + +#include "opengl/ra_gl.h" +#include "gpu/video.h" + +struct mp_egl_rpi { + struct mp_log *log; + struct GL *gl; + struct ra *ra; + EGLDisplay egl_display; + EGLConfig egl_config; + EGLContext egl_context; + EGLSurface egl_surface; + // yep, the API keeps a pointer to it + EGL_DISPMANX_WINDOW_T egl_window; +}; + +struct priv { + DISPMANX_DISPLAY_HANDLE_T display; + DISPMANX_ELEMENT_HANDLE_T window; + DISPMANX_ELEMENT_HANDLE_T osd_overlay; + DISPMANX_UPDATE_HANDLE_T update; + uint32_t w, h; + uint32_t x, y; + double display_fps; + + double osd_pts; + struct mp_osd_res osd_res; + struct m_config_cache *opts_cache; + + struct mp_egl_rpi egl; + struct gl_video *gl_video; + struct mpgl_osd *osd; + + MMAL_COMPONENT_T *renderer; + bool renderer_enabled; + + bool display_synced, skip_osd; + struct mp_image *next_image; + + // for RAM input + MMAL_POOL_T *swpool; + + mp_mutex display_mutex; + mp_cond display_cond; + int64_t vsync_counter; + bool reload_display; + + int background_layer; + int video_layer; + int osd_layer; + + int display_nr; + int layer; + bool background; + bool enable_osd; +}; + +// Magic alignments (in pixels) expected by the MMAL internals. +#define ALIGN_W 32 +#define ALIGN_H 16 + +static void recreate_renderer(struct vo *vo); + +static void *get_proc_address(const GLubyte *name) +{ + void *p = eglGetProcAddress(name); + // EGL 1.4 (supported by the RPI firmware) does not necessarily return + // function pointers for core functions. + if (!p) { + void *h = dlopen("/opt/vc/lib/libbrcmGLESv2.so", RTLD_LAZY); + if (h) { + p = dlsym(h, name); + dlclose(h); + } + } + return p; +} + +static EGLConfig select_fb_config_egl(struct mp_egl_rpi *p) +{ + EGLint attributes[] = { + EGL_SURFACE_TYPE, EGL_WINDOW_BIT, + EGL_RED_SIZE, 8, + EGL_GREEN_SIZE, 8, + EGL_BLUE_SIZE, 8, + EGL_DEPTH_SIZE, 0, + EGL_RENDERABLE_TYPE, EGL_OPENGL_ES2_BIT, + EGL_NONE + }; + + EGLint config_count; + EGLConfig config; + + eglChooseConfig(p->egl_display, attributes, &config, 1, &config_count); + + if (!config_count) { + MP_FATAL(p, "Could find EGL configuration!\n"); + return NULL; + } + + return config; +} + +static void mp_egl_rpi_destroy(struct mp_egl_rpi *p) +{ + if (p->egl_display) { + eglMakeCurrent(p->egl_display, EGL_NO_SURFACE, EGL_NO_SURFACE, + EGL_NO_CONTEXT); + } + if (p->egl_surface) + eglDestroySurface(p->egl_display, p->egl_surface); + if (p->egl_context) + eglDestroyContext(p->egl_display, p->egl_context); + p->egl_context = EGL_NO_CONTEXT; + eglReleaseThread(); + p->egl_display = EGL_NO_DISPLAY; + talloc_free(p->gl); + p->gl = NULL; +} + +static int mp_egl_rpi_init(struct mp_egl_rpi *p, DISPMANX_ELEMENT_HANDLE_T window, + int w, int h) +{ + p->egl_display = eglGetDisplay(EGL_DEFAULT_DISPLAY); + if (!eglInitialize(p->egl_display, NULL, NULL)) { + MP_FATAL(p, "EGL failed to initialize.\n"); + goto fail; + } + + eglBindAPI(EGL_OPENGL_ES_API); + + EGLConfig config = select_fb_config_egl(p); + if (!config) + goto fail; + + p->egl_window = (EGL_DISPMANX_WINDOW_T){ + .element = window, + .width = w, + .height = h, + }; + p->egl_surface = eglCreateWindowSurface(p->egl_display, config, + &p->egl_window, NULL); + + if (p->egl_surface == EGL_NO_SURFACE) { + MP_FATAL(p, "Could not create EGL surface!\n"); + goto fail; + } + + EGLint context_attributes[] = { + EGL_CONTEXT_CLIENT_VERSION, 2, + EGL_NONE + }; + p->egl_context = eglCreateContext(p->egl_display, config, + EGL_NO_CONTEXT, context_attributes); + + if (p->egl_context == EGL_NO_CONTEXT) { + MP_FATAL(p, "Could not create EGL context!\n"); + goto fail; + } + + eglMakeCurrent(p->egl_display, p->egl_surface, p->egl_surface, + p->egl_context); + + p->gl = talloc_zero(NULL, struct GL); + + const char *exts = eglQueryString(p->egl_display, EGL_EXTENSIONS); + mpgl_load_functions(p->gl, get_proc_address, exts, p->log); + + if (!p->gl->version && !p->gl->es) + goto fail; + + p->ra = ra_create_gl(p->gl, p->log); + if (!p->ra) + goto fail; + + return 0; + +fail: + mp_egl_rpi_destroy(p); + return -1; +} + +// Make mpi point to buffer, assuming MMAL_ENCODING_I420. +// buffer can be NULL. +// Return the required buffer space. +static size_t layout_buffer(struct mp_image *mpi, MMAL_BUFFER_HEADER_T *buffer, + struct mp_image_params *params) +{ + assert(params->imgfmt == IMGFMT_420P); + mp_image_set_params(mpi, params); + int w = MP_ALIGN_UP(params->w, ALIGN_W); + int h = MP_ALIGN_UP(params->h, ALIGN_H); + uint8_t *cur = buffer ? buffer->data : NULL; + size_t size = 0; + for (int i = 0; i < 3; i++) { + int div = i ? 2 : 1; + mpi->planes[i] = cur; + mpi->stride[i] = w / div; + size_t plane_size = h / div * mpi->stride[i]; + if (cur) + cur += plane_size; + size += plane_size; + } + return size; +} + +static void update_osd(struct vo *vo) +{ + struct priv *p = vo->priv; + if (!p->enable_osd) + return; + + if (!gl_video_check_osd_change(p->gl_video, &p->osd_res, p->osd_pts)) { + p->skip_osd = true; + return; + } + + MP_STATS(vo, "start rpi_osd"); + + struct vo_frame frame = {0}; + struct ra_fbo target = { + .tex = ra_create_wrapped_fb(p->egl.ra, 0, p->osd_res.w, p->osd_res.h), + .flip = true, + }; + gl_video_set_osd_pts(p->gl_video, p->osd_pts); + gl_video_render_frame(p->gl_video, &frame, target, RENDER_FRAME_DEF); + ra_tex_free(p->egl.ra, &target.tex); + + MP_STATS(vo, "stop rpi_osd"); +} + +static void resize(struct vo *vo) +{ + struct priv *p = vo->priv; + MMAL_PORT_T *input = p->renderer->input[0]; + + struct mp_rect src, dst; + + vo_get_src_dst_rects(vo, &src, &dst, &p->osd_res); + + int rotate[] = {MMAL_DISPLAY_ROT0, + MMAL_DISPLAY_ROT90, + MMAL_DISPLAY_ROT180, + MMAL_DISPLAY_ROT270}; + + + int src_w = src.x1 - src.x0, src_h = src.y1 - src.y0, + dst_w = dst.x1 - dst.x0, dst_h = dst.y1 - dst.y0; + int p_x, p_y; + av_reduce(&p_x, &p_y, dst_w * src_h, src_w * dst_h, 16000); + MMAL_DISPLAYREGION_T dr = { + .hdr = { .id = MMAL_PARAMETER_DISPLAYREGION, + .size = sizeof(MMAL_DISPLAYREGION_T), }, + .src_rect = { .x = src.x0, .y = src.y0, .width = src_w, .height = src_h }, + .dest_rect = { .x = dst.x0 + p->x, .y = dst.y0 + p->y, + .width = dst_w, .height = dst_h }, + .layer = p->video_layer, + .display_num = p->display_nr, + .pixel_x = p_x, + .pixel_y = p_y, + .transform = rotate[vo->params ? vo->params->rotate / 90 : 0], + .fullscreen = vo->opts->fullscreen, + .set = MMAL_DISPLAY_SET_SRC_RECT | MMAL_DISPLAY_SET_DEST_RECT | + MMAL_DISPLAY_SET_LAYER | MMAL_DISPLAY_SET_NUM | + MMAL_DISPLAY_SET_PIXEL | MMAL_DISPLAY_SET_TRANSFORM | + MMAL_DISPLAY_SET_FULLSCREEN, + }; + + if (vo->params && (vo->params->rotate % 180) == 90) { + MPSWAP(int, dr.src_rect.x, dr.src_rect.y); + MPSWAP(int, dr.src_rect.width, dr.src_rect.height); + } + + if (mmal_port_parameter_set(input, &dr.hdr)) + MP_WARN(vo, "could not set video rectangle\n"); + + if (p->gl_video) + gl_video_resize(p->gl_video, &src, &dst, &p->osd_res); +} + +static void destroy_overlays(struct vo *vo) +{ + struct priv *p = vo->priv; + + if (p->window) + vc_dispmanx_element_remove(p->update, p->window); + p->window = 0; + + gl_video_uninit(p->gl_video); + p->gl_video = NULL; + ra_free(&p->egl.ra); + mp_egl_rpi_destroy(&p->egl); + + if (p->osd_overlay) + vc_dispmanx_element_remove(p->update, p->osd_overlay); + p->osd_overlay = 0; +} + +static int update_display_size(struct vo *vo) +{ + struct priv *p = vo->priv; + + uint32_t n_w = 0, n_h = 0; + if (graphics_get_display_size(0, &n_w, &n_h) < 0) { + MP_FATAL(vo, "Could not get display size.\n"); + return -1; + } + + if (p->w == n_w && p->h == n_h) + return 0; + + p->w = n_w; + p->h = n_h; + + MP_VERBOSE(vo, "Display size: %dx%d\n", p->w, p->h); + + return 0; +} + +static int create_overlays(struct vo *vo) +{ + struct priv *p = vo->priv; + destroy_overlays(vo); + + if (!p->display) + return -1; + + if (vo->opts->fullscreen && p->background) { + // Use the whole screen. + VC_RECT_T dst = {.width = p->w, .height = p->h}; + VC_RECT_T src = {.width = 1 << 16, .height = 1 << 16}; + VC_DISPMANX_ALPHA_T alpha = { + .flags = DISPMANX_FLAGS_ALPHA_FIXED_ALL_PIXELS, + .opacity = 0xFF, + }; + + p->window = vc_dispmanx_element_add(p->update, p->display, + p->background_layer, + &dst, 0, &src, + DISPMANX_PROTECTION_NONE, + &alpha, 0, 0); + if (!p->window) { + MP_FATAL(vo, "Could not add DISPMANX element.\n"); + return -1; + } + } + + if (p->enable_osd) { + VC_RECT_T dst = {.x = p->x, .y = p->y, + .width = p->osd_res.w, .height = p->osd_res.h}; + VC_RECT_T src = {.width = p->osd_res.w << 16, .height = p->osd_res.h << 16}; + VC_DISPMANX_ALPHA_T alpha = { + .flags = DISPMANX_FLAGS_ALPHA_FROM_SOURCE, + .opacity = 0xFF, + }; + p->osd_overlay = vc_dispmanx_element_add(p->update, p->display, + p->osd_layer, + &dst, 0, &src, + DISPMANX_PROTECTION_NONE, + &alpha, 0, 0); + if (!p->osd_overlay) { + MP_FATAL(vo, "Could not add DISPMANX element.\n"); + return -1; + } + + if (mp_egl_rpi_init(&p->egl, p->osd_overlay, + p->osd_res.w, p->osd_res.h) < 0) + { + MP_FATAL(vo, "EGL/GLES initialization for OSD renderer failed.\n"); + return -1; + } + p->gl_video = gl_video_init(p->egl.ra, vo->log, vo->global); + gl_video_set_clear_color(p->gl_video, (struct m_color){.a = 0}); + gl_video_set_osd_source(p->gl_video, vo->osd); + } + + p->display_fps = 0; + TV_GET_STATE_RESP_T tvstate; + TV_DISPLAY_STATE_T tvstate_disp; + if (!vc_tv_get_state(&tvstate) && !vc_tv_get_display_state(&tvstate_disp)) { + if (tvstate_disp.state & (VC_HDMI_HDMI | VC_HDMI_DVI)) { + p->display_fps = tvstate_disp.display.hdmi.frame_rate; + + HDMI_PROPERTY_PARAM_T param = { + .property = HDMI_PROPERTY_PIXEL_CLOCK_TYPE, + }; + if (!vc_tv_hdmi_get_property(¶m) && + param.param1 == HDMI_PIXEL_CLOCK_TYPE_NTSC) + p->display_fps = p->display_fps / 1.001; + } else { + p->display_fps = tvstate_disp.display.sdtv.frame_rate; + } + } + + resize(vo); + + vo_event(vo, VO_EVENT_WIN_STATE); + + vc_dispmanx_update_submit_sync(p->update); + p->update = vc_dispmanx_update_start(10); + + return 0; +} + +static int set_geometry(struct vo *vo) +{ + struct priv *p = vo->priv; + + if (vo->opts->fullscreen) { + vo->dwidth = p->w; + vo->dheight = p->h; + p->x = p->y = 0; + } else { + struct vo_win_geometry geo; + struct mp_rect screenrc = {0, 0, p->w, p->h}; + + vo_calc_window_geometry(vo, &screenrc, &geo); + vo_apply_window_geometry(vo, &geo); + + p->x = geo.win.x0; + p->y = geo.win.y0; + } + + resize(vo); + + if (create_overlays(vo) < 0) + return -1; + + return 0; +} + +static void wait_next_vsync(struct vo *vo) +{ + struct priv *p = vo->priv; + mp_mutex_lock(&p->display_mutex); + int64_t end = mp_time_ns() + MP_TIME_MS_TO_NS(50); + int64_t old = p->vsync_counter; + while (old == p->vsync_counter && !p->reload_display) { + if (mp_cond_timedwait_until(&p->display_cond, &p->display_mutex, end)) + break; + } + mp_mutex_unlock(&p->display_mutex); +} + +static void flip_page(struct vo *vo) +{ + struct priv *p = vo->priv; + + if (!p->renderer_enabled) + return; + + struct mp_image *mpi = p->next_image; + p->next_image = NULL; + + // For OSD + if (!p->skip_osd && p->egl.gl) + eglSwapBuffers(p->egl.egl_display, p->egl.egl_surface); + p->skip_osd = false; + + if (mpi) { + MMAL_PORT_T *input = p->renderer->input[0]; + MMAL_BUFFER_HEADER_T *ref = (void *)mpi->planes[3]; + + // Assume this field is free for use by us. + ref->user_data = mpi; + + if (mmal_port_send_buffer(input, ref)) { + MP_ERR(vo, "could not queue picture!\n"); + talloc_free(mpi); + } + } + + if (p->display_synced) + wait_next_vsync(vo); +} + +static void free_mmal_buffer(void *arg) +{ + MMAL_BUFFER_HEADER_T *buffer = arg; + mmal_buffer_header_release(buffer); +} + +static void draw_frame(struct vo *vo, struct vo_frame *frame) +{ + struct priv *p = vo->priv; + + if (!p->renderer_enabled) + return; + + mp_image_t *mpi = NULL; + if (!frame->redraw && !frame->repeat) + mpi = mp_image_new_ref(frame->current); + + talloc_free(p->next_image); + p->next_image = NULL; + + if (mpi) + p->osd_pts = mpi->pts; + + // Redraw only if the OSD has meaningfully changed, which we assume it + // hasn't when a frame is merely repeated for display sync. + p->skip_osd = !frame->redraw && frame->repeat; + + if (!p->skip_osd && p->egl.gl) + update_osd(vo); + + p->display_synced = frame->display_synced; + + if (mpi && mpi->imgfmt != IMGFMT_MMAL) { + MMAL_BUFFER_HEADER_T *buffer = mmal_queue_wait(p->swpool->queue); + if (!buffer) { + talloc_free(mpi); + MP_ERR(vo, "Can't allocate buffer.\n"); + return; + } + mmal_buffer_header_reset(buffer); + + struct mp_image *new_ref = mp_image_new_custom_ref(NULL, buffer, + free_mmal_buffer); + if (!new_ref) { + mmal_buffer_header_release(buffer); + talloc_free(mpi); + MP_ERR(vo, "Out of memory.\n"); + return; + } + + mp_image_setfmt(new_ref, IMGFMT_MMAL); + new_ref->planes[3] = (void *)buffer; + + struct mp_image dmpi = {0}; + buffer->length = layout_buffer(&dmpi, buffer, vo->params); + mp_image_copy(&dmpi, mpi); + + talloc_free(mpi); + mpi = new_ref; + } + + p->next_image = mpi; +} + +static int query_format(struct vo *vo, int format) +{ + return format == IMGFMT_MMAL || format == IMGFMT_420P; +} + +static MMAL_FOURCC_T map_csp(enum mp_csp csp) +{ + switch (csp) { + case MP_CSP_BT_601: return MMAL_COLOR_SPACE_ITUR_BT601; + case MP_CSP_BT_709: return MMAL_COLOR_SPACE_ITUR_BT709; + case MP_CSP_SMPTE_240M: return MMAL_COLOR_SPACE_SMPTE240M; + default: return MMAL_COLOR_SPACE_UNKNOWN; + } +} + +static void control_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer) +{ + mmal_buffer_header_release(buffer); +} + +static void input_port_cb(MMAL_PORT_T *port, MMAL_BUFFER_HEADER_T *buffer) +{ + struct mp_image *mpi = buffer->user_data; + talloc_free(mpi); +} + +static void disable_renderer(struct vo *vo) +{ + struct priv *p = vo->priv; + + if (p->renderer_enabled) { + mmal_port_disable(p->renderer->control); + mmal_port_disable(p->renderer->input[0]); + + mmal_port_flush(p->renderer->control); + mmal_port_flush(p->renderer->input[0]); + + mmal_component_disable(p->renderer); + } + mmal_pool_destroy(p->swpool); + p->swpool = NULL; + p->renderer_enabled = false; +} + +static int reconfig(struct vo *vo, struct mp_image_params *params) +{ + struct priv *p = vo->priv; + MMAL_PORT_T *input = p->renderer->input[0]; + bool opaque = params->imgfmt == IMGFMT_MMAL; + + if (!p->display) + return -1; + + disable_renderer(vo); + + input->format->encoding = opaque ? MMAL_ENCODING_OPAQUE : MMAL_ENCODING_I420; + input->format->es->video.width = MP_ALIGN_UP(params->w, ALIGN_W); + input->format->es->video.height = MP_ALIGN_UP(params->h, ALIGN_H); + input->format->es->video.crop = (MMAL_RECT_T){0, 0, params->w, params->h}; + input->format->es->video.par = (MMAL_RATIONAL_T){params->p_w, params->p_h}; + input->format->es->video.color_space = map_csp(params->color.space); + + if (mmal_port_format_commit(input)) + return -1; + + input->buffer_num = MPMAX(input->buffer_num_min, + input->buffer_num_recommended) + 3; + input->buffer_size = MPMAX(input->buffer_size_min, + input->buffer_size_recommended); + + if (!opaque) { + size_t size = layout_buffer(&(struct mp_image){0}, NULL, params); + if (input->buffer_size != size) { + MP_FATAL(vo, "We disagree with MMAL about buffer sizes.\n"); + return -1; + } + + p->swpool = mmal_pool_create(input->buffer_num, input->buffer_size); + if (!p->swpool) { + MP_FATAL(vo, "Could not allocate buffer pool.\n"); + return -1; + } + } + + if (set_geometry(vo) < 0) + return -1; + + p->renderer_enabled = true; + + if (mmal_port_enable(p->renderer->control, control_port_cb)) + return -1; + + if (mmal_port_enable(input, input_port_cb)) + return -1; + + if (mmal_component_enable(p->renderer)) { + MP_FATAL(vo, "Failed to enable video renderer.\n"); + return -1; + } + + resize(vo); + + return 0; +} + +static struct mp_image *take_screenshot(struct vo *vo) +{ + struct priv *p = vo->priv; + + if (!p->display) + return NULL; + + struct mp_image *img = mp_image_alloc(IMGFMT_BGR0, p->w, p->h); + if (!img) + return NULL; + + DISPMANX_RESOURCE_HANDLE_T resource = + vc_dispmanx_resource_create(VC_IMAGE_ARGB8888, + img->w | ((img->w * 4) << 16), img->h, + &(int32_t){0}); + if (!resource) + goto fail; + + if (vc_dispmanx_snapshot(p->display, resource, 0)) + goto fail; + + VC_RECT_T rc = {.width = img->w, .height = img->h}; + if (vc_dispmanx_resource_read_data(resource, &rc, img->planes[0], img->stride[0])) + goto fail; + + vc_dispmanx_resource_delete(resource); + return img; + +fail: + vc_dispmanx_resource_delete(resource); + talloc_free(img); + return NULL; +} + +static void set_fullscreen(struct vo *vo) { + struct priv *p = vo->priv; + + if (p->renderer_enabled) + set_geometry(vo); + vo->want_redraw = true; +} + +static int control(struct vo *vo, uint32_t request, void *data) +{ + struct priv *p = vo->priv; + + switch (request) { + case VOCTRL_VO_OPTS_CHANGED: { + void *opt; + while (m_config_cache_get_next_changed(p->opts_cache, &opt)) { + struct mp_vo_opts *opts = p->opts_cache->opts; + if (&opts->fullscreen == opt) + set_fullscreen(vo); + } + return VO_TRUE; + } + case VOCTRL_SET_PANSCAN: + if (p->renderer_enabled) + resize(vo); + vo->want_redraw = true; + return VO_TRUE; + case VOCTRL_REDRAW_FRAME: + update_osd(vo); + return VO_TRUE; + case VOCTRL_SCREENSHOT_WIN: + *(struct mp_image **)data = take_screenshot(vo); + return VO_TRUE; + case VOCTRL_CHECK_EVENTS: { + mp_mutex_lock(&p->display_mutex); + bool reload_required = p->reload_display; + p->reload_display = false; + mp_mutex_unlock(&p->display_mutex); + if (reload_required) + recreate_renderer(vo); + return VO_TRUE; + } + case VOCTRL_GET_DISPLAY_FPS: + *(double *)data = p->display_fps; + return VO_TRUE; + case VOCTRL_GET_DISPLAY_RES: + ((int *)data)[0] = p->w; + ((int *)data)[1] = p->h; + return VO_TRUE; + } + + return VO_NOTIMPL; +} + +static void tv_callback(void *callback_data, uint32_t reason, uint32_t param1, + uint32_t param2) +{ + struct vo *vo = callback_data; + struct priv *p = vo->priv; + mp_mutex_lock(&p->display_mutex); + p->reload_display = true; + mp_cond_signal(&p->display_cond); + mp_mutex_unlock(&p->display_mutex); + vo_wakeup(vo); +} + +static void vsync_callback(DISPMANX_UPDATE_HANDLE_T u, void *arg) +{ + struct vo *vo = arg; + struct priv *p = vo->priv; + mp_mutex_lock(&p->display_mutex); + p->vsync_counter += 1; + mp_cond_signal(&p->display_cond); + mp_mutex_unlock(&p->display_mutex); +} + +static void destroy_dispmanx(struct vo *vo) +{ + struct priv *p = vo->priv; + + disable_renderer(vo); + destroy_overlays(vo); + + if (p->update) + vc_dispmanx_update_submit_sync(p->update); + p->update = 0; + + if (p->display) { + vc_dispmanx_vsync_callback(p->display, NULL, NULL); + vc_dispmanx_display_close(p->display); + } + p->display = 0; +} + +static int recreate_dispmanx(struct vo *vo) +{ + struct priv *p = vo->priv; + + p->display = vc_dispmanx_display_open(p->display_nr); + p->update = vc_dispmanx_update_start(0); + if (!p->display || !p->update) { + MP_FATAL(vo, "Could not get DISPMANX objects.\n"); + if (p->display) + vc_dispmanx_display_close(p->display); + p->display = 0; + p->update = 0; + return -1; + } + + update_display_size(vo); + + vc_dispmanx_vsync_callback(p->display, vsync_callback, vo); + + return 0; +} + +static void recreate_renderer(struct vo *vo) +{ + MP_WARN(vo, "Recreating renderer after display change.\n"); + + destroy_dispmanx(vo); + recreate_dispmanx(vo); + + if (vo->params) { + if (reconfig(vo, vo->params) < 0) + MP_FATAL(vo, "Recreation failed.\n"); + } +} + +static void uninit(struct vo *vo) +{ + struct priv *p = vo->priv; + + vc_tv_unregister_callback_full(tv_callback, vo); + + talloc_free(p->next_image); + + destroy_dispmanx(vo); + + if (p->renderer) + mmal_component_release(p->renderer); + + mmal_vc_deinit(); + + mp_cond_destroy(&p->display_cond); + mp_mutex_destroy(&p->display_mutex); +} + +static int preinit(struct vo *vo) +{ + struct priv *p = vo->priv; + + p->background_layer = p->layer; + p->video_layer = p->layer + 1; + p->osd_layer = p->layer + 2; + + p->egl.log = vo->log; + + bcm_host_init(); + + if (mmal_vc_init()) { + MP_FATAL(vo, "Could not initialize MMAL.\n"); + return -1; + } + + mp_mutex_init(&p->display_mutex); + mp_cond_init(&p->display_cond); + + p->opts_cache = m_config_cache_alloc(p, vo->global, &vo_sub_opts); + + if (recreate_dispmanx(vo) < 0) + goto fail; + + if (update_display_size(vo) < 0) + goto fail; + + if (mmal_component_create(MMAL_COMPONENT_DEFAULT_VIDEO_RENDERER, &p->renderer)) + { + MP_FATAL(vo, "Could not create MMAL renderer.\n"); + goto fail; + } + + vc_tv_register_callback(tv_callback, vo); + + return 0; + +fail: + uninit(vo); + return -1; +} + +#define OPT_BASE_STRUCT struct priv +static const struct m_option options[] = { + {"display", OPT_INT(display_nr)}, + {"layer", OPT_INT(layer), OPTDEF_INT(-10)}, + {"background", OPT_BOOL(background)}, + {"osd", OPT_BOOL(enable_osd), OPTDEF_INT(1)}, + {0}, +}; + +const struct vo_driver video_out_rpi = { + .description = "Raspberry Pi (MMAL)", + .name = "rpi", + .caps = VO_CAP_ROTATE90, + .preinit = preinit, + .query_format = query_format, + .reconfig = reconfig, + .control = control, + .draw_frame = draw_frame, + .flip_page = flip_page, + .uninit = uninit, + .priv_size = sizeof(struct priv), + .options = options, + .options_prefix = "rpi", +}; diff --git a/video/out/vo_sdl.c b/video/out/vo_sdl.c new file mode 100644 index 0000000..5f4c027 --- /dev/null +++ b/video/out/vo_sdl.c @@ -0,0 +1,992 @@ +/* + * video output driver for SDL 2.0+ + * + * Copyright (C) 2012 Rudolf Polzer <divVerent@xonotic.org> + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/stat.h> +#include <unistd.h> +#include <string.h> +#include <time.h> +#include <errno.h> +#include <assert.h> + +#include <SDL.h> + +#include "input/input.h" +#include "input/keycodes.h" +#include "input/input.h" +#include "common/msg.h" +#include "options/m_config.h" +#include "options/options.h" + +#include "osdep/timer.h" + +#include "sub/osd.h" + +#include "video/mp_image.h" + +#include "win_state.h" +#include "vo.h" + +struct formatmap_entry { + Uint32 sdl; + unsigned int mpv; + int is_rgba; +}; +const struct formatmap_entry formats[] = { + {SDL_PIXELFORMAT_YV12, IMGFMT_420P, 0}, + {SDL_PIXELFORMAT_IYUV, IMGFMT_420P, 0}, + {SDL_PIXELFORMAT_UYVY, IMGFMT_UYVY, 0}, + //{SDL_PIXELFORMAT_YVYU, IMGFMT_YVYU, 0}, +#if BYTE_ORDER == BIG_ENDIAN + {SDL_PIXELFORMAT_RGB888, IMGFMT_0RGB, 0}, // RGB888 means XRGB8888 + {SDL_PIXELFORMAT_RGBX8888, IMGFMT_RGB0, 0}, // has no alpha -> bad for OSD + {SDL_PIXELFORMAT_BGR888, IMGFMT_0BGR, 0}, // BGR888 means XBGR8888 + {SDL_PIXELFORMAT_BGRX8888, IMGFMT_BGR0, 0}, // has no alpha -> bad for OSD + {SDL_PIXELFORMAT_ARGB8888, IMGFMT_ARGB, 1}, // matches SUBBITMAP_BGRA + {SDL_PIXELFORMAT_RGBA8888, IMGFMT_RGBA, 1}, + {SDL_PIXELFORMAT_ABGR8888, IMGFMT_ABGR, 1}, + {SDL_PIXELFORMAT_BGRA8888, IMGFMT_BGRA, 1}, +#else + {SDL_PIXELFORMAT_RGB888, IMGFMT_BGR0, 0}, // RGB888 means XRGB8888 + {SDL_PIXELFORMAT_RGBX8888, IMGFMT_0BGR, 0}, // has no alpha -> bad for OSD + {SDL_PIXELFORMAT_BGR888, IMGFMT_RGB0, 0}, // BGR888 means XBGR8888 + {SDL_PIXELFORMAT_BGRX8888, IMGFMT_0RGB, 0}, // has no alpha -> bad for OSD + {SDL_PIXELFORMAT_ARGB8888, IMGFMT_BGRA, 1}, // matches SUBBITMAP_BGRA + {SDL_PIXELFORMAT_RGBA8888, IMGFMT_ABGR, 1}, + {SDL_PIXELFORMAT_ABGR8888, IMGFMT_RGBA, 1}, + {SDL_PIXELFORMAT_BGRA8888, IMGFMT_ARGB, 1}, +#endif + {SDL_PIXELFORMAT_RGB24, IMGFMT_RGB24, 0}, + {SDL_PIXELFORMAT_BGR24, IMGFMT_BGR24, 0}, + {SDL_PIXELFORMAT_RGB565, IMGFMT_RGB565, 0}, +}; + +struct keymap_entry { + SDL_Keycode sdl; + int mpv; +}; +const struct keymap_entry keys[] = { + {SDLK_RETURN, MP_KEY_ENTER}, + {SDLK_ESCAPE, MP_KEY_ESC}, + {SDLK_BACKSPACE, MP_KEY_BACKSPACE}, + {SDLK_TAB, MP_KEY_TAB}, + {SDLK_PRINTSCREEN, MP_KEY_PRINT}, + {SDLK_PAUSE, MP_KEY_PAUSE}, + {SDLK_INSERT, MP_KEY_INSERT}, + {SDLK_HOME, MP_KEY_HOME}, + {SDLK_PAGEUP, MP_KEY_PAGE_UP}, + {SDLK_DELETE, MP_KEY_DELETE}, + {SDLK_END, MP_KEY_END}, + {SDLK_PAGEDOWN, MP_KEY_PAGE_DOWN}, + {SDLK_RIGHT, MP_KEY_RIGHT}, + {SDLK_LEFT, MP_KEY_LEFT}, + {SDLK_DOWN, MP_KEY_DOWN}, + {SDLK_UP, MP_KEY_UP}, + {SDLK_KP_ENTER, MP_KEY_KPENTER}, + {SDLK_KP_1, MP_KEY_KP1}, + {SDLK_KP_2, MP_KEY_KP2}, + {SDLK_KP_3, MP_KEY_KP3}, + {SDLK_KP_4, MP_KEY_KP4}, + {SDLK_KP_5, MP_KEY_KP5}, + {SDLK_KP_6, MP_KEY_KP6}, + {SDLK_KP_7, MP_KEY_KP7}, + {SDLK_KP_8, MP_KEY_KP8}, + {SDLK_KP_9, MP_KEY_KP9}, + {SDLK_KP_0, MP_KEY_KP0}, + {SDLK_KP_PERIOD, MP_KEY_KPDEC}, + {SDLK_POWER, MP_KEY_POWER}, + {SDLK_MENU, MP_KEY_MENU}, + {SDLK_STOP, MP_KEY_STOP}, + {SDLK_MUTE, MP_KEY_MUTE}, + {SDLK_VOLUMEUP, MP_KEY_VOLUME_UP}, + {SDLK_VOLUMEDOWN, MP_KEY_VOLUME_DOWN}, + {SDLK_KP_COMMA, MP_KEY_KPDEC}, + {SDLK_AUDIONEXT, MP_KEY_NEXT}, + {SDLK_AUDIOPREV, MP_KEY_PREV}, + {SDLK_AUDIOSTOP, MP_KEY_STOP}, + {SDLK_AUDIOPLAY, MP_KEY_PLAY}, + {SDLK_AUDIOMUTE, MP_KEY_MUTE}, + {SDLK_F1, MP_KEY_F + 1}, + {SDLK_F2, MP_KEY_F + 2}, + {SDLK_F3, MP_KEY_F + 3}, + {SDLK_F4, MP_KEY_F + 4}, + {SDLK_F5, MP_KEY_F + 5}, + {SDLK_F6, MP_KEY_F + 6}, + {SDLK_F7, MP_KEY_F + 7}, + {SDLK_F8, MP_KEY_F + 8}, + {SDLK_F9, MP_KEY_F + 9}, + {SDLK_F10, MP_KEY_F + 10}, + {SDLK_F11, MP_KEY_F + 11}, + {SDLK_F12, MP_KEY_F + 12}, + {SDLK_F13, MP_KEY_F + 13}, + {SDLK_F14, MP_KEY_F + 14}, + {SDLK_F15, MP_KEY_F + 15}, + {SDLK_F16, MP_KEY_F + 16}, + {SDLK_F17, MP_KEY_F + 17}, + {SDLK_F18, MP_KEY_F + 18}, + {SDLK_F19, MP_KEY_F + 19}, + {SDLK_F20, MP_KEY_F + 20}, + {SDLK_F21, MP_KEY_F + 21}, + {SDLK_F22, MP_KEY_F + 22}, + {SDLK_F23, MP_KEY_F + 23}, + {SDLK_F24, MP_KEY_F + 24} +}; + +struct mousemap_entry { + Uint8 sdl; + int mpv; +}; +const struct mousemap_entry mousebtns[] = { + {SDL_BUTTON_LEFT, MP_MBTN_LEFT}, + {SDL_BUTTON_MIDDLE, MP_MBTN_MID}, + {SDL_BUTTON_RIGHT, MP_MBTN_RIGHT}, + {SDL_BUTTON_X1, MP_MBTN_BACK}, + {SDL_BUTTON_X2, MP_MBTN_FORWARD}, +}; + +struct priv { + SDL_Window *window; + SDL_Renderer *renderer; + int renderer_index; + SDL_RendererInfo renderer_info; + SDL_Texture *tex; + int tex_swapped; + struct mp_image_params params; + struct mp_rect src_rect; + struct mp_rect dst_rect; + struct mp_osd_res osd_res; + struct formatmap_entry osd_format; + struct osd_bitmap_surface { + int change_id; + struct osd_target { + SDL_Rect source; + SDL_Rect dest; + SDL_Texture *tex; + SDL_Texture *tex2; + } *targets; + int num_targets; + int targets_size; + } osd_surfaces[MAX_OSD_PARTS]; + double osd_pts; + Uint32 wakeup_event; + bool screensaver_enabled; + struct m_config_cache *opts_cache; + + // options + bool allow_sw; + bool switch_mode; + bool vsync; +}; + +static bool lock_texture(struct vo *vo, struct mp_image *texmpi) +{ + struct priv *vc = vo->priv; + *texmpi = (struct mp_image){0}; + mp_image_set_size(texmpi, vc->params.w, vc->params.h); + mp_image_setfmt(texmpi, vc->params.imgfmt); + switch (texmpi->num_planes) { + case 1: + case 3: + break; + default: + MP_ERR(vo, "Invalid plane count\n"); + return false; + } + void *pixels; + int pitch; + if (SDL_LockTexture(vc->tex, NULL, &pixels, &pitch)) { + MP_ERR(vo, "SDL_LockTexture failed\n"); + return false; + } + texmpi->planes[0] = pixels; + texmpi->stride[0] = pitch; + if (texmpi->num_planes == 3) { + if (vc->tex_swapped) { + texmpi->planes[2] = + ((Uint8 *) texmpi->planes[0] + texmpi->h * pitch); + texmpi->stride[2] = pitch / 2; + texmpi->planes[1] = + ((Uint8 *) texmpi->planes[2] + (texmpi->h * pitch) / 4); + texmpi->stride[1] = pitch / 2; + } else { + texmpi->planes[1] = + ((Uint8 *) texmpi->planes[0] + texmpi->h * pitch); + texmpi->stride[1] = pitch / 2; + texmpi->planes[2] = + ((Uint8 *) texmpi->planes[1] + (texmpi->h * pitch) / 4); + texmpi->stride[2] = pitch / 2; + } + } + return true; +} + +static bool is_good_renderer(SDL_RendererInfo *ri, + const char *driver_name_wanted, bool allow_sw, + struct formatmap_entry *osd_format) +{ + if (driver_name_wanted && driver_name_wanted[0]) + if (strcmp(driver_name_wanted, ri->name)) + return false; + + if (!allow_sw && + !(ri->flags & SDL_RENDERER_ACCELERATED)) + return false; + + int i, j; + for (i = 0; i < ri->num_texture_formats; ++i) + for (j = 0; j < sizeof(formats) / sizeof(formats[0]); ++j) + if (ri->texture_formats[i] == formats[j].sdl) + if (formats[j].is_rgba) { + if (osd_format) + *osd_format = formats[j]; + return true; + } + + return false; +} + +static void destroy_renderer(struct vo *vo) +{ + struct priv *vc = vo->priv; + + // free ALL the textures + if (vc->tex) { + SDL_DestroyTexture(vc->tex); + vc->tex = NULL; + } + + int i, j; + for (i = 0; i < MAX_OSD_PARTS; ++i) { + for (j = 0; j < vc->osd_surfaces[i].targets_size; ++j) { + if (vc->osd_surfaces[i].targets[j].tex) { + SDL_DestroyTexture(vc->osd_surfaces[i].targets[j].tex); + vc->osd_surfaces[i].targets[j].tex = NULL; + } + if (vc->osd_surfaces[i].targets[j].tex2) { + SDL_DestroyTexture(vc->osd_surfaces[i].targets[j].tex2); + vc->osd_surfaces[i].targets[j].tex2 = NULL; + } + } + } + + if (vc->renderer) { + SDL_DestroyRenderer(vc->renderer); + vc->renderer = NULL; + } +} + +static bool try_create_renderer(struct vo *vo, int i, const char *driver) +{ + struct priv *vc = vo->priv; + + // first probe + SDL_RendererInfo ri; + if (SDL_GetRenderDriverInfo(i, &ri)) + return false; + if (!is_good_renderer(&ri, driver, vc->allow_sw, NULL)) + return false; + + vc->renderer = SDL_CreateRenderer(vc->window, i, 0); + if (!vc->renderer) { + MP_ERR(vo, "SDL_CreateRenderer failed\n"); + return false; + } + + if (SDL_GetRendererInfo(vc->renderer, &vc->renderer_info)) { + MP_ERR(vo, "SDL_GetRendererInfo failed\n"); + destroy_renderer(vo); + return false; + } + + if (!is_good_renderer(&vc->renderer_info, NULL, vc->allow_sw, + &vc->osd_format)) { + MP_ERR(vo, "Renderer '%s' does not fulfill " + "requirements on this system\n", + vc->renderer_info.name); + destroy_renderer(vo); + return false; + } + + if (vc->renderer_index != i) { + MP_INFO(vo, "Using %s\n", vc->renderer_info.name); + vc->renderer_index = i; + } + + return true; +} + +static int init_renderer(struct vo *vo) +{ + struct priv *vc = vo->priv; + + int n = SDL_GetNumRenderDrivers(); + int i; + + if (vc->renderer_index >= 0) + if (try_create_renderer(vo, vc->renderer_index, NULL)) + return 0; + + for (i = 0; i < n; ++i) + if (try_create_renderer(vo, i, SDL_GetHint(SDL_HINT_RENDER_DRIVER))) + return 0; + + for (i = 0; i < n; ++i) + if (try_create_renderer(vo, i, NULL)) + return 0; + + MP_ERR(vo, "No supported renderer\n"); + return -1; +} + +static void resize(struct vo *vo, int w, int h) +{ + struct priv *vc = vo->priv; + vo->dwidth = w; + vo->dheight = h; + vo_get_src_dst_rects(vo, &vc->src_rect, &vc->dst_rect, + &vc->osd_res); + SDL_RenderSetLogicalSize(vc->renderer, w, h); + vo->want_redraw = true; + vo_wakeup(vo); +} + +static void force_resize(struct vo *vo) +{ + struct priv *vc = vo->priv; + int w, h; + SDL_GetWindowSize(vc->window, &w, &h); + resize(vo, w, h); +} + +static void check_resize(struct vo *vo) +{ + struct priv *vc = vo->priv; + int w, h; + SDL_GetWindowSize(vc->window, &w, &h); + if (vo->dwidth != w || vo->dheight != h) + resize(vo, w, h); +} + +static inline void set_screensaver(bool enabled) +{ + if (!!enabled == !!SDL_IsScreenSaverEnabled()) + return; + + if (enabled) + SDL_EnableScreenSaver(); + else + SDL_DisableScreenSaver(); +} + +static void set_fullscreen(struct vo *vo) +{ + struct priv *vc = vo->priv; + struct mp_vo_opts *opts = vc->opts_cache->opts; + int fs = opts->fullscreen; + SDL_bool prev_screensaver_state = SDL_IsScreenSaverEnabled(); + + Uint32 fs_flag; + if (vc->switch_mode) + fs_flag = SDL_WINDOW_FULLSCREEN; + else + fs_flag = SDL_WINDOW_FULLSCREEN_DESKTOP; + + Uint32 old_flags = SDL_GetWindowFlags(vc->window); + int prev_fs = !!(old_flags & fs_flag); + if (fs == prev_fs) + return; + + Uint32 flags = 0; + if (fs) + flags |= fs_flag; + + if (SDL_SetWindowFullscreen(vc->window, flags)) { + MP_ERR(vo, "SDL_SetWindowFullscreen failed\n"); + return; + } + + // toggling fullscreen might recreate the window, so better guard for this + set_screensaver(prev_screensaver_state); + + force_resize(vo); +} + +static void update_screeninfo(struct vo *vo, struct mp_rect *screenrc) +{ + struct priv *vc = vo->priv; + SDL_DisplayMode mode; + if (SDL_GetCurrentDisplayMode(SDL_GetWindowDisplayIndex(vc->window), + &mode)) { + MP_ERR(vo, "SDL_GetCurrentDisplayMode failed\n"); + return; + } + *screenrc = (struct mp_rect){0, 0, mode.w, mode.h}; +} + +static int reconfig(struct vo *vo, struct mp_image_params *params) +{ + struct priv *vc = vo->priv; + + struct vo_win_geometry geo; + struct mp_rect screenrc; + + update_screeninfo(vo, &screenrc); + vo_calc_window_geometry(vo, &screenrc, &geo); + vo_apply_window_geometry(vo, &geo); + + int win_w = vo->dwidth; + int win_h = vo->dheight; + + SDL_SetWindowSize(vc->window, win_w, win_h); + if (geo.flags & VO_WIN_FORCE_POS) + SDL_SetWindowPosition(vc->window, geo.win.x0, geo.win.y0); + + if (vc->tex) + SDL_DestroyTexture(vc->tex); + Uint32 texfmt = SDL_PIXELFORMAT_UNKNOWN; + int i, j; + for (i = 0; i < vc->renderer_info.num_texture_formats; ++i) + for (j = 0; j < sizeof(formats) / sizeof(formats[0]); ++j) + if (vc->renderer_info.texture_formats[i] == formats[j].sdl) + if (params->imgfmt == formats[j].mpv) + texfmt = formats[j].sdl; + if (texfmt == SDL_PIXELFORMAT_UNKNOWN) { + MP_ERR(vo, "Invalid pixel format\n"); + return -1; + } + + vc->tex_swapped = texfmt == SDL_PIXELFORMAT_YV12; + vc->tex = SDL_CreateTexture(vc->renderer, texfmt, + SDL_TEXTUREACCESS_STREAMING, + params->w, params->h); + if (!vc->tex) { + MP_ERR(vo, "Could not create a texture\n"); + return -1; + } + + vc->params = *params; + + struct mp_image tmp; + if (!lock_texture(vo, &tmp)) { + SDL_DestroyTexture(vc->tex); + vc->tex = NULL; + return -1; + } + mp_image_clear(&tmp, 0, 0, tmp.w, tmp.h); + SDL_UnlockTexture(vc->tex); + + resize(vo, win_w, win_h); + + set_screensaver(vc->screensaver_enabled); + set_fullscreen(vo); + + SDL_ShowWindow(vc->window); + + check_resize(vo); + + return 0; +} + +static void flip_page(struct vo *vo) +{ + struct priv *vc = vo->priv; + SDL_RenderPresent(vc->renderer); +} + +static void wakeup(struct vo *vo) +{ + struct priv *vc = vo->priv; + SDL_Event event = {.type = vc->wakeup_event}; + // Note that there is no context - SDL is a singleton. + SDL_PushEvent(&event); +} + +static void wait_events(struct vo *vo, int64_t until_time_ns) +{ + int64_t wait_ns = until_time_ns - mp_time_ns(); + // Round-up to 1ms for short timeouts (100us, 1000us] + if (wait_ns > MP_TIME_US_TO_NS(100)) + wait_ns = MPMAX(wait_ns, MP_TIME_MS_TO_NS(1)); + int timeout_ms = MPCLAMP(wait_ns / MP_TIME_MS_TO_NS(1), 0, 10000); + SDL_Event ev; + + while (SDL_WaitEventTimeout(&ev, timeout_ms)) { + timeout_ms = 0; + switch (ev.type) { + case SDL_WINDOWEVENT: + switch (ev.window.event) { + case SDL_WINDOWEVENT_EXPOSED: + vo->want_redraw = true; + break; + case SDL_WINDOWEVENT_SIZE_CHANGED: + check_resize(vo); + vo_event(vo, VO_EVENT_RESIZE); + break; + case SDL_WINDOWEVENT_ENTER: + mp_input_put_key(vo->input_ctx, MP_KEY_MOUSE_ENTER); + break; + case SDL_WINDOWEVENT_LEAVE: + mp_input_put_key(vo->input_ctx, MP_KEY_MOUSE_LEAVE); + break; + } + break; + case SDL_QUIT: + mp_input_put_key(vo->input_ctx, MP_KEY_CLOSE_WIN); + break; + case SDL_TEXTINPUT: { + int sdl_mod = SDL_GetModState(); + int mpv_mod = 0; + // we ignore KMOD_LSHIFT, KMOD_RSHIFT and KMOD_RALT (if + // mp_input_use_alt_gr() is true) because these are already + // factored into ev.text.text + if (sdl_mod & (KMOD_LCTRL | KMOD_RCTRL)) + mpv_mod |= MP_KEY_MODIFIER_CTRL; + if ((sdl_mod & KMOD_LALT) || + ((sdl_mod & KMOD_RALT) && !mp_input_use_alt_gr(vo->input_ctx))) + mpv_mod |= MP_KEY_MODIFIER_ALT; + if (sdl_mod & (KMOD_LGUI | KMOD_RGUI)) + mpv_mod |= MP_KEY_MODIFIER_META; + struct bstr t = { + ev.text.text, strlen(ev.text.text) + }; + mp_input_put_key_utf8(vo->input_ctx, mpv_mod, t); + break; + } + case SDL_KEYDOWN: { + // Issue: we don't know in advance whether this keydown event + // will ALSO cause a SDL_TEXTINPUT event + // So we're conservative, and only map non printable keycodes + // (e.g. function keys, arrow keys, etc.) + // However, this does lose some keypresses at least on X11 + // (e.g. Ctrl-A generates SDL_KEYDOWN only, but the key is + // 'a'... and 'a' is normally also handled by SDL_TEXTINPUT). + // The default config does not use Ctrl, so this is fine... + int keycode = 0; + int i; + for (i = 0; i < sizeof(keys) / sizeof(keys[0]); ++i) + if (keys[i].sdl == ev.key.keysym.sym) { + keycode = keys[i].mpv; + break; + } + if (keycode) { + if (ev.key.keysym.mod & (KMOD_LSHIFT | KMOD_RSHIFT)) + keycode |= MP_KEY_MODIFIER_SHIFT; + if (ev.key.keysym.mod & (KMOD_LCTRL | KMOD_RCTRL)) + keycode |= MP_KEY_MODIFIER_CTRL; + if (ev.key.keysym.mod & (KMOD_LALT | KMOD_RALT)) + keycode |= MP_KEY_MODIFIER_ALT; + if (ev.key.keysym.mod & (KMOD_LGUI | KMOD_RGUI)) + keycode |= MP_KEY_MODIFIER_META; + mp_input_put_key(vo->input_ctx, keycode); + } + break; + } + case SDL_MOUSEMOTION: + mp_input_set_mouse_pos(vo->input_ctx, ev.motion.x, ev.motion.y); + break; + case SDL_MOUSEBUTTONDOWN: { + int i; + for (i = 0; i < sizeof(mousebtns) / sizeof(mousebtns[0]); ++i) + if (mousebtns[i].sdl == ev.button.button) { + mp_input_put_key(vo->input_ctx, mousebtns[i].mpv | MP_KEY_STATE_DOWN); + break; + } + break; + } + case SDL_MOUSEBUTTONUP: { + int i; + for (i = 0; i < sizeof(mousebtns) / sizeof(mousebtns[0]); ++i) + if (mousebtns[i].sdl == ev.button.button) { + mp_input_put_key(vo->input_ctx, mousebtns[i].mpv | MP_KEY_STATE_UP); + break; + } + break; + } + case SDL_MOUSEWHEEL: { +#if SDL_VERSION_ATLEAST(2, 0, 4) + double multiplier = ev.wheel.direction == SDL_MOUSEWHEEL_FLIPPED ? -1 : 1; +#else + double multiplier = 1; +#endif + int y_code = ev.wheel.y > 0 ? MP_WHEEL_UP : MP_WHEEL_DOWN; + mp_input_put_wheel(vo->input_ctx, y_code, abs(ev.wheel.y) * multiplier); + int x_code = ev.wheel.x > 0 ? MP_WHEEL_RIGHT : MP_WHEEL_LEFT; + mp_input_put_wheel(vo->input_ctx, x_code, abs(ev.wheel.x) * multiplier); + break; + } + } + } +} + +static void uninit(struct vo *vo) +{ + struct priv *vc = vo->priv; + destroy_renderer(vo); + SDL_DestroyWindow(vc->window); + vc->window = NULL; + SDL_QuitSubSystem(SDL_INIT_VIDEO); + talloc_free(vc); +} + +static inline void upload_to_texture(struct vo *vo, SDL_Texture *tex, + int w, int h, void *bitmap, int stride) +{ + struct priv *vc = vo->priv; + + if (vc->osd_format.sdl == SDL_PIXELFORMAT_ARGB8888) { + // NOTE: this optimization is questionable, because SDL docs say + // that this way is slow. + // It did measure up faster, though... + SDL_UpdateTexture(tex, NULL, bitmap, stride); + return; + } + + void *pixels; + int pitch; + if (SDL_LockTexture(tex, NULL, &pixels, &pitch)) { + MP_ERR(vo, "Could not lock texture\n"); + } else { + SDL_ConvertPixels(w, h, SDL_PIXELFORMAT_ARGB8888, + bitmap, stride, + vc->osd_format.sdl, + pixels, pitch); + SDL_UnlockTexture(tex); + } +} + +static inline void subbitmap_to_texture(struct vo *vo, SDL_Texture *tex, + struct sub_bitmap *bmp, + uint32_t ormask) +{ + if (ormask == 0) { + upload_to_texture(vo, tex, bmp->w, bmp->h, + bmp->bitmap, bmp->stride); + } else { + uint32_t *temppixels; + temppixels = talloc_array(vo, uint32_t, bmp->w * bmp->h); + + int x, y; + for (y = 0; y < bmp->h; ++y) { + const uint32_t *src = + (const uint32_t *) ((const char *) bmp->bitmap + y * bmp->stride); + uint32_t *dst = temppixels + y * bmp->w; + for (x = 0; x < bmp->w; ++x) + dst[x] = src[x] | ormask; + } + + upload_to_texture(vo, tex, bmp->w, bmp->h, + temppixels, sizeof(uint32_t) * bmp->w); + + talloc_free(temppixels); + } +} + +static void generate_osd_part(struct vo *vo, struct sub_bitmaps *imgs) +{ + struct priv *vc = vo->priv; + struct osd_bitmap_surface *sfc = &vc->osd_surfaces[imgs->render_index]; + + if (imgs->format == SUBBITMAP_EMPTY || imgs->num_parts == 0) + return; + + if (imgs->change_id == sfc->change_id) + return; + + if (imgs->num_parts > sfc->targets_size) { + sfc->targets = talloc_realloc(vc, sfc->targets, + struct osd_target, imgs->num_parts); + memset(&sfc->targets[sfc->targets_size], 0, sizeof(struct osd_target) * + (imgs->num_parts - sfc->targets_size)); + sfc->targets_size = imgs->num_parts; + } + sfc->num_targets = imgs->num_parts; + + for (int i = 0; i < imgs->num_parts; i++) { + struct osd_target *target = sfc->targets + i; + struct sub_bitmap *bmp = imgs->parts + i; + + target->source = (SDL_Rect){ + 0, 0, bmp->w, bmp->h + }; + target->dest = (SDL_Rect){ + bmp->x, bmp->y, bmp->dw, bmp->dh + }; + + // tex: alpha blended texture + if (target->tex) { + SDL_DestroyTexture(target->tex); + target->tex = NULL; + } + if (!target->tex) + target->tex = SDL_CreateTexture(vc->renderer, + vc->osd_format.sdl, SDL_TEXTUREACCESS_STREAMING, + bmp->w, bmp->h); + if (!target->tex) { + MP_ERR(vo, "Could not create texture\n"); + } + if (target->tex) { + SDL_SetTextureBlendMode(target->tex, + SDL_BLENDMODE_BLEND); + SDL_SetTextureColorMod(target->tex, 0, 0, 0); + subbitmap_to_texture(vo, target->tex, bmp, 0); // RGBA -> 000A + } + + // tex2: added texture + if (target->tex2) { + SDL_DestroyTexture(target->tex2); + target->tex2 = NULL; + } + if (!target->tex2) + target->tex2 = SDL_CreateTexture(vc->renderer, + vc->osd_format.sdl, SDL_TEXTUREACCESS_STREAMING, + bmp->w, bmp->h); + if (!target->tex2) { + MP_ERR(vo, "Could not create texture\n"); + } + if (target->tex2) { + SDL_SetTextureBlendMode(target->tex2, + SDL_BLENDMODE_ADD); + subbitmap_to_texture(vo, target->tex2, bmp, + 0xFF000000); // RGBA -> RGB1 + } + } + + sfc->change_id = imgs->change_id; +} + +static void draw_osd_part(struct vo *vo, int index) +{ + struct priv *vc = vo->priv; + struct osd_bitmap_surface *sfc = &vc->osd_surfaces[index]; + int i; + + for (i = 0; i < sfc->num_targets; i++) { + struct osd_target *target = sfc->targets + i; + if (target->tex) + SDL_RenderCopy(vc->renderer, target->tex, + &target->source, &target->dest); + if (target->tex2) + SDL_RenderCopy(vc->renderer, target->tex2, + &target->source, &target->dest); + } +} + +static void draw_osd_cb(void *ctx, struct sub_bitmaps *imgs) +{ + struct vo *vo = ctx; + generate_osd_part(vo, imgs); + draw_osd_part(vo, imgs->render_index); +} + +static void draw_osd(struct vo *vo) +{ + struct priv *vc = vo->priv; + + static const bool osdformats[SUBBITMAP_COUNT] = { + [SUBBITMAP_BGRA] = true, + }; + + osd_draw(vo->osd, vc->osd_res, vc->osd_pts, 0, osdformats, draw_osd_cb, vo); +} + +static int preinit(struct vo *vo) +{ + struct priv *vc = vo->priv; + + if (SDL_WasInit(SDL_INIT_EVENTS)) { + MP_ERR(vo, "Another component is using SDL already.\n"); + return -1; + } + + vc->opts_cache = m_config_cache_alloc(vc, vo->global, &vo_sub_opts); + + // predefine SDL defaults (SDL env vars shall override) + SDL_SetHintWithPriority(SDL_HINT_RENDER_SCALE_QUALITY, "1", + SDL_HINT_DEFAULT); + SDL_SetHintWithPriority(SDL_HINT_VIDEO_MINIMIZE_ON_FOCUS_LOSS, "0", + SDL_HINT_DEFAULT); + + // predefine MPV options (SDL env vars shall be overridden) + SDL_SetHintWithPriority(SDL_HINT_RENDER_VSYNC, vc->vsync ? "1" : "0", + SDL_HINT_OVERRIDE); + + if (SDL_InitSubSystem(SDL_INIT_VIDEO)) { + MP_ERR(vo, "SDL_Init failed\n"); + return -1; + } + + // then actually try + vc->window = SDL_CreateWindow("MPV", SDL_WINDOWPOS_UNDEFINED, SDL_WINDOWPOS_UNDEFINED, + 640, 480, SDL_WINDOW_RESIZABLE | SDL_WINDOW_HIDDEN); + if (!vc->window) { + MP_ERR(vo, "SDL_CreateWindow failed\n"); + return -1; + } + + // try creating a renderer (this also gets the renderer_info data + // for query_format to use!) + if (init_renderer(vo) != 0) { + SDL_DestroyWindow(vc->window); + vc->window = NULL; + return -1; + } + + vc->wakeup_event = SDL_RegisterEvents(1); + if (vc->wakeup_event == (Uint32)-1) + MP_ERR(vo, "SDL_RegisterEvents() failed.\n"); + + MP_WARN(vo, "Warning: this legacy VO has bad performance. Consider fixing " + "your graphics drivers, or not forcing the sdl VO.\n"); + + return 0; +} + +static int query_format(struct vo *vo, int format) +{ + struct priv *vc = vo->priv; + int i, j; + for (i = 0; i < vc->renderer_info.num_texture_formats; ++i) + for (j = 0; j < sizeof(formats) / sizeof(formats[0]); ++j) + if (vc->renderer_info.texture_formats[i] == formats[j].sdl) + if (format == formats[j].mpv) + return 1; + return 0; +} + +static void draw_frame(struct vo *vo, struct vo_frame *frame) +{ + struct priv *vc = vo->priv; + + // typically this runs in parallel with the following mp_image_copy call + SDL_SetRenderDrawColor(vc->renderer, 0, 0, 0, 255); + SDL_RenderClear(vc->renderer); + + SDL_SetTextureBlendMode(vc->tex, SDL_BLENDMODE_NONE); + + if (frame->current) { + vc->osd_pts = frame->current->pts; + + mp_image_t texmpi; + if (!lock_texture(vo, &texmpi)) + return; + + mp_image_copy(&texmpi, frame->current); + + SDL_UnlockTexture(vc->tex); + } + + SDL_Rect src, dst; + src.x = vc->src_rect.x0; + src.y = vc->src_rect.y0; + src.w = vc->src_rect.x1 - vc->src_rect.x0; + src.h = vc->src_rect.y1 - vc->src_rect.y0; + dst.x = vc->dst_rect.x0; + dst.y = vc->dst_rect.y0; + dst.w = vc->dst_rect.x1 - vc->dst_rect.x0; + dst.h = vc->dst_rect.y1 - vc->dst_rect.y0; + + SDL_RenderCopy(vc->renderer, vc->tex, &src, &dst); + + draw_osd(vo); +} + +static struct mp_image *get_window_screenshot(struct vo *vo) +{ + struct priv *vc = vo->priv; + struct mp_image *image = mp_image_alloc(vc->osd_format.mpv, vo->dwidth, + vo->dheight); + if (!image) + return NULL; + if (SDL_RenderReadPixels(vc->renderer, NULL, vc->osd_format.sdl, + image->planes[0], image->stride[0])) { + MP_ERR(vo, "SDL_RenderReadPixels failed\n"); + talloc_free(image); + return NULL; + } + return image; +} + +static int control(struct vo *vo, uint32_t request, void *data) +{ + struct priv *vc = vo->priv; + + switch (request) { + case VOCTRL_VO_OPTS_CHANGED: { + void *opt; + while (m_config_cache_get_next_changed(vc->opts_cache, &opt)) { + struct mp_vo_opts *opts = vc->opts_cache->opts; + if (&opts->fullscreen == opt) + set_fullscreen(vo); + } + return 1; + } + case VOCTRL_SET_PANSCAN: + force_resize(vo); + return VO_TRUE; + case VOCTRL_SCREENSHOT_WIN: + *(struct mp_image **)data = get_window_screenshot(vo); + return true; + case VOCTRL_SET_CURSOR_VISIBILITY: + SDL_ShowCursor(*(bool *)data); + return true; + case VOCTRL_KILL_SCREENSAVER: + vc->screensaver_enabled = false; + set_screensaver(vc->screensaver_enabled); + return VO_TRUE; + case VOCTRL_RESTORE_SCREENSAVER: + vc->screensaver_enabled = true; + set_screensaver(vc->screensaver_enabled); + return VO_TRUE; + case VOCTRL_UPDATE_WINDOW_TITLE: + SDL_SetWindowTitle(vc->window, (char *)data); + return true; + } + return VO_NOTIMPL; +} + +#define OPT_BASE_STRUCT struct priv + +const struct vo_driver video_out_sdl = { + .description = "SDL 2.0 Renderer", + .name = "sdl", + .priv_size = sizeof(struct priv), + .priv_defaults = &(const struct priv) { + .renderer_index = -1, + .vsync = true, + }, + .options = (const struct m_option []){ + {"sw", OPT_BOOL(allow_sw)}, + {"switch-mode", OPT_BOOL(switch_mode)}, + {"vsync", OPT_BOOL(vsync)}, + {NULL} + }, + .preinit = preinit, + .query_format = query_format, + .reconfig = reconfig, + .control = control, + .draw_frame = draw_frame, + .uninit = uninit, + .flip_page = flip_page, + .wait_events = wait_events, + .wakeup = wakeup, + .options_prefix = "sdl", +}; diff --git a/video/out/vo_sixel.c b/video/out/vo_sixel.c new file mode 100644 index 0000000..e05c455 --- /dev/null +++ b/video/out/vo_sixel.c @@ -0,0 +1,627 @@ +/* + * Sixel mpv output device implementation based on ffmpeg libavdevice implementation + * by Hayaki Saito + * https://github.com/saitoha/FFmpeg-SIXEL/blob/sixel/libavdevice/sixel.c + * + * Copyright (c) 2014 Hayaki Saito + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdlib.h> + +#include <libswscale/swscale.h> +#include <sixel.h> + +#include "config.h" +#include "options/m_config.h" +#include "osdep/terminal.h" +#include "sub/osd.h" +#include "vo.h" +#include "video/sws_utils.h" +#include "video/mp_image.h" + +#if HAVE_POSIX +#include <unistd.h> +#endif + +#define IMGFMT IMGFMT_RGB24 + +#define TERM_ESC_USE_GLOBAL_COLOR_REG "\033[?1070l" + +#define TERMINAL_FALLBACK_COLS 80 +#define TERMINAL_FALLBACK_ROWS 25 +#define TERMINAL_FALLBACK_PX_WIDTH 320 +#define TERMINAL_FALLBACK_PX_HEIGHT 240 + +struct vo_sixel_opts { + int diffuse; + int reqcolors; + bool fixedpal; + int threshold; + int width, height, top, left; + int pad_y, pad_x; + int rows, cols; + bool config_clear, alt_screen; + bool buffered; +}; + +struct priv { + // User specified options + struct vo_sixel_opts opts; + + // Internal data + sixel_output_t *output; + sixel_dither_t *dither; + sixel_dither_t *testdither; + uint8_t *buffer; + char *sixel_output_buf; + bool skip_frame_draw; + + int left, top; // image origin cell (1 based) + int width, height; // actual image px size - always reflects dst_rect. + int num_cols, num_rows; // terminal size in cells + int canvas_ok; // whether canvas vo->dwidth and vo->dheight are positive + + int previous_histogram_colors; + + struct mp_rect src_rect; + struct mp_rect dst_rect; + struct mp_osd_res osd; + struct mp_image *frame; + struct mp_sws_context *sws; +}; + +static const unsigned int depth = 3; + +static int detect_scene_change(struct vo* vo) +{ + struct priv* priv = vo->priv; + int previous_histogram_colors = priv->previous_histogram_colors; + int histogram_colors = 0; + + // If threshold is set negative, then every frame must be a scene change + if (priv->dither == NULL || priv->opts.threshold < 0) + return 1; + + histogram_colors = sixel_dither_get_num_of_histogram_colors(priv->testdither); + + int color_difference_count = previous_histogram_colors - histogram_colors; + color_difference_count = (color_difference_count > 0) ? // abs value + color_difference_count : -color_difference_count; + + if (100 * color_difference_count > + priv->opts.threshold * previous_histogram_colors) + { + priv->previous_histogram_colors = histogram_colors; // update history + return 1; + } else { + return 0; + } + +} + +static void dealloc_dithers_and_buffers(struct vo* vo) +{ + struct priv* priv = vo->priv; + + if (priv->buffer) { + talloc_free(priv->buffer); + priv->buffer = NULL; + } + + if (priv->frame) { + talloc_free(priv->frame); + priv->frame = NULL; + } + + if (priv->dither) { + sixel_dither_unref(priv->dither); + priv->dither = NULL; + } + + if (priv->testdither) { + sixel_dither_unref(priv->testdither); + priv->testdither = NULL; + } +} + +static SIXELSTATUS prepare_static_palette(struct vo* vo) +{ + struct priv* priv = vo->priv; + + if (!priv->dither) { + priv->dither = sixel_dither_get(BUILTIN_XTERM256); + if (priv->dither == NULL) + return SIXEL_FALSE; + + sixel_dither_set_diffusion_type(priv->dither, priv->opts.diffuse); + } + + sixel_dither_set_body_only(priv->dither, 0); + return SIXEL_OK; +} + +static SIXELSTATUS prepare_dynamic_palette(struct vo *vo) +{ + SIXELSTATUS status = SIXEL_FALSE; + struct priv *priv = vo->priv; + + /* create histogram and construct color palette + * with median cut algorithm. */ + status = sixel_dither_initialize(priv->testdither, priv->buffer, + priv->width, priv->height, + SIXEL_PIXELFORMAT_RGB888, + LARGE_NORM, REP_CENTER_BOX, + QUALITY_LOW); + if (SIXEL_FAILED(status)) + return status; + + if (detect_scene_change(vo)) { + if (priv->dither) { + sixel_dither_unref(priv->dither); + priv->dither = NULL; + } + + priv->dither = priv->testdither; + status = sixel_dither_new(&priv->testdither, priv->opts.reqcolors, NULL); + + if (SIXEL_FAILED(status)) + return status; + + sixel_dither_set_diffusion_type(priv->dither, priv->opts.diffuse); + } else { + if (priv->dither == NULL) + return SIXEL_FALSE; + } + + sixel_dither_set_body_only(priv->dither, 0); + return status; +} + +static void update_canvas_dimensions(struct vo *vo) +{ + // this function sets the vo canvas size in pixels vo->dwidth, vo->dheight, + // and the number of rows and columns available in priv->num_rows/cols + struct priv *priv = vo->priv; + int num_rows = TERMINAL_FALLBACK_ROWS; + int num_cols = TERMINAL_FALLBACK_COLS; + int total_px_width = 0; + int total_px_height = 0; + + terminal_get_size2(&num_rows, &num_cols, &total_px_width, &total_px_height); + + // If the user has specified rows/cols use them for further calculations + num_rows = (priv->opts.rows > 0) ? priv->opts.rows : num_rows; + num_cols = (priv->opts.cols > 0) ? priv->opts.cols : num_cols; + + // If the pad value is set in between 0 and width/2 - 1, then we + // subtract from the detected width. Otherwise, we assume that the width + // output must be a integer multiple of num_cols and accordingly set + // total_width to be an integer multiple of num_cols. So in case the padding + // added by terminal is less than the number of cells in that axis, then rounding + // down will take care of correcting the detected width and remove padding. + if (priv->opts.width > 0) { + // option - set by the user, hard truth + total_px_width = priv->opts.width; + } else { + if (total_px_width <= 0) { + // ioctl failed to read terminal width + total_px_width = TERMINAL_FALLBACK_PX_WIDTH; + } else { + if (priv->opts.pad_x >= 0 && priv->opts.pad_x < total_px_width / 2) { + // explicit padding set by the user + total_px_width -= (2 * priv->opts.pad_x); + } else { + // rounded "auto padding" + total_px_width = total_px_width / num_cols * num_cols; + } + } + } + + if (priv->opts.height > 0) { + total_px_height = priv->opts.height; + } else { + if (total_px_height <= 0) { + total_px_height = TERMINAL_FALLBACK_PX_HEIGHT; + } else { + if (priv->opts.pad_y >= 0 && priv->opts.pad_y < total_px_height / 2) { + total_px_height -= (2 * priv->opts.pad_y); + } else { + total_px_height = total_px_height / num_rows * num_rows; + } + } + } + + // use n-1 rows for height + // The last row can't be used for encoding image, because after sixel encode + // the terminal moves the cursor to next line below the image, causing the + // last line to be empty instead of displaying image data. + // TODO: Confirm if the output height must be a multiple of 6, if not, remove + // the / 6 * 6 part which is setting the height to be a multiple of 6. + vo->dheight = total_px_height * (num_rows - 1) / num_rows / 6 * 6; + vo->dwidth = total_px_width; + + priv->num_rows = num_rows; + priv->num_cols = num_cols; + + priv->canvas_ok = vo->dwidth > 0 && vo->dheight > 0; +} + +static void set_sixel_output_parameters(struct vo *vo) +{ + // This function sets output scaled size in priv->width, priv->height + // and the scaling rectangles in pixels priv->src_rect, priv->dst_rect + // as well as image positioning in cells priv->top, priv->left. + struct priv *priv = vo->priv; + + vo_get_src_dst_rects(vo, &priv->src_rect, &priv->dst_rect, &priv->osd); + + // priv->width and priv->height are the width and height of dst_rect + // and they are not changed anywhere else outside this function. + // It is the sixel image output dimension which is output by libsixel. + priv->width = priv->dst_rect.x1 - priv->dst_rect.x0; + priv->height = priv->dst_rect.y1 - priv->dst_rect.y0; + + // top/left values must be greater than 1. If it is set, then + // the image will be rendered from there and no further centering is done. + priv->top = (priv->opts.top > 0) ? priv->opts.top : + priv->num_rows * priv->dst_rect.y0 / vo->dheight + 1; + priv->left = (priv->opts.left > 0) ? priv->opts.left : + priv->num_cols * priv->dst_rect.x0 / vo->dwidth + 1; +} + +static int update_sixel_swscaler(struct vo *vo, struct mp_image_params *params) +{ + struct priv *priv = vo->priv; + + priv->sws->src = *params; + priv->sws->src.w = mp_rect_w(priv->src_rect); + priv->sws->src.h = mp_rect_h(priv->src_rect); + priv->sws->dst = (struct mp_image_params) { + .imgfmt = IMGFMT, + .w = priv->width, + .h = priv->height, + .p_w = 1, + .p_h = 1, + }; + + dealloc_dithers_and_buffers(vo); + + priv->frame = mp_image_alloc(IMGFMT, priv->width, priv->height); + if (!priv->frame) + return -1; + + if (mp_sws_reinit(priv->sws) < 0) + return -1; + + // create testdither only if dynamic palette mode is set + if (!priv->opts.fixedpal) { + SIXELSTATUS status = sixel_dither_new(&priv->testdither, + priv->opts.reqcolors, NULL); + if (SIXEL_FAILED(status)) { + MP_ERR(vo, "update_sixel_swscaler: Failed to create new dither: %s\n", + sixel_helper_format_error(status)); + return -1; + } + } + + priv->buffer = + talloc_array(NULL, uint8_t, depth * priv->width * priv->height); + + return 0; +} + +static inline int sixel_buffer(char *data, int size, void *priv) { + char **out = (char **)priv; + *out = talloc_strndup_append_buffer(*out, data, size); + return size; +} + +static inline int sixel_write(char *data, int size, void *priv) +{ + FILE *p = (FILE *)priv; + // On POSIX platforms, write() is the fastest method. It also is the only + // one that allows atomic writes so mpv’s output will not be interrupted + // by other processes or threads that write to stdout, which would cause + // screen corruption. POSIX does not guarantee atomicity for writes + // exceeding PIPE_BUF, but at least Linux does seem to implement it that + // way. +#if HAVE_POSIX + int remain = size; + + while (remain > 0) { + ssize_t written = write(fileno(p), data, remain); + if (written < 0) + return written; + remain -= written; + data += written; + } + + return size; +#else + int ret = fwrite(data, 1, size, p); + fflush(p); + return ret; +#endif +} + +static inline void sixel_strwrite(char *s) +{ + sixel_write(s, strlen(s), stdout); +} + +static int reconfig(struct vo *vo, struct mp_image_params *params) +{ + struct priv *priv = vo->priv; + int ret = 0; + update_canvas_dimensions(vo); + if (priv->canvas_ok) { // if too small - succeed but skip the rendering + set_sixel_output_parameters(vo); + ret = update_sixel_swscaler(vo, params); + } + + if (priv->opts.config_clear) + sixel_strwrite(TERM_ESC_CLEAR_SCREEN); + vo->want_redraw = true; + + return ret; +} + +static void draw_frame(struct vo *vo, struct vo_frame *frame) +{ + struct priv *priv = vo->priv; + SIXELSTATUS status; + struct mp_image *mpi = NULL; + + int prev_rows = priv->num_rows; + int prev_cols = priv->num_cols; + int prev_height = vo->dheight; + int prev_width = vo->dwidth; + bool resized = false; + update_canvas_dimensions(vo); + if (!priv->canvas_ok) + return; + + if (prev_rows != priv->num_rows || prev_cols != priv->num_cols || + prev_width != vo->dwidth || prev_height != vo->dheight) + { + set_sixel_output_parameters(vo); + // Not checking for vo->config_ok because draw_frame is never called + // with a failed reconfig. + update_sixel_swscaler(vo, vo->params); + + if (priv->opts.config_clear) + sixel_strwrite(TERM_ESC_CLEAR_SCREEN); + resized = true; + } + + if (frame->repeat && !frame->redraw && !resized) { + // Frame is repeated, and no need to update OSD either + priv->skip_frame_draw = true; + return; + } else { + // Either frame is new, or OSD has to be redrawn + priv->skip_frame_draw = false; + } + + // Normal case where we have to draw the frame and the image is not NULL + if (frame->current) { + mpi = mp_image_new_ref(frame->current); + struct mp_rect src_rc = priv->src_rect; + src_rc.x0 = MP_ALIGN_DOWN(src_rc.x0, mpi->fmt.align_x); + src_rc.y0 = MP_ALIGN_DOWN(src_rc.y0, mpi->fmt.align_y); + mp_image_crop_rc(mpi, src_rc); + + // scale/pan to our dest rect + mp_sws_scale(priv->sws, priv->frame, mpi); + } else { + // Image is NULL, so need to clear image and draw OSD + mp_image_clear(priv->frame, 0, 0, priv->width, priv->height); + } + + struct mp_osd_res dim = { + .w = priv->width, + .h = priv->height + }; + osd_draw_on_image(vo->osd, dim, mpi ? mpi->pts : 0, 0, priv->frame); + + // Copy from mpv to RGB format as required by libsixel + memcpy_pic(priv->buffer, priv->frame->planes[0], priv->width * depth, + priv->height, priv->width * depth, priv->frame->stride[0]); + + // Even if either of these prepare palette functions fail, on re-running them + // they should try to re-initialize the dithers, so it shouldn't dereference + // any NULL pointers. flip_page also has a check to make sure dither is not + // NULL before drawing, so failure in these functions should still be okay. + if (priv->opts.fixedpal) { + status = prepare_static_palette(vo); + } else { + status = prepare_dynamic_palette(vo); + } + + if (SIXEL_FAILED(status)) { + MP_WARN(vo, "draw_frame: prepare_palette returned error: %s\n", + sixel_helper_format_error(status)); + } + + if (mpi) + talloc_free(mpi); +} + +static void flip_page(struct vo *vo) +{ + struct priv* priv = vo->priv; + if (!priv->canvas_ok) + return; + + // If frame is repeated and no update required, then we skip encoding + if (priv->skip_frame_draw) + return; + + // Make sure that image and dither are valid before drawing + if (priv->buffer == NULL || priv->dither == NULL) + return; + + // Go to the offset row and column, then display the image + priv->sixel_output_buf = talloc_asprintf(NULL, TERM_ESC_GOTO_YX, + priv->top, priv->left); + if (!priv->opts.buffered) + sixel_strwrite(priv->sixel_output_buf); + + sixel_encode(priv->buffer, priv->width, priv->height, + depth, priv->dither, priv->output); + + if (priv->opts.buffered) + sixel_write(priv->sixel_output_buf, + ta_get_size(priv->sixel_output_buf), stdout); + + talloc_free(priv->sixel_output_buf); +} + +static int preinit(struct vo *vo) +{ + struct priv *priv = vo->priv; + SIXELSTATUS status = SIXEL_FALSE; + + // Parse opts set by CLI or conf + priv->sws = mp_sws_alloc(vo); + priv->sws->log = vo->log; + mp_sws_enable_cmdline_opts(priv->sws, vo->global); + + if (priv->opts.buffered) + status = sixel_output_new(&priv->output, sixel_buffer, + &priv->sixel_output_buf, NULL); + else + status = sixel_output_new(&priv->output, sixel_write, stdout, NULL); + if (SIXEL_FAILED(status)) { + MP_ERR(vo, "preinit: Failed to create output file: %s\n", + sixel_helper_format_error(status)); + return -1; + } + + sixel_output_set_encode_policy(priv->output, SIXEL_ENCODEPOLICY_FAST); + + if (priv->opts.alt_screen) + sixel_strwrite(TERM_ESC_ALT_SCREEN); + + sixel_strwrite(TERM_ESC_HIDE_CURSOR); + + /* don't use private color registers for each frame. */ + sixel_strwrite(TERM_ESC_USE_GLOBAL_COLOR_REG); + + priv->dither = NULL; + + // create testdither only if dynamic palette mode is set + if (!priv->opts.fixedpal) { + status = sixel_dither_new(&priv->testdither, priv->opts.reqcolors, NULL); + if (SIXEL_FAILED(status)) { + MP_ERR(vo, "preinit: Failed to create new dither: %s\n", + sixel_helper_format_error(status)); + return -1; + } + } + + priv->previous_histogram_colors = 0; + + return 0; +} + +static int query_format(struct vo *vo, int format) +{ + return format == IMGFMT; +} + +static int control(struct vo *vo, uint32_t request, void *data) +{ + if (request == VOCTRL_SET_PANSCAN) + return (vo->config_ok && !reconfig(vo, vo->params)) ? VO_TRUE : VO_FALSE; + return VO_NOTIMPL; +} + + +static void uninit(struct vo *vo) +{ + struct priv *priv = vo->priv; + + sixel_strwrite(TERM_ESC_RESTORE_CURSOR); + + if (priv->opts.alt_screen) + sixel_strwrite(TERM_ESC_NORMAL_SCREEN); + fflush(stdout); + + if (priv->output) { + sixel_output_unref(priv->output); + priv->output = NULL; + } + + dealloc_dithers_and_buffers(vo); +} + +#define OPT_BASE_STRUCT struct priv + +const struct vo_driver video_out_sixel = { + .name = "sixel", + .description = "terminal graphics using sixels", + .preinit = preinit, + .query_format = query_format, + .reconfig = reconfig, + .control = control, + .draw_frame = draw_frame, + .flip_page = flip_page, + .uninit = uninit, + .priv_size = sizeof(struct priv), + .priv_defaults = &(const struct priv) { + .opts.diffuse = DIFFUSE_AUTO, + .opts.reqcolors = 256, + .opts.threshold = -1, + .opts.fixedpal = true, + .opts.pad_y = -1, + .opts.pad_x = -1, + .opts.config_clear = true, + .opts.alt_screen = true, + }, + .options = (const m_option_t[]) { + {"dither", OPT_CHOICE(opts.diffuse, + {"auto", DIFFUSE_AUTO}, + {"none", DIFFUSE_NONE}, + {"atkinson", DIFFUSE_ATKINSON}, + {"fs", DIFFUSE_FS}, + {"jajuni", DIFFUSE_JAJUNI}, + {"stucki", DIFFUSE_STUCKI}, + {"burkes", DIFFUSE_BURKES}, + {"arithmetic", DIFFUSE_A_DITHER}, + {"xor", DIFFUSE_X_DITHER})}, + {"width", OPT_INT(opts.width)}, + {"height", OPT_INT(opts.height)}, + {"reqcolors", OPT_INT(opts.reqcolors)}, + {"fixedpalette", OPT_BOOL(opts.fixedpal)}, + {"threshold", OPT_INT(opts.threshold)}, + {"top", OPT_INT(opts.top)}, + {"left", OPT_INT(opts.left)}, + {"pad-y", OPT_INT(opts.pad_y)}, + {"pad-x", OPT_INT(opts.pad_x)}, + {"rows", OPT_INT(opts.rows)}, + {"cols", OPT_INT(opts.cols)}, + {"config-clear", OPT_BOOL(opts.config_clear), }, + {"alt-screen", OPT_BOOL(opts.alt_screen), }, + {"buffered", OPT_BOOL(opts.buffered), }, + {"exit-clear", OPT_REPLACED("vo-sixel-alt-screen")}, + {0} + }, + .options_prefix = "vo-sixel", +}; diff --git a/video/out/vo_tct.c b/video/out/vo_tct.c new file mode 100644 index 0000000..8859095 --- /dev/null +++ b/video/out/vo_tct.c @@ -0,0 +1,347 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <unistd.h> +#include <config.h> + +#if HAVE_POSIX +#include <sys/ioctl.h> +#endif + +#include <libswscale/swscale.h> + +#include "options/m_config.h" +#include "config.h" +#include "osdep/terminal.h" +#include "osdep/io.h" +#include "vo.h" +#include "sub/osd.h" +#include "video/sws_utils.h" +#include "video/mp_image.h" + +#define IMGFMT IMGFMT_BGR24 + +#define ALGO_PLAIN 1 +#define ALGO_HALF_BLOCKS 2 + +#define TERM_ESC_CLEAR_COLORS "\033[0m" +#define TERM_ESC_COLOR256_BG "\033[48;5" +#define TERM_ESC_COLOR256_FG "\033[38;5" +#define TERM_ESC_COLOR24BIT_BG "\033[48;2" +#define TERM_ESC_COLOR24BIT_FG "\033[38;2" + +#define DEFAULT_WIDTH 80 +#define DEFAULT_HEIGHT 25 + +struct vo_tct_opts { + int algo; + int width; // 0 -> default + int height; // 0 -> default + bool term256; // 0 -> true color +}; + +struct lut_item { + char str[4]; + int width; +}; + +struct priv { + struct vo_tct_opts opts; + size_t buffer_size; + int swidth; + int sheight; + struct mp_image *frame; + struct mp_rect src; + struct mp_rect dst; + struct mp_sws_context *sws; + struct lut_item lut[256]; +}; + +// Convert RGB24 to xterm-256 8-bit value +// For simplicity, assume RGB space is perceptually uniform. +// There are 5 places where one of two outputs needs to be chosen when the +// input is the exact middle: +// - The r/g/b channels and the gray value: the higher value output is chosen. +// - If the gray and color have same distance from the input - color is chosen. +static int rgb_to_x256(uint8_t r, uint8_t g, uint8_t b) +{ + // Calculate the nearest 0-based color index at 16 .. 231 +# define v2ci(v) (v < 48 ? 0 : v < 115 ? 1 : (v - 35) / 40) + int ir = v2ci(r), ig = v2ci(g), ib = v2ci(b); // 0..5 each +# define color_index() (36 * ir + 6 * ig + ib) /* 0..215, lazy evaluation */ + + // Calculate the nearest 0-based gray index at 232 .. 255 + int average = (r + g + b) / 3; + int gray_index = average > 238 ? 23 : (average - 3) / 10; // 0..23 + + // Calculate the represented colors back from the index + static const int i2cv[6] = {0, 0x5f, 0x87, 0xaf, 0xd7, 0xff}; + int cr = i2cv[ir], cg = i2cv[ig], cb = i2cv[ib]; // r/g/b, 0..255 each + int gv = 8 + 10 * gray_index; // same value for r/g/b, 0..255 + + // Return the one which is nearer to the original input rgb value +# define dist_square(A,B,C, a,b,c) ((A-a)*(A-a) + (B-b)*(B-b) + (C-c)*(C-c)) + int color_err = dist_square(cr, cg, cb, r, g, b); + int gray_err = dist_square(gv, gv, gv, r, g, b); + return color_err <= gray_err ? 16 + color_index() : 232 + gray_index; +} + +static void print_seq3(struct lut_item *lut, const char* prefix, + uint8_t r, uint8_t g, uint8_t b) +{ +// The fwrite implementation is about 25% faster than the printf code +// (even if we use *.s with the lut values), however, +// on windows we need to use printf in order to translate escape sequences and +// UTF8 output for the console. +#ifndef _WIN32 + fputs(prefix, stdout); + fwrite(lut[r].str, lut[r].width, 1, stdout); + fwrite(lut[g].str, lut[g].width, 1, stdout); + fwrite(lut[b].str, lut[b].width, 1, stdout); + fputc('m', stdout); +#else + printf("%s;%d;%d;%dm", prefix, (int)r, (int)g, (int)b); +#endif +} + +static void print_seq1(struct lut_item *lut, const char* prefix, uint8_t c) +{ +#ifndef _WIN32 + fputs(prefix, stdout); + fwrite(lut[c].str, lut[c].width, 1, stdout); + fputc('m', stdout); +#else + printf("%s;%dm", prefix, (int)c); +#endif +} + + +static void write_plain( + const int dwidth, const int dheight, + const int swidth, const int sheight, + const unsigned char *source, const int source_stride, + bool term256, struct lut_item *lut) +{ + assert(source); + const int tx = (dwidth - swidth) / 2; + const int ty = (dheight - sheight) / 2; + for (int y = 0; y < sheight; y++) { + const unsigned char *row = source + y * source_stride; + printf(TERM_ESC_GOTO_YX, ty + y, tx); + for (int x = 0; x < swidth; x++) { + unsigned char b = *row++; + unsigned char g = *row++; + unsigned char r = *row++; + if (term256) { + print_seq1(lut, TERM_ESC_COLOR256_BG, rgb_to_x256(r, g, b)); + } else { + print_seq3(lut, TERM_ESC_COLOR24BIT_BG, r, g, b); + } + printf(" "); + } + printf(TERM_ESC_CLEAR_COLORS); + } + printf("\n"); +} + +static void write_half_blocks( + const int dwidth, const int dheight, + const int swidth, const int sheight, + unsigned char *source, int source_stride, + bool term256, struct lut_item *lut) +{ + assert(source); + const int tx = (dwidth - swidth) / 2; + const int ty = (dheight - sheight) / 2; + for (int y = 0; y < sheight * 2; y += 2) { + const unsigned char *row_up = source + y * source_stride; + const unsigned char *row_down = source + (y + 1) * source_stride; + printf(TERM_ESC_GOTO_YX, ty + y / 2, tx); + for (int x = 0; x < swidth; x++) { + unsigned char b_up = *row_up++; + unsigned char g_up = *row_up++; + unsigned char r_up = *row_up++; + unsigned char b_down = *row_down++; + unsigned char g_down = *row_down++; + unsigned char r_down = *row_down++; + if (term256) { + print_seq1(lut, TERM_ESC_COLOR256_BG, rgb_to_x256(r_up, g_up, b_up)); + print_seq1(lut, TERM_ESC_COLOR256_FG, rgb_to_x256(r_down, g_down, b_down)); + } else { + print_seq3(lut, TERM_ESC_COLOR24BIT_BG, r_up, g_up, b_up); + print_seq3(lut, TERM_ESC_COLOR24BIT_FG, r_down, g_down, b_down); + } + printf("\xe2\x96\x84"); // UTF8 bytes of U+2584 (lower half block) + } + printf(TERM_ESC_CLEAR_COLORS); + } + printf("\n"); +} + +static void get_win_size(struct vo *vo, int *out_width, int *out_height) { + struct priv *p = vo->priv; + *out_width = DEFAULT_WIDTH; + *out_height = DEFAULT_HEIGHT; + + terminal_get_size(out_width, out_height); + + if (p->opts.width > 0) + *out_width = p->opts.width; + if (p->opts.height > 0) + *out_height = p->opts.height; +} + +static int reconfig(struct vo *vo, struct mp_image_params *params) +{ + struct priv *p = vo->priv; + + get_win_size(vo, &vo->dwidth, &vo->dheight); + + struct mp_osd_res osd; + vo_get_src_dst_rects(vo, &p->src, &p->dst, &osd); + p->swidth = p->dst.x1 - p->dst.x0; + p->sheight = p->dst.y1 - p->dst.y0; + + p->sws->src = *params; + p->sws->dst = (struct mp_image_params) { + .imgfmt = IMGFMT, + .w = p->swidth, + .h = p->sheight, + .p_w = 1, + .p_h = 1, + }; + + const int mul = (p->opts.algo == ALGO_PLAIN ? 1 : 2); + if (p->frame) + talloc_free(p->frame); + p->frame = mp_image_alloc(IMGFMT, p->swidth, p->sheight * mul); + if (!p->frame) + return -1; + + if (mp_sws_reinit(p->sws) < 0) + return -1; + + printf(TERM_ESC_CLEAR_SCREEN); + + vo->want_redraw = true; + return 0; +} + +static void draw_frame(struct vo *vo, struct vo_frame *frame) +{ + struct priv *p = vo->priv; + struct mp_image *src = frame->current; + if (!src) + return; + // XXX: pan, crop etc. + mp_sws_scale(p->sws, p->frame, src); +} + +static void flip_page(struct vo *vo) +{ + struct priv *p = vo->priv; + + int width, height; + get_win_size(vo, &width, &height); + + if (vo->dwidth != width || vo->dheight != height) + reconfig(vo, vo->params); + + if (p->opts.algo == ALGO_PLAIN) { + write_plain( + vo->dwidth, vo->dheight, p->swidth, p->sheight, + p->frame->planes[0], p->frame->stride[0], + p->opts.term256, p->lut); + } else { + write_half_blocks( + vo->dwidth, vo->dheight, p->swidth, p->sheight, + p->frame->planes[0], p->frame->stride[0], + p->opts.term256, p->lut); + } + fflush(stdout); +} + +static void uninit(struct vo *vo) +{ + printf(TERM_ESC_RESTORE_CURSOR); + printf(TERM_ESC_NORMAL_SCREEN); + struct priv *p = vo->priv; + if (p->frame) + talloc_free(p->frame); +} + +static int preinit(struct vo *vo) +{ + // most terminal characters aren't 1:1, so we default to 2:1. + // if user passes their own value of choice, it'll be scaled accordingly. + vo->monitor_par = vo->opts->monitor_pixel_aspect * 2; + + struct priv *p = vo->priv; + p->sws = mp_sws_alloc(vo); + p->sws->log = vo->log; + mp_sws_enable_cmdline_opts(p->sws, vo->global); + + for (int i = 0; i < 256; ++i) { + char buff[8]; + p->lut[i].width = snprintf(buff, sizeof(buff), ";%d", i); + memcpy(p->lut[i].str, buff, 4); // some strings may not end on a null byte, but that's ok. + } + + printf(TERM_ESC_HIDE_CURSOR); + printf(TERM_ESC_ALT_SCREEN); + + return 0; +} + +static int query_format(struct vo *vo, int format) +{ + return format == IMGFMT; +} + +static int control(struct vo *vo, uint32_t request, void *data) +{ + return VO_NOTIMPL; +} + +#define OPT_BASE_STRUCT struct priv + +const struct vo_driver video_out_tct = { + .name = "tct", + .description = "true-color terminals", + .preinit = preinit, + .query_format = query_format, + .reconfig = reconfig, + .control = control, + .draw_frame = draw_frame, + .flip_page = flip_page, + .uninit = uninit, + .priv_size = sizeof(struct priv), + .priv_defaults = &(const struct priv) { + .opts.algo = ALGO_HALF_BLOCKS, + }, + .options = (const m_option_t[]) { + {"algo", OPT_CHOICE(opts.algo, + {"plain", ALGO_PLAIN}, + {"half-blocks", ALGO_HALF_BLOCKS})}, + {"width", OPT_INT(opts.width)}, + {"height", OPT_INT(opts.height)}, + {"256", OPT_BOOL(opts.term256)}, + {0} + }, + .options_prefix = "vo-tct", +}; diff --git a/video/out/vo_vaapi.c b/video/out/vo_vaapi.c new file mode 100644 index 0000000..12888fe --- /dev/null +++ b/video/out/vo_vaapi.c @@ -0,0 +1,877 @@ +/* + * VA API output module + * + * Copyright (C) 2008-2009 Splitted-Desktop Systems + * Gwenole Beauchesne <gbeauchesne@splitted-desktop.com> + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <stdarg.h> +#include <limits.h> + +#include <X11/Xlib.h> +#include <X11/Xutil.h> +#include <va/va_x11.h> + +#include "common/msg.h" +#include "video/out/vo.h" +#include "video/mp_image_pool.h" +#include "video/sws_utils.h" +#include "sub/draw_bmp.h" +#include "sub/img_convert.h" +#include "sub/osd.h" +#include "present_sync.h" +#include "x11_common.h" + +#include "video/mp_image.h" +#include "video/vaapi.h" +#include "video/hwdec.h" + +struct vaapi_osd_image { + int w, h; + VAImage image; + VASubpictureID subpic_id; + bool is_used; +}; + +struct vaapi_subpic { + VASubpictureID id; + int src_x, src_y, src_w, src_h; + int dst_x, dst_y, dst_w, dst_h; +}; + +struct vaapi_osd_part { + bool active; + int change_id; + struct vaapi_osd_image image; + struct vaapi_subpic subpic; +}; + +#define MAX_OUTPUT_SURFACES 2 + +struct priv { + struct mp_log *log; + struct vo *vo; + VADisplay display; + struct mp_vaapi_ctx *mpvaapi; + + struct mp_image_params image_params; + struct mp_rect src_rect; + struct mp_rect dst_rect; + struct mp_osd_res screen_osd_res; + + struct mp_image *output_surfaces[MAX_OUTPUT_SURFACES]; + struct mp_image *swdec_surfaces[MAX_OUTPUT_SURFACES]; + + int output_surface; + int visible_surface; + int scaling; + bool force_scaled_osd; + + VAImageFormat osd_format; // corresponds to OSD_VA_FORMAT + struct vaapi_osd_part osd_part; + bool osd_screen; + struct mp_draw_sub_cache *osd_cache; + + struct mp_image_pool *pool; + + struct mp_image *black_surface; + + VAImageFormat *va_subpic_formats; + unsigned int *va_subpic_flags; + int va_num_subpic_formats; + VADisplayAttribute *va_display_attrs; + int *mp_display_attr; + int va_num_display_attrs; + + struct va_image_formats *image_formats; +}; + +#define OSD_VA_FORMAT VA_FOURCC_BGRA + +static void draw_osd(struct vo *vo); + + +struct fmtentry { + uint32_t va; + enum mp_imgfmt mp; +}; + +static const struct fmtentry va_to_imgfmt[] = { + {VA_FOURCC_NV12, IMGFMT_NV12}, + {VA_FOURCC_YV12, IMGFMT_420P}, + {VA_FOURCC_IYUV, IMGFMT_420P}, + {VA_FOURCC_UYVY, IMGFMT_UYVY}, + // Note: not sure about endian issues (the mp formats are byte-addressed) + {VA_FOURCC_RGBA, IMGFMT_RGBA}, + {VA_FOURCC_RGBX, IMGFMT_RGBA}, + {VA_FOURCC_BGRA, IMGFMT_BGRA}, + {VA_FOURCC_BGRX, IMGFMT_BGRA}, + {0 , IMGFMT_NONE} +}; + +static enum mp_imgfmt va_fourcc_to_imgfmt(uint32_t fourcc) +{ + for (const struct fmtentry *entry = va_to_imgfmt; entry->va; ++entry) { + if (entry->va == fourcc) + return entry->mp; + } + return IMGFMT_NONE; +} + +static uint32_t va_fourcc_from_imgfmt(int imgfmt) +{ + for (const struct fmtentry *entry = va_to_imgfmt; entry->va; ++entry) { + if (entry->mp == imgfmt) + return entry->va; + } + return 0; +} + +struct va_image_formats { + VAImageFormat *entries; + int num; +}; + +static void va_get_formats(struct priv *ctx) +{ + struct va_image_formats *formats = talloc_ptrtype(ctx, formats); + formats->num = vaMaxNumImageFormats(ctx->display); + formats->entries = talloc_array(formats, VAImageFormat, formats->num); + VAStatus status = vaQueryImageFormats(ctx->display, formats->entries, + &formats->num); + if (!CHECK_VA_STATUS(ctx, "vaQueryImageFormats()")) + return; + MP_VERBOSE(ctx, "%d image formats available:\n", formats->num); + for (int i = 0; i < formats->num; i++) + MP_VERBOSE(ctx, " %s\n", mp_tag_str(formats->entries[i].fourcc)); + ctx->image_formats = formats; +} + +static VAImageFormat *va_image_format_from_imgfmt(struct priv *ctx, + int imgfmt) +{ + struct va_image_formats *formats = ctx->image_formats; + const int fourcc = va_fourcc_from_imgfmt(imgfmt); + if (!formats || !formats->num || !fourcc) + return NULL; + for (int i = 0; i < formats->num; i++) { + if (formats->entries[i].fourcc == fourcc) + return &formats->entries[i]; + } + return NULL; +} + +struct va_surface { + struct mp_vaapi_ctx *ctx; + VADisplay display; + + VASurfaceID id; + int rt_format; + + // The actually allocated surface size (needed for cropping). + // mp_images can have a smaller size than this, which means they are + // cropped down to a smaller size by removing right/bottom pixels. + int w, h; + + VAImage image; // used for software decoding case + bool is_derived; // is image derived by vaDeriveImage()? +}; + +static struct va_surface *va_surface_in_mp_image(struct mp_image *mpi) +{ + return mpi && mpi->imgfmt == IMGFMT_VAAPI ? + (struct va_surface*)mpi->planes[0] : NULL; +} + +static void release_va_surface(void *arg) +{ + struct va_surface *surface = arg; + + if (surface->id != VA_INVALID_ID) { + if (surface->image.image_id != VA_INVALID_ID) + vaDestroyImage(surface->display, surface->image.image_id); + vaDestroySurfaces(surface->display, &surface->id, 1); + } + + talloc_free(surface); +} + +static struct mp_image *alloc_surface(struct mp_vaapi_ctx *ctx, int rt_format, + int w, int h) +{ + VASurfaceID id = VA_INVALID_ID; + VAStatus status; + status = vaCreateSurfaces(ctx->display, rt_format, w, h, &id, 1, NULL, 0); + if (!CHECK_VA_STATUS(ctx, "vaCreateSurfaces()")) + return NULL; + + struct va_surface *surface = talloc_ptrtype(NULL, surface); + if (!surface) + return NULL; + + *surface = (struct va_surface){ + .ctx = ctx, + .id = id, + .rt_format = rt_format, + .w = w, + .h = h, + .display = ctx->display, + .image = { .image_id = VA_INVALID_ID, .buf = VA_INVALID_ID }, + }; + + struct mp_image img = {0}; + mp_image_setfmt(&img, IMGFMT_VAAPI); + mp_image_set_size(&img, w, h); + img.planes[0] = (uint8_t*)surface; + img.planes[3] = (uint8_t*)(uintptr_t)surface->id; + return mp_image_new_custom_ref(&img, surface, release_va_surface); +} + +static void va_surface_image_destroy(struct va_surface *surface) +{ + if (!surface || surface->image.image_id == VA_INVALID_ID) + return; + vaDestroyImage(surface->display, surface->image.image_id); + surface->image.image_id = VA_INVALID_ID; + surface->is_derived = false; +} + +static int va_surface_image_alloc(struct va_surface *p, VAImageFormat *format) +{ + VADisplay *display = p->display; + + if (p->image.image_id != VA_INVALID_ID && + p->image.format.fourcc == format->fourcc) + return 0; + + int r = 0; + + va_surface_image_destroy(p); + + VAStatus status = vaDeriveImage(display, p->id, &p->image); + if (status == VA_STATUS_SUCCESS) { + /* vaDeriveImage() is supported, check format */ + if (p->image.format.fourcc == format->fourcc && + p->image.width == p->w && p->image.height == p->h) + { + p->is_derived = true; + MP_TRACE(p->ctx, "Using vaDeriveImage()\n"); + } else { + vaDestroyImage(p->display, p->image.image_id); + status = VA_STATUS_ERROR_OPERATION_FAILED; + } + } + if (status != VA_STATUS_SUCCESS) { + p->image.image_id = VA_INVALID_ID; + status = vaCreateImage(p->display, format, p->w, p->h, &p->image); + if (!CHECK_VA_STATUS(p->ctx, "vaCreateImage()")) { + p->image.image_id = VA_INVALID_ID; + r = -1; + } + } + + return r; +} + +// img must be a VAAPI surface; make sure its internal VAImage is allocated +// to a format corresponding to imgfmt (or return an error). +static int va_surface_alloc_imgfmt(struct priv *priv, struct mp_image *img, + int imgfmt) +{ + struct va_surface *p = va_surface_in_mp_image(img); + if (!p) + return -1; + // Multiple FourCCs can refer to the same imgfmt, so check by doing the + // surjective conversion first. + if (p->image.image_id != VA_INVALID_ID && + va_fourcc_to_imgfmt(p->image.format.fourcc) == imgfmt) + return 0; + VAImageFormat *format = va_image_format_from_imgfmt(priv, imgfmt); + if (!format) + return -1; + if (va_surface_image_alloc(p, format) < 0) + return -1; + return 0; +} + +static bool va_image_map(struct mp_vaapi_ctx *ctx, VAImage *image, + struct mp_image *mpi) +{ + int imgfmt = va_fourcc_to_imgfmt(image->format.fourcc); + if (imgfmt == IMGFMT_NONE) + return false; + void *data = NULL; + const VAStatus status = vaMapBuffer(ctx->display, image->buf, &data); + if (!CHECK_VA_STATUS(ctx, "vaMapBuffer()")) + return false; + + *mpi = (struct mp_image) {0}; + mp_image_setfmt(mpi, imgfmt); + mp_image_set_size(mpi, image->width, image->height); + + for (int p = 0; p < image->num_planes; p++) { + mpi->stride[p] = image->pitches[p]; + mpi->planes[p] = (uint8_t *)data + image->offsets[p]; + } + + if (image->format.fourcc == VA_FOURCC_YV12) { + MPSWAP(int, mpi->stride[1], mpi->stride[2]); + MPSWAP(uint8_t *, mpi->planes[1], mpi->planes[2]); + } + + return true; +} + +static bool va_image_unmap(struct mp_vaapi_ctx *ctx, VAImage *image) +{ + const VAStatus status = vaUnmapBuffer(ctx->display, image->buf); + return CHECK_VA_STATUS(ctx, "vaUnmapBuffer()"); +} + +// va_dst: copy destination, must be IMGFMT_VAAPI +// sw_src: copy source, must be a software pixel format +static int va_surface_upload(struct priv *priv, struct mp_image *va_dst, + struct mp_image *sw_src) +{ + struct va_surface *p = va_surface_in_mp_image(va_dst); + if (!p) + return -1; + + if (va_surface_alloc_imgfmt(priv, va_dst, sw_src->imgfmt) < 0) + return -1; + + struct mp_image img; + if (!va_image_map(p->ctx, &p->image, &img)) + return -1; + assert(sw_src->w <= img.w && sw_src->h <= img.h); + mp_image_set_size(&img, sw_src->w, sw_src->h); // copy only visible part + mp_image_copy(&img, sw_src); + va_image_unmap(p->ctx, &p->image); + + if (!p->is_derived) { + VAStatus status = vaPutImage(p->display, p->id, + p->image.image_id, + 0, 0, sw_src->w, sw_src->h, + 0, 0, sw_src->w, sw_src->h); + if (!CHECK_VA_STATUS(p->ctx, "vaPutImage()")) + return -1; + } + + if (p->is_derived) + va_surface_image_destroy(p); + return 0; +} + +struct pool_alloc_ctx { + struct mp_vaapi_ctx *vaapi; + int rt_format; +}; + +static struct mp_image *alloc_pool(void *pctx, int fmt, int w, int h) +{ + struct pool_alloc_ctx *alloc_ctx = pctx; + if (fmt != IMGFMT_VAAPI) + return NULL; + + return alloc_surface(alloc_ctx->vaapi, alloc_ctx->rt_format, w, h); +} + +// The allocator of the given image pool to allocate VAAPI surfaces, using +// the given rt_format. +static void va_pool_set_allocator(struct mp_image_pool *pool, + struct mp_vaapi_ctx *ctx, int rt_format) +{ + struct pool_alloc_ctx *alloc_ctx = talloc_ptrtype(pool, alloc_ctx); + *alloc_ctx = (struct pool_alloc_ctx){ + .vaapi = ctx, + .rt_format = rt_format, + }; + mp_image_pool_set_allocator(pool, alloc_pool, alloc_ctx); + mp_image_pool_set_lru(pool); +} + +static void flush_output_surfaces(struct priv *p) +{ + for (int n = 0; n < MAX_OUTPUT_SURFACES; n++) + mp_image_unrefp(&p->output_surfaces[n]); + p->output_surface = 0; + p->visible_surface = 0; +} + +// See flush_surfaces() remarks - the same applies. +static void free_video_specific(struct priv *p) +{ + flush_output_surfaces(p); + + mp_image_unrefp(&p->black_surface); + + for (int n = 0; n < MAX_OUTPUT_SURFACES; n++) + mp_image_unrefp(&p->swdec_surfaces[n]); + + if (p->pool) + mp_image_pool_clear(p->pool); +} + +static bool alloc_swdec_surfaces(struct priv *p, int w, int h, int imgfmt) +{ + free_video_specific(p); + for (int i = 0; i < MAX_OUTPUT_SURFACES; i++) { + p->swdec_surfaces[i] = mp_image_pool_get(p->pool, IMGFMT_VAAPI, w, h); + if (va_surface_alloc_imgfmt(p, p->swdec_surfaces[i], imgfmt) < 0) + return false; + } + return true; +} + +static void resize(struct priv *p) +{ + vo_get_src_dst_rects(p->vo, &p->src_rect, &p->dst_rect, &p->screen_osd_res); + + // It's not clear whether this is needed; maybe not. + //vo_x11_clearwindow(p->vo, p->vo->x11->window); + + p->vo->want_redraw = true; +} + +static int reconfig(struct vo *vo, struct mp_image_params *params) +{ + struct priv *p = vo->priv; + + free_video_specific(p); + + vo_x11_config_vo_window(vo); + + if (params->imgfmt != IMGFMT_VAAPI) { + if (!alloc_swdec_surfaces(p, params->w, params->h, params->imgfmt)) + return -1; + } + + p->image_params = *params; + resize(p); + return 0; +} + +static int query_format(struct vo *vo, int imgfmt) +{ + struct priv *p = vo->priv; + if (imgfmt == IMGFMT_VAAPI || va_image_format_from_imgfmt(p, imgfmt)) + return 1; + + return 0; +} + +static bool render_to_screen(struct priv *p, struct mp_image *mpi) +{ + VAStatus status; + + VASurfaceID surface = va_surface_id(mpi); + if (surface == VA_INVALID_ID) { + if (!p->black_surface) { + int w = p->image_params.w, h = p->image_params.h; + // 4:2:0 should work everywhere + int fmt = IMGFMT_420P; + p->black_surface = mp_image_pool_get(p->pool, IMGFMT_VAAPI, w, h); + if (p->black_surface) { + struct mp_image *img = mp_image_alloc(fmt, w, h); + if (img) { + mp_image_clear(img, 0, 0, w, h); + if (va_surface_upload(p, p->black_surface, img) < 0) + mp_image_unrefp(&p->black_surface); + talloc_free(img); + } + } + } + surface = va_surface_id(p->black_surface); + } + + if (surface == VA_INVALID_ID) + return false; + + struct vaapi_osd_part *part = &p->osd_part; + if (part->active) { + struct vaapi_subpic *sp = &part->subpic; + int flags = 0; + if (p->osd_screen) + flags |= VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD; + status = vaAssociateSubpicture(p->display, + sp->id, &surface, 1, + sp->src_x, sp->src_y, + sp->src_w, sp->src_h, + sp->dst_x, sp->dst_y, + sp->dst_w, sp->dst_h, + flags); + CHECK_VA_STATUS(p, "vaAssociateSubpicture()"); + } + + int flags = va_get_colorspace_flag(p->image_params.color.space) | + p->scaling | VA_FRAME_PICTURE; + status = vaPutSurface(p->display, + surface, + p->vo->x11->window, + p->src_rect.x0, + p->src_rect.y0, + p->src_rect.x1 - p->src_rect.x0, + p->src_rect.y1 - p->src_rect.y0, + p->dst_rect.x0, + p->dst_rect.y0, + p->dst_rect.x1 - p->dst_rect.x0, + p->dst_rect.y1 - p->dst_rect.y0, + NULL, 0, + flags); + CHECK_VA_STATUS(p, "vaPutSurface()"); + + if (part->active) { + struct vaapi_subpic *sp = &part->subpic; + status = vaDeassociateSubpicture(p->display, sp->id, + &surface, 1); + CHECK_VA_STATUS(p, "vaDeassociateSubpicture()"); + } + + return true; +} + +static void flip_page(struct vo *vo) +{ + struct priv *p = vo->priv; + + p->visible_surface = p->output_surface; + render_to_screen(p, p->output_surfaces[p->output_surface]); + p->output_surface = (p->output_surface + 1) % MAX_OUTPUT_SURFACES; + vo_x11_present(vo); + present_sync_swap(vo->x11->present); +} + +static void get_vsync(struct vo *vo, struct vo_vsync_info *info) +{ + struct vo_x11_state *x11 = vo->x11; + present_sync_get_info(x11->present, info); +} + +static void draw_frame(struct vo *vo, struct vo_frame *frame) +{ + struct priv *p = vo->priv; + struct mp_image *mpi = frame->current; + + if (mpi && mpi->imgfmt != IMGFMT_VAAPI) { + struct mp_image *dst = p->swdec_surfaces[p->output_surface]; + if (!dst || va_surface_upload(p, dst, mpi) < 0) { + MP_WARN(vo, "Could not upload surface.\n"); + talloc_free(mpi); + return; + } + mp_image_copy_attributes(dst, mpi); + mpi = mp_image_new_ref(dst); + } + + talloc_free(p->output_surfaces[p->output_surface]); + p->output_surfaces[p->output_surface] = mpi; + + draw_osd(vo); +} + +static void free_subpicture(struct priv *p, struct vaapi_osd_image *img) +{ + if (img->image.image_id != VA_INVALID_ID) + vaDestroyImage(p->display, img->image.image_id); + if (img->subpic_id != VA_INVALID_ID) + vaDestroySubpicture(p->display, img->subpic_id); + img->image.image_id = VA_INVALID_ID; + img->subpic_id = VA_INVALID_ID; +} + +static int new_subpicture(struct priv *p, int w, int h, + struct vaapi_osd_image *out) +{ + VAStatus status; + + free_subpicture(p, out); + + struct vaapi_osd_image m = { + .image = {.image_id = VA_INVALID_ID, .buf = VA_INVALID_ID}, + .subpic_id = VA_INVALID_ID, + .w = w, + .h = h, + }; + + status = vaCreateImage(p->display, &p->osd_format, w, h, &m.image); + if (!CHECK_VA_STATUS(p, "vaCreateImage()")) + goto error; + status = vaCreateSubpicture(p->display, m.image.image_id, &m.subpic_id); + if (!CHECK_VA_STATUS(p, "vaCreateSubpicture()")) + goto error; + + *out = m; + return 0; + +error: + free_subpicture(p, &m); + MP_ERR(p, "failed to allocate OSD sub-picture of size %dx%d.\n", w, h); + return -1; +} + +static void draw_osd(struct vo *vo) +{ + struct priv *p = vo->priv; + + struct mp_image *cur = p->output_surfaces[p->output_surface]; + double pts = cur ? cur->pts : 0; + + if (!p->osd_format.fourcc) + return; + + struct mp_osd_res vid_res = osd_res_from_image_params(vo->params); + + struct mp_osd_res *res; + if (p->osd_screen) { + res = &p->screen_osd_res; + } else { + res = &vid_res; + } + + p->osd_part.active = false; + + if (!p->osd_cache) + p->osd_cache = mp_draw_sub_alloc(p, vo->global); + + struct sub_bitmap_list *sbs = osd_render(vo->osd, *res, pts, 0, + mp_draw_sub_formats); + + struct mp_rect act_rc[1], mod_rc[64]; + int num_act_rc = 0, num_mod_rc = 0; + + struct mp_image *osd = mp_draw_sub_overlay(p->osd_cache, sbs, + act_rc, MP_ARRAY_SIZE(act_rc), &num_act_rc, + mod_rc, MP_ARRAY_SIZE(mod_rc), &num_mod_rc); + + if (!osd) + goto error; + + struct vaapi_osd_part *part = &p->osd_part; + + part->active = false; + + int w = res->w; + int h = res->h; + if (part->image.w != w || part->image.h != h) { + if (new_subpicture(p, w, h, &part->image) < 0) + goto error; + } + + struct vaapi_osd_image *img = &part->image; + struct mp_image vaimg; + if (!va_image_map(p->mpvaapi, &img->image, &vaimg)) + goto error; + + for (int n = 0; n < num_mod_rc; n++) { + struct mp_rect *rc = &mod_rc[n]; + + int rw = mp_rect_w(*rc); + int rh = mp_rect_h(*rc); + + void *src = mp_image_pixel_ptr(osd, 0, rc->x0, rc->y0); + void *dst = vaimg.planes[0] + rc->y0 * vaimg.stride[0] + rc->x0 * 4; + + memcpy_pic(dst, src, rw * 4, rh, vaimg.stride[0], osd->stride[0]); + } + + if (!va_image_unmap(p->mpvaapi, &img->image)) + goto error; + + if (num_act_rc) { + struct mp_rect rc = act_rc[0]; + rc.x0 = rc.y0 = 0; // must be a Mesa bug + part->subpic = (struct vaapi_subpic) { + .id = img->subpic_id, + .src_x = rc.x0, .src_y = rc.y0, + .src_w = mp_rect_w(rc), .src_h = mp_rect_h(rc), + .dst_x = rc.x0, .dst_y = rc.y0, + .dst_w = mp_rect_w(rc), .dst_h = mp_rect_h(rc), + }; + part->active = true; + } + +error: + talloc_free(sbs); +} + +static int control(struct vo *vo, uint32_t request, void *data) +{ + struct priv *p = vo->priv; + + switch (request) { + case VOCTRL_SET_PANSCAN: + resize(p); + return VO_TRUE; + } + + int events = 0; + int r = vo_x11_control(vo, &events, request, data); + if (events & VO_EVENT_RESIZE) + resize(p); + if (events & VO_EVENT_EXPOSE) + vo->want_redraw = true; + vo_event(vo, events); + return r; +} + +static void uninit(struct vo *vo) +{ + struct priv *p = vo->priv; + + free_video_specific(p); + talloc_free(p->pool); + + struct vaapi_osd_part *part = &p->osd_part; + free_subpicture(p, &part->image); + + if (vo->hwdec_devs) { + hwdec_devices_remove(vo->hwdec_devs, &p->mpvaapi->hwctx); + hwdec_devices_destroy(vo->hwdec_devs); + } + + va_destroy(p->mpvaapi); + + vo_x11_uninit(vo); +} + +static int preinit(struct vo *vo) +{ + struct priv *p = vo->priv; + p->vo = vo; + p->log = vo->log; + + VAStatus status; + + if (!vo_x11_init(vo)) + goto fail; + + if (!vo_x11_create_vo_window(vo, NULL, "vaapi")) + goto fail; + + p->display = vaGetDisplay(vo->x11->display); + if (!p->display) + goto fail; + + p->mpvaapi = va_initialize(p->display, p->log, false); + if (!p->mpvaapi) { + vaTerminate(p->display); + p->display = NULL; + goto fail; + } + + if (va_guess_if_emulated(p->mpvaapi)) { + MP_WARN(vo, "VA-API is most likely emulated via VDPAU.\n" + "It's better to use VDPAU directly with: --vo=vdpau\n"); + } + + va_get_formats(p); + if (!p->image_formats) + goto fail; + + p->mpvaapi->hwctx.hw_imgfmt = IMGFMT_VAAPI; + p->pool = mp_image_pool_new(p); + va_pool_set_allocator(p->pool, p->mpvaapi, VA_RT_FORMAT_YUV420); + + int max_subpic_formats = vaMaxNumSubpictureFormats(p->display); + p->va_subpic_formats = talloc_array(vo, VAImageFormat, max_subpic_formats); + p->va_subpic_flags = talloc_array(vo, unsigned int, max_subpic_formats); + status = vaQuerySubpictureFormats(p->display, + p->va_subpic_formats, + p->va_subpic_flags, + &p->va_num_subpic_formats); + if (!CHECK_VA_STATUS(p, "vaQuerySubpictureFormats()")) + p->va_num_subpic_formats = 0; + MP_VERBOSE(vo, "%d subpicture formats available:\n", + p->va_num_subpic_formats); + + for (int i = 0; i < p->va_num_subpic_formats; i++) { + MP_VERBOSE(vo, " %s, flags 0x%x\n", + mp_tag_str(p->va_subpic_formats[i].fourcc), + p->va_subpic_flags[i]); + if (p->va_subpic_formats[i].fourcc == OSD_VA_FORMAT) { + p->osd_format = p->va_subpic_formats[i]; + if (!p->force_scaled_osd) { + p->osd_screen = + p->va_subpic_flags[i] & VA_SUBPICTURE_DESTINATION_IS_SCREEN_COORD; + } + } + } + + if (!p->osd_format.fourcc) + MP_ERR(vo, "OSD format not supported. Disabling OSD.\n"); + + struct vaapi_osd_part *part = &p->osd_part; + part->image.image.image_id = VA_INVALID_ID; + part->image.subpic_id = VA_INVALID_ID; + + int max_display_attrs = vaMaxNumDisplayAttributes(p->display); + p->va_display_attrs = talloc_array(vo, VADisplayAttribute, max_display_attrs); + if (p->va_display_attrs) { + status = vaQueryDisplayAttributes(p->display, p->va_display_attrs, + &p->va_num_display_attrs); + if (!CHECK_VA_STATUS(p, "vaQueryDisplayAttributes()")) + p->va_num_display_attrs = 0; + p->mp_display_attr = talloc_zero_array(vo, int, p->va_num_display_attrs); + } + + vo->hwdec_devs = hwdec_devices_create(); + hwdec_devices_add(vo->hwdec_devs, &p->mpvaapi->hwctx); + + MP_WARN(vo, "Warning: this compatibility VO is low quality and may " + "have issues with OSD, scaling, screenshots and more.\n" + "vo=gpu is the preferred choice in any case and " + "includes VA-API support via hwdec=vaapi or vaapi-copy.\n"); + + return 0; + +fail: + uninit(vo); + return -1; +} + +#define OPT_BASE_STRUCT struct priv + +const struct vo_driver video_out_vaapi = { + .description = "VA API with X11", + .name = "vaapi", + .preinit = preinit, + .query_format = query_format, + .reconfig = reconfig, + .control = control, + .draw_frame = draw_frame, + .flip_page = flip_page, + .get_vsync = get_vsync, + .wakeup = vo_x11_wakeup, + .wait_events = vo_x11_wait_events, + .uninit = uninit, + .priv_size = sizeof(struct priv), + .priv_defaults = &(const struct priv) { + .scaling = VA_FILTER_SCALING_DEFAULT, + }, + .options = (const struct m_option[]) { + {"scaling", OPT_CHOICE(scaling, + {"default", VA_FILTER_SCALING_DEFAULT}, + {"fast", VA_FILTER_SCALING_FAST}, + {"hq", VA_FILTER_SCALING_HQ}, + {"nla", VA_FILTER_SCALING_NL_ANAMORPHIC})}, + {"scaled-osd", OPT_BOOL(force_scaled_osd)}, + {0} + }, + .options_prefix = "vo-vaapi", +}; diff --git a/video/out/vo_vdpau.c b/video/out/vo_vdpau.c new file mode 100644 index 0000000..d6b261f --- /dev/null +++ b/video/out/vo_vdpau.c @@ -0,0 +1,1139 @@ +/* + * VDPAU video output driver + * + * Copyright (C) 2008 NVIDIA (Rajib Mahapatra <rmahapatra@nvidia.com>) + * Copyright (C) 2009 Uoti Urpala + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +/* + * Actual decoding is done in video/decode/vdpau.c + */ + +#include <stdio.h> +#include <stdlib.h> +#include <stdint.h> +#include <stdbool.h> +#include <limits.h> +#include <assert.h> + +#include "video/vdpau.h" +#include "video/vdpau_mixer.h" +#include "video/hwdec.h" +#include "common/msg.h" +#include "options/options.h" +#include "mpv_talloc.h" +#include "vo.h" +#include "x11_common.h" +#include "video/csputils.h" +#include "sub/osd.h" +#include "options/m_option.h" +#include "video/mp_image.h" +#include "osdep/timer.h" + +// Returns x + a, but wrapped around to the range [0, m) +// a must be within [-m, m], x within [0, m) +#define WRAP_ADD(x, a, m) ((a) < 0 \ + ? ((x)+(a)+(m) < (m) ? (x)+(a)+(m) : (x)+(a)) \ + : ((x)+(a) < (m) ? (x)+(a) : (x)+(a)-(m))) + + +/* number of video and output surfaces */ +#define MAX_OUTPUT_SURFACES 15 + +/* Pixelformat used for output surfaces */ +#define OUTPUT_RGBA_FORMAT VDP_RGBA_FORMAT_B8G8R8A8 + +/* + * Global variable declaration - VDPAU specific + */ + +struct vdpctx { + struct mp_vdpau_ctx *mpvdp; + struct vdp_functions *vdp; + VdpDevice vdp_device; + uint64_t preemption_counter; + + struct m_color colorkey; + + VdpPresentationQueueTarget flip_target; + VdpPresentationQueue flip_queue; + + VdpOutputSurface output_surfaces[MAX_OUTPUT_SURFACES]; + int num_output_surfaces; + VdpOutputSurface black_pixel; + VdpOutputSurface rotation_surface; + + struct mp_image *current_image; + int64_t current_pts; + int current_duration; + + int output_surface_w, output_surface_h; + int rotation; + + bool force_yuv; + struct mp_vdpau_mixer *video_mixer; + bool pullup; + float denoise; + float sharpen; + int hqscaling; + bool chroma_deint; + int flip_offset_window; + int flip_offset_fs; + int64_t flip_offset_us; + + VdpRect src_rect_vid; + VdpRect out_rect_vid; + struct mp_osd_res osd_rect; + VdpBool supports_a8; + + int surface_num; // indexes output_surfaces + int query_surface_num; + VdpTime recent_vsync_time; + float user_fps; + bool composite_detect; + int vsync_interval; + uint64_t last_queue_time; + uint64_t queue_time[MAX_OUTPUT_SURFACES]; + uint64_t last_ideal_time; + bool dropped_frame; + uint64_t dropped_time; + uint32_t vid_width, vid_height; + uint32_t image_format; + VdpYCbCrFormat vdp_pixel_format; + bool rgb_mode; + + // OSD + struct osd_bitmap_surface { + VdpRGBAFormat format; + VdpBitmapSurface surface; + uint32_t surface_w, surface_h; + // List of surfaces to be rendered + struct osd_target { + VdpRect source; + VdpRect dest; + VdpColor color; + } *targets; + int targets_size; + int render_count; + int change_id; + } osd_surfaces[MAX_OSD_PARTS]; +}; + +static bool status_ok(struct vo *vo); + +static int video_to_output_surface(struct vo *vo, struct mp_image *mpi) +{ + struct vdpctx *vc = vo->priv; + struct vdp_functions *vdp = vc->vdp; + VdpTime dummy; + VdpStatus vdp_st; + + VdpOutputSurface output_surface = vc->output_surfaces[vc->surface_num]; + VdpRect *output_rect = &vc->out_rect_vid; + VdpRect *video_rect = &vc->src_rect_vid; + + vdp_st = vdp->presentation_queue_block_until_surface_idle(vc->flip_queue, + output_surface, + &dummy); + CHECK_VDP_WARNING(vo, "Error when calling " + "vdp_presentation_queue_block_until_surface_idle"); + + // Clear the borders between video and window (if there are any). + // For some reason, video_mixer_render doesn't need it for YUV. + // Also, if there is nothing to render, at least clear the screen. + if (vc->rgb_mode || !mpi || mpi->params.rotate != 0) { + int flags = VDP_OUTPUT_SURFACE_RENDER_ROTATE_0; + vdp_st = vdp->output_surface_render_output_surface(output_surface, + NULL, vc->black_pixel, + NULL, NULL, NULL, + flags); + CHECK_VDP_WARNING(vo, "Error clearing screen"); + } + + if (!mpi) + return -1; + + struct mp_vdpau_mixer_frame *frame = mp_vdpau_mixed_frame_get(mpi); + struct mp_vdpau_mixer_opts opts = {0}; + if (frame) + opts = frame->opts; + + // Apply custom vo_vdpau suboptions. + opts.chroma_deint |= vc->chroma_deint; + opts.pullup |= vc->pullup; + opts.denoise = MPCLAMP(opts.denoise + vc->denoise, 0, 1); + opts.sharpen = MPCLAMP(opts.sharpen + vc->sharpen, -1, 1); + if (vc->hqscaling) + opts.hqscaling = vc->hqscaling; + + if (mpi->params.rotate != 0) { + int flags; + VdpRect r_rect; + switch (mpi->params.rotate) { + case 90: + r_rect.y0 = output_rect->x0; + r_rect.y1 = output_rect->x1; + r_rect.x0 = output_rect->y0; + r_rect.x1 = output_rect->y1; + flags = VDP_OUTPUT_SURFACE_RENDER_ROTATE_90; + break; + case 180: + r_rect.x0 = output_rect->x0; + r_rect.x1 = output_rect->x1; + r_rect.y0 = output_rect->y0; + r_rect.y1 = output_rect->y1; + flags = VDP_OUTPUT_SURFACE_RENDER_ROTATE_180; + break; + case 270: + r_rect.y0 = output_rect->x0; + r_rect.y1 = output_rect->x1; + r_rect.x0 = output_rect->y0; + r_rect.x1 = output_rect->y1; + flags = VDP_OUTPUT_SURFACE_RENDER_ROTATE_270; + break; + default: + MP_ERR(vo, "Unsupported rotation angle: %u\n", mpi->params.rotate); + return -1; + } + + mp_vdpau_mixer_render(vc->video_mixer, &opts, vc->rotation_surface, + &r_rect, mpi, video_rect); + vdp_st = vdp->output_surface_render_output_surface(output_surface, + output_rect, + vc->rotation_surface, + &r_rect, + NULL, + NULL, + flags); + CHECK_VDP_WARNING(vo, "Error rendering rotated frame"); + } else { + mp_vdpau_mixer_render(vc->video_mixer, &opts, output_surface, + output_rect, mpi, video_rect); + } + return 0; +} + +static void forget_frames(struct vo *vo, bool seek_reset) +{ + struct vdpctx *vc = vo->priv; + + if (!seek_reset) + mp_image_unrefp(&vc->current_image); + + vc->dropped_frame = false; +} + +static int s_size(int max, int s, int disp) +{ + disp = MPMAX(1, disp); + return MPMIN(max, MPMAX(s, disp)); +} + +static void resize(struct vo *vo) +{ + struct vdpctx *vc = vo->priv; + struct vdp_functions *vdp = vc->vdp; + VdpStatus vdp_st; + struct mp_rect src_rect; + struct mp_rect dst_rect; + vo_get_src_dst_rects(vo, &src_rect, &dst_rect, &vc->osd_rect); + vc->out_rect_vid.x0 = dst_rect.x0; + vc->out_rect_vid.x1 = dst_rect.x1; + vc->out_rect_vid.y0 = dst_rect.y0; + vc->out_rect_vid.y1 = dst_rect.y1; + if (vo->params->rotate == 90 || vo->params->rotate == 270) { + vc->src_rect_vid.y0 = src_rect.x0; + vc->src_rect_vid.y1 = src_rect.x1; + vc->src_rect_vid.x0 = src_rect.y0; + vc->src_rect_vid.x1 = src_rect.y1; + } else { + vc->src_rect_vid.x0 = src_rect.x0; + vc->src_rect_vid.x1 = src_rect.x1; + vc->src_rect_vid.y0 = src_rect.y0; + vc->src_rect_vid.y1 = src_rect.y1; + } + + VdpBool ok; + uint32_t max_w, max_h; + vdp_st = vdp->output_surface_query_capabilities(vc->vdp_device, + OUTPUT_RGBA_FORMAT, + &ok, &max_w, &max_h); + if (vdp_st != VDP_STATUS_OK || !ok) + return; + + vc->flip_offset_us = vo->opts->fullscreen ? + 1000LL * vc->flip_offset_fs : + 1000LL * vc->flip_offset_window; + vo_set_queue_params(vo, vc->flip_offset_us * 1000, 1); + + if (vc->output_surface_w < vo->dwidth || vc->output_surface_h < vo->dheight || + vc->rotation != vo->params->rotate) + { + vc->output_surface_w = s_size(max_w, vc->output_surface_w, vo->dwidth); + vc->output_surface_h = s_size(max_h, vc->output_surface_h, vo->dheight); + // Creation of output_surfaces + for (int i = 0; i < vc->num_output_surfaces; i++) + if (vc->output_surfaces[i] != VDP_INVALID_HANDLE) { + vdp_st = vdp->output_surface_destroy(vc->output_surfaces[i]); + CHECK_VDP_WARNING(vo, "Error when calling " + "vdp_output_surface_destroy"); + } + for (int i = 0; i < vc->num_output_surfaces; i++) { + vdp_st = vdp->output_surface_create(vc->vdp_device, + OUTPUT_RGBA_FORMAT, + vc->output_surface_w, + vc->output_surface_h, + &vc->output_surfaces[i]); + CHECK_VDP_WARNING(vo, "Error when calling vdp_output_surface_create"); + MP_DBG(vo, "vdpau out create: %u\n", + vc->output_surfaces[i]); + } + if (vc->rotation_surface != VDP_INVALID_HANDLE) { + vdp_st = vdp->output_surface_destroy(vc->rotation_surface); + CHECK_VDP_WARNING(vo, "Error when calling " + "vdp_output_surface_destroy"); + vc->rotation_surface = VDP_INVALID_HANDLE; + } + if (vo->params->rotate == 90 || vo->params->rotate == 270) { + vdp_st = vdp->output_surface_create(vc->vdp_device, + OUTPUT_RGBA_FORMAT, + vc->output_surface_h, + vc->output_surface_w, + &vc->rotation_surface); + } else if (vo->params->rotate == 180) { + vdp_st = vdp->output_surface_create(vc->vdp_device, + OUTPUT_RGBA_FORMAT, + vc->output_surface_w, + vc->output_surface_h, + &vc->rotation_surface); + } + CHECK_VDP_WARNING(vo, "Error when calling vdp_output_surface_create"); + MP_DBG(vo, "vdpau rotation surface create: %u\n", + vc->rotation_surface); + } + vc->rotation = vo->params->rotate; + vo->want_redraw = true; +} + +static int win_x11_init_vdpau_flip_queue(struct vo *vo) +{ + struct vdpctx *vc = vo->priv; + struct vdp_functions *vdp = vc->vdp; + struct vo_x11_state *x11 = vo->x11; + VdpStatus vdp_st; + + if (vc->flip_target == VDP_INVALID_HANDLE) { + vdp_st = vdp->presentation_queue_target_create_x11(vc->vdp_device, + x11->window, + &vc->flip_target); + CHECK_VDP_ERROR(vo, "Error when calling " + "vdp_presentation_queue_target_create_x11"); + } + + /* Empirically this seems to be the first call which fails when we + * try to reinit after preemption while the user is still switched + * from X to a virtual terminal (creating the vdp_device initially + * succeeds, as does creating the flip_target above). This is + * probably not guaranteed behavior. + */ + if (vc->flip_queue == VDP_INVALID_HANDLE) { + vdp_st = vdp->presentation_queue_create(vc->vdp_device, vc->flip_target, + &vc->flip_queue); + CHECK_VDP_ERROR(vo, "Error when calling vdp_presentation_queue_create"); + } + + if (vc->colorkey.a > 0) { + VdpColor color = { + .red = vc->colorkey.r / 255.0, + .green = vc->colorkey.g / 255.0, + .blue = vc->colorkey.b / 255.0, + .alpha = 0, + }; + vdp_st = vdp->presentation_queue_set_background_color(vc->flip_queue, + &color); + CHECK_VDP_WARNING(vo, "Error setting colorkey"); + } + + if (vc->composite_detect && vo_x11_screen_is_composited(vo)) { + MP_INFO(vo, "Compositing window manager detected. Assuming timing info " + "is inaccurate.\n"); + vc->user_fps = -1; + } + + return 0; +} + +// Free everything specific to a certain video file +static void free_video_specific(struct vo *vo) +{ + struct vdpctx *vc = vo->priv; + struct vdp_functions *vdp = vc->vdp; + VdpStatus vdp_st; + + forget_frames(vo, false); + + if (vc->black_pixel != VDP_INVALID_HANDLE) { + vdp_st = vdp->output_surface_destroy(vc->black_pixel); + CHECK_VDP_WARNING(vo, "Error when calling vdp_output_surface_destroy"); + } + vc->black_pixel = VDP_INVALID_HANDLE; +} + +static int initialize_vdpau_objects(struct vo *vo) +{ + struct vdpctx *vc = vo->priv; + struct vdp_functions *vdp = vc->vdp; + VdpStatus vdp_st; + + mp_vdpau_get_format(vc->image_format, NULL, &vc->vdp_pixel_format); + + vc->video_mixer->initialized = false; + + if (win_x11_init_vdpau_flip_queue(vo) < 0) + return -1; + + if (vc->black_pixel == VDP_INVALID_HANDLE) { + vdp_st = vdp->output_surface_create(vc->vdp_device, OUTPUT_RGBA_FORMAT, + 1, 1, &vc->black_pixel); + CHECK_VDP_ERROR(vo, "Allocating clearing surface"); + const char data[4] = {0}; + vdp_st = vdp->output_surface_put_bits_native(vc->black_pixel, + (const void*[]){data}, + (uint32_t[]){4}, NULL); + CHECK_VDP_ERROR(vo, "Initializing clearing surface"); + } + + forget_frames(vo, false); + resize(vo); + return 0; +} + +static void mark_vdpau_objects_uninitialized(struct vo *vo) +{ + struct vdpctx *vc = vo->priv; + + forget_frames(vo, false); + vc->black_pixel = VDP_INVALID_HANDLE; + vc->flip_queue = VDP_INVALID_HANDLE; + vc->flip_target = VDP_INVALID_HANDLE; + for (int i = 0; i < MAX_OUTPUT_SURFACES; i++) + vc->output_surfaces[i] = VDP_INVALID_HANDLE; + vc->rotation_surface = VDP_INVALID_HANDLE; + vc->vdp_device = VDP_INVALID_HANDLE; + for (int i = 0; i < MAX_OSD_PARTS; i++) { + struct osd_bitmap_surface *sfc = &vc->osd_surfaces[i]; + sfc->change_id = 0; + *sfc = (struct osd_bitmap_surface){ + .surface = VDP_INVALID_HANDLE, + }; + } + vc->output_surface_w = vc->output_surface_h = -1; +} + +static bool check_preemption(struct vo *vo) +{ + struct vdpctx *vc = vo->priv; + + int r = mp_vdpau_handle_preemption(vc->mpvdp, &vc->preemption_counter); + if (r < 1) { + mark_vdpau_objects_uninitialized(vo); + if (r < 0) + return false; + vc->vdp_device = vc->mpvdp->vdp_device; + if (initialize_vdpau_objects(vo) < 0) + return false; + } + return true; +} + +static bool status_ok(struct vo *vo) +{ + return vo->config_ok && check_preemption(vo); +} + +/* + * connect to X server, create and map window, initialize all + * VDPAU objects, create different surfaces etc. + */ +static int reconfig(struct vo *vo, struct mp_image_params *params) +{ + struct vdpctx *vc = vo->priv; + struct vdp_functions *vdp = vc->vdp; + VdpStatus vdp_st; + + if (!check_preemption(vo)) + { + /* + * When prempted, leave the reconfig() immediately + * without reconfiguring the vo_window and without + * initializing the vdpau objects. When recovered + * from preemption, if there is a difference between + * the VD thread parameters and the VO thread parameters + * the reconfig() is triggered again. + */ + return 0; + } + + VdpChromaType chroma_type = VDP_CHROMA_TYPE_420; + mp_vdpau_get_format(params->imgfmt, &chroma_type, NULL); + + VdpBool ok; + uint32_t max_w, max_h; + vdp_st = vdp->video_surface_query_capabilities(vc->vdp_device, chroma_type, + &ok, &max_w, &max_h); + CHECK_VDP_ERROR(vo, "Error when calling vdp_video_surface_query_capabilities"); + + if (!ok) + return -1; + if (params->w > max_w || params->h > max_h) { + if (ok) + MP_ERR(vo, "Video too large for vdpau.\n"); + return -1; + } + + vc->image_format = params->imgfmt; + vc->vid_width = params->w; + vc->vid_height = params->h; + + vc->rgb_mode = mp_vdpau_get_rgb_format(params->imgfmt, NULL); + + free_video_specific(vo); + + vo_x11_config_vo_window(vo); + + if (initialize_vdpau_objects(vo) < 0) + return -1; + + return 0; +} + +static void draw_osd_part(struct vo *vo, int index) +{ + struct vdpctx *vc = vo->priv; + struct vdp_functions *vdp = vc->vdp; + VdpStatus vdp_st; + struct osd_bitmap_surface *sfc = &vc->osd_surfaces[index]; + VdpOutputSurface output_surface = vc->output_surfaces[vc->surface_num]; + int i; + + VdpOutputSurfaceRenderBlendState blend_state = { + .struct_version = VDP_OUTPUT_SURFACE_RENDER_BLEND_STATE_VERSION, + .blend_factor_source_color = + VDP_OUTPUT_SURFACE_RENDER_BLEND_FACTOR_SRC_ALPHA, + .blend_factor_source_alpha = + VDP_OUTPUT_SURFACE_RENDER_BLEND_FACTOR_ZERO, + .blend_factor_destination_color = + VDP_OUTPUT_SURFACE_RENDER_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, + .blend_factor_destination_alpha = + VDP_OUTPUT_SURFACE_RENDER_BLEND_FACTOR_ZERO, + .blend_equation_color = VDP_OUTPUT_SURFACE_RENDER_BLEND_EQUATION_ADD, + .blend_equation_alpha = VDP_OUTPUT_SURFACE_RENDER_BLEND_EQUATION_ADD, + }; + + VdpOutputSurfaceRenderBlendState blend_state_premultiplied = blend_state; + blend_state_premultiplied.blend_factor_source_color = + VDP_OUTPUT_SURFACE_RENDER_BLEND_FACTOR_ONE; + + for (i = 0; i < sfc->render_count; i++) { + VdpOutputSurfaceRenderBlendState *blend = &blend_state; + if (sfc->format == VDP_RGBA_FORMAT_B8G8R8A8) + blend = &blend_state_premultiplied; + vdp_st = vdp-> + output_surface_render_bitmap_surface(output_surface, + &sfc->targets[i].dest, + sfc->surface, + &sfc->targets[i].source, + &sfc->targets[i].color, + blend, + VDP_OUTPUT_SURFACE_RENDER_ROTATE_0); + CHECK_VDP_WARNING(vo, "OSD: Error when rendering"); + } +} + +static int next_pow2(int v) +{ + for (int x = 0; x < 30; x++) { + if ((1 << x) >= v) + return 1 << x; + } + return INT_MAX; +} + +static void generate_osd_part(struct vo *vo, struct sub_bitmaps *imgs) +{ + struct vdpctx *vc = vo->priv; + struct vdp_functions *vdp = vc->vdp; + VdpStatus vdp_st; + struct osd_bitmap_surface *sfc = &vc->osd_surfaces[imgs->render_index]; + + if (imgs->change_id == sfc->change_id) + return; // Nothing changed and we still have the old data + + sfc->change_id = imgs->change_id; + sfc->render_count = 0; + + if (imgs->format == SUBBITMAP_EMPTY || imgs->num_parts == 0) + return; + + VdpRGBAFormat format; + switch (imgs->format) { + case SUBBITMAP_LIBASS: + format = VDP_RGBA_FORMAT_A8; + break; + case SUBBITMAP_BGRA: + format = VDP_RGBA_FORMAT_B8G8R8A8; + break; + default: + MP_ASSERT_UNREACHABLE(); + }; + + assert(imgs->packed); + + int r_w = next_pow2(imgs->packed_w); + int r_h = next_pow2(imgs->packed_h); + + if (sfc->format != format || sfc->surface == VDP_INVALID_HANDLE || + sfc->surface_w < r_w || sfc->surface_h < r_h) + { + MP_VERBOSE(vo, "Allocating a %dx%d surface for OSD bitmaps.\n", r_w, r_h); + + uint32_t m_w = 0, m_h = 0; + vdp_st = vdp->bitmap_surface_query_capabilities(vc->vdp_device, format, + &(VdpBool){0}, &m_w, &m_h); + CHECK_VDP_WARNING(vo, "Query to get max OSD surface size failed"); + + if (r_w > m_w || r_h > m_h) { + MP_ERR(vo, "OSD bitmaps do not fit on a surface with the maximum " + "supported size\n"); + return; + } + + if (sfc->surface != VDP_INVALID_HANDLE) { + vdp_st = vdp->bitmap_surface_destroy(sfc->surface); + CHECK_VDP_WARNING(vo, "Error when calling vdp_bitmap_surface_destroy"); + } + + VdpBitmapSurface surface; + vdp_st = vdp->bitmap_surface_create(vc->vdp_device, format, + r_w, r_h, true, &surface); + CHECK_VDP_WARNING(vo, "OSD: error when creating surface"); + if (vdp_st != VDP_STATUS_OK) + return; + + sfc->surface = surface; + sfc->surface_w = r_w; + sfc->surface_h = r_h; + sfc->format = format; + } + + void *data = imgs->packed->planes[0]; + int stride = imgs->packed->stride[0]; + VdpRect rc = {0, 0, imgs->packed_w, imgs->packed_h}; + vdp_st = vdp->bitmap_surface_put_bits_native(sfc->surface, + &(const void *){data}, + &(uint32_t){stride}, + &rc); + CHECK_VDP_WARNING(vo, "OSD: putbits failed"); + + MP_TARRAY_GROW(vc, sfc->targets, imgs->num_parts); + sfc->render_count = imgs->num_parts; + + for (int i = 0; i < imgs->num_parts; i++) { + struct sub_bitmap *b = &imgs->parts[i]; + struct osd_target *target = &sfc->targets[i]; + target->source = (VdpRect){b->src_x, b->src_y, + b->src_x + b->w, b->src_y + b->h}; + target->dest = (VdpRect){b->x, b->y, b->x + b->dw, b->y + b->dh}; + target->color = (VdpColor){1, 1, 1, 1}; + if (imgs->format == SUBBITMAP_LIBASS) { + uint32_t color = b->libass.color; + target->color.alpha = 1.0 - ((color >> 0) & 0xff) / 255.0; + target->color.blue = ((color >> 8) & 0xff) / 255.0; + target->color.green = ((color >> 16) & 0xff) / 255.0; + target->color.red = ((color >> 24) & 0xff) / 255.0; + } + } +} + +static void draw_osd_cb(void *ctx, struct sub_bitmaps *imgs) +{ + struct vo *vo = ctx; + generate_osd_part(vo, imgs); + draw_osd_part(vo, imgs->render_index); +} + +static void draw_osd(struct vo *vo) +{ + struct vdpctx *vc = vo->priv; + + if (!status_ok(vo)) + return; + + bool formats[SUBBITMAP_COUNT] = { + [SUBBITMAP_LIBASS] = vc->supports_a8, + [SUBBITMAP_BGRA] = true, + }; + + double pts = vc->current_image ? vc->current_image->pts : 0; + osd_draw(vo->osd, vc->osd_rect, pts, 0, formats, draw_osd_cb, vo); +} + +static int update_presentation_queue_status(struct vo *vo) +{ + struct vdpctx *vc = vo->priv; + struct vdp_functions *vdp = vc->vdp; + VdpStatus vdp_st; + + while (vc->query_surface_num != vc->surface_num) { + VdpTime vtime; + VdpPresentationQueueStatus status; + VdpOutputSurface surface = vc->output_surfaces[vc->query_surface_num]; + vdp_st = vdp->presentation_queue_query_surface_status(vc->flip_queue, + surface, + &status, &vtime); + CHECK_VDP_WARNING(vo, "Error calling " + "presentation_queue_query_surface_status"); + if (mp_msg_test(vo->log, MSGL_TRACE)) { + VdpTime current; + vdp_st = vdp->presentation_queue_get_time(vc->flip_queue, ¤t); + CHECK_VDP_WARNING(vo, "Error when calling " + "vdp_presentation_queue_get_time"); + MP_TRACE(vo, "Vdpau time: %"PRIu64"\n", (uint64_t)current); + MP_TRACE(vo, "Surface %d status: %d time: %"PRIu64"\n", + (int)surface, (int)status, (uint64_t)vtime); + } + if (status == VDP_PRESENTATION_QUEUE_STATUS_QUEUED) + break; + if (vc->vsync_interval > 1) { + uint64_t qtime = vc->queue_time[vc->query_surface_num]; + int diff = ((int64_t)vtime - (int64_t)qtime) / 1e6; + MP_TRACE(vo, "Queue time difference: %d ms\n", diff); + if (vtime < qtime + vc->vsync_interval / 2) + MP_VERBOSE(vo, "Frame shown too early (%d ms)\n", diff); + if (vtime > qtime + vc->vsync_interval) + MP_VERBOSE(vo, "Frame shown late (%d ms)\n", diff); + } + vc->query_surface_num = WRAP_ADD(vc->query_surface_num, 1, + vc->num_output_surfaces); + vc->recent_vsync_time = vtime; + } + int num_queued = WRAP_ADD(vc->surface_num, -vc->query_surface_num, + vc->num_output_surfaces); + MP_DBG(vo, "Queued surface count (before add): %d\n", num_queued); + return num_queued; +} + +// Return the timestamp of the vsync that must have happened before ts. +static inline uint64_t prev_vsync(struct vdpctx *vc, uint64_t ts) +{ + int64_t diff = (int64_t)(ts - vc->recent_vsync_time); + int64_t offset = diff % vc->vsync_interval; + if (offset < 0) + offset += vc->vsync_interval; + return ts - offset; +} + +static void flip_page(struct vo *vo) +{ + struct vdpctx *vc = vo->priv; + struct vdp_functions *vdp = vc->vdp; + VdpStatus vdp_st; + + int64_t pts_us = vc->current_pts; + int duration = vc->current_duration; + + vc->dropped_frame = true; // changed at end if false + + if (!check_preemption(vo)) + goto drop; + + vc->vsync_interval = 1; + if (vc->user_fps > 0) { + vc->vsync_interval = 1e9 / vc->user_fps; + } else if (vc->user_fps == 0) { + vc->vsync_interval = vo_get_vsync_interval(vo); + } + vc->vsync_interval = MPMAX(vc->vsync_interval, 1); + + if (duration > INT_MAX / 1000) + duration = -1; + else + duration *= 1000; + + if (vc->vsync_interval == 1) + duration = -1; // Make sure drop logic is disabled + + VdpTime vdp_time = 0; + vdp_st = vdp->presentation_queue_get_time(vc->flip_queue, &vdp_time); + CHECK_VDP_WARNING(vo, "Error when calling vdp_presentation_queue_get_time"); + + int64_t rel_pts_ns = (pts_us * 1000) - mp_time_ns(); + if (!pts_us || rel_pts_ns < 0) + rel_pts_ns = 0; + + uint64_t now = vdp_time; + uint64_t pts = now + rel_pts_ns; + uint64_t ideal_pts = pts; + uint64_t npts = duration >= 0 ? pts + duration : UINT64_MAX; + + /* This should normally never happen. + * - The last queued frame can't have a PTS that goes more than 50ms in the + * future. This is guaranteed by vo.c, which currently actually queues + * ahead by roughly the flip queue offset. Just to be sure + * give some additional room by doubling the time. + * - The last vsync can never be in the future. + */ + int64_t max_pts_ahead = vc->flip_offset_us * 1000 * 2; + if (vc->last_queue_time > now + max_pts_ahead || + vc->recent_vsync_time > now) + { + vc->last_queue_time = 0; + vc->recent_vsync_time = 0; + MP_WARN(vo, "Inconsistent timing detected.\n"); + } + +#define PREV_VSYNC(ts) prev_vsync(vc, ts) + + /* We hope to be here at least one vsync before the frame should be shown. + * If we are running late then don't drop the frame unless there is + * already one queued for the next vsync; even if we _hope_ to show the + * next frame soon enough to mean this one should be dropped we might + * not make the target time in reality. Without this check we could drop + * every frame, freezing the display completely if video lags behind. + */ + if (now > PREV_VSYNC(MPMAX(pts, vc->last_queue_time + vc->vsync_interval))) + npts = UINT64_MAX; + + /* Allow flipping a frame at a vsync if its presentation time is a + * bit after that vsync and the change makes the flip time delta + * from previous frame better match the target timestamp delta. + * This avoids instability with frame timestamps falling near vsyncs. + * For example if the frame timestamps were (with vsyncs at + * integer values) 0.01, 1.99, 4.01, 5.99, 8.01, ... then + * straightforward timing at next vsync would flip the frames at + * 1, 2, 5, 6, 9; this changes it to 1, 2, 4, 6, 8 and so on with + * regular 2-vsync intervals. + * + * Also allow moving the frame forward if it looks like we dropped + * the previous frame incorrectly (now that we know better after + * having final exact timestamp information for this frame) and + * there would unnecessarily be a vsync without a frame change. + */ + uint64_t vsync = PREV_VSYNC(pts); + if (pts < vsync + vc->vsync_interval / 4 + && (vsync - PREV_VSYNC(vc->last_queue_time) + > pts - vc->last_ideal_time + vc->vsync_interval / 2 + || (vc->dropped_frame && vsync > vc->dropped_time))) + pts -= vc->vsync_interval / 2; + + vc->dropped_time = ideal_pts; + + pts = MPMAX(pts, vc->last_queue_time + vc->vsync_interval); + pts = MPMAX(pts, now); + if (npts < PREV_VSYNC(pts) + vc->vsync_interval) + goto drop; + + int num_flips = update_presentation_queue_status(vo); + vsync = vc->recent_vsync_time + num_flips * vc->vsync_interval; + pts = MPMAX(pts, now); + pts = MPMAX(pts, vsync + (vc->vsync_interval >> 2)); + vsync = PREV_VSYNC(pts); + if (npts < vsync + vc->vsync_interval) + goto drop; + pts = vsync + (vc->vsync_interval >> 2); + VdpOutputSurface frame = vc->output_surfaces[vc->surface_num]; + vdp_st = vdp->presentation_queue_display(vc->flip_queue, frame, + vo->dwidth, vo->dheight, pts); + CHECK_VDP_WARNING(vo, "Error when calling vdp_presentation_queue_display"); + + MP_TRACE(vo, "Queue new surface %d: Vdpau time: %"PRIu64" " + "pts: %"PRIu64"\n", (int)frame, now, pts); + + vc->last_queue_time = pts; + vc->queue_time[vc->surface_num] = pts; + vc->last_ideal_time = ideal_pts; + vc->dropped_frame = false; + vc->surface_num = WRAP_ADD(vc->surface_num, 1, vc->num_output_surfaces); + return; + +drop: + vo_increment_drop_count(vo, 1); +} + +static void draw_frame(struct vo *vo, struct vo_frame *frame) +{ + struct vdpctx *vc = vo->priv; + + check_preemption(vo); + + if (frame->current && !frame->redraw) { + struct mp_image *vdp_mpi = + mp_vdpau_upload_video_surface(vc->mpvdp, frame->current); + if (!vdp_mpi) + MP_ERR(vo, "Could not upload image.\n"); + + talloc_free(vc->current_image); + vc->current_image = vdp_mpi; + } + + vc->current_pts = frame->pts; + vc->current_duration = frame->duration; + + if (status_ok(vo)) { + video_to_output_surface(vo, vc->current_image); + draw_osd(vo); + } +} + +// warning: the size and pixel format of surface must match that of the +// surfaces in vc->output_surfaces +static struct mp_image *read_output_surface(struct vo *vo, + VdpOutputSurface surface) +{ + struct vdpctx *vc = vo->priv; + VdpStatus vdp_st; + struct vdp_functions *vdp = vc->vdp; + if (!vo->params) + return NULL; + + VdpRGBAFormat fmt; + uint32_t w, h; + vdp_st = vdp->output_surface_get_parameters(surface, &fmt, &w, &h); + if (vdp_st != VDP_STATUS_OK) + return NULL; + + assert(fmt == OUTPUT_RGBA_FORMAT); + + struct mp_image *image = mp_image_alloc(IMGFMT_BGR0, w, h); + if (!image) + return NULL; + + void *dst_planes[] = { image->planes[0] }; + uint32_t dst_pitches[] = { image->stride[0] }; + vdp_st = vdp->output_surface_get_bits_native(surface, NULL, dst_planes, + dst_pitches); + CHECK_VDP_WARNING(vo, "Error when calling vdp_output_surface_get_bits_native"); + + return image; +} + +static struct mp_image *get_window_screenshot(struct vo *vo) +{ + struct vdpctx *vc = vo->priv; + int last_surface = WRAP_ADD(vc->surface_num, -1, vc->num_output_surfaces); + VdpOutputSurface screen = vc->output_surfaces[last_surface]; + struct mp_image *image = read_output_surface(vo, screen); + if (image && image->w >= vo->dwidth && image->h >= vo->dheight) + mp_image_set_size(image, vo->dwidth, vo->dheight); + return image; +} + +static int query_format(struct vo *vo, int format) +{ + struct vdpctx *vc = vo->priv; + + if (mp_vdpau_get_format(format, NULL, NULL)) + return 1; + if (!vc->force_yuv && mp_vdpau_get_rgb_format(format, NULL)) + return 1; + return 0; +} + +static void destroy_vdpau_objects(struct vo *vo) +{ + struct vdpctx *vc = vo->priv; + struct vdp_functions *vdp = vc->vdp; + + VdpStatus vdp_st; + + free_video_specific(vo); + + if (vc->flip_queue != VDP_INVALID_HANDLE) { + vdp_st = vdp->presentation_queue_destroy(vc->flip_queue); + CHECK_VDP_WARNING(vo, "Error when calling vdp_presentation_queue_destroy"); + } + + if (vc->flip_target != VDP_INVALID_HANDLE) { + vdp_st = vdp->presentation_queue_target_destroy(vc->flip_target); + CHECK_VDP_WARNING(vo, "Error when calling " + "vdp_presentation_queue_target_destroy"); + } + + for (int i = 0; i < vc->num_output_surfaces; i++) { + if (vc->output_surfaces[i] == VDP_INVALID_HANDLE) + continue; + vdp_st = vdp->output_surface_destroy(vc->output_surfaces[i]); + CHECK_VDP_WARNING(vo, "Error when calling vdp_output_surface_destroy"); + } + if (vc->rotation_surface != VDP_INVALID_HANDLE) { + vdp_st = vdp->output_surface_destroy(vc->rotation_surface); + CHECK_VDP_WARNING(vo, "Error when calling vdp_output_surface_destroy"); + } + + for (int i = 0; i < MAX_OSD_PARTS; i++) { + struct osd_bitmap_surface *sfc = &vc->osd_surfaces[i]; + if (sfc->surface != VDP_INVALID_HANDLE) { + vdp_st = vdp->bitmap_surface_destroy(sfc->surface); + CHECK_VDP_WARNING(vo, "Error when calling vdp_bitmap_surface_destroy"); + } + } + + mp_vdpau_destroy(vc->mpvdp); + vc->mpvdp = NULL; +} + +static void uninit(struct vo *vo) +{ + struct vdpctx *vc = vo->priv; + + hwdec_devices_remove(vo->hwdec_devs, &vc->mpvdp->hwctx); + hwdec_devices_destroy(vo->hwdec_devs); + + /* Destroy all vdpau objects */ + mp_vdpau_mixer_destroy(vc->video_mixer); + destroy_vdpau_objects(vo); + + vo_x11_uninit(vo); +} + +static int preinit(struct vo *vo) +{ + struct vdpctx *vc = vo->priv; + + if (!vo_x11_init(vo)) + return -1; + + if (!vo_x11_create_vo_window(vo, NULL, "vdpau")) { + vo_x11_uninit(vo); + return -1; + } + + vc->mpvdp = mp_vdpau_create_device_x11(vo->log, vo->x11->display, false); + if (!vc->mpvdp) { + vo_x11_uninit(vo); + return -1; + } + vc->mpvdp->hwctx.hw_imgfmt = IMGFMT_VDPAU; + + vo->hwdec_devs = hwdec_devices_create(); + hwdec_devices_add(vo->hwdec_devs, &vc->mpvdp->hwctx); + + vc->video_mixer = mp_vdpau_mixer_create(vc->mpvdp, vo->log); + vc->video_mixer->video_eq = mp_csp_equalizer_create(vo, vo->global); + + if (mp_vdpau_guess_if_emulated(vc->mpvdp)) { + MP_WARN(vo, "VDPAU is most likely emulated via VA-API.\n" + "This is inefficient. Use --vo=gpu instead.\n"); + } + + // Mark everything as invalid first so uninit() can tell what has been + // allocated + mark_vdpau_objects_uninitialized(vo); + + mp_vdpau_handle_preemption(vc->mpvdp, &vc->preemption_counter); + + vc->vdp_device = vc->mpvdp->vdp_device; + vc->vdp = &vc->mpvdp->vdp; + + vc->vdp->bitmap_surface_query_capabilities(vc->vdp_device, VDP_RGBA_FORMAT_A8, + &vc->supports_a8, &(uint32_t){0}, &(uint32_t){0}); + + MP_WARN(vo, "Warning: this compatibility VO is low quality and may " + "have issues with OSD, scaling, screenshots and more.\n" + "vo=gpu is the preferred choice in any case and " + "includes VDPAU support via hwdec=vdpau or vdpau-copy.\n"); + + return 0; +} + +static void checked_resize(struct vo *vo) +{ + if (!status_ok(vo)) + return; + resize(vo); +} + +static int control(struct vo *vo, uint32_t request, void *data) +{ + check_preemption(vo); + + switch (request) { + case VOCTRL_SET_PANSCAN: + checked_resize(vo); + return VO_TRUE; + case VOCTRL_SET_EQUALIZER: + vo->want_redraw = true; + return true; + case VOCTRL_RESET: + forget_frames(vo, true); + return true; + case VOCTRL_SCREENSHOT_WIN: + if (!status_ok(vo)) + return false; + *(struct mp_image **)data = get_window_screenshot(vo); + return true; + } + + int events = 0; + int r = vo_x11_control(vo, &events, request, data); + + if (events & VO_EVENT_RESIZE) { + checked_resize(vo); + } else if (events & VO_EVENT_EXPOSE) { + vo->want_redraw = true; + } + vo_event(vo, events); + + return r; +} + +#define OPT_BASE_STRUCT struct vdpctx + +const struct vo_driver video_out_vdpau = { + .description = "VDPAU with X11", + .name = "vdpau", + .caps = VO_CAP_FRAMEDROP | VO_CAP_ROTATE90, + .preinit = preinit, + .query_format = query_format, + .reconfig = reconfig, + .control = control, + .draw_frame = draw_frame, + .flip_page = flip_page, + .wakeup = vo_x11_wakeup, + .wait_events = vo_x11_wait_events, + .uninit = uninit, + .priv_size = sizeof(struct vdpctx), + .options = (const struct m_option []){ + {"chroma-deint", OPT_BOOL(chroma_deint), OPTDEF_INT(1)}, + {"pullup", OPT_BOOL(pullup)}, + {"denoise", OPT_FLOAT(denoise), M_RANGE(0, 1)}, + {"sharpen", OPT_FLOAT(sharpen), M_RANGE(-1, 1)}, + {"hqscaling", OPT_INT(hqscaling), M_RANGE(0, 9)}, + {"fps", OPT_FLOAT(user_fps)}, + {"composite-detect", OPT_BOOL(composite_detect), OPTDEF_INT(1)}, + {"queuetime-windowed", OPT_INT(flip_offset_window), OPTDEF_INT(50)}, + {"queuetime-fs", OPT_INT(flip_offset_fs), OPTDEF_INT(50)}, + {"output-surfaces", OPT_INT(num_output_surfaces), + M_RANGE(2, MAX_OUTPUT_SURFACES), OPTDEF_INT(3)}, + {"colorkey", OPT_COLOR(colorkey), + .defval = &(const struct m_color){.r = 2, .g = 5, .b = 7, .a = 255}}, + {"force-yuv", OPT_BOOL(force_yuv)}, + {NULL}, + }, + .options_prefix = "vo-vdpau", +}; diff --git a/video/out/vo_wlshm.c b/video/out/vo_wlshm.c new file mode 100644 index 0000000..1e5e009 --- /dev/null +++ b/video/out/vo_wlshm.c @@ -0,0 +1,324 @@ +/* + * This file is part of mpv video player. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <errno.h> +#include <fcntl.h> +#include <sys/mman.h> +#include <time.h> +#include <unistd.h> + +#include <libswscale/swscale.h> + +#include "osdep/endian.h" +#include "present_sync.h" +#include "sub/osd.h" +#include "video/fmt-conversion.h" +#include "video/mp_image.h" +#include "video/sws_utils.h" +#include "vo.h" +#include "wayland_common.h" + +struct buffer { + struct vo *vo; + size_t size; + struct wl_shm_pool *pool; + struct wl_buffer *buffer; + struct mp_image mpi; + struct buffer *next; +}; + +struct priv { + struct mp_sws_context *sws; + struct buffer *free_buffers; + struct mp_rect src; + struct mp_rect dst; + struct mp_osd_res osd; +}; + +static void buffer_handle_release(void *data, struct wl_buffer *wl_buffer) +{ + struct buffer *buf = data; + struct vo *vo = buf->vo; + struct priv *p = vo->priv; + + if (buf->mpi.w == vo->dwidth && buf->mpi.h == vo->dheight) { + buf->next = p->free_buffers; + p->free_buffers = buf; + } else { + talloc_free(buf); + } +} + +static const struct wl_buffer_listener buffer_listener = { + buffer_handle_release, +}; + +static void buffer_destroy(void *p) +{ + struct buffer *buf = p; + wl_buffer_destroy(buf->buffer); + wl_shm_pool_destroy(buf->pool); + munmap(buf->mpi.planes[0], buf->size); +} + +static struct buffer *buffer_create(struct vo *vo, int width, int height) +{ + struct priv *p = vo->priv; + struct vo_wayland_state *wl = vo->wl; + int fd; + int stride; + size_t size; + uint8_t *data; + struct buffer *buf; + + stride = MP_ALIGN_UP(width * 4, 16); + size = height * stride; + fd = vo_wayland_allocate_memfd(vo, size); + if (fd < 0) + goto error0; + data = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, 0); + if (data == MAP_FAILED) + goto error1; + buf = talloc_zero(NULL, struct buffer); + if (!buf) + goto error2; + buf->vo = vo; + buf->size = size; + mp_image_set_params(&buf->mpi, &p->sws->dst); + mp_image_set_size(&buf->mpi, width, height); + buf->mpi.planes[0] = data; + buf->mpi.stride[0] = stride; + buf->pool = wl_shm_create_pool(wl->shm, fd, size); + if (!buf->pool) + goto error3; + buf->buffer = wl_shm_pool_create_buffer(buf->pool, 0, width, height, + stride, WL_SHM_FORMAT_XRGB8888); + if (!buf->buffer) + goto error4; + wl_buffer_add_listener(buf->buffer, &buffer_listener, buf); + + close(fd); + talloc_set_destructor(buf, buffer_destroy); + + return buf; + +error4: + wl_shm_pool_destroy(buf->pool); +error3: + talloc_free(buf); +error2: + munmap(data, size); +error1: + close(fd); +error0: + return NULL; +} + +static void uninit(struct vo *vo) +{ + struct priv *p = vo->priv; + struct buffer *buf; + + while (p->free_buffers) { + buf = p->free_buffers; + p->free_buffers = buf->next; + talloc_free(buf); + } + vo_wayland_uninit(vo); +} + +static int preinit(struct vo *vo) +{ + struct priv *p = vo->priv; + + if (!vo_wayland_init(vo)) + goto err; + if (!vo->wl->shm) { + MP_FATAL(vo->wl, "Compositor doesn't support the %s protocol!\n", + wl_shm_interface.name); + goto err; + } + p->sws = mp_sws_alloc(vo); + p->sws->log = vo->log; + mp_sws_enable_cmdline_opts(p->sws, vo->global); + + return 0; +err: + uninit(vo); + return -1; +} + +static int query_format(struct vo *vo, int format) +{ + return sws_isSupportedInput(imgfmt2pixfmt(format)); +} + +static int reconfig(struct vo *vo, struct mp_image_params *params) +{ + struct priv *p = vo->priv; + + if (!vo_wayland_reconfig(vo)) + return -1; + p->sws->src = *params; + + return 0; +} + +static int resize(struct vo *vo) +{ + struct priv *p = vo->priv; + struct vo_wayland_state *wl = vo->wl; + const int32_t width = mp_rect_w(wl->geometry); + const int32_t height = mp_rect_h(wl->geometry); + + if (width == 0 || height == 0) + return 1; + + struct buffer *buf; + + vo_wayland_set_opaque_region(wl, false); + vo->want_redraw = true; + vo->dwidth = width; + vo->dheight = height; + vo_get_src_dst_rects(vo, &p->src, &p->dst, &p->osd); + p->sws->dst = (struct mp_image_params) { + .imgfmt = MP_SELECT_LE_BE(IMGFMT_BGR0, IMGFMT_0RGB), + .w = width, + .h = height, + .p_w = 1, + .p_h = 1, + }; + mp_image_params_guess_csp(&p->sws->dst); + while (p->free_buffers) { + buf = p->free_buffers; + p->free_buffers = buf->next; + talloc_free(buf); + } + + vo_wayland_handle_fractional_scale(wl); + + return mp_sws_reinit(p->sws); +} + +static int control(struct vo *vo, uint32_t request, void *data) +{ + switch (request) { + case VOCTRL_SET_PANSCAN: + resize(vo); + return VO_TRUE; + } + + int events = 0; + int ret = vo_wayland_control(vo, &events, request, data); + + if (events & VO_EVENT_RESIZE) + ret = resize(vo); + if (events & VO_EVENT_EXPOSE) + vo->want_redraw = true; + vo_event(vo, events); + return ret; +} + +static void draw_frame(struct vo *vo, struct vo_frame *frame) +{ + struct priv *p = vo->priv; + struct vo_wayland_state *wl = vo->wl; + struct mp_image *src = frame->current; + struct buffer *buf; + + bool render = vo_wayland_check_visible(vo); + if (!render) + return; + + buf = p->free_buffers; + if (buf) { + p->free_buffers = buf->next; + } else { + buf = buffer_create(vo, vo->dwidth, vo->dheight); + if (!buf) { + wl_surface_attach(wl->surface, NULL, 0, 0); + return; + } + } + if (src) { + struct mp_image dst = buf->mpi; + struct mp_rect src_rc; + struct mp_rect dst_rc; + src_rc.x0 = MP_ALIGN_DOWN(p->src.x0, MPMAX(src->fmt.align_x, 4)); + src_rc.y0 = MP_ALIGN_DOWN(p->src.y0, MPMAX(src->fmt.align_y, 4)); + src_rc.x1 = p->src.x1 - (p->src.x0 - src_rc.x0); + src_rc.y1 = p->src.y1 - (p->src.y0 - src_rc.y0); + dst_rc.x0 = MP_ALIGN_DOWN(p->dst.x0, MPMAX(dst.fmt.align_x, 4)); + dst_rc.y0 = MP_ALIGN_DOWN(p->dst.y0, MPMAX(dst.fmt.align_y, 4)); + dst_rc.x1 = p->dst.x1 - (p->dst.x0 - dst_rc.x0); + dst_rc.y1 = p->dst.y1 - (p->dst.y0 - dst_rc.y0); + mp_image_crop_rc(src, src_rc); + mp_image_crop_rc(&dst, dst_rc); + mp_sws_scale(p->sws, &dst, src); + if (dst_rc.y0 > 0) + mp_image_clear(&buf->mpi, 0, 0, buf->mpi.w, dst_rc.y0); + if (buf->mpi.h > dst_rc.y1) + mp_image_clear(&buf->mpi, 0, dst_rc.y1, buf->mpi.w, buf->mpi.h); + if (dst_rc.x0 > 0) + mp_image_clear(&buf->mpi, 0, dst_rc.y0, dst_rc.x0, dst_rc.y1); + if (buf->mpi.w > dst_rc.x1) + mp_image_clear(&buf->mpi, dst_rc.x1, dst_rc.y0, buf->mpi.w, dst_rc.y1); + osd_draw_on_image(vo->osd, p->osd, src->pts, 0, &buf->mpi); + } else { + mp_image_clear(&buf->mpi, 0, 0, buf->mpi.w, buf->mpi.h); + osd_draw_on_image(vo->osd, p->osd, 0, 0, &buf->mpi); + } + wl_surface_attach(wl->surface, buf->buffer, 0, 0); +} + +static void flip_page(struct vo *vo) +{ + struct vo_wayland_state *wl = vo->wl; + + wl_surface_damage_buffer(wl->surface, 0, 0, vo->dwidth, + vo->dheight); + wl_surface_commit(wl->surface); + + if (!wl->opts->disable_vsync) + vo_wayland_wait_frame(wl); + + if (wl->use_present) + present_sync_swap(wl->present); +} + +static void get_vsync(struct vo *vo, struct vo_vsync_info *info) +{ + struct vo_wayland_state *wl = vo->wl; + if (wl->use_present) + present_sync_get_info(wl->present, info); +} + +const struct vo_driver video_out_wlshm = { + .description = "Wayland SHM video output (software scaling)", + .name = "wlshm", + .preinit = preinit, + .query_format = query_format, + .reconfig = reconfig, + .control = control, + .draw_frame = draw_frame, + .flip_page = flip_page, + .get_vsync = get_vsync, + .wakeup = vo_wayland_wakeup, + .wait_events = vo_wayland_wait_events, + .uninit = uninit, + .priv_size = sizeof(struct priv), +}; diff --git a/video/out/vo_x11.c b/video/out/vo_x11.c new file mode 100644 index 0000000..fa93157 --- /dev/null +++ b/video/out/vo_x11.c @@ -0,0 +1,447 @@ +/* + * Original author: Aaron Holtzman <aholtzma@ess.engr.uvic.ca> + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> + +#include <libswscale/swscale.h> + +#include "vo.h" +#include "video/csputils.h" +#include "video/mp_image.h" + +#include <X11/Xlib.h> +#include <X11/Xutil.h> + +#include <errno.h> + +#include "present_sync.h" +#include "x11_common.h" + +#include <sys/ipc.h> +#include <sys/shm.h> +#include <X11/extensions/XShm.h> + +#include "sub/osd.h" +#include "sub/draw_bmp.h" + +#include "video/sws_utils.h" +#include "video/fmt-conversion.h" + +#include "common/msg.h" +#include "input/input.h" +#include "options/options.h" +#include "osdep/timer.h" + +struct priv { + struct vo *vo; + + struct mp_image *original_image; + + XImage *myximage[2]; + struct mp_image mp_ximages[2]; + int depth; + GC gc; + + uint32_t image_width; + uint32_t image_height; + + struct mp_rect src; + struct mp_rect dst; + struct mp_osd_res osd; + + struct mp_sws_context *sws; + + XVisualInfo vinfo; + + int current_buf; + + int Shmem_Flag; + XShmSegmentInfo Shminfo[2]; + int Shm_Warned_Slow; +}; + +static bool resize(struct vo *vo); + +static bool getMyXImage(struct priv *p, int foo) +{ + struct vo *vo = p->vo; + if (vo->x11->display_is_local && XShmQueryExtension(vo->x11->display)) { + p->Shmem_Flag = 1; + vo->x11->ShmCompletionEvent = XShmGetEventBase(vo->x11->display) + + ShmCompletion; + } else { + p->Shmem_Flag = 0; + MP_WARN(vo, "Shared memory not supported\nReverting to normal Xlib\n"); + } + + if (p->Shmem_Flag) { + p->myximage[foo] = + XShmCreateImage(vo->x11->display, p->vinfo.visual, p->depth, + ZPixmap, NULL, &p->Shminfo[foo], p->image_width, + p->image_height); + if (p->myximage[foo] == NULL) { + MP_WARN(vo, "Shared memory error,disabling ( Ximage error )\n"); + goto shmemerror; + } + p->Shminfo[foo].shmid = shmget(IPC_PRIVATE, + p->myximage[foo]->bytes_per_line * + p->myximage[foo]->height, + IPC_CREAT | 0777); + if (p->Shminfo[foo].shmid < 0) { + XDestroyImage(p->myximage[foo]); + MP_WARN(vo, "Shared memory error,disabling ( seg id error )\n"); + goto shmemerror; + } + p->Shminfo[foo].shmaddr = (char *) shmat(p->Shminfo[foo].shmid, 0, 0); + + if (p->Shminfo[foo].shmaddr == ((char *) -1)) { + XDestroyImage(p->myximage[foo]); + MP_WARN(vo, "Shared memory error,disabling ( address error )\n"); + goto shmemerror; + } + p->myximage[foo]->data = p->Shminfo[foo].shmaddr; + p->Shminfo[foo].readOnly = False; + XShmAttach(vo->x11->display, &p->Shminfo[foo]); + + XSync(vo->x11->display, False); + + shmctl(p->Shminfo[foo].shmid, IPC_RMID, 0); + } else { +shmemerror: + p->Shmem_Flag = 0; + + MP_VERBOSE(vo, "Not using SHM.\n"); + p->myximage[foo] = + XCreateImage(vo->x11->display, p->vinfo.visual, p->depth, ZPixmap, + 0, NULL, p->image_width, p->image_height, 8, 0); + if (p->myximage[foo]) { + p->myximage[foo]->data = + calloc(1, p->myximage[foo]->bytes_per_line * p->image_height + 32); + } + if (!p->myximage[foo] || !p->myximage[foo]->data) { + MP_WARN(vo, "could not allocate image"); + return false; + } + } + return true; +} + +static void freeMyXImage(struct priv *p, int foo) +{ + struct vo *vo = p->vo; + if (p->Shmem_Flag) { + XShmDetach(vo->x11->display, &p->Shminfo[foo]); + XDestroyImage(p->myximage[foo]); + shmdt(p->Shminfo[foo].shmaddr); + } else { + if (p->myximage[foo]) { + // XDestroyImage() would free the data too since XFree() just calls + // free(), but do it ourselves for portability reasons + free(p->myximage[foo]->data); + p->myximage[foo]->data = NULL; + XDestroyImage(p->myximage[foo]); + } + } + p->myximage[foo] = NULL; +} + +#define MAKE_MASK(comp) (((1ul << (comp).size) - 1) << (comp).offset) + +static int reconfig(struct vo *vo, struct mp_image_params *fmt) +{ + vo_x11_config_vo_window(vo); + + if (!resize(vo)) + return -1; + + return 0; +} + +static bool resize(struct vo *vo) +{ + struct priv *p = vo->priv; + + // Attempt to align. We don't know the size in bytes yet (????), so just + // assume worst case (1 byte per pixel). + int nw = MPMAX(1, MP_ALIGN_UP(vo->dwidth, MP_IMAGE_BYTE_ALIGN)); + int nh = MPMAX(1, vo->dheight); + + if (nw > p->image_width || nh > p->image_height) { + for (int i = 0; i < 2; i++) + freeMyXImage(p, i); + + p->image_width = nw; + p->image_height = nh; + + for (int i = 0; i < 2; i++) { + if (!getMyXImage(p, i)) { + p->image_width = 0; + p->image_height = 0; + return false; + } + } + } + + int mpfmt = 0; + for (int fmt = IMGFMT_START; fmt < IMGFMT_END; fmt++) { + struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(fmt); + if ((desc.flags & MP_IMGFLAG_HAS_COMPS) && desc.num_planes == 1 && + (desc.flags & MP_IMGFLAG_COLOR_MASK) == MP_IMGFLAG_COLOR_RGB && + (desc.flags & MP_IMGFLAG_TYPE_MASK) == MP_IMGFLAG_TYPE_UINT && + (desc.flags & MP_IMGFLAG_NE) && !(desc.flags & MP_IMGFLAG_ALPHA) && + desc.bpp[0] <= 8 * sizeof(unsigned long) && + p->myximage[0]->bits_per_pixel == desc.bpp[0] && + p->myximage[0]->byte_order == MP_SELECT_LE_BE(LSBFirst, MSBFirst)) + { + // desc.comps[] uses little endian bit offsets, so "swap" the + // offsets here. + if (MP_SELECT_LE_BE(0, 1)) { + // Except for formats that use byte swapping; for these, the + // offsets are in native endian. There is no way to distinguish + // which one a given format is (could even be both), and using + // mp_find_other_endian() is just a guess. + if (!mp_find_other_endian(fmt)) { + for (int c = 0; c < 3; c++) { + desc.comps[c].offset = + desc.bpp[0] - desc.comps[c].size -desc.comps[c].offset; + } + } + } + if (p->myximage[0]->red_mask == MAKE_MASK(desc.comps[0]) && + p->myximage[0]->green_mask == MAKE_MASK(desc.comps[1]) && + p->myximage[0]->blue_mask == MAKE_MASK(desc.comps[2])) + { + mpfmt = fmt; + break; + } + } + } + + if (!mpfmt) { + MP_ERR(vo, "X server image format not supported, use another VO.\n"); + return false; + } + MP_VERBOSE(vo, "Using mp format: %s\n", mp_imgfmt_to_name(mpfmt)); + + for (int i = 0; i < 2; i++) { + struct mp_image *img = &p->mp_ximages[i]; + *img = (struct mp_image){0}; + mp_image_setfmt(img, mpfmt); + mp_image_set_size(img, p->image_width, p->image_height); + img->planes[0] = p->myximage[i]->data; + img->stride[0] = p->myximage[i]->bytes_per_line; + + mp_image_params_guess_csp(&img->params); + } + + vo_get_src_dst_rects(vo, &p->src, &p->dst, &p->osd); + + if (vo->params) { + p->sws->src = *vo->params; + p->sws->src.w = mp_rect_w(p->src); + p->sws->src.h = mp_rect_h(p->src); + + p->sws->dst = p->mp_ximages[0].params; + p->sws->dst.w = mp_rect_w(p->dst); + p->sws->dst.h = mp_rect_h(p->dst); + + if (mp_sws_reinit(p->sws) < 0) + return false; + } + + vo->want_redraw = true; + return true; +} + +static void Display_Image(struct priv *p, XImage *myximage) +{ + struct vo *vo = p->vo; + + XImage *x_image = p->myximage[p->current_buf]; + + if (p->Shmem_Flag) { + XShmPutImage(vo->x11->display, vo->x11->window, p->gc, x_image, + 0, 0, 0, 0, vo->dwidth, vo->dheight, True); + vo->x11->ShmCompletionWaitCount++; + } else { + XPutImage(vo->x11->display, vo->x11->window, p->gc, x_image, + 0, 0, 0, 0, vo->dwidth, vo->dheight); + } +} + +static void wait_for_completion(struct vo *vo, int max_outstanding) +{ + struct priv *ctx = vo->priv; + struct vo_x11_state *x11 = vo->x11; + if (ctx->Shmem_Flag) { + while (x11->ShmCompletionWaitCount > max_outstanding) { + if (!ctx->Shm_Warned_Slow) { + MP_WARN(vo, "can't keep up! Waiting" + " for XShm completion events...\n"); + ctx->Shm_Warned_Slow = 1; + } + mp_sleep_ns(MP_TIME_MS_TO_NS(1)); + vo_x11_check_events(vo); + } + } +} + +static void flip_page(struct vo *vo) +{ + struct priv *p = vo->priv; + Display_Image(p, p->myximage[p->current_buf]); + p->current_buf = (p->current_buf + 1) % 2; + if (vo->x11->use_present) { + vo_x11_present(vo); + present_sync_swap(vo->x11->present); + } +} + +static void get_vsync(struct vo *vo, struct vo_vsync_info *info) +{ + struct vo_x11_state *x11 = vo->x11; + if (x11->use_present) + present_sync_get_info(x11->present, info); +} + +static void draw_frame(struct vo *vo, struct vo_frame *frame) +{ + struct priv *p = vo->priv; + + wait_for_completion(vo, 1); + bool render = vo_x11_check_visible(vo); + if (!render) + return; + + struct mp_image *img = &p->mp_ximages[p->current_buf]; + + if (frame->current) { + mp_image_clear_rc_inv(img, p->dst); + + struct mp_image *src = frame->current; + struct mp_rect src_rc = p->src; + src_rc.x0 = MP_ALIGN_DOWN(src_rc.x0, src->fmt.align_x); + src_rc.y0 = MP_ALIGN_DOWN(src_rc.y0, src->fmt.align_y); + mp_image_crop_rc(src, src_rc); + + struct mp_image dst = *img; + mp_image_crop_rc(&dst, p->dst); + + mp_sws_scale(p->sws, &dst, src); + } else { + mp_image_clear(img, 0, 0, img->w, img->h); + } + + osd_draw_on_image(vo->osd, p->osd, frame->current ? frame->current->pts : 0, 0, img); + + if (frame->current != p->original_image) + p->original_image = frame->current; +} + +static int query_format(struct vo *vo, int format) +{ + struct priv *p = vo->priv; + if (mp_sws_supports_formats(p->sws, IMGFMT_RGB0, format)) + return 1; + return 0; +} + +static void uninit(struct vo *vo) +{ + struct priv *p = vo->priv; + if (p->myximage[0]) + freeMyXImage(p, 0); + if (p->myximage[1]) + freeMyXImage(p, 1); + if (p->gc) + XFreeGC(vo->x11->display, p->gc); + + vo_x11_uninit(vo); +} + +static int preinit(struct vo *vo) +{ + struct priv *p = vo->priv; + p->vo = vo; + p->sws = mp_sws_alloc(vo); + p->sws->log = vo->log; + mp_sws_enable_cmdline_opts(p->sws, vo->global); + + if (!vo_x11_init(vo)) + goto error; + struct vo_x11_state *x11 = vo->x11; + + XWindowAttributes attribs; + XGetWindowAttributes(x11->display, x11->rootwin, &attribs); + p->depth = attribs.depth; + + if (!XMatchVisualInfo(x11->display, x11->screen, p->depth, + TrueColor, &p->vinfo)) + goto error; + + MP_VERBOSE(vo, "selected visual: %d\n", (int)p->vinfo.visualid); + + if (!vo_x11_create_vo_window(vo, &p->vinfo, "x11")) + goto error; + + p->gc = XCreateGC(x11->display, x11->window, 0, NULL); + MP_WARN(vo, "Warning: this legacy VO has bad performance. Consider fixing " + "your graphics drivers, or not forcing the x11 VO.\n"); + return 0; + +error: + uninit(vo); + return -1; +} + +static int control(struct vo *vo, uint32_t request, void *data) +{ + switch (request) { + case VOCTRL_SET_PANSCAN: + if (vo->config_ok) + resize(vo); + return VO_TRUE; + } + + int events = 0; + int r = vo_x11_control(vo, &events, request, data); + if (vo->config_ok && (events & (VO_EVENT_EXPOSE | VO_EVENT_RESIZE))) + resize(vo); + vo_event(vo, events); + return r; +} + +const struct vo_driver video_out_x11 = { + .description = "X11 (software scaling)", + .name = "x11", + .priv_size = sizeof(struct priv), + .preinit = preinit, + .query_format = query_format, + .reconfig = reconfig, + .control = control, + .draw_frame = draw_frame, + .flip_page = flip_page, + .get_vsync = get_vsync, + .wakeup = vo_x11_wakeup, + .wait_events = vo_x11_wait_events, + .uninit = uninit, +}; diff --git a/video/out/vo_xv.c b/video/out/vo_xv.c new file mode 100644 index 0000000..6c776c5 --- /dev/null +++ b/video/out/vo_xv.c @@ -0,0 +1,921 @@ +/* + * X11 Xv interface + * + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <float.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdint.h> +#include <stdbool.h> +#include <errno.h> +#include <X11/Xlib.h> +#include <X11/Xutil.h> + +#include <libavutil/common.h> + +#include <sys/types.h> +#include <sys/ipc.h> +#include <sys/shm.h> +#include <X11/extensions/XShm.h> + +// Note: depends on the inclusion of X11/extensions/XShm.h +#include <X11/extensions/Xv.h> +#include <X11/extensions/Xvlib.h> + +#include "options/options.h" +#include "mpv_talloc.h" +#include "common/msg.h" +#include "vo.h" +#include "video/mp_image.h" +#include "present_sync.h" +#include "x11_common.h" +#include "sub/osd.h" +#include "sub/draw_bmp.h" +#include "video/csputils.h" +#include "options/m_option.h" +#include "input/input.h" +#include "osdep/timer.h" + +#define CK_METHOD_NONE 0 // no colorkey drawing +#define CK_METHOD_BACKGROUND 1 // set colorkey as window background +#define CK_METHOD_AUTOPAINT 2 // let xv draw the colorkey +#define CK_METHOD_MANUALFILL 3 // manually draw the colorkey +#define CK_SRC_USE 0 // use specified / default colorkey +#define CK_SRC_SET 1 // use and set specified / default colorkey +#define CK_SRC_CUR 2 // use current colorkey (get it from xv) + +#define MAX_BUFFERS 10 + +struct xvctx { + struct xv_ck_info_s { + int method; // CK_METHOD_* constants + int source; // CK_SRC_* constants + } xv_ck_info; + int colorkey; + unsigned long xv_colorkey; + int xv_port; + int cfg_xv_adaptor; + int cfg_buffers; + XvAdaptorInfo *ai; + XvImageFormatValues *fo; + unsigned int formats, adaptors, xv_format; + int current_buf; + int current_ip_buf; + int num_buffers; + XvImage *xvimage[MAX_BUFFERS]; + struct mp_image *original_image; + uint32_t image_width; + uint32_t image_height; + uint32_t image_format; + int cached_csp; + struct mp_rect src_rect; + struct mp_rect dst_rect; + uint32_t max_width, max_height; // zero means: not set + GC f_gc; // used to paint background + GC vo_gc; // used to paint video + int Shmem_Flag; + XShmSegmentInfo Shminfo[MAX_BUFFERS]; + int Shm_Warned_Slow; +}; + +#define MP_FOURCC(a,b,c,d) ((a) | ((b)<<8) | ((c)<<16) | ((unsigned)(d)<<24)) + +#define MP_FOURCC_YV12 MP_FOURCC('Y', 'V', '1', '2') +#define MP_FOURCC_I420 MP_FOURCC('I', '4', '2', '0') +#define MP_FOURCC_IYUV MP_FOURCC('I', 'Y', 'U', 'V') +#define MP_FOURCC_UYVY MP_FOURCC('U', 'Y', 'V', 'Y') + +struct fmt_entry { + int imgfmt; + int fourcc; +}; +static const struct fmt_entry fmt_table[] = { + {IMGFMT_420P, MP_FOURCC_YV12}, + {IMGFMT_420P, MP_FOURCC_I420}, + {IMGFMT_UYVY, MP_FOURCC_UYVY}, + {0} +}; + +static bool allocate_xvimage(struct vo *, int); +static void deallocate_xvimage(struct vo *vo, int foo); +static struct mp_image get_xv_buffer(struct vo *vo, int buf_index); + +static int find_xv_format(int imgfmt) +{ + for (int n = 0; fmt_table[n].imgfmt; n++) { + if (fmt_table[n].imgfmt == imgfmt) + return fmt_table[n].fourcc; + } + return 0; +} + +static int xv_find_atom(struct vo *vo, uint32_t xv_port, const char *name, + bool get, int *min, int *max) +{ + Atom atom = None; + int howmany = 0; + XvAttribute *attributes = XvQueryPortAttributes(vo->x11->display, xv_port, + &howmany); + for (int i = 0; i < howmany && attributes; i++) { + int flag = get ? XvGettable : XvSettable; + if (attributes[i].flags & flag) { + atom = XInternAtom(vo->x11->display, attributes[i].name, True); + *min = attributes[i].min_value; + *max = attributes[i].max_value; +/* since we have SET_DEFAULTS first in our list, we can check if it's available + then trigger it if it's ok so that the other values are at default upon query */ + if (atom != None) { + if (!strcmp(attributes[i].name, "XV_BRIGHTNESS") && + (!strcmp(name, "brightness"))) + break; + else if (!strcmp(attributes[i].name, "XV_CONTRAST") && + (!strcmp(name, "contrast"))) + break; + else if (!strcmp(attributes[i].name, "XV_SATURATION") && + (!strcmp(name, "saturation"))) + break; + else if (!strcmp(attributes[i].name, "XV_HUE") && + (!strcmp(name, "hue"))) + break; + if (!strcmp(attributes[i].name, "XV_RED_INTENSITY") && + (!strcmp(name, "red_intensity"))) + break; + else if (!strcmp(attributes[i].name, "XV_GREEN_INTENSITY") + && (!strcmp(name, "green_intensity"))) + break; + else if (!strcmp(attributes[i].name, "XV_BLUE_INTENSITY") + && (!strcmp(name, "blue_intensity"))) + break; + else if ((!strcmp(attributes[i].name, "XV_ITURBT_709") //NVIDIA + || !strcmp(attributes[i].name, "XV_COLORSPACE")) //ATI + && (!strcmp(name, "bt_709"))) + break; + atom = None; + continue; + } + } + } + XFree(attributes); + return atom; +} + +static int xv_set_eq(struct vo *vo, uint32_t xv_port, const char *name, + int value) +{ + MP_VERBOSE(vo, "xv_set_eq called! (%s, %d)\n", name, value); + + int min, max; + int atom = xv_find_atom(vo, xv_port, name, false, &min, &max); + if (atom != None) { + // -100 -> min + // 0 -> (max+min)/2 + // +100 -> max + int port_value = (value + 100) * (max - min) / 200 + min; + XvSetPortAttribute(vo->x11->display, xv_port, atom, port_value); + return VO_TRUE; + } + return VO_FALSE; +} + +static int xv_get_eq(struct vo *vo, uint32_t xv_port, const char *name, + int *value) +{ + int min, max; + int atom = xv_find_atom(vo, xv_port, name, true, &min, &max); + if (atom != None) { + int port_value = 0; + XvGetPortAttribute(vo->x11->display, xv_port, atom, &port_value); + + *value = (port_value - min) * 200 / (max - min) - 100; + MP_VERBOSE(vo, "xv_get_eq called! (%s, %d)\n", name, *value); + return VO_TRUE; + } + return VO_FALSE; +} + +static Atom xv_intern_atom_if_exists(struct vo *vo, char const *atom_name) +{ + struct xvctx *ctx = vo->priv; + XvAttribute *attributes; + int attrib_count, i; + Atom xv_atom = None; + + attributes = XvQueryPortAttributes(vo->x11->display, ctx->xv_port, + &attrib_count); + if (attributes != NULL) { + for (i = 0; i < attrib_count; ++i) { + if (strcmp(attributes[i].name, atom_name) == 0) { + xv_atom = XInternAtom(vo->x11->display, atom_name, False); + break; + } + } + XFree(attributes); + } + + return xv_atom; +} + +// Try to enable vsync for xv. +// Returns -1 if not available, 0 on failure and 1 on success. +static int xv_enable_vsync(struct vo *vo) +{ + struct xvctx *ctx = vo->priv; + Atom xv_atom = xv_intern_atom_if_exists(vo, "XV_SYNC_TO_VBLANK"); + if (xv_atom == None) + return -1; + return XvSetPortAttribute(vo->x11->display, ctx->xv_port, xv_atom, 1) + == Success; +} + +// Get maximum supported source image dimensions. +// If querying the dimensions fails, don't change *width and *height. +static void xv_get_max_img_dim(struct vo *vo, uint32_t *width, uint32_t *height) +{ + struct xvctx *ctx = vo->priv; + XvEncodingInfo *encodings; + unsigned int num_encodings, idx; + + XvQueryEncodings(vo->x11->display, ctx->xv_port, &num_encodings, &encodings); + + if (encodings) { + for (idx = 0; idx < num_encodings; ++idx) { + if (strcmp(encodings[idx].name, "XV_IMAGE") == 0) { + *width = encodings[idx].width; + *height = encodings[idx].height; + break; + } + } + } + + MP_VERBOSE(vo, "Maximum source image dimensions: %ux%u\n", *width, *height); + + XvFreeEncodingInfo(encodings); +} + +static void xv_print_ck_info(struct vo *vo) +{ + struct xvctx *xv = vo->priv; + + switch (xv->xv_ck_info.method) { + case CK_METHOD_NONE: + MP_VERBOSE(vo, "Drawing no colorkey.\n"); + return; + case CK_METHOD_AUTOPAINT: + MP_VERBOSE(vo, "Colorkey is drawn by Xv.\n"); + break; + case CK_METHOD_MANUALFILL: + MP_VERBOSE(vo, "Drawing colorkey manually.\n"); + break; + case CK_METHOD_BACKGROUND: + MP_VERBOSE(vo, "Colorkey is drawn as window background.\n"); + break; + } + + switch (xv->xv_ck_info.source) { + case CK_SRC_CUR: + MP_VERBOSE(vo, "Using colorkey from Xv (0x%06lx).\n", xv->xv_colorkey); + break; + case CK_SRC_USE: + if (xv->xv_ck_info.method == CK_METHOD_AUTOPAINT) { + MP_VERBOSE(vo, "Ignoring colorkey from mpv (0x%06lx).\n", + xv->xv_colorkey); + } else { + MP_VERBOSE(vo, "Using colorkey from mpv (0x%06lx). Use -colorkey to change.\n", + xv->xv_colorkey); + } + break; + case CK_SRC_SET: + MP_VERBOSE(vo, "Setting and using colorkey from mpv (0x%06lx)." + " Use -colorkey to change.\n", xv->xv_colorkey); + break; + } +} + +/* NOTE: If vo.colorkey has bits set after the first 3 low order bytes + * we don't draw anything as this means it was forced to off. */ +static int xv_init_colorkey(struct vo *vo) +{ + struct xvctx *ctx = vo->priv; + Display *display = vo->x11->display; + Atom xv_atom; + int rez; + + /* check if colorkeying is needed */ + xv_atom = xv_intern_atom_if_exists(vo, "XV_COLORKEY"); + if (xv_atom != None && ctx->xv_ck_info.method != CK_METHOD_NONE) { + if (ctx->xv_ck_info.source == CK_SRC_CUR) { + int colorkey_ret; + + rez = XvGetPortAttribute(display, ctx->xv_port, xv_atom, + &colorkey_ret); + if (rez == Success) + ctx->xv_colorkey = colorkey_ret; + else { + MP_FATAL(vo, "Couldn't get colorkey! " + "Maybe the selected Xv port has no overlay.\n"); + return 0; // error getting colorkey + } + } else { + ctx->xv_colorkey = ctx->colorkey; + + /* check if we have to set the colorkey too */ + if (ctx->xv_ck_info.source == CK_SRC_SET) { + xv_atom = XInternAtom(display, "XV_COLORKEY", False); + + rez = XvSetPortAttribute(display, ctx->xv_port, xv_atom, + ctx->colorkey); + if (rez != Success) { + MP_FATAL(vo, "Couldn't set colorkey!\n"); + return 0; // error setting colorkey + } + } + } + + xv_atom = xv_intern_atom_if_exists(vo, "XV_AUTOPAINT_COLORKEY"); + + /* should we draw the colorkey ourselves or activate autopainting? */ + if (ctx->xv_ck_info.method == CK_METHOD_AUTOPAINT) { + rez = !Success; + + if (xv_atom != None) // autopaint is supported + rez = XvSetPortAttribute(display, ctx->xv_port, xv_atom, 1); + + if (rez != Success) + ctx->xv_ck_info.method = CK_METHOD_MANUALFILL; + } else { + // disable colorkey autopainting if supported + if (xv_atom != None) + XvSetPortAttribute(display, ctx->xv_port, xv_atom, 0); + } + } else { // do no colorkey drawing at all + ctx->xv_ck_info.method = CK_METHOD_NONE; + ctx->colorkey = 0xFF000000; + } + + xv_print_ck_info(vo); + + return 1; +} + +/* Draw the colorkey on the video window. + * + * Draws the colorkey depending on the set method ( colorkey_handling ). + * + * Also draws the black bars ( when the video doesn't fit the display in + * fullscreen ) separately, so they don't overlap with the video area. */ +static void xv_draw_colorkey(struct vo *vo, const struct mp_rect *rc) +{ + struct xvctx *ctx = vo->priv; + struct vo_x11_state *x11 = vo->x11; + if (ctx->xv_ck_info.method == CK_METHOD_MANUALFILL || + ctx->xv_ck_info.method == CK_METHOD_BACKGROUND) + { + if (!ctx->vo_gc) + return; + //less tearing than XClearWindow() + XSetForeground(x11->display, ctx->vo_gc, ctx->xv_colorkey); + XFillRectangle(x11->display, x11->window, ctx->vo_gc, rc->x0, rc->y0, + rc->x1 - rc->x0, rc->y1 - rc->y0); + } +} + +static void read_xv_csp(struct vo *vo) +{ + struct xvctx *ctx = vo->priv; + ctx->cached_csp = 0; + int bt709_enabled; + if (xv_get_eq(vo, ctx->xv_port, "bt_709", &bt709_enabled)) + ctx->cached_csp = bt709_enabled == 100 ? MP_CSP_BT_709 : MP_CSP_BT_601; +} + + +static void fill_rect(struct vo *vo, GC gc, int x0, int y0, int x1, int y1) +{ + struct vo_x11_state *x11 = vo->x11; + + x0 = MPMAX(x0, 0); + y0 = MPMAX(y0, 0); + x1 = MPMIN(x1, vo->dwidth); + y1 = MPMIN(y1, vo->dheight); + + if (x11->window && gc && x1 > x0 && y1 > y0) + XFillRectangle(x11->display, x11->window, gc, x0, y0, x1 - x0, y1 - y0); +} + +// Clear everything outside of rc with the background color +static void vo_x11_clear_background(struct vo *vo, const struct mp_rect *rc) +{ + struct vo_x11_state *x11 = vo->x11; + struct xvctx *ctx = vo->priv; + GC gc = ctx->f_gc; + + int w = vo->dwidth; + int h = vo->dheight; + + fill_rect(vo, gc, 0, 0, w, rc->y0); // top + fill_rect(vo, gc, 0, rc->y1, w, h); // bottom + fill_rect(vo, gc, 0, rc->y0, rc->x0, rc->y1); // left + fill_rect(vo, gc, rc->x1, rc->y0, w, rc->y1); // right + + XFlush(x11->display); +} + +static void resize(struct vo *vo) +{ + struct xvctx *ctx = vo->priv; + + // Can't be used, because the function calculates screen-space coordinates, + // while we need video-space. + struct mp_osd_res unused; + + vo_get_src_dst_rects(vo, &ctx->src_rect, &ctx->dst_rect, &unused); + + vo_x11_clear_background(vo, &ctx->dst_rect); + xv_draw_colorkey(vo, &ctx->dst_rect); + read_xv_csp(vo); + + mp_input_set_mouse_transform(vo->input_ctx, &ctx->dst_rect, &ctx->src_rect); + + vo->want_redraw = true; +} + +/* + * create and map window, + * allocate colors and (shared) memory + */ +static int reconfig(struct vo *vo, struct mp_image_params *params) +{ + struct vo_x11_state *x11 = vo->x11; + struct xvctx *ctx = vo->priv; + int i; + + ctx->image_height = params->h; + ctx->image_width = params->w; + ctx->image_format = params->imgfmt; + + if ((ctx->max_width != 0 && ctx->max_height != 0) + && (ctx->image_width > ctx->max_width + || ctx->image_height > ctx->max_height)) { + MP_ERR(vo, "Source image dimensions are too high: %ux%u (maximum is %ux%u)\n", + ctx->image_width, ctx->image_height, ctx->max_width, + ctx->max_height); + return -1; + } + + /* check image formats */ + ctx->xv_format = 0; + for (i = 0; i < ctx->formats; i++) { + MP_VERBOSE(vo, "Xvideo image format: 0x%x (%4.4s) %s\n", + ctx->fo[i].id, (char *) &ctx->fo[i].id, + (ctx->fo[i].format == XvPacked) ? "packed" : "planar"); + if (ctx->fo[i].id == find_xv_format(ctx->image_format)) + ctx->xv_format = ctx->fo[i].id; + } + if (!ctx->xv_format) + return -1; + + vo_x11_config_vo_window(vo); + + if (!ctx->f_gc && !ctx->vo_gc) { + ctx->f_gc = XCreateGC(x11->display, x11->window, 0, 0); + ctx->vo_gc = XCreateGC(x11->display, x11->window, 0, NULL); + XSetForeground(x11->display, ctx->f_gc, 0); + } + + if (ctx->xv_ck_info.method == CK_METHOD_BACKGROUND) + XSetWindowBackground(x11->display, x11->window, ctx->xv_colorkey); + + MP_VERBOSE(vo, "using Xvideo port %d for hw scaling\n", ctx->xv_port); + + // In case config has been called before + for (i = 0; i < ctx->num_buffers; i++) + deallocate_xvimage(vo, i); + + ctx->num_buffers = ctx->cfg_buffers; + + for (i = 0; i < ctx->num_buffers; i++) { + if (!allocate_xvimage(vo, i)) { + MP_FATAL(vo, "could not allocate Xv image data\n"); + return -1; + } + } + + ctx->current_buf = 0; + ctx->current_ip_buf = 0; + + int is_709 = params->color.space == MP_CSP_BT_709; + xv_set_eq(vo, ctx->xv_port, "bt_709", is_709 * 200 - 100); + read_xv_csp(vo); + + resize(vo); + + return 0; +} + +static bool allocate_xvimage(struct vo *vo, int foo) +{ + struct xvctx *ctx = vo->priv; + struct vo_x11_state *x11 = vo->x11; + // align it for faster OSD rendering (draw_bmp.c swscale usage) + int aligned_w = FFALIGN(ctx->image_width, 32); + // round up the height to next chroma boundary too + int aligned_h = FFALIGN(ctx->image_height, 2); + if (x11->display_is_local && XShmQueryExtension(x11->display)) { + ctx->Shmem_Flag = 1; + x11->ShmCompletionEvent = XShmGetEventBase(x11->display) + + ShmCompletion; + } else { + ctx->Shmem_Flag = 0; + MP_INFO(vo, "Shared memory not supported\nReverting to normal Xv.\n"); + } + if (ctx->Shmem_Flag) { + ctx->xvimage[foo] = + (XvImage *) XvShmCreateImage(x11->display, ctx->xv_port, + ctx->xv_format, NULL, + aligned_w, aligned_h, + &ctx->Shminfo[foo]); + if (!ctx->xvimage[foo]) + return false; + + ctx->Shminfo[foo].shmid = shmget(IPC_PRIVATE, + ctx->xvimage[foo]->data_size, + IPC_CREAT | 0777); + ctx->Shminfo[foo].shmaddr = shmat(ctx->Shminfo[foo].shmid, 0, 0); + if (ctx->Shminfo[foo].shmaddr == (void *)-1) + return false; + ctx->Shminfo[foo].readOnly = False; + + ctx->xvimage[foo]->data = ctx->Shminfo[foo].shmaddr; + XShmAttach(x11->display, &ctx->Shminfo[foo]); + XSync(x11->display, False); + shmctl(ctx->Shminfo[foo].shmid, IPC_RMID, 0); + } else { + ctx->xvimage[foo] = + (XvImage *) XvCreateImage(x11->display, ctx->xv_port, + ctx->xv_format, NULL, aligned_w, + aligned_h); + if (!ctx->xvimage[foo]) + return false; + ctx->xvimage[foo]->data = av_malloc(ctx->xvimage[foo]->data_size); + if (!ctx->xvimage[foo]->data) + return false; + XSync(x11->display, False); + } + + if ((ctx->xvimage[foo]->width < aligned_w) || + (ctx->xvimage[foo]->height < aligned_h)) { + MP_ERR(vo, "Got XvImage with too small size: %ux%u (expected %ux%u)\n", + ctx->xvimage[foo]->width, ctx->xvimage[foo]->height, + aligned_w, ctx->image_height); + return false; + } + + struct mp_image img = get_xv_buffer(vo, foo); + mp_image_set_size(&img, aligned_w, aligned_h); + mp_image_clear(&img, 0, 0, img.w, img.h); + return true; +} + +static void deallocate_xvimage(struct vo *vo, int foo) +{ + struct xvctx *ctx = vo->priv; + if (ctx->Shmem_Flag) { + XShmDetach(vo->x11->display, &ctx->Shminfo[foo]); + shmdt(ctx->Shminfo[foo].shmaddr); + } else { + av_free(ctx->xvimage[foo]->data); + } + if (ctx->xvimage[foo]) + XFree(ctx->xvimage[foo]); + + ctx->xvimage[foo] = NULL; + ctx->Shminfo[foo] = (XShmSegmentInfo){0}; + + XSync(vo->x11->display, False); + return; +} + +static inline void put_xvimage(struct vo *vo, XvImage *xvi) +{ + struct xvctx *ctx = vo->priv; + struct vo_x11_state *x11 = vo->x11; + struct mp_rect *src = &ctx->src_rect; + struct mp_rect *dst = &ctx->dst_rect; + int dw = dst->x1 - dst->x0, dh = dst->y1 - dst->y0; + int sw = src->x1 - src->x0, sh = src->y1 - src->y0; + + if (ctx->Shmem_Flag) { + XvShmPutImage(x11->display, ctx->xv_port, x11->window, ctx->vo_gc, xvi, + src->x0, src->y0, sw, sh, + dst->x0, dst->y0, dw, dh, + True); + x11->ShmCompletionWaitCount++; + } else { + XvPutImage(x11->display, ctx->xv_port, x11->window, ctx->vo_gc, xvi, + src->x0, src->y0, sw, sh, + dst->x0, dst->y0, dw, dh); + } +} + +static struct mp_image get_xv_buffer(struct vo *vo, int buf_index) +{ + struct xvctx *ctx = vo->priv; + XvImage *xv_image = ctx->xvimage[buf_index]; + + struct mp_image img = {0}; + mp_image_set_size(&img, ctx->image_width, ctx->image_height); + mp_image_setfmt(&img, ctx->image_format); + + bool swapuv = ctx->xv_format == MP_FOURCC_YV12; + for (int n = 0; n < img.num_planes; n++) { + int sn = n > 0 && swapuv ? (n == 1 ? 2 : 1) : n; + img.planes[n] = xv_image->data + xv_image->offsets[sn]; + img.stride[n] = xv_image->pitches[sn]; + } + + if (vo->params) { + struct mp_image_params params = *vo->params; + if (ctx->cached_csp) + params.color.space = ctx->cached_csp; + mp_image_set_attributes(&img, ¶ms); + } + + return img; +} + +static void wait_for_completion(struct vo *vo, int max_outstanding) +{ + struct xvctx *ctx = vo->priv; + struct vo_x11_state *x11 = vo->x11; + if (ctx->Shmem_Flag) { + while (x11->ShmCompletionWaitCount > max_outstanding) { + if (!ctx->Shm_Warned_Slow) { + MP_WARN(vo, "X11 can't keep up! Waiting" + " for XShm completion events...\n"); + ctx->Shm_Warned_Slow = 1; + } + mp_sleep_ns(MP_TIME_MS_TO_NS(1)); + vo_x11_check_events(vo); + } + } +} + +static void flip_page(struct vo *vo) +{ + struct xvctx *ctx = vo->priv; + put_xvimage(vo, ctx->xvimage[ctx->current_buf]); + + /* remember the currently visible buffer */ + ctx->current_buf = (ctx->current_buf + 1) % ctx->num_buffers; + + if (!ctx->Shmem_Flag) + XSync(vo->x11->display, False); + + if (vo->x11->use_present) { + vo_x11_present(vo); + present_sync_swap(vo->x11->present); + } +} + +static void get_vsync(struct vo *vo, struct vo_vsync_info *info) +{ + struct vo_x11_state *x11 = vo->x11; + if (x11->use_present) + present_sync_get_info(x11->present, info); +} + +static void draw_frame(struct vo *vo, struct vo_frame *frame) +{ + struct xvctx *ctx = vo->priv; + + wait_for_completion(vo, ctx->num_buffers - 1); + bool render = vo_x11_check_visible(vo); + if (!render) + return; + + struct mp_image xv_buffer = get_xv_buffer(vo, ctx->current_buf); + if (frame->current) { + mp_image_copy(&xv_buffer, frame->current); + } else { + mp_image_clear(&xv_buffer, 0, 0, xv_buffer.w, xv_buffer.h); + } + + struct mp_osd_res res = osd_res_from_image_params(vo->params); + osd_draw_on_image(vo->osd, res, frame->current ? frame->current->pts : 0, 0, &xv_buffer); + + if (frame->current != ctx->original_image) + ctx->original_image = frame->current; +} + +static int query_format(struct vo *vo, int format) +{ + struct xvctx *ctx = vo->priv; + uint32_t i; + + int fourcc = find_xv_format(format); + if (fourcc) { + for (i = 0; i < ctx->formats; i++) { + if (ctx->fo[i].id == fourcc) + return 1; + } + } + return 0; +} + +static void uninit(struct vo *vo) +{ + struct xvctx *ctx = vo->priv; + int i; + + if (ctx->ai) + XvFreeAdaptorInfo(ctx->ai); + ctx->ai = NULL; + if (ctx->fo) { + XFree(ctx->fo); + ctx->fo = NULL; + } + for (i = 0; i < ctx->num_buffers; i++) + deallocate_xvimage(vo, i); + if (ctx->f_gc != None) + XFreeGC(vo->x11->display, ctx->f_gc); + if (ctx->vo_gc != None) + XFreeGC(vo->x11->display, ctx->vo_gc); + // uninit() shouldn't get called unless initialization went past vo_init() + vo_x11_uninit(vo); +} + +static int preinit(struct vo *vo) +{ + XvPortID xv_p; + int busy_ports = 0; + unsigned int i; + struct xvctx *ctx = vo->priv; + int xv_adaptor = ctx->cfg_xv_adaptor; + + if (!vo_x11_init(vo)) + return -1; + + if (!vo_x11_create_vo_window(vo, NULL, "xv")) + goto error; + + struct vo_x11_state *x11 = vo->x11; + + /* check for Xvideo extension */ + unsigned int ver, rel, req, ev, err; + if (Success != XvQueryExtension(x11->display, &ver, &rel, &req, &ev, &err)) { + MP_ERR(vo, "Xv not supported by this X11 version/driver\n"); + goto error; + } + + /* check for Xvideo support */ + if (Success != + XvQueryAdaptors(x11->display, DefaultRootWindow(x11->display), + &ctx->adaptors, &ctx->ai)) { + MP_ERR(vo, "XvQueryAdaptors failed.\n"); + goto error; + } + + /* check adaptors */ + if (ctx->xv_port) { + int port_found; + + for (port_found = 0, i = 0; !port_found && i < ctx->adaptors; i++) { + if ((ctx->ai[i].type & XvInputMask) + && (ctx->ai[i].type & XvImageMask)) { + for (xv_p = ctx->ai[i].base_id; + xv_p < ctx->ai[i].base_id + ctx->ai[i].num_ports; + ++xv_p) { + if (xv_p == ctx->xv_port) { + port_found = 1; + break; + } + } + } + } + if (port_found) { + if (XvGrabPort(x11->display, ctx->xv_port, CurrentTime)) + ctx->xv_port = 0; + } else { + MP_WARN(vo, "Invalid port parameter, overriding with port 0.\n"); + ctx->xv_port = 0; + } + } + + for (i = 0; i < ctx->adaptors && ctx->xv_port == 0; i++) { + /* check if adaptor number has been specified */ + if (xv_adaptor != -1 && xv_adaptor != i) + continue; + + if ((ctx->ai[i].type & XvInputMask) && (ctx->ai[i].type & XvImageMask)) { + for (xv_p = ctx->ai[i].base_id; + xv_p < ctx->ai[i].base_id + ctx->ai[i].num_ports; ++xv_p) + if (!XvGrabPort(x11->display, xv_p, CurrentTime)) { + ctx->xv_port = xv_p; + MP_VERBOSE(vo, "Using Xv Adapter #%d (%s)\n", + i, ctx->ai[i].name); + break; + } else { + MP_WARN(vo, "Could not grab port %i.\n", (int) xv_p); + ++busy_ports; + } + } + } + if (!ctx->xv_port) { + if (busy_ports) + MP_ERR(vo, "Xvideo ports busy.\n"); + else + MP_ERR(vo, "No Xvideo support found.\n"); + goto error; + } + + if (!xv_init_colorkey(vo)) { + goto error; // bail out, colorkey setup failed + } + xv_enable_vsync(vo); + xv_get_max_img_dim(vo, &ctx->max_width, &ctx->max_height); + + ctx->fo = XvListImageFormats(x11->display, ctx->xv_port, + (int *) &ctx->formats); + + MP_WARN(vo, "Warning: this legacy VO has bad quality and performance, " + "and will in particular result in blurry OSD and subtitles. " + "You should fix your graphics drivers, or not force the xv VO.\n"); + return 0; + + error: + uninit(vo); // free resources + return -1; +} + +static int control(struct vo *vo, uint32_t request, void *data) +{ + switch (request) { + case VOCTRL_SET_PANSCAN: + resize(vo); + return VO_TRUE; + } + int events = 0; + int r = vo_x11_control(vo, &events, request, data); + if (events & (VO_EVENT_EXPOSE | VO_EVENT_RESIZE)) + resize(vo); + vo_event(vo, events); + return r; +} + +#define OPT_BASE_STRUCT struct xvctx + +const struct vo_driver video_out_xv = { + .description = "X11/Xv", + .name = "xv", + .preinit = preinit, + .query_format = query_format, + .reconfig = reconfig, + .control = control, + .draw_frame = draw_frame, + .flip_page = flip_page, + .get_vsync = get_vsync, + .wakeup = vo_x11_wakeup, + .wait_events = vo_x11_wait_events, + .uninit = uninit, + .priv_size = sizeof(struct xvctx), + .priv_defaults = &(const struct xvctx) { + .cfg_xv_adaptor = -1, + .xv_ck_info = {CK_METHOD_MANUALFILL, CK_SRC_CUR}, + .colorkey = 0x0000ff00, // default colorkey is green + // (0xff000000 means that colorkey has been disabled) + .cfg_buffers = 2, + }, + .options = (const struct m_option[]) { + {"port", OPT_INT(xv_port), M_RANGE(0, DBL_MAX)}, + {"adaptor", OPT_INT(cfg_xv_adaptor), M_RANGE(-1, DBL_MAX)}, + {"ck", OPT_CHOICE(xv_ck_info.source, + {"use", CK_SRC_USE}, + {"set", CK_SRC_SET}, + {"cur", CK_SRC_CUR})}, + {"ck-method", OPT_CHOICE(xv_ck_info.method, + {"none", CK_METHOD_NONE}, + {"bg", CK_METHOD_BACKGROUND}, + {"man", CK_METHOD_MANUALFILL}, + {"auto", CK_METHOD_AUTOPAINT})}, + {"colorkey", OPT_INT(colorkey)}, + {"buffers", OPT_INT(cfg_buffers), M_RANGE(1, MAX_BUFFERS)}, + {0} + }, + .options_prefix = "xv", +}; diff --git a/video/out/vulkan/common.h b/video/out/vulkan/common.h new file mode 100644 index 0000000..d006942 --- /dev/null +++ b/video/out/vulkan/common.h @@ -0,0 +1,40 @@ +#pragma once + +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <stdbool.h> +#include <assert.h> + +#include "config.h" + +#include "common/common.h" +#include "common/msg.h" + +// We need to define all platforms we want to support. Since we have +// our own mechanism for checking this, we re-define the right symbols +#if HAVE_WAYLAND +#define VK_USE_PLATFORM_WAYLAND_KHR +#endif +#if HAVE_X11 +#define VK_USE_PLATFORM_XLIB_KHR +#endif +#if HAVE_WIN32_DESKTOP +#define VK_USE_PLATFORM_WIN32_KHR +#endif +#if HAVE_COCOA +#define VK_USE_PLATFORM_MACOS_MVK +#define VK_USE_PLATFORM_METAL_EXT +#endif + +#include <libplacebo/vulkan.h> + +// Shared struct used to hold vulkan context information +struct mpvk_ctx { + pl_log pllog; + pl_vk_inst vkinst; + pl_vulkan vulkan; + pl_gpu gpu; // points to vulkan->gpu for convenience + pl_swapchain swapchain; + VkSurfaceKHR surface; +}; diff --git a/video/out/vulkan/context.c b/video/out/vulkan/context.c new file mode 100644 index 0000000..5087403 --- /dev/null +++ b/video/out/vulkan/context.c @@ -0,0 +1,372 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "config.h" + +#if HAVE_LAVU_UUID +#include <libavutil/uuid.h> +#else +#include "misc/uuid.h" +#endif + +#include "options/m_config.h" +#include "video/out/placebo/ra_pl.h" + +#include "context.h" +#include "utils.h" + +struct vulkan_opts { + char *device; // force a specific GPU + int swap_mode; + int queue_count; + bool async_transfer; + bool async_compute; +}; + +static int vk_validate_dev(struct mp_log *log, const struct m_option *opt, + struct bstr name, const char **value) +{ + struct bstr param = bstr0(*value); + int ret = M_OPT_INVALID; + VkResult res; + + // Create a dummy instance to validate/list the devices + VkInstanceCreateInfo info = { + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + .pApplicationInfo = &(VkApplicationInfo) { + .sType = VK_STRUCTURE_TYPE_APPLICATION_INFO, + .apiVersion = VK_API_VERSION_1_1, + } + }; + + VkInstance inst; + VkPhysicalDevice *devices = NULL; + uint32_t num = 0; + + res = vkCreateInstance(&info, NULL, &inst); + if (res != VK_SUCCESS) + goto done; + + res = vkEnumeratePhysicalDevices(inst, &num, NULL); + if (res != VK_SUCCESS) + goto done; + + devices = talloc_array(NULL, VkPhysicalDevice, num); + res = vkEnumeratePhysicalDevices(inst, &num, devices); + if (res != VK_SUCCESS) + goto done; + + bool help = bstr_equals0(param, "help"); + if (help) { + mp_info(log, "Available vulkan devices:\n"); + ret = M_OPT_EXIT; + } + + AVUUID param_uuid; + bool is_uuid = av_uuid_parse(*value, param_uuid) == 0; + + for (int i = 0; i < num; i++) { + VkPhysicalDeviceIDPropertiesKHR id_prop = { 0 }; + id_prop.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES_KHR; + + VkPhysicalDeviceProperties2KHR prop2 = { 0 }; + prop2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR; + prop2.pNext = &id_prop; + + vkGetPhysicalDeviceProperties2(devices[i], &prop2); + + const VkPhysicalDeviceProperties *prop = &prop2.properties; + + if (help) { + char device_uuid[37]; + av_uuid_unparse(id_prop.deviceUUID, device_uuid); + mp_info(log, " '%s' (GPU %d, PCI ID %x:%x, UUID %s)\n", + prop->deviceName, i, (unsigned)prop->vendorID, + (unsigned)prop->deviceID, device_uuid); + } else if (bstr_equals0(param, prop->deviceName)) { + ret = 0; + goto done; + } else if (is_uuid && av_uuid_equal(param_uuid, id_prop.deviceUUID)) { + ret = 0; + goto done; + } + } + + if (!help) + mp_err(log, "No device with %s '%.*s'!\n", is_uuid ? "UUID" : "name", + BSTR_P(param)); + +done: + talloc_free(devices); + return ret; +} + +#define OPT_BASE_STRUCT struct vulkan_opts +const struct m_sub_options vulkan_conf = { + .opts = (const struct m_option[]) { + {"vulkan-device", OPT_STRING_VALIDATE(device, vk_validate_dev)}, + {"vulkan-swap-mode", OPT_CHOICE(swap_mode, + {"auto", -1}, + {"fifo", VK_PRESENT_MODE_FIFO_KHR}, + {"fifo-relaxed", VK_PRESENT_MODE_FIFO_RELAXED_KHR}, + {"mailbox", VK_PRESENT_MODE_MAILBOX_KHR}, + {"immediate", VK_PRESENT_MODE_IMMEDIATE_KHR})}, + {"vulkan-queue-count", OPT_INT(queue_count), M_RANGE(1, 8)}, + {"vulkan-async-transfer", OPT_BOOL(async_transfer)}, + {"vulkan-async-compute", OPT_BOOL(async_compute)}, + {"vulkan-disable-events", OPT_REMOVED("Unused")}, + {0} + }, + .size = sizeof(struct vulkan_opts), + .defaults = &(struct vulkan_opts) { + .swap_mode = -1, + .queue_count = 1, + .async_transfer = true, + .async_compute = true, + }, +}; + +struct priv { + struct mpvk_ctx *vk; + struct vulkan_opts *opts; + struct ra_vk_ctx_params params; + struct ra_tex proxy_tex; +}; + +static const struct ra_swapchain_fns vulkan_swapchain; + +struct mpvk_ctx *ra_vk_ctx_get(struct ra_ctx *ctx) +{ + if (!ctx->swapchain || ctx->swapchain->fns != &vulkan_swapchain) + return NULL; + + struct priv *p = ctx->swapchain->priv; + return p->vk; +} + +void ra_vk_ctx_uninit(struct ra_ctx *ctx) +{ + if (!ctx->swapchain) + return; + + struct priv *p = ctx->swapchain->priv; + struct mpvk_ctx *vk = p->vk; + + if (ctx->ra) { + pl_gpu_finish(vk->gpu); + pl_swapchain_destroy(&vk->swapchain); + ctx->ra->fns->destroy(ctx->ra); + ctx->ra = NULL; + } + + vk->gpu = NULL; + pl_vulkan_destroy(&vk->vulkan); + TA_FREEP(&ctx->swapchain); +} + +bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk, + struct ra_vk_ctx_params params, + VkPresentModeKHR preferred_mode) +{ + struct ra_swapchain *sw = ctx->swapchain = talloc_zero(NULL, struct ra_swapchain); + sw->ctx = ctx; + sw->fns = &vulkan_swapchain; + + struct priv *p = sw->priv = talloc_zero(sw, struct priv); + p->vk = vk; + p->params = params; + p->opts = mp_get_config_group(p, ctx->global, &vulkan_conf); + + VkPhysicalDeviceFeatures2 features = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2, + }; + +#if HAVE_VULKAN_INTEROP + /* + * Request the additional extensions and features required to make full use + * of the ffmpeg Vulkan hwcontext and video decoding capability. + */ + const char *opt_extensions[] = { + VK_EXT_DESCRIPTOR_BUFFER_EXTENSION_NAME, + VK_EXT_SHADER_ATOMIC_FLOAT_EXTENSION_NAME, + VK_KHR_VIDEO_DECODE_QUEUE_EXTENSION_NAME, + VK_KHR_VIDEO_DECODE_H264_EXTENSION_NAME, + VK_KHR_VIDEO_DECODE_H265_EXTENSION_NAME, + VK_KHR_VIDEO_QUEUE_EXTENSION_NAME, + // This is a literal string as it's not in the official headers yet. + "VK_MESA_video_decode_av1", + }; + + VkPhysicalDeviceDescriptorBufferFeaturesEXT descriptor_buffer_feature = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_BUFFER_FEATURES_EXT, + .pNext = NULL, + .descriptorBuffer = true, + .descriptorBufferPushDescriptors = true, + }; + + VkPhysicalDeviceShaderAtomicFloatFeaturesEXT atomic_float_feature = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT, + .pNext = &descriptor_buffer_feature, + .shaderBufferFloat32Atomics = true, + .shaderBufferFloat32AtomicAdd = true, + }; + + features.pNext = &atomic_float_feature; +#endif + + AVUUID param_uuid = { 0 }; + bool is_uuid = p->opts->device && + av_uuid_parse(p->opts->device, param_uuid) == 0; + + assert(vk->pllog); + assert(vk->vkinst); + struct pl_vulkan_params device_params = { + .instance = vk->vkinst->instance, + .get_proc_addr = vk->vkinst->get_proc_addr, + .surface = vk->surface, + .async_transfer = p->opts->async_transfer, + .async_compute = p->opts->async_compute, + .queue_count = p->opts->queue_count, +#if HAVE_VULKAN_INTEROP + .extra_queues = VK_QUEUE_VIDEO_DECODE_BIT_KHR, + .opt_extensions = opt_extensions, + .num_opt_extensions = MP_ARRAY_SIZE(opt_extensions), +#endif + .features = &features, + .device_name = is_uuid ? NULL : p->opts->device, + }; + if (is_uuid) + av_uuid_copy(device_params.device_uuid, param_uuid); + + vk->vulkan = pl_vulkan_create(vk->pllog, &device_params); + if (!vk->vulkan) + goto error; + + vk->gpu = vk->vulkan->gpu; + ctx->ra = ra_create_pl(vk->gpu, ctx->log); + if (!ctx->ra) + goto error; + + // Create the swapchain + struct pl_vulkan_swapchain_params pl_params = { + .surface = vk->surface, + .present_mode = preferred_mode, + .swapchain_depth = ctx->vo->opts->swapchain_depth, + // mpv already handles resize events, so gracefully allow suboptimal + // swapchains to exist in order to make resizing even smoother + .allow_suboptimal = true, + }; + + if (p->opts->swap_mode >= 0) // user override + pl_params.present_mode = p->opts->swap_mode; + + vk->swapchain = pl_vulkan_create_swapchain(vk->vulkan, &pl_params); + if (!vk->swapchain) + goto error; + + return true; + +error: + ra_vk_ctx_uninit(ctx); + return false; +} + +bool ra_vk_ctx_resize(struct ra_ctx *ctx, int width, int height) +{ + struct priv *p = ctx->swapchain->priv; + + bool ok = pl_swapchain_resize(p->vk->swapchain, &width, &height); + ctx->vo->dwidth = width; + ctx->vo->dheight = height; + + return ok; +} + +char *ra_vk_ctx_get_device_name(struct ra_ctx *ctx) +{ + /* + * This implementation is a bit odd because it has to work even if the + * ctx hasn't been initialised yet. A context implementation may need access + * to the device name before it can fully initialise the ctx. + */ + struct vulkan_opts *opts = mp_get_config_group(NULL, ctx->global, + &vulkan_conf); + char *device_name = talloc_strdup(NULL, opts->device); + talloc_free(opts); + return device_name; +} + +static int color_depth(struct ra_swapchain *sw) +{ + return 0; // TODO: implement this somehow? +} + +static bool start_frame(struct ra_swapchain *sw, struct ra_fbo *out_fbo) +{ + struct priv *p = sw->priv; + struct pl_swapchain_frame frame; + + bool visible = true; + if (p->params.check_visible) + visible = p->params.check_visible(sw->ctx); + + // If out_fbo is NULL, this was called from vo_gpu_next. Bail out. + if (out_fbo == NULL || !visible) + return visible; + + if (!pl_swapchain_start_frame(p->vk->swapchain, &frame)) + return false; + if (!mppl_wrap_tex(sw->ctx->ra, frame.fbo, &p->proxy_tex)) + return false; + + *out_fbo = (struct ra_fbo) { + .tex = &p->proxy_tex, + .flip = frame.flipped, + }; + + return true; +} + +static bool submit_frame(struct ra_swapchain *sw, const struct vo_frame *frame) +{ + struct priv *p = sw->priv; + return pl_swapchain_submit_frame(p->vk->swapchain); +} + +static void swap_buffers(struct ra_swapchain *sw) +{ + struct priv *p = sw->priv; + pl_swapchain_swap_buffers(p->vk->swapchain); + if (p->params.swap_buffers) + p->params.swap_buffers(sw->ctx); +} + +static void get_vsync(struct ra_swapchain *sw, + struct vo_vsync_info *info) +{ + struct priv *p = sw->priv; + if (p->params.get_vsync) + p->params.get_vsync(sw->ctx, info); +} + +static const struct ra_swapchain_fns vulkan_swapchain = { + .color_depth = color_depth, + .start_frame = start_frame, + .submit_frame = submit_frame, + .swap_buffers = swap_buffers, + .get_vsync = get_vsync, +}; diff --git a/video/out/vulkan/context.h b/video/out/vulkan/context.h new file mode 100644 index 0000000..c846942 --- /dev/null +++ b/video/out/vulkan/context.h @@ -0,0 +1,31 @@ +#pragma once + +#include "video/out/gpu/context.h" +#include "common.h" + +struct ra_vk_ctx_params { + // See ra_swapchain_fns.get_vsync. + void (*get_vsync)(struct ra_ctx *ctx, struct vo_vsync_info *info); + + // For special contexts (i.e. wayland) that want to check visibility + // before drawing a frame. + bool (*check_visible)(struct ra_ctx *ctx); + + // In case something special needs to be done on the buffer swap. + void (*swap_buffers)(struct ra_ctx *ctx); +}; + +// Helpers for ra_ctx based on ra_vk. These initialize ctx->ra and ctx->swchain. +void ra_vk_ctx_uninit(struct ra_ctx *ctx); +bool ra_vk_ctx_init(struct ra_ctx *ctx, struct mpvk_ctx *vk, + struct ra_vk_ctx_params params, + VkPresentModeKHR preferred_mode); + +// Handles a resize request, and updates ctx->vo->dwidth/dheight +bool ra_vk_ctx_resize(struct ra_ctx *ctx, int width, int height); + +// May be called on a ra_ctx of any type. +struct mpvk_ctx *ra_vk_ctx_get(struct ra_ctx *ctx); + +// Get the user requested Vulkan device name. +char *ra_vk_ctx_get_device_name(struct ra_ctx *ctx); diff --git a/video/out/vulkan/context_android.c b/video/out/vulkan/context_android.c new file mode 100644 index 0000000..ddab391 --- /dev/null +++ b/video/out/vulkan/context_android.c @@ -0,0 +1,96 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <vulkan/vulkan.h> +#include <vulkan/vulkan_android.h> + +#include "video/out/android_common.h" +#include "common.h" +#include "context.h" +#include "utils.h" + +struct priv { + struct mpvk_ctx vk; +}; + +static void android_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + ra_vk_ctx_uninit(ctx); + mpvk_uninit(&p->vk); + + vo_android_uninit(ctx->vo); +} + +static bool android_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + struct mpvk_ctx *vk = &p->vk; + int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR; + + if (!vo_android_init(ctx->vo)) + goto fail; + + if (!mpvk_init(vk, ctx, VK_KHR_ANDROID_SURFACE_EXTENSION_NAME)) + goto fail; + + VkAndroidSurfaceCreateInfoKHR info = { + .sType = VK_STRUCTURE_TYPE_ANDROID_SURFACE_CREATE_INFO_KHR, + .window = vo_android_native_window(ctx->vo) + }; + + struct ra_vk_ctx_params params = {0}; + + VkInstance inst = vk->vkinst->instance; + VkResult res = vkCreateAndroidSurfaceKHR(inst, &info, NULL, &vk->surface); + if (res != VK_SUCCESS) { + MP_MSG(ctx, msgl, "Failed creating Android surface\n"); + goto fail; + } + + if (!ra_vk_ctx_init(ctx, vk, params, VK_PRESENT_MODE_FIFO_KHR)) + goto fail; + + return true; +fail: + android_uninit(ctx); + return false; +} + +static bool android_reconfig(struct ra_ctx *ctx) +{ + int w, h; + if (!vo_android_surface_size(ctx->vo, &w, &h)) + return false; + + ra_vk_ctx_resize(ctx, w, h); + return true; +} + +static int android_control(struct ra_ctx *ctx, int *events, int request, void *arg) +{ + return VO_NOTIMPL; +} + +const struct ra_ctx_fns ra_ctx_vulkan_android = { + .type = "vulkan", + .name = "androidvk", + .reconfig = android_reconfig, + .control = android_control, + .init = android_init, + .uninit = android_uninit, +}; diff --git a/video/out/vulkan/context_display.c b/video/out/vulkan/context_display.c new file mode 100644 index 0000000..84cef1e --- /dev/null +++ b/video/out/vulkan/context_display.c @@ -0,0 +1,491 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "context.h" +#include "options/m_config.h" +#include "utils.h" + +#if HAVE_DRM +#include <errno.h> +#include <fcntl.h> +#include <unistd.h> + +#include "libmpv/render_gl.h" +#include "video/out/drm_common.h" +#endif + +struct vulkan_display_opts { + int display; + int mode; + int plane; +}; + +struct mode_selector { + // Indexes of selected display/mode/plane. + int display_idx; + int mode_idx; + int plane_idx; + + // Must be freed with talloc_free + VkDisplayModePropertiesKHR *out_mode_props; +}; + +/** + * If a selector is passed, verify that it is valid and return the matching + * mode properties. If null is passed, walk all modes and print them out. + */ +static bool walk_display_properties(struct mp_log *log, + int msgl_err, + VkPhysicalDevice device, + struct mode_selector *selector) { + bool ret = false; + VkResult res; + + int msgl_info = selector ? MSGL_TRACE : MSGL_INFO; + + // Use a dummy as parent for all other allocations. + void *tmp = talloc_new(NULL); + + VkPhysicalDeviceProperties prop; + vkGetPhysicalDeviceProperties(device, &prop); + mp_msg(log, msgl_info, " '%s' (GPU ID %x:%x)\n", prop.deviceName, + (unsigned)prop.vendorID, (unsigned)prop.deviceID); + + // Count displays. This must be done before enumerating planes with the + // Intel driver, or it will not enumerate any planes. WTF. + int num_displays = 0; + vkGetPhysicalDeviceDisplayPropertiesKHR(device, &num_displays, NULL); + if (!num_displays) { + mp_msg(log, msgl_info, " No available displays for device.\n"); + goto done; + } + if (selector && selector->display_idx + 1 > num_displays) { + mp_msg(log, msgl_err, "Selected display (%d) not present.\n", + selector->display_idx); + goto done; + } + + // Enumerate Planes + int num_planes = 0; + vkGetPhysicalDeviceDisplayPlanePropertiesKHR(device, &num_planes, NULL); + if (!num_planes) { + mp_msg(log, msgl_info, " No available planes for device.\n"); + goto done; + } + if (selector && selector->plane_idx + 1 > num_planes) { + mp_msg(log, msgl_err, "Selected plane (%d) not present.\n", + selector->plane_idx); + goto done; + } + + VkDisplayPlanePropertiesKHR *planes = + talloc_array(tmp, VkDisplayPlanePropertiesKHR, num_planes); + res = vkGetPhysicalDeviceDisplayPlanePropertiesKHR(device, &num_planes, + planes); + if (res != VK_SUCCESS) { + mp_msg(log, msgl_err, " Failed enumerating planes\n"); + goto done; + } + + // Allocate zeroed arrays so that planes with no displays have a null entry. + VkDisplayKHR **planes_to_displays = + talloc_zero_array(tmp, VkDisplayKHR *, num_planes); + for (int j = 0; j < num_planes; j++) { + int num_displays_for_plane = 0; + vkGetDisplayPlaneSupportedDisplaysKHR(device, j, + &num_displays_for_plane, NULL); + if (!num_displays_for_plane) + continue; + + // Null terminated array + VkDisplayKHR *displays = + talloc_zero_array(planes_to_displays, VkDisplayKHR, + num_displays_for_plane + 1); + res = vkGetDisplayPlaneSupportedDisplaysKHR(device, j, + &num_displays_for_plane, + displays); + if (res != VK_SUCCESS) { + mp_msg(log, msgl_err, " Failed enumerating plane displays\n"); + continue; + } + planes_to_displays[j] = displays; + } + + // Enumerate Displays and Modes + VkDisplayPropertiesKHR *props = + talloc_array(tmp, VkDisplayPropertiesKHR, num_displays); + res = vkGetPhysicalDeviceDisplayPropertiesKHR(device, &num_displays, props); + if (res != VK_SUCCESS) { + mp_msg(log, msgl_err, " Failed enumerating display properties\n"); + goto done; + } + + for (int j = 0; j < num_displays; j++) { + if (selector && selector->display_idx != j) + continue; + + mp_msg(log, msgl_info, " Display %d: '%s' (%dx%d)\n", + j, + props[j].displayName, + props[j].physicalResolution.width, + props[j].physicalResolution.height); + + VkDisplayKHR display = props[j].display; + + mp_msg(log, msgl_info, " Modes:\n"); + + int num_modes = 0; + vkGetDisplayModePropertiesKHR(device, display, &num_modes, NULL); + if (!num_modes) { + mp_msg(log, msgl_info, " No available modes for display.\n"); + continue; + } + if (selector && selector->mode_idx + 1 > num_modes) { + mp_msg(log, msgl_err, "Selected mode (%d) not present.\n", + selector->mode_idx); + goto done; + } + + VkDisplayModePropertiesKHR *modes = + talloc_array(tmp, VkDisplayModePropertiesKHR, num_modes); + res = vkGetDisplayModePropertiesKHR(device, display, &num_modes, modes); + if (res != VK_SUCCESS) { + mp_msg(log, msgl_err, " Failed enumerating display modes\n"); + continue; + } + + for (int k = 0; k < num_modes; k++) { + if (selector && selector->mode_idx != k) + continue; + + mp_msg(log, msgl_info, " Mode %02d: %dx%d (%02d.%03d Hz)\n", k, + modes[k].parameters.visibleRegion.width, + modes[k].parameters.visibleRegion.height, + modes[k].parameters.refreshRate / 1000, + modes[k].parameters.refreshRate % 1000); + + if (selector) + selector->out_mode_props = talloc_dup(NULL, &modes[k]); + } + + int found_plane = -1; + mp_msg(log, msgl_info, " Planes:\n"); + for (int k = 0; k < num_planes; k++) { + VkDisplayKHR *displays = planes_to_displays[k]; + if (!displays) { + // This plane is not connected to any displays. + continue; + } + for (int d = 0; displays[d]; d++) { + if (displays[d] == display) { + if (selector && selector->plane_idx != k) + continue; + + mp_msg(log, msgl_info, " Plane: %d\n", k); + found_plane = k; + } + } + } + if (selector && selector->plane_idx != found_plane) { + mp_msg(log, msgl_err, + "Selected plane (%d) not available on selected display.\n", + selector->plane_idx); + goto done; + } + } + ret = true; +done: + talloc_free(tmp); + return ret; +} + +static int print_display_info(struct mp_log *log, const struct m_option *opt, + struct bstr name) { + VkResult res; + VkPhysicalDevice *devices = NULL; + + // Create a dummy instance to list the resources + VkInstanceCreateInfo info = { + .sType = VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO, + .enabledExtensionCount = 1, + .ppEnabledExtensionNames = (const char*[]) { + VK_KHR_DISPLAY_EXTENSION_NAME + }, + }; + + VkInstance inst = NULL; + res = vkCreateInstance(&info, NULL, &inst); + if (res != VK_SUCCESS) { + mp_warn(log, "Unable to create Vulkan instance.\n"); + goto done; + } + + uint32_t num_devices = 0; + vkEnumeratePhysicalDevices(inst, &num_devices, NULL); + if (!num_devices) { + mp_info(log, "No Vulkan devices detected.\n"); + goto done; + } + + devices = talloc_array(NULL, VkPhysicalDevice, num_devices); + vkEnumeratePhysicalDevices(inst, &num_devices, devices); + if (res != VK_SUCCESS) { + mp_warn(log, "Failed enumerating physical devices.\n"); + goto done; + } + + mp_info(log, "Vulkan Devices:\n"); + for (int i = 0; i < num_devices; i++) { + walk_display_properties(log, MSGL_WARN, devices[i], NULL); + } + +done: + talloc_free(devices); + vkDestroyInstance(inst, NULL); + return M_OPT_EXIT; +} + +#define OPT_BASE_STRUCT struct vulkan_display_opts +const struct m_sub_options vulkan_display_conf = { + .opts = (const struct m_option[]) { + {"vulkan-display-display", OPT_INT(display), + .help = print_display_info, + }, + {"vulkan-display-mode", OPT_INT(mode), + .help = print_display_info, + }, + {"vulkan-display-plane", OPT_INT(plane), + .help = print_display_info, + }, + {0} + }, + .size = sizeof(struct vulkan_display_opts), + .defaults = &(struct vulkan_display_opts) {0}, +}; + +struct priv { + struct mpvk_ctx vk; + struct vulkan_display_opts *opts; + uint32_t width; + uint32_t height; + +#if HAVE_DRM + struct mpv_opengl_drm_params_v2 drm_params; +#endif +}; + +#if HAVE_DRM +static void open_render_fd(struct ra_ctx *ctx, const char *render_path) +{ + struct priv *p = ctx->priv; + p->drm_params.fd = -1; + p->drm_params.render_fd = open(render_path, O_RDWR | O_CLOEXEC); + if (p->drm_params.render_fd == -1) { + MP_WARN(ctx, "Failed to open render node: %s\n", + strerror(errno)); + } +} + +static bool drm_setup(struct ra_ctx *ctx, int display_idx, + VkPhysicalDevicePCIBusInfoPropertiesEXT *pci_props) +{ + drmDevice *devs[32] = {}; + int count = drmGetDevices2(0, devs, MP_ARRAY_SIZE(devs)); + for (int i = 0; i < count; i++) { + drmDevice *dev = devs[i]; + + if (dev->bustype != DRM_BUS_PCI || + dev->businfo.pci->domain != pci_props->pciDomain || + dev->businfo.pci->bus != pci_props->pciBus || + dev->businfo.pci->dev != pci_props->pciDevice || + dev->businfo.pci->func != pci_props->pciFunction) + { + continue; + } + + // Found our matching device. + MP_DBG(ctx, "DRM device found for Vulkan device at %04X:%02X:%02X:%02X\n", + pci_props->pciDomain, pci_props->pciBus, + pci_props->pciDevice, pci_props->pciFunction); + + if (!(dev->available_nodes & 1 << DRM_NODE_RENDER)) { + MP_DBG(ctx, "Card does not have a render node.\n"); + continue; + } + + open_render_fd(ctx, dev->nodes[DRM_NODE_RENDER]); + + break; + } + drmFreeDevices(devs, MP_ARRAY_SIZE(devs)); + + struct priv *p = ctx->priv; + if (p->drm_params.render_fd == -1) { + MP_WARN(ctx, "Couldn't open DRM render node for Vulkan device " + "at: %04X:%02X:%02X:%02X\n", + pci_props->pciDomain, pci_props->pciBus, + pci_props->pciDevice, pci_props->pciFunction); + return false; + } + + return true; +} +#endif + +static void display_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + ra_vk_ctx_uninit(ctx); + mpvk_uninit(&p->vk); + +#if HAVE_DRM + if (p->drm_params.render_fd != -1) { + close(p->drm_params.render_fd); + p->drm_params.render_fd = -1; + } +#endif +} + +static bool display_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + struct mpvk_ctx *vk = &p->vk; + int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR; + VkResult res; + bool ret = false; + + VkDisplayModePropertiesKHR *mode = NULL; + + p->opts = mp_get_config_group(p, ctx->global, &vulkan_display_conf); + int display_idx = p->opts->display; + int mode_idx = p->opts->mode; + int plane_idx = p->opts->plane; + + if (!mpvk_init(vk, ctx, VK_KHR_DISPLAY_EXTENSION_NAME)) + goto error; + + char *device_name = ra_vk_ctx_get_device_name(ctx); + struct pl_vulkan_device_params vulkan_params = { + .instance = vk->vkinst->instance, + .device_name = device_name, + }; + VkPhysicalDevice device = pl_vulkan_choose_device(vk->pllog, &vulkan_params); + talloc_free(device_name); + if (!device) { + MP_MSG(ctx, msgl, "Failed to open physical device.\n"); + goto error; + } + +#if HAVE_DRM + VkPhysicalDevicePCIBusInfoPropertiesEXT pci_props = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT, + }; + VkPhysicalDeviceProperties2KHR props = { + .sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2_KHR, + .pNext = &pci_props, + }; + vkGetPhysicalDeviceProperties2(device, &props); + + if (!drm_setup(ctx, display_idx, &pci_props)) + MP_WARN(ctx, "Failed to set up DRM.\n"); +#endif + + struct mode_selector selector = { + .display_idx = display_idx, + .mode_idx = mode_idx, + .plane_idx = plane_idx, + + }; + if (!walk_display_properties(ctx->log, msgl, device, &selector)) + goto error; + mode = selector.out_mode_props; + + VkDisplaySurfaceCreateInfoKHR xinfo = { + .sType = VK_STRUCTURE_TYPE_DISPLAY_SURFACE_CREATE_INFO_KHR, + .displayMode = mode->displayMode, + .imageExtent = mode->parameters.visibleRegion, + .planeIndex = plane_idx, + .transform = VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR, + .alphaMode = VK_DISPLAY_PLANE_ALPHA_OPAQUE_BIT_KHR, + }; + + res = vkCreateDisplayPlaneSurfaceKHR(vk->vkinst->instance, &xinfo, NULL, + &vk->surface); + if (res != VK_SUCCESS) { + MP_MSG(ctx, msgl, "Failed creating Display surface\n"); + goto error; + } + + p->width = mode->parameters.visibleRegion.width; + p->height = mode->parameters.visibleRegion.height; + + struct ra_vk_ctx_params params = {0}; + if (!ra_vk_ctx_init(ctx, vk, params, VK_PRESENT_MODE_FIFO_KHR)) + goto error; + +#if HAVE_DRM + if (p->drm_params.render_fd > -1) { + ra_add_native_resource(ctx->ra, "drm_params_v2", &p->drm_params); + } else { + MP_WARN(ctx, + "No DRM render fd available. VAAPI hwaccel will not be usable.\n"); + } +#endif + + ret = true; + +done: + talloc_free(mode); + return ret; + +error: + display_uninit(ctx); + goto done; +} + +static bool display_reconfig(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + return ra_vk_ctx_resize(ctx, p->width, p->height); +} + +static int display_control(struct ra_ctx *ctx, int *events, int request, void *arg) +{ + return VO_NOTIMPL; +} + +static void display_wakeup(struct ra_ctx *ctx) +{ + // TODO +} + +static void display_wait_events(struct ra_ctx *ctx, int64_t until_time_ns) +{ + // TODO +} + +const struct ra_ctx_fns ra_ctx_vulkan_display = { + .type = "vulkan", + .name = "displayvk", + .reconfig = display_reconfig, + .control = display_control, + .wakeup = display_wakeup, + .wait_events = display_wait_events, + .init = display_init, + .uninit = display_uninit, +}; diff --git a/video/out/vulkan/context_mac.m b/video/out/vulkan/context_mac.m new file mode 100644 index 0000000..8ac6e16 --- /dev/null +++ b/video/out/vulkan/context_mac.m @@ -0,0 +1,119 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "video/out/gpu/context.h" +#include "osdep/macOS_swift.h" + +#include "common.h" +#include "context.h" +#include "utils.h" + +struct priv { + struct mpvk_ctx vk; + MacCommon *vo_mac; +}; + +static void mac_vk_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + ra_vk_ctx_uninit(ctx); + mpvk_uninit(&p->vk); + [p->vo_mac uninit:ctx->vo]; +} + +static void mac_vk_swap_buffers(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + [p->vo_mac swapBuffer]; +} + +static bool mac_vk_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + struct mpvk_ctx *vk = &p->vk; + int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR; + + if (!mpvk_init(vk, ctx, VK_EXT_METAL_SURFACE_EXTENSION_NAME)) + goto error; + + p->vo_mac = [[MacCommon alloc] init:ctx->vo]; + if (!p->vo_mac) + goto error; + + VkMetalSurfaceCreateInfoEXT mac_info = { + .sType = VK_STRUCTURE_TYPE_MACOS_SURFACE_CREATE_INFO_MVK, + .pNext = NULL, + .flags = 0, + .pLayer = p->vo_mac.layer, + }; + + struct ra_vk_ctx_params params = { + .swap_buffers = mac_vk_swap_buffers, + }; + + VkInstance inst = vk->vkinst->instance; + VkResult res = vkCreateMetalSurfaceEXT(inst, &mac_info, NULL, &vk->surface); + if (res != VK_SUCCESS) { + MP_MSG(ctx, msgl, "Failed creating metal surface\n"); + goto error; + } + + if (!ra_vk_ctx_init(ctx, vk, params, VK_PRESENT_MODE_FIFO_KHR)) + goto error; + + return true; +error: + if (p->vo_mac) + [p->vo_mac uninit:ctx->vo]; + return false; +} + +static bool resize(struct ra_ctx *ctx) +{ + return ra_vk_ctx_resize(ctx, ctx->vo->dwidth, ctx->vo->dheight); +} + +static bool mac_vk_reconfig(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + if (![p->vo_mac config:ctx->vo]) + return false; + return true; +} + +static int mac_vk_control(struct ra_ctx *ctx, int *events, int request, void *arg) +{ + struct priv *p = ctx->priv; + int ret = [p->vo_mac control:ctx->vo events:events request:request data:arg]; + + if (*events & VO_EVENT_RESIZE) { + if (!resize(ctx)) + return VO_ERROR; + } + + return ret; +} + +const struct ra_ctx_fns ra_ctx_vulkan_mac = { + .type = "vulkan", + .name = "macvk", + .reconfig = mac_vk_reconfig, + .control = mac_vk_control, + .init = mac_vk_init, + .uninit = mac_vk_uninit, +}; diff --git a/video/out/vulkan/context_wayland.c b/video/out/vulkan/context_wayland.c new file mode 100644 index 0000000..761ff5b --- /dev/null +++ b/video/out/vulkan/context_wayland.c @@ -0,0 +1,167 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "video/out/gpu/context.h" +#include "video/out/present_sync.h" +#include "video/out/wayland_common.h" + +#include "common.h" +#include "context.h" +#include "utils.h" + +struct priv { + struct mpvk_ctx vk; +}; + +static bool wayland_vk_check_visible(struct ra_ctx *ctx) +{ + return vo_wayland_check_visible(ctx->vo); +} + +static void wayland_vk_swap_buffers(struct ra_ctx *ctx) +{ + struct vo_wayland_state *wl = ctx->vo->wl; + + if (!wl->opts->disable_vsync) + vo_wayland_wait_frame(wl); + + if (wl->use_present) + present_sync_swap(wl->present); +} + +static void wayland_vk_get_vsync(struct ra_ctx *ctx, struct vo_vsync_info *info) +{ + struct vo_wayland_state *wl = ctx->vo->wl; + if (wl->use_present) + present_sync_get_info(wl->present, info); +} + +static void wayland_vk_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + ra_vk_ctx_uninit(ctx); + mpvk_uninit(&p->vk); + vo_wayland_uninit(ctx->vo); +} + +static bool wayland_vk_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + struct mpvk_ctx *vk = &p->vk; + int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR; + + if (!mpvk_init(vk, ctx, VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME)) + goto error; + + if (!vo_wayland_init(ctx->vo)) + goto error; + + VkWaylandSurfaceCreateInfoKHR wlinfo = { + .sType = VK_STRUCTURE_TYPE_WAYLAND_SURFACE_CREATE_INFO_KHR, + .display = ctx->vo->wl->display, + .surface = ctx->vo->wl->surface, + }; + + struct ra_vk_ctx_params params = { + .check_visible = wayland_vk_check_visible, + .swap_buffers = wayland_vk_swap_buffers, + .get_vsync = wayland_vk_get_vsync, + }; + + VkInstance inst = vk->vkinst->instance; + VkResult res = vkCreateWaylandSurfaceKHR(inst, &wlinfo, NULL, &vk->surface); + if (res != VK_SUCCESS) { + MP_MSG(ctx, msgl, "Failed creating Wayland surface\n"); + goto error; + } + + /* Because in Wayland clients render whenever they receive a callback from + * the compositor, and the fact that the compositor usually stops sending + * callbacks once the surface is no longer visible, using FIFO here would + * mean the entire player would block on acquiring swapchain images. Hence, + * use MAILBOX to guarantee that there'll always be a swapchain image and + * the player won't block waiting on those */ + if (!ra_vk_ctx_init(ctx, vk, params, VK_PRESENT_MODE_MAILBOX_KHR)) + goto error; + + ra_add_native_resource(ctx->ra, "wl", ctx->vo->wl->display); + + return true; + +error: + wayland_vk_uninit(ctx); + return false; +} + +static bool resize(struct ra_ctx *ctx) +{ + struct vo_wayland_state *wl = ctx->vo->wl; + + MP_VERBOSE(wl, "Handling resize on the vk side\n"); + + const int32_t width = mp_rect_w(wl->geometry); + const int32_t height = mp_rect_h(wl->geometry); + + vo_wayland_set_opaque_region(wl, ctx->opts.want_alpha); + vo_wayland_handle_fractional_scale(wl); + return ra_vk_ctx_resize(ctx, width, height); +} + +static bool wayland_vk_reconfig(struct ra_ctx *ctx) +{ + return vo_wayland_reconfig(ctx->vo); +} + +static int wayland_vk_control(struct ra_ctx *ctx, int *events, int request, void *arg) +{ + int ret = vo_wayland_control(ctx->vo, events, request, arg); + if (*events & VO_EVENT_RESIZE) { + if (!resize(ctx)) + return VO_ERROR; + } + return ret; +} + +static void wayland_vk_wakeup(struct ra_ctx *ctx) +{ + vo_wayland_wakeup(ctx->vo); +} + +static void wayland_vk_wait_events(struct ra_ctx *ctx, int64_t until_time_ns) +{ + vo_wayland_wait_events(ctx->vo, until_time_ns); +} + +static void wayland_vk_update_render_opts(struct ra_ctx *ctx) +{ + struct vo_wayland_state *wl = ctx->vo->wl; + vo_wayland_set_opaque_region(wl, ctx->opts.want_alpha); + wl_surface_commit(wl->surface); +} + +const struct ra_ctx_fns ra_ctx_vulkan_wayland = { + .type = "vulkan", + .name = "waylandvk", + .reconfig = wayland_vk_reconfig, + .control = wayland_vk_control, + .wakeup = wayland_vk_wakeup, + .wait_events = wayland_vk_wait_events, + .update_render_opts = wayland_vk_update_render_opts, + .init = wayland_vk_init, + .uninit = wayland_vk_uninit, +}; diff --git a/video/out/vulkan/context_win.c b/video/out/vulkan/context_win.c new file mode 100644 index 0000000..a89c644 --- /dev/null +++ b/video/out/vulkan/context_win.c @@ -0,0 +1,106 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "video/out/gpu/context.h" +#include "video/out/w32_common.h" + +#include "common.h" +#include "context.h" +#include "utils.h" + +EXTERN_C IMAGE_DOS_HEADER __ImageBase; +#define HINST_THISCOMPONENT ((HINSTANCE)&__ImageBase) + +struct priv { + struct mpvk_ctx vk; +}; + +static void win_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + ra_vk_ctx_uninit(ctx); + mpvk_uninit(&p->vk); + vo_w32_uninit(ctx->vo); +} + +static bool win_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + struct mpvk_ctx *vk = &p->vk; + int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR; + + if (!mpvk_init(vk, ctx, VK_KHR_WIN32_SURFACE_EXTENSION_NAME)) + goto error; + + if (!vo_w32_init(ctx->vo)) + goto error; + + VkWin32SurfaceCreateInfoKHR wininfo = { + .sType = VK_STRUCTURE_TYPE_WIN32_SURFACE_CREATE_INFO_KHR, + .hinstance = HINST_THISCOMPONENT, + .hwnd = vo_w32_hwnd(ctx->vo), + }; + + struct ra_vk_ctx_params params = {0}; + + VkInstance inst = vk->vkinst->instance; + VkResult res = vkCreateWin32SurfaceKHR(inst, &wininfo, NULL, &vk->surface); + if (res != VK_SUCCESS) { + MP_MSG(ctx, msgl, "Failed creating Windows surface\n"); + goto error; + } + + if (!ra_vk_ctx_init(ctx, vk, params, VK_PRESENT_MODE_FIFO_KHR)) + goto error; + + return true; + +error: + win_uninit(ctx); + return false; +} + +static bool resize(struct ra_ctx *ctx) +{ + return ra_vk_ctx_resize(ctx, ctx->vo->dwidth, ctx->vo->dheight); +} + +static bool win_reconfig(struct ra_ctx *ctx) +{ + vo_w32_config(ctx->vo); + return resize(ctx); +} + +static int win_control(struct ra_ctx *ctx, int *events, int request, void *arg) +{ + int ret = vo_w32_control(ctx->vo, events, request, arg); + if (*events & VO_EVENT_RESIZE) { + if (!resize(ctx)) + return VO_ERROR; + } + return ret; +} + +const struct ra_ctx_fns ra_ctx_vulkan_win = { + .type = "vulkan", + .name = "winvk", + .reconfig = win_reconfig, + .control = win_control, + .init = win_init, + .uninit = win_uninit, +}; diff --git a/video/out/vulkan/context_xlib.c b/video/out/vulkan/context_xlib.c new file mode 100644 index 0000000..673dc31 --- /dev/null +++ b/video/out/vulkan/context_xlib.c @@ -0,0 +1,143 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "video/out/gpu/context.h" +#include "video/out/present_sync.h" +#include "video/out/x11_common.h" + +#include "common.h" +#include "context.h" +#include "utils.h" + +struct priv { + struct mpvk_ctx vk; +}; + +static bool xlib_check_visible(struct ra_ctx *ctx) +{ + return vo_x11_check_visible(ctx->vo); +} + +static void xlib_vk_swap_buffers(struct ra_ctx *ctx) +{ + if (ctx->vo->x11->use_present) + present_sync_swap(ctx->vo->x11->present); +} + +static void xlib_vk_get_vsync(struct ra_ctx *ctx, struct vo_vsync_info *info) +{ + struct vo_x11_state *x11 = ctx->vo->x11; + if (ctx->vo->x11->use_present) + present_sync_get_info(x11->present, info); +} + +static void xlib_uninit(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv; + + ra_vk_ctx_uninit(ctx); + mpvk_uninit(&p->vk); + vo_x11_uninit(ctx->vo); +} + +static bool xlib_init(struct ra_ctx *ctx) +{ + struct priv *p = ctx->priv = talloc_zero(ctx, struct priv); + struct mpvk_ctx *vk = &p->vk; + int msgl = ctx->opts.probing ? MSGL_V : MSGL_ERR; + + if (!mpvk_init(vk, ctx, VK_KHR_XLIB_SURFACE_EXTENSION_NAME)) + goto error; + + if (!vo_x11_init(ctx->vo)) + goto error; + + if (!vo_x11_create_vo_window(ctx->vo, NULL, "mpvk")) + goto error; + + VkXlibSurfaceCreateInfoKHR xinfo = { + .sType = VK_STRUCTURE_TYPE_XLIB_SURFACE_CREATE_INFO_KHR, + .dpy = ctx->vo->x11->display, + .window = ctx->vo->x11->window, + }; + + struct ra_vk_ctx_params params = { + .check_visible = xlib_check_visible, + .swap_buffers = xlib_vk_swap_buffers, + .get_vsync = xlib_vk_get_vsync, + }; + + VkInstance inst = vk->vkinst->instance; + VkResult res = vkCreateXlibSurfaceKHR(inst, &xinfo, NULL, &vk->surface); + if (res != VK_SUCCESS) { + MP_MSG(ctx, msgl, "Failed creating Xlib surface\n"); + goto error; + } + + if (!ra_vk_ctx_init(ctx, vk, params, VK_PRESENT_MODE_FIFO_KHR)) + goto error; + + ra_add_native_resource(ctx->ra, "x11", ctx->vo->x11->display); + + return true; + +error: + xlib_uninit(ctx); + return false; +} + +static bool resize(struct ra_ctx *ctx) +{ + return ra_vk_ctx_resize(ctx, ctx->vo->dwidth, ctx->vo->dheight); +} + +static bool xlib_reconfig(struct ra_ctx *ctx) +{ + vo_x11_config_vo_window(ctx->vo); + return resize(ctx); +} + +static int xlib_control(struct ra_ctx *ctx, int *events, int request, void *arg) +{ + int ret = vo_x11_control(ctx->vo, events, request, arg); + if (*events & VO_EVENT_RESIZE) { + if (!resize(ctx)) + return VO_ERROR; + } + return ret; +} + +static void xlib_wakeup(struct ra_ctx *ctx) +{ + vo_x11_wakeup(ctx->vo); +} + +static void xlib_wait_events(struct ra_ctx *ctx, int64_t until_time_ns) +{ + vo_x11_wait_events(ctx->vo, until_time_ns); +} + +const struct ra_ctx_fns ra_ctx_vulkan_xlib = { + .type = "vulkan", + .name = "x11vk", + .reconfig = xlib_reconfig, + .control = xlib_control, + .wakeup = xlib_wakeup, + .wait_events = xlib_wait_events, + .init = xlib_init, + .uninit = xlib_uninit, +}; diff --git a/video/out/vulkan/utils.c b/video/out/vulkan/utils.c new file mode 100644 index 0000000..57a3664 --- /dev/null +++ b/video/out/vulkan/utils.c @@ -0,0 +1,42 @@ +#include "video/out/placebo/utils.h" +#include "utils.h" + +bool mpvk_init(struct mpvk_ctx *vk, struct ra_ctx *ctx, const char *surface_ext) +{ + vk->pllog = mppl_log_create(ctx, ctx->vo->log); + if (!vk->pllog) + goto error; + + const char *exts[] = { + VK_KHR_SURFACE_EXTENSION_NAME, + surface_ext, + }; + + mppl_log_set_probing(vk->pllog, true); + vk->vkinst = pl_vk_inst_create(vk->pllog, &(struct pl_vk_inst_params) { + .debug = ctx->opts.debug, + .extensions = exts, + .num_extensions = MP_ARRAY_SIZE(exts), + }); + mppl_log_set_probing(vk->pllog, false); + if (!vk->vkinst) + goto error; + + return true; + +error: + mpvk_uninit(vk); + return false; +} + +void mpvk_uninit(struct mpvk_ctx *vk) +{ + if (vk->surface) { + assert(vk->vkinst); + vkDestroySurfaceKHR(vk->vkinst->instance, vk->surface, NULL); + vk->surface = VK_NULL_HANDLE; + } + + pl_vk_inst_destroy(&vk->vkinst); + pl_log_destroy(&vk->pllog); +} diff --git a/video/out/vulkan/utils.h b/video/out/vulkan/utils.h new file mode 100644 index 0000000..a98e147 --- /dev/null +++ b/video/out/vulkan/utils.h @@ -0,0 +1,6 @@ +#pragma once +#include "common.h" +#include "video/out/gpu/context.h" + +bool mpvk_init(struct mpvk_ctx *vk, struct ra_ctx *ctx, const char *surface_ext); +void mpvk_uninit(struct mpvk_ctx *vk); diff --git a/video/out/w32_common.c b/video/out/w32_common.c new file mode 100644 index 0000000..e6a4670 --- /dev/null +++ b/video/out/w32_common.c @@ -0,0 +1,2144 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> +#include <limits.h> +#include <stdatomic.h> +#include <stdio.h> + +#include <windows.h> +#include <windowsx.h> +#include <dwmapi.h> +#include <ole2.h> +#include <shobjidl.h> +#include <avrt.h> + +#include "options/m_config.h" +#include "options/options.h" +#include "input/keycodes.h" +#include "input/input.h" +#include "input/event.h" +#include "stream/stream.h" +#include "common/msg.h" +#include "common/common.h" +#include "vo.h" +#include "win_state.h" +#include "w32_common.h" +#include "win32/displayconfig.h" +#include "win32/droptarget.h" +#include "osdep/io.h" +#include "osdep/threads.h" +#include "osdep/w32_keyboard.h" +#include "misc/dispatch.h" +#include "misc/rendezvous.h" +#include "mpv_talloc.h" + +EXTERN_C IMAGE_DOS_HEADER __ImageBase; +#define HINST_THISCOMPONENT ((HINSTANCE)&__ImageBase) + +#ifndef WM_DPICHANGED +#define WM_DPICHANGED (0x02E0) +#endif + +#ifndef DWMWA_USE_IMMERSIVE_DARK_MODE +#define DWMWA_USE_IMMERSIVE_DARK_MODE 20 +#endif + + +//Older MinGW compatibility +#define DWMWA_WINDOW_CORNER_PREFERENCE 33 +#define DWMWA_SYSTEMBACKDROP_TYPE 38 + +#ifndef DPI_ENUMS_DECLARED +typedef enum MONITOR_DPI_TYPE { + MDT_EFFECTIVE_DPI = 0, + MDT_ANGULAR_DPI = 1, + MDT_RAW_DPI = 2, + MDT_DEFAULT = MDT_EFFECTIVE_DPI +} MONITOR_DPI_TYPE; +#endif + +#define rect_w(r) ((r).right - (r).left) +#define rect_h(r) ((r).bottom - (r).top) + +struct w32_api { + HRESULT (WINAPI *pGetDpiForMonitor)(HMONITOR, MONITOR_DPI_TYPE, UINT*, UINT*); + BOOL (WINAPI *pImmDisableIME)(DWORD); + BOOL (WINAPI *pAdjustWindowRectExForDpi)(LPRECT lpRect, DWORD dwStyle, BOOL bMenu, DWORD dwExStyle, UINT dpi); + BOOLEAN (WINAPI *pShouldAppsUseDarkMode)(void); + DWORD (WINAPI *pSetPreferredAppMode)(DWORD mode); +}; + +struct vo_w32_state { + struct mp_log *log; + struct vo *vo; + struct mp_vo_opts *opts; + struct m_config_cache *opts_cache; + struct input_ctx *input_ctx; + + mp_thread thread; + bool terminate; + struct mp_dispatch_queue *dispatch; // used to run stuff on the GUI thread + bool in_dispatch; + + struct w32_api api; // stores functions from dynamically loaded DLLs + + HWND window; + HWND parent; // 0 normally, set in embedding mode + HHOOK parent_win_hook; + HWINEVENTHOOK parent_evt_hook; + + HMONITOR monitor; // Handle of the current screen + char *color_profile; // Path of the current screen's color profile + + // Has the window seen a WM_DESTROY? If so, don't call DestroyWindow again. + bool destroyed; + + bool focused; + + // whether the window position and size were initialized + bool window_bounds_initialized; + + bool current_fs; + bool toggle_fs; // whether the current fullscreen state needs to be switched + + // Note: maximized state doesn't involve nor modify windowrc + RECT windowrc; // currently known normal/fullscreen window client rect + RECT prev_windowrc; // saved normal window client rect while in fullscreen + + // video size + uint32_t o_dwidth; + uint32_t o_dheight; + + int dpi; + double dpi_scale; + + bool disable_screensaver; + bool cursor_visible; + atomic_uint event_flags; + + BOOL tracking; + TRACKMOUSEEVENT trackEvent; + + int mouse_x; + int mouse_y; + + // Should SetCursor be called when handling VOCTRL_SET_CURSOR_VISIBILITY? + bool can_set_cursor; + + // UTF-16 decoding state for WM_CHAR and VK_PACKET + int high_surrogate; + + // Fit the window to one monitor working area next time it's not fullscreen + // and not maximized. Used once after every new "untrusted" size comes from + // mpv, else we assume that the last known size is valid and don't fit. + // FIXME: on a multi-monitor setup one bit is not enough, because the first + // fit (autofit etc) should be to one monitor, but later size changes from + // mpv like window-scale (VOCTRL_SET_UNFS_WINDOW_SIZE) should allow the + // entire virtual desktop area - but we still limit to one monitor size. + bool fit_on_screen; + + bool win_force_pos; + + ITaskbarList2 *taskbar_list; + ITaskbarList3 *taskbar_list3; + UINT tbtnCreatedMsg; + bool tbtnCreated; + + struct voctrl_playback_state current_pstate; + + // updates on move/resize/displaychange + double display_fps; + + bool moving; + + union { + uint8_t snapped; + struct { + uint8_t snapped_left : 1; + uint8_t snapped_right : 1; + uint8_t snapped_top : 1; + uint8_t snapped_bottom : 1; + }; + }; + int snap_dx; + int snap_dy; + + HANDLE avrt_handle; + + bool cleared; +}; + +static void adjust_window_rect(struct vo_w32_state *w32, HWND hwnd, RECT *rc) +{ + if (!w32->opts->border) + return; + + if (w32->api.pAdjustWindowRectExForDpi) { + w32->api.pAdjustWindowRectExForDpi(rc, + GetWindowLongPtrW(hwnd, GWL_STYLE), 0, + GetWindowLongPtrW(hwnd, GWL_EXSTYLE), w32->dpi); + } else { + AdjustWindowRect(rc, GetWindowLongPtrW(hwnd, GWL_STYLE), 0); + } +} + +static void add_window_borders(struct vo_w32_state *w32, HWND hwnd, RECT *rc) +{ + RECT win = *rc; + adjust_window_rect(w32, hwnd, rc); + // Adjust for title bar height that will be hidden in WM_NCCALCSIZE + if (w32->opts->border && !w32->opts->title_bar && !w32->current_fs) + rc->top -= rc->top - win.top; +} + +// basically a reverse AdjustWindowRect (win32 doesn't appear to have this) +static void subtract_window_borders(struct vo_w32_state *w32, HWND hwnd, RECT *rc) +{ + RECT b = { 0, 0, 0, 0 }; + add_window_borders(w32, hwnd, &b); + rc->left -= b.left; + rc->top -= b.top; + rc->right -= b.right; + rc->bottom -= b.bottom; +} + +static LRESULT borderless_nchittest(struct vo_w32_state *w32, int x, int y) +{ + if (IsMaximized(w32->window)) + return HTCLIENT; + + RECT rc; + if (!GetWindowRect(w32->window, &rc)) + return HTNOWHERE; + + POINT frame = {GetSystemMetrics(SM_CXSIZEFRAME), + GetSystemMetrics(SM_CYSIZEFRAME)}; + if (w32->opts->border) { + frame.x += GetSystemMetrics(SM_CXPADDEDBORDER); + frame.y += GetSystemMetrics(SM_CXPADDEDBORDER); + if (!w32->opts->title_bar) + rc.top -= GetSystemMetrics(SM_CXPADDEDBORDER); + } + InflateRect(&rc, -frame.x, -frame.y); + + // Hit-test top border + if (y < rc.top) { + if (x < rc.left) + return HTTOPLEFT; + if (x > rc.right) + return HTTOPRIGHT; + return HTTOP; + } + + // Hit-test bottom border + if (y > rc.bottom) { + if (x < rc.left) + return HTBOTTOMLEFT; + if (x > rc.right) + return HTBOTTOMRIGHT; + return HTBOTTOM; + } + + // Hit-test side borders + if (x < rc.left) + return HTLEFT; + if (x > rc.right) + return HTRIGHT; + return HTCLIENT; +} + +// turn a WMSZ_* input value in v into the border that should be resized +// take into consideration which borders are snapped to avoid detaching +// returns: 0=left, 1=top, 2=right, 3=bottom, -1=undefined +static int get_resize_border(struct vo_w32_state *w32, int v) +{ + switch (v) { + case WMSZ_LEFT: + case WMSZ_RIGHT: + return w32->snapped_bottom ? 1 : 3; + case WMSZ_TOP: + case WMSZ_BOTTOM: + return w32->snapped_right ? 0 : 2; + case WMSZ_TOPLEFT: return 1; + case WMSZ_TOPRIGHT: return 1; + case WMSZ_BOTTOMLEFT: return 3; + case WMSZ_BOTTOMRIGHT: return 3; + default: return -1; + } +} + +static bool key_state(int vk) +{ + return GetKeyState(vk) & 0x8000; +} + +static int mod_state(struct vo_w32_state *w32) +{ + int res = 0; + + // AltGr is represented as LCONTROL+RMENU on Windows + bool alt_gr = mp_input_use_alt_gr(w32->input_ctx) && + key_state(VK_RMENU) && key_state(VK_LCONTROL); + + if (key_state(VK_RCONTROL) || (key_state(VK_LCONTROL) && !alt_gr)) + res |= MP_KEY_MODIFIER_CTRL; + if (key_state(VK_SHIFT)) + res |= MP_KEY_MODIFIER_SHIFT; + if (key_state(VK_LMENU) || (key_state(VK_RMENU) && !alt_gr)) + res |= MP_KEY_MODIFIER_ALT; + return res; +} + +static int decode_surrogate_pair(wchar_t lead, wchar_t trail) +{ + return 0x10000 + (((lead & 0x3ff) << 10) | (trail & 0x3ff)); +} + +static int decode_utf16(struct vo_w32_state *w32, wchar_t c) +{ + // Decode UTF-16, keeping state in w32->high_surrogate + if (IS_HIGH_SURROGATE(c)) { + w32->high_surrogate = c; + return 0; + } + if (IS_LOW_SURROGATE(c)) { + if (!w32->high_surrogate) { + MP_ERR(w32, "Invalid UTF-16 input\n"); + return 0; + } + int codepoint = decode_surrogate_pair(w32->high_surrogate, c); + w32->high_surrogate = 0; + return codepoint; + } + if (w32->high_surrogate != 0) { + w32->high_surrogate = 0; + MP_ERR(w32, "Invalid UTF-16 input\n"); + return 0; + } + + return c; +} + +static void clear_keyboard_buffer(void) +{ + static const UINT vkey = VK_DECIMAL; + static const BYTE keys[256] = { 0 }; + UINT scancode = MapVirtualKey(vkey, MAPVK_VK_TO_VSC); + wchar_t buf[10]; + int ret = 0; + + // Use the method suggested by Michael Kaplan to clear any pending dead + // keys from the current keyboard layout. See: + // https://web.archive.org/web/20101004154432/http://blogs.msdn.com/b/michkap/archive/2006/04/06/569632.aspx + // https://web.archive.org/web/20100820152419/http://blogs.msdn.com/b/michkap/archive/2007/10/27/5717859.aspx + do { + ret = ToUnicode(vkey, scancode, keys, buf, MP_ARRAY_SIZE(buf), 0); + } while (ret < 0); +} + +static int to_unicode(UINT vkey, UINT scancode, const BYTE keys[256]) +{ + // This wraps ToUnicode to be stateless and to return only one character + + // Make the buffer 10 code units long to be safe, same as here: + // https://web.archive.org/web/20101013215215/http://blogs.msdn.com/b/michkap/archive/2006/03/24/559169.aspx + wchar_t buf[10] = { 0 }; + + // Dead keys aren't useful for key shortcuts, so clear the keyboard state + clear_keyboard_buffer(); + + int len = ToUnicode(vkey, scancode, keys, buf, MP_ARRAY_SIZE(buf), 0); + + // Return the last complete UTF-16 code point. A negative return value + // indicates a dead key, however there should still be a non-combining + // version of the key in the buffer. + if (len < 0) + len = -len; + if (len >= 2 && IS_SURROGATE_PAIR(buf[len - 2], buf[len - 1])) + return decode_surrogate_pair(buf[len - 2], buf[len - 1]); + if (len >= 1) + return buf[len - 1]; + + return 0; +} + +static int decode_key(struct vo_w32_state *w32, UINT vkey, UINT scancode) +{ + BYTE keys[256]; + GetKeyboardState(keys); + + // If mp_input_use_alt_gr is false, detect and remove AltGr so normal + // characters are generated. Note that AltGr is represented as + // LCONTROL+RMENU on Windows. + if ((keys[VK_RMENU] & 0x80) && (keys[VK_LCONTROL] & 0x80) && + !mp_input_use_alt_gr(w32->input_ctx)) + { + keys[VK_RMENU] = keys[VK_LCONTROL] = 0; + keys[VK_MENU] = keys[VK_LMENU]; + keys[VK_CONTROL] = keys[VK_RCONTROL]; + } + + int c = to_unicode(vkey, scancode, keys); + + // Some shift states prevent ToUnicode from working or cause it to produce + // control characters. If this is detected, remove modifiers until it + // starts producing normal characters. + if (c < 0x20 && (keys[VK_MENU] & 0x80)) { + keys[VK_LMENU] = keys[VK_RMENU] = keys[VK_MENU] = 0; + c = to_unicode(vkey, scancode, keys); + } + if (c < 0x20 && (keys[VK_CONTROL] & 0x80)) { + keys[VK_LCONTROL] = keys[VK_RCONTROL] = keys[VK_CONTROL] = 0; + c = to_unicode(vkey, scancode, keys); + } + if (c < 0x20) + return 0; + + // Decode lone UTF-16 surrogates (VK_PACKET can generate these) + if (c < 0x10000) + return decode_utf16(w32, c); + return c; +} + +static bool handle_appcommand(struct vo_w32_state *w32, UINT cmd) +{ + if (!mp_input_use_media_keys(w32->input_ctx)) + return false; + int mpkey = mp_w32_appcmd_to_mpkey(cmd); + if (!mpkey) + return false; + mp_input_put_key(w32->input_ctx, mpkey | mod_state(w32)); + return true; +} + +static void handle_key_down(struct vo_w32_state *w32, UINT vkey, UINT scancode) +{ + // Ignore key repeat + if (scancode & KF_REPEAT) + return; + + int mpkey = mp_w32_vkey_to_mpkey(vkey, scancode & KF_EXTENDED); + if (!mpkey) { + mpkey = decode_key(w32, vkey, scancode & (0xff | KF_EXTENDED)); + if (!mpkey) + return; + } + + mp_input_put_key(w32->input_ctx, mpkey | mod_state(w32) | MP_KEY_STATE_DOWN); +} + +static void handle_key_up(struct vo_w32_state *w32, UINT vkey, UINT scancode) +{ + switch (vkey) { + case VK_MENU: + case VK_CONTROL: + case VK_SHIFT: + break; + default: + // Releasing all keys on key-up is simpler and ensures no keys can be + // get "stuck." This matches the behaviour of other VOs. + mp_input_put_key(w32->input_ctx, MP_INPUT_RELEASE_ALL); + } +} + +static bool handle_char(struct vo_w32_state *w32, wchar_t wc) +{ + int c = decode_utf16(w32, wc); + + if (c == 0) + return true; + if (c < 0x20) + return false; + + mp_input_put_key(w32->input_ctx, c | mod_state(w32)); + return true; +} + +static bool handle_mouse_down(struct vo_w32_state *w32, int btn, int x, int y) +{ + btn |= mod_state(w32); + mp_input_put_key(w32->input_ctx, btn | MP_KEY_STATE_DOWN); + + if (btn == MP_MBTN_LEFT && !w32->current_fs && + !mp_input_test_dragging(w32->input_ctx, x, y)) + { + // Window dragging hack + ReleaseCapture(); + SendMessage(w32->window, WM_NCLBUTTONDOWN, HTCAPTION, 0); + mp_input_put_key(w32->input_ctx, MP_MBTN_LEFT | MP_KEY_STATE_UP); + + // Indicate the message was handled, so DefWindowProc won't be called + return true; + } + + SetCapture(w32->window); + return false; +} + +static void handle_mouse_up(struct vo_w32_state *w32, int btn) +{ + btn |= mod_state(w32); + mp_input_put_key(w32->input_ctx, btn | MP_KEY_STATE_UP); + + ReleaseCapture(); +} + +static void handle_mouse_wheel(struct vo_w32_state *w32, bool horiz, int val) +{ + int code; + if (horiz) + code = val > 0 ? MP_WHEEL_RIGHT : MP_WHEEL_LEFT; + else + code = val > 0 ? MP_WHEEL_UP : MP_WHEEL_DOWN; + mp_input_put_wheel(w32->input_ctx, code | mod_state(w32), abs(val) / 120.); +} + +static void signal_events(struct vo_w32_state *w32, int events) +{ + atomic_fetch_or(&w32->event_flags, events); + vo_wakeup(w32->vo); +} + +static void wakeup_gui_thread(void *ctx) +{ + struct vo_w32_state *w32 = ctx; + // Wake up the window procedure (which processes the dispatch queue) + if (GetWindowThreadProcessId(w32->window, NULL) == GetCurrentThreadId()) { + PostMessageW(w32->window, WM_NULL, 0, 0); + } else { + // Use a sent message when cross-thread, since the queue of sent + // messages is processed in some cases when posted messages are blocked + SendNotifyMessageW(w32->window, WM_NULL, 0, 0); + } +} + +static double get_refresh_rate_from_gdi(const wchar_t *device) +{ + DEVMODEW dm = { .dmSize = sizeof dm }; + if (!EnumDisplaySettingsW(device, ENUM_CURRENT_SETTINGS, &dm)) + return 0.0; + + // May return 0 or 1 which "represent the display hardware's default refresh rate" + // https://msdn.microsoft.com/en-us/library/windows/desktop/dd183565%28v=vs.85%29.aspx + // mpv validates this value with a threshold of 1, so don't return exactly 1 + if (dm.dmDisplayFrequency == 1) + return 0.0; + + // dm.dmDisplayFrequency is an integer which is rounded down, so it's + // highly likely that 23 represents 24/1.001, 59 represents 60/1.001, etc. + // A caller can always reproduce the original value by using floor. + double rv = dm.dmDisplayFrequency; + switch (dm.dmDisplayFrequency) { + case 23: + case 29: + case 47: + case 59: + case 71: + case 89: + case 95: + case 119: + case 143: + case 164: + case 239: + case 359: + case 479: + rv = (rv + 1) / 1.001; + } + + return rv; +} + +static char *get_color_profile(void *ctx, const wchar_t *device) +{ + char *name = NULL; + + HDC ic = CreateICW(device, NULL, NULL, NULL); + if (!ic) + goto done; + wchar_t wname[MAX_PATH + 1]; + if (!GetICMProfileW(ic, &(DWORD){ MAX_PATH }, wname)) + goto done; + + name = mp_to_utf8(ctx, wname); +done: + if (ic) + DeleteDC(ic); + return name; +} + +static void update_dpi(struct vo_w32_state *w32) +{ + UINT dpiX, dpiY; + HDC hdc = NULL; + int dpi = 0; + + if (w32->api.pGetDpiForMonitor && w32->api.pGetDpiForMonitor(w32->monitor, + MDT_EFFECTIVE_DPI, &dpiX, &dpiY) == S_OK) { + dpi = (int)dpiX; + MP_VERBOSE(w32, "DPI detected from the new API: %d\n", dpi); + } else if ((hdc = GetDC(NULL))) { + dpi = GetDeviceCaps(hdc, LOGPIXELSX); + ReleaseDC(NULL, hdc); + MP_VERBOSE(w32, "DPI detected from the old API: %d\n", dpi); + } + + if (dpi <= 0) { + dpi = 96; + MP_VERBOSE(w32, "Couldn't determine DPI, falling back to %d\n", dpi); + } + + w32->dpi = dpi; + w32->dpi_scale = w32->opts->hidpi_window_scale ? w32->dpi / 96.0 : 1.0; + signal_events(w32, VO_EVENT_DPI); +} + +static void update_display_info(struct vo_w32_state *w32) +{ + HMONITOR monitor = MonitorFromWindow(w32->window, MONITOR_DEFAULTTOPRIMARY); + if (w32->monitor == monitor) + return; + w32->monitor = monitor; + + update_dpi(w32); + + MONITORINFOEXW mi = { .cbSize = sizeof mi }; + GetMonitorInfoW(monitor, (MONITORINFO*)&mi); + + // Try to get the monitor refresh rate. + double freq = 0.0; + + if (freq == 0.0) + freq = mp_w32_displayconfig_get_refresh_rate(mi.szDevice); + if (freq == 0.0) + freq = get_refresh_rate_from_gdi(mi.szDevice); + + if (freq != w32->display_fps) { + MP_VERBOSE(w32, "display-fps: %f\n", freq); + if (freq == 0.0) + MP_WARN(w32, "Couldn't determine monitor refresh rate\n"); + w32->display_fps = freq; + signal_events(w32, VO_EVENT_WIN_STATE); + } + + char *color_profile = get_color_profile(w32, mi.szDevice); + if ((color_profile == NULL) != (w32->color_profile == NULL) || + (color_profile && strcmp(color_profile, w32->color_profile))) + { + if (color_profile) + MP_VERBOSE(w32, "color-profile: %s\n", color_profile); + talloc_free(w32->color_profile); + w32->color_profile = color_profile; + color_profile = NULL; + signal_events(w32, VO_EVENT_ICC_PROFILE_CHANGED); + } + + talloc_free(color_profile); +} + +static void force_update_display_info(struct vo_w32_state *w32) +{ + w32->monitor = 0; + update_display_info(w32); +} + +static void update_playback_state(struct vo_w32_state *w32) +{ + struct voctrl_playback_state *pstate = &w32->current_pstate; + + if (!w32->taskbar_list3 || !w32->tbtnCreated) + return; + + if (!pstate->playing || !pstate->taskbar_progress) { + ITaskbarList3_SetProgressState(w32->taskbar_list3, w32->window, + TBPF_NOPROGRESS); + return; + } + + ITaskbarList3_SetProgressValue(w32->taskbar_list3, w32->window, + pstate->percent_pos, 100); + ITaskbarList3_SetProgressState(w32->taskbar_list3, w32->window, + pstate->paused ? TBPF_PAUSED : + TBPF_NORMAL); +} + +struct get_monitor_data { + int i; + int target; + HMONITOR mon; +}; + +static BOOL CALLBACK get_monitor_proc(HMONITOR mon, HDC dc, LPRECT r, LPARAM p) +{ + struct get_monitor_data *data = (struct get_monitor_data*)p; + + if (data->i == data->target) { + data->mon = mon; + return FALSE; + } + data->i++; + return TRUE; +} + +static HMONITOR get_monitor(int id) +{ + struct get_monitor_data data = { .target = id }; + EnumDisplayMonitors(NULL, NULL, get_monitor_proc, (LPARAM)&data); + return data.mon; +} + +static HMONITOR get_default_monitor(struct vo_w32_state *w32) +{ + const int id = w32->current_fs ? w32->opts->fsscreen_id : + w32->opts->screen_id; + + // Handle --fs-screen=<all|default> and --screen=default + if (id < 0) { + if (w32->win_force_pos && !w32->current_fs) { + // Get window from forced position + return MonitorFromRect(&w32->windowrc, MONITOR_DEFAULTTOPRIMARY); + } else { + // Let compositor decide + return MonitorFromWindow(w32->window, MONITOR_DEFAULTTOPRIMARY); + } + } + + HMONITOR mon = get_monitor(id); + if (mon) + return mon; + MP_VERBOSE(w32, "Screen %d does not exist, falling back to primary\n", id); + return MonitorFromPoint((POINT){0, 0}, MONITOR_DEFAULTTOPRIMARY); +} + +static MONITORINFO get_monitor_info(struct vo_w32_state *w32) +{ + HMONITOR mon; + if (IsWindowVisible(w32->window) && !w32->current_fs) { + mon = MonitorFromWindow(w32->window, MONITOR_DEFAULTTOPRIMARY); + } else { + // The window is not visible during initialization, so get the + // monitor by --screen or --fs-screen id, or fallback to primary. + mon = get_default_monitor(w32); + } + MONITORINFO mi = { .cbSize = sizeof(mi) }; + GetMonitorInfoW(mon, &mi); + return mi; +} + +static RECT get_screen_area(struct vo_w32_state *w32) +{ + // Handle --fs-screen=all + if (w32->current_fs && w32->opts->fsscreen_id == -2) { + const int x = GetSystemMetrics(SM_XVIRTUALSCREEN); + const int y = GetSystemMetrics(SM_YVIRTUALSCREEN); + return (RECT) { x, y, x + GetSystemMetrics(SM_CXVIRTUALSCREEN), + y + GetSystemMetrics(SM_CYVIRTUALSCREEN) }; + } + return get_monitor_info(w32).rcMonitor; +} + +static RECT get_working_area(struct vo_w32_state *w32) +{ + return w32->current_fs ? get_screen_area(w32) : + get_monitor_info(w32).rcWork; +} + +// Adjust working area boundaries to compensate for invisible borders. +static void adjust_working_area_for_extended_frame(RECT *wa_rect, RECT *wnd_rect, HWND wnd) +{ + RECT frame = {0}; + + if (DwmGetWindowAttribute(wnd, DWMWA_EXTENDED_FRAME_BOUNDS, + &frame, sizeof(RECT)) == S_OK) { + wa_rect->left -= frame.left - wnd_rect->left; + wa_rect->top -= frame.top - wnd_rect->top; + wa_rect->right += wnd_rect->right - frame.right; + wa_rect->bottom += wnd_rect->bottom - frame.bottom; + } +} + +static bool snap_to_screen_edges(struct vo_w32_state *w32, RECT *rc) +{ + if (w32->parent || w32->current_fs || IsMaximized(w32->window)) + return false; + + if (!w32->opts->snap_window) { + w32->snapped = 0; + return false; + } + + RECT rect; + POINT cursor; + if (!GetWindowRect(w32->window, &rect) || !GetCursorPos(&cursor)) + return false; + // Check if window is going to be aero-snapped + if (rect_w(*rc) != rect_w(rect) || rect_h(*rc) != rect_h(rect)) + return false; + + // Check if window has already been aero-snapped + WINDOWPLACEMENT wp = {0}; + wp.length = sizeof(wp); + if (!GetWindowPlacement(w32->window, &wp)) + return false; + RECT wr = wp.rcNormalPosition; + if (rect_w(*rc) != rect_w(wr) || rect_h(*rc) != rect_h(wr)) + return false; + + // Get the work area to let the window snap to taskbar + wr = get_working_area(w32); + + adjust_working_area_for_extended_frame(&wr, &rect, w32->window); + + // Let the window to unsnap by changing its position, + // otherwise it will stick to the screen edges forever + rect = *rc; + if (w32->snapped) { + OffsetRect(&rect, cursor.x - rect.left - w32->snap_dx, + cursor.y - rect.top - w32->snap_dy); + } + + int threshold = (w32->dpi * 16) / 96; + bool was_snapped = !!w32->snapped; + w32->snapped = 0; + // Adjust X position + // snapped_left & snapped_right are mutually exclusive + if (abs(rect.left - wr.left) < threshold) { + w32->snapped_left = 1; + OffsetRect(&rect, wr.left - rect.left, 0); + } else if (abs(rect.right - wr.right) < threshold) { + w32->snapped_right = 1; + OffsetRect(&rect, wr.right - rect.right, 0); + } + // Adjust Y position + // snapped_top & snapped_bottom are mutually exclusive + if (abs(rect.top - wr.top) < threshold) { + w32->snapped_top = 1; + OffsetRect(&rect, 0, wr.top - rect.top); + } else if (abs(rect.bottom - wr.bottom) < threshold) { + w32->snapped_bottom = 1; + OffsetRect(&rect, 0, wr.bottom - rect.bottom); + } + + if (!was_snapped && w32->snapped != 0) { + w32->snap_dx = cursor.x - rc->left; + w32->snap_dy = cursor.y - rc->top; + } + + *rc = rect; + return true; +} + +static DWORD update_style(struct vo_w32_state *w32, DWORD style) +{ + const DWORD NO_FRAME = WS_OVERLAPPED | WS_MINIMIZEBOX | WS_THICKFRAME; + const DWORD FRAME = WS_OVERLAPPEDWINDOW; + const DWORD FULLSCREEN = NO_FRAME & ~WS_THICKFRAME; + style &= ~(NO_FRAME | FRAME | FULLSCREEN); + style |= WS_SYSMENU; + if (w32->current_fs) { + style |= FULLSCREEN; + } else { + style |= w32->opts->border ? FRAME : NO_FRAME; + } + return style; +} + +static LONG get_title_bar_height(struct vo_w32_state *w32) +{ + RECT rc = {0}; + adjust_window_rect(w32, w32->window, &rc); + return -rc.top; +} + +static void update_window_style(struct vo_w32_state *w32) +{ + if (w32->parent) + return; + + // SetWindowLongPtr can trigger a WM_SIZE event, so window rect + // has to be saved now and restored after setting the new style. + const RECT wr = w32->windowrc; + const DWORD style = GetWindowLongPtrW(w32->window, GWL_STYLE); + SetWindowLongPtrW(w32->window, GWL_STYLE, update_style(w32, style)); + w32->windowrc = wr; +} + +// Resize window rect to width = w and height = h. If window is snapped, +// don't let it detach from snapped borders. Otherwise resize around the center. +static void resize_and_move_rect(struct vo_w32_state *w32, RECT *rc, int w, int h) +{ + int x, y; + + if (w32->snapped_left) + x = rc->left; + else if (w32->snapped_right) + x = rc->right - w; + else + x = rc->left + rect_w(*rc) / 2 - w / 2; + + if (w32->snapped_top) + y = rc->top; + else if (w32->snapped_bottom) + y = rc->bottom - h; + else + y = rc->top + rect_h(*rc) / 2 - h / 2; + + SetRect(rc, x, y, x + w, y + h); +} + +// If rc is wider/taller than n_w/n_h, shrink rc size while keeping the center. +// returns true if the rectangle was modified. +static bool fit_rect_size(struct vo_w32_state *w32, RECT *rc, long n_w, long n_h) +{ + // nothing to do if we already fit. + int o_w = rect_w(*rc), o_h = rect_h(*rc); + if (o_w <= n_w && o_h <= n_h) + return false; + + // Apply letterboxing + const float o_asp = o_w / (float)MPMAX(o_h, 1); + const float n_asp = n_w / (float)MPMAX(n_h, 1); + if (o_asp > n_asp) { + n_h = n_w / o_asp; + } else { + n_w = n_h * o_asp; + } + + resize_and_move_rect(w32, rc, n_w, n_h); + + return true; +} + +// If the window is bigger than the desktop, shrink to fit with same center. +// Also, if the top edge is above the working area, move down to align. +static void fit_window_on_screen(struct vo_w32_state *w32) +{ + RECT screen = get_working_area(w32); + if (w32->opts->border) + subtract_window_borders(w32, w32->window, &screen); + + RECT window_rect; + if (GetWindowRect(w32->window, &window_rect)) + adjust_working_area_for_extended_frame(&screen, &window_rect, w32->window); + + bool adjusted = fit_rect_size(w32, &w32->windowrc, rect_w(screen), rect_h(screen)); + + if (w32->windowrc.top < screen.top) { + // if the top-edge of client area is above the target area (mainly + // because the client-area is centered but the title bar is taller + // than the bottom border), then move it down to align the edges. + // Windows itself applies the same constraint during manual move. + w32->windowrc.bottom += screen.top - w32->windowrc.top; + w32->windowrc.top = screen.top; + adjusted = true; + } + + if (adjusted) { + MP_VERBOSE(w32, "adjusted window bounds: %d:%d:%d:%d\n", + (int)w32->windowrc.left, (int)w32->windowrc.top, + (int)rect_w(w32->windowrc), (int)rect_h(w32->windowrc)); + } +} + +// Calculate new fullscreen state and change window size and position. +static void update_fullscreen_state(struct vo_w32_state *w32) +{ + if (w32->parent) + return; + + bool new_fs = w32->opts->fullscreen; + if (w32->toggle_fs) { + new_fs = !w32->current_fs; + w32->toggle_fs = false; + } + + bool toggle_fs = w32->current_fs != new_fs; + w32->opts->fullscreen = w32->current_fs = new_fs; + m_config_cache_write_opt(w32->opts_cache, + &w32->opts->fullscreen); + + if (toggle_fs) { + if (w32->current_fs) { + // Save window rect when switching to fullscreen. + w32->prev_windowrc = w32->windowrc; + MP_VERBOSE(w32, "save window bounds: %d:%d:%d:%d\n", + (int)w32->windowrc.left, (int)w32->windowrc.top, + (int)rect_w(w32->windowrc), (int)rect_h(w32->windowrc)); + } else { + // Restore window rect when switching from fullscreen. + w32->windowrc = w32->prev_windowrc; + } + } + + if (w32->current_fs) + w32->windowrc = get_screen_area(w32); + + MP_VERBOSE(w32, "reset window bounds: %d:%d:%d:%d\n", + (int)w32->windowrc.left, (int)w32->windowrc.top, + (int)rect_w(w32->windowrc), (int)rect_h(w32->windowrc)); +} + +static void update_minimized_state(struct vo_w32_state *w32) +{ + if (w32->parent) + return; + + if (!!IsMinimized(w32->window) != w32->opts->window_minimized) { + if (w32->opts->window_minimized) { + ShowWindow(w32->window, SW_SHOWMINNOACTIVE); + } else { + ShowWindow(w32->window, SW_RESTORE); + } + } +} + +static void update_maximized_state(struct vo_w32_state *w32) +{ + if (w32->parent) + return; + + // Don't change the maximized state in fullscreen for now. In future, this + // should be made to apply the maximized state on leaving fullscreen. + if (w32->current_fs) + return; + + WINDOWPLACEMENT wp = { .length = sizeof wp }; + GetWindowPlacement(w32->window, &wp); + + if (wp.showCmd == SW_SHOWMINIMIZED) { + // When the window is minimized, setting this property just changes + // whether it will be maximized when it's restored + if (w32->opts->window_maximized) { + wp.flags |= WPF_RESTORETOMAXIMIZED; + } else { + wp.flags &= ~WPF_RESTORETOMAXIMIZED; + } + SetWindowPlacement(w32->window, &wp); + } else if ((wp.showCmd == SW_SHOWMAXIMIZED) != w32->opts->window_maximized) { + if (w32->opts->window_maximized) { + ShowWindow(w32->window, SW_SHOWMAXIMIZED); + } else { + ShowWindow(w32->window, SW_SHOWNOACTIVATE); + } + } +} + +static bool is_visible(HWND window) +{ + // Unlike IsWindowVisible, this doesn't check the window's parents + return GetWindowLongPtrW(window, GWL_STYLE) & WS_VISIBLE; +} + +//Set the mpv window's affinity. +//This will affect how it's displayed on the desktop and in system-level operations like taking screenshots. +static void update_affinity(struct vo_w32_state *w32) +{ + if (!w32 || w32->parent) { + return; + } + SetWindowDisplayAffinity(w32->window, w32->opts->window_affinity); +} + +static void update_window_state(struct vo_w32_state *w32) +{ + if (w32->parent) + return; + + RECT wr = w32->windowrc; + add_window_borders(w32, w32->window, &wr); + + SetWindowPos(w32->window, w32->opts->ontop ? HWND_TOPMOST : HWND_NOTOPMOST, + wr.left, wr.top, rect_w(wr), rect_h(wr), + SWP_FRAMECHANGED | SWP_NOACTIVATE | SWP_NOOWNERZORDER); + + // Show the window if it's not yet visible + if (!is_visible(w32->window)) { + if (w32->opts->window_minimized) { + ShowWindow(w32->window, SW_SHOWMINNOACTIVE); + update_maximized_state(w32); // Set the WPF_RESTORETOMAXIMIZED flag + } else if (w32->opts->window_maximized) { + ShowWindow(w32->window, SW_SHOWMAXIMIZED); + } else { + ShowWindow(w32->window, SW_SHOW); + } + } + + // Notify the taskbar about the fullscreen state only after the window + // is visible, to make sure the taskbar item has already been created + if (w32->taskbar_list) { + ITaskbarList2_MarkFullscreenWindow(w32->taskbar_list, + w32->window, w32->current_fs); + } + + // Update snapping status if needed + if (w32->opts->snap_window && !w32->parent && + !w32->current_fs && !IsMaximized(w32->window)) { + RECT wa = get_working_area(w32); + + adjust_working_area_for_extended_frame(&wa, &wr, w32->window); + + // snapped_left & snapped_right are mutually exclusive + if (wa.left == wr.left && wa.right == wr.right) { + // Leave as is. + } else if (wa.left == wr.left) { + w32->snapped_left = 1; + w32->snapped_right = 0; + } else if (wa.right == wr.right) { + w32->snapped_right = 1; + w32->snapped_left = 0; + } else { + w32->snapped_left = w32->snapped_right = 0; + } + + // snapped_top & snapped_bottom are mutually exclusive + if (wa.top == wr.top && wa.bottom == wr.bottom) { + // Leave as is. + } else if (wa.top == wr.top) { + w32->snapped_top = 1; + w32->snapped_bottom = 0; + } else if (wa.bottom == wr.bottom) { + w32->snapped_bottom = 1; + w32->snapped_top = 0; + } else { + w32->snapped_top = w32->snapped_bottom = 0; + } + } + + signal_events(w32, VO_EVENT_RESIZE); +} + +static void update_corners_pref(const struct vo_w32_state *w32) { + if (w32->parent) + return; + + int pref = w32->current_fs ? 0 : w32->opts->window_corners; + DwmSetWindowAttribute(w32->window, DWMWA_WINDOW_CORNER_PREFERENCE, + &pref, sizeof(pref)); +} + +static void reinit_window_state(struct vo_w32_state *w32) +{ + if (w32->parent) + return; + + // The order matters: fs state should be updated prior to changing styles + update_fullscreen_state(w32); + update_corners_pref(w32); + update_window_style(w32); + + // fit_on_screen is applied at most once when/if applicable (normal win). + if (w32->fit_on_screen && !w32->current_fs && !IsMaximized(w32->window)) { + fit_window_on_screen(w32); + w32->fit_on_screen = false; + } + + // Show and activate the window after all window state parameters were set + update_window_state(w32); +} + +// Follow Windows settings and update dark mode state +// Microsoft documented how to enable dark mode for title bar: +// https://learn.microsoft.com/windows/apps/desktop/modernize/apply-windows-themes +// https://learn.microsoft.com/windows/win32/api/dwmapi/ne-dwmapi-dwmwindowattribute +// Documentation says to set the DWMWA_USE_IMMERSIVE_DARK_MODE attribute to +// TRUE to honor dark mode for the window, FALSE to always use light mode. While +// in fact setting it to TRUE causes dark mode to be always enabled, regardless +// of the settings. Since it is quite unlikely that it will be fixed, just use +// UxTheme API to check if dark mode should be applied and while at it enable it +// fully. Ideally this function should only call the DwmSetWindowAttribute(), +// but it just doesn't work as documented. +static void update_dark_mode(const struct vo_w32_state *w32) +{ + if (w32->api.pSetPreferredAppMode) + w32->api.pSetPreferredAppMode(1); // allow dark mode + + HIGHCONTRAST hc = {sizeof(hc)}; + SystemParametersInfo(SPI_GETHIGHCONTRAST, sizeof(hc), &hc, 0); + bool high_contrast = hc.dwFlags & HCF_HIGHCONTRASTON; + + // if pShouldAppsUseDarkMode is not available, just assume it to be true + const BOOL use_dark_mode = !high_contrast && (!w32->api.pShouldAppsUseDarkMode || + w32->api.pShouldAppsUseDarkMode()); + + SetWindowTheme(w32->window, use_dark_mode ? L"DarkMode_Explorer" : L"", NULL); + + DwmSetWindowAttribute(w32->window, DWMWA_USE_IMMERSIVE_DARK_MODE, + &use_dark_mode, sizeof(use_dark_mode)); +} + +static void update_backdrop(const struct vo_w32_state *w32) +{ + if (w32->parent) + return; + + int backdropType = w32->opts->backdrop_type; + DwmSetWindowAttribute(w32->window, DWMWA_SYSTEMBACKDROP_TYPE, + &backdropType, sizeof(backdropType)); +} + +static LRESULT CALLBACK WndProc(HWND hWnd, UINT message, WPARAM wParam, + LPARAM lParam) +{ + struct vo_w32_state *w32 = (void*)GetWindowLongPtrW(hWnd, GWLP_USERDATA); + if (!w32) { + // WM_NCCREATE is supposed to be the first message that a window + // receives. It allows struct vo_w32_state to be passed from + // CreateWindow's lpParam to the window procedure. However, as a + // longstanding Windows bug, overlapped top-level windows will get a + // WM_GETMINMAXINFO before WM_NCCREATE. This can be ignored. + if (message != WM_NCCREATE) + return DefWindowProcW(hWnd, message, wParam, lParam); + + CREATESTRUCTW *cs = (CREATESTRUCTW *)lParam; + w32 = cs->lpCreateParams; + w32->window = hWnd; + SetWindowLongPtrW(hWnd, GWLP_USERDATA, (LONG_PTR)w32); + } + + // The dispatch queue should be processed as soon as possible to prevent + // playback glitches, since it is likely blocking the VO thread + if (!w32->in_dispatch) { + w32->in_dispatch = true; + mp_dispatch_queue_process(w32->dispatch, 0); + w32->in_dispatch = false; + } + + switch (message) { + case WM_ERASEBKGND: + if (w32->cleared || !w32->opts->border || w32->current_fs) + return TRUE; + break; + case WM_PAINT: + w32->cleared = true; + signal_events(w32, VO_EVENT_EXPOSE); + break; + case WM_MOVE: { + w32->moving = false; + const int x = GET_X_LPARAM(lParam), y = GET_Y_LPARAM(lParam); + OffsetRect(&w32->windowrc, x - w32->windowrc.left, + y - w32->windowrc.top); + + // Window may intersect with new monitors (see VOCTRL_GET_DISPLAY_NAMES) + signal_events(w32, VO_EVENT_WIN_STATE); + + update_display_info(w32); // if we moved between monitors + break; + } + case WM_MOVING: { + w32->moving = true; + RECT *rc = (RECT*)lParam; + if (snap_to_screen_edges(w32, rc)) + return TRUE; + break; + } + case WM_ENTERSIZEMOVE: + w32->moving = true; + if (w32->snapped != 0) { + // Save the cursor offset from the window borders, + // so the player window can be unsnapped later + RECT rc; + POINT cursor; + if (GetWindowRect(w32->window, &rc) && GetCursorPos(&cursor)) { + w32->snap_dx = cursor.x - rc.left; + w32->snap_dy = cursor.y - rc.top; + } + } + break; + case WM_EXITSIZEMOVE: + w32->moving = false; + break; + case WM_SIZE: { + const int w = LOWORD(lParam), h = HIWORD(lParam); + if (w > 0 && h > 0) { + w32->windowrc.right = w32->windowrc.left + w; + w32->windowrc.bottom = w32->windowrc.top + h; + signal_events(w32, VO_EVENT_RESIZE); + MP_VERBOSE(w32, "resize window: %d:%d\n", w, h); + } + + // Window may have been minimized, maximized or restored + if (is_visible(w32->window)) { + WINDOWPLACEMENT wp = { .length = sizeof wp }; + GetWindowPlacement(w32->window, &wp); + + bool is_minimized = wp.showCmd == SW_SHOWMINIMIZED; + if (w32->opts->window_minimized != is_minimized) { + w32->opts->window_minimized = is_minimized; + m_config_cache_write_opt(w32->opts_cache, + &w32->opts->window_minimized); + } + + bool is_maximized = wp.showCmd == SW_SHOWMAXIMIZED || + (wp.showCmd == SW_SHOWMINIMIZED && + (wp.flags & WPF_RESTORETOMAXIMIZED)); + if (w32->opts->window_maximized != is_maximized) { + w32->opts->window_maximized = is_maximized; + m_config_cache_write_opt(w32->opts_cache, + &w32->opts->window_maximized); + } + } + + signal_events(w32, VO_EVENT_WIN_STATE); + + update_display_info(w32); + break; + } + case WM_SIZING: + if (w32->opts->keepaspect && w32->opts->keepaspect_window && + !w32->current_fs && !w32->parent) + { + RECT *rc = (RECT*)lParam; + // get client area of the windows if it had the rect rc + // (subtracting the window borders) + RECT r = *rc; + subtract_window_borders(w32, w32->window, &r); + int c_w = rect_w(r), c_h = rect_h(r); + float aspect = w32->o_dwidth / (float) MPMAX(w32->o_dheight, 1); + int d_w = c_h * aspect - c_w; + int d_h = c_w / aspect - c_h; + int d_corners[4] = { d_w, d_h, -d_w, -d_h }; + int corners[4] = { rc->left, rc->top, rc->right, rc->bottom }; + int corner = get_resize_border(w32, wParam); + if (corner >= 0) + corners[corner] -= d_corners[corner]; + *rc = (RECT) { corners[0], corners[1], corners[2], corners[3] }; + return TRUE; + } + break; + case WM_DPICHANGED: + update_display_info(w32); + + RECT *rc = (RECT*)lParam; + w32->windowrc = *rc; + subtract_window_borders(w32, w32->window, &w32->windowrc); + update_window_state(w32); + break; + case WM_CLOSE: + // Don't destroy the window yet to not lose wakeup events. + mp_input_put_key(w32->input_ctx, MP_KEY_CLOSE_WIN); + return 0; + case WM_NCDESTROY: // Sometimes only WM_NCDESTROY is received in --wid mode + case WM_DESTROY: + if (w32->destroyed) + break; + // If terminate is not set, something else destroyed the window. This + // can also happen in --wid mode when the parent window is destroyed. + if (!w32->terminate) + mp_input_put_key(w32->input_ctx, MP_KEY_CLOSE_WIN); + RevokeDragDrop(w32->window); + w32->destroyed = true; + w32->window = NULL; + PostQuitMessage(0); + break; + case WM_SYSCOMMAND: + switch (wParam & 0xFFF0) { + case SC_SCREENSAVE: + case SC_MONITORPOWER: + if (w32->disable_screensaver) { + MP_VERBOSE(w32, "killing screensaver\n"); + return 0; + } + break; + case SC_RESTORE: + if (IsMaximized(w32->window) && w32->current_fs) { + w32->toggle_fs = true; + reinit_window_state(w32); + + return 0; + } + break; + } + break; + case WM_NCACTIVATE: + // Cosmetic to remove blinking window border when initializing window + if (!w32->opts->border) + lParam = -1; + break; + case WM_NCHITTEST: + // Provide sizing handles for borderless windows + if ((!w32->opts->border || !w32->opts->title_bar) && !w32->current_fs) { + return borderless_nchittest(w32, GET_X_LPARAM(lParam), + GET_Y_LPARAM(lParam)); + } + break; + case WM_APPCOMMAND: + if (handle_appcommand(w32, GET_APPCOMMAND_LPARAM(lParam))) + return TRUE; + break; + case WM_SYSKEYDOWN: + // Open the window menu on Alt+Space. Normally DefWindowProc opens the + // window menu in response to WM_SYSCHAR, but since mpv translates its + // own keyboard input, WM_SYSCHAR isn't generated, so the window menu + // must be opened manually. + if (wParam == VK_SPACE) { + SendMessage(w32->window, WM_SYSCOMMAND, SC_KEYMENU, ' '); + return 0; + } + + handle_key_down(w32, wParam, HIWORD(lParam)); + if (wParam == VK_F10) + return 0; + break; + case WM_KEYDOWN: + handle_key_down(w32, wParam, HIWORD(lParam)); + break; + case WM_SYSKEYUP: + case WM_KEYUP: + handle_key_up(w32, wParam, HIWORD(lParam)); + if (wParam == VK_F10) + return 0; + break; + case WM_CHAR: + case WM_SYSCHAR: + if (handle_char(w32, wParam)) + return 0; + break; + case WM_KILLFOCUS: + mp_input_put_key(w32->input_ctx, MP_INPUT_RELEASE_ALL); + w32->focused = false; + signal_events(w32, VO_EVENT_FOCUS); + return 0; + case WM_SETFOCUS: + w32->focused = true; + signal_events(w32, VO_EVENT_FOCUS); + return 0; + case WM_SETCURSOR: + // The cursor should only be hidden if the mouse is in the client area + // and if the window isn't in menu mode (HIWORD(lParam) is non-zero) + w32->can_set_cursor = LOWORD(lParam) == HTCLIENT && HIWORD(lParam); + if (w32->can_set_cursor && !w32->cursor_visible) { + SetCursor(NULL); + return TRUE; + } + break; + case WM_MOUSELEAVE: + w32->tracking = FALSE; + mp_input_put_key(w32->input_ctx, MP_KEY_MOUSE_LEAVE); + break; + case WM_MOUSEMOVE: { + if (!w32->tracking) { + w32->tracking = TrackMouseEvent(&w32->trackEvent); + mp_input_put_key(w32->input_ctx, MP_KEY_MOUSE_ENTER); + } + // Windows can send spurious mouse events, which would make the mpv + // core unhide the mouse cursor on completely unrelated events. See: + // https://blogs.msdn.com/b/oldnewthing/archive/2003/10/01/55108.aspx + int x = GET_X_LPARAM(lParam); + int y = GET_Y_LPARAM(lParam); + if (x != w32->mouse_x || y != w32->mouse_y) { + w32->mouse_x = x; + w32->mouse_y = y; + mp_input_set_mouse_pos(w32->input_ctx, x, y); + } + break; + } + case WM_LBUTTONDOWN: + if (handle_mouse_down(w32, MP_MBTN_LEFT, GET_X_LPARAM(lParam), + GET_Y_LPARAM(lParam))) + return 0; + break; + case WM_LBUTTONUP: + handle_mouse_up(w32, MP_MBTN_LEFT); + break; + case WM_MBUTTONDOWN: + handle_mouse_down(w32, MP_MBTN_MID, GET_X_LPARAM(lParam), + GET_Y_LPARAM(lParam)); + break; + case WM_MBUTTONUP: + handle_mouse_up(w32, MP_MBTN_MID); + break; + case WM_RBUTTONDOWN: + handle_mouse_down(w32, MP_MBTN_RIGHT, GET_X_LPARAM(lParam), + GET_Y_LPARAM(lParam)); + break; + case WM_RBUTTONUP: + handle_mouse_up(w32, MP_MBTN_RIGHT); + break; + case WM_MOUSEWHEEL: + handle_mouse_wheel(w32, false, GET_WHEEL_DELTA_WPARAM(wParam)); + return 0; + case WM_MOUSEHWHEEL: + handle_mouse_wheel(w32, true, GET_WHEEL_DELTA_WPARAM(wParam)); + // Some buggy mouse drivers (SetPoint) stop delivering WM_MOUSEHWHEEL + // events when the message loop doesn't return TRUE (even on Windows 7) + return TRUE; + case WM_XBUTTONDOWN: + handle_mouse_down(w32, + HIWORD(wParam) == 1 ? MP_MBTN_BACK : MP_MBTN_FORWARD, + GET_X_LPARAM(lParam), GET_Y_LPARAM(lParam)); + break; + case WM_XBUTTONUP: + handle_mouse_up(w32, + HIWORD(wParam) == 1 ? MP_MBTN_BACK : MP_MBTN_FORWARD); + break; + case WM_DISPLAYCHANGE: + force_update_display_info(w32); + break; + case WM_SETTINGCHANGE: + update_dark_mode(w32); + break; + case WM_NCCALCSIZE: + if (!w32->opts->border) + return 0; + // Apparently removing WS_CAPTION disables some window animation, instead + // just reduce non-client size to remove title bar. + if (wParam && lParam && w32->opts->border && !w32->opts->title_bar && + !w32->current_fs && !w32->parent) + { + ((LPNCCALCSIZE_PARAMS) lParam)->rgrc[0].top -= get_title_bar_height(w32); + } + break; + } + + if (message == w32->tbtnCreatedMsg) { + w32->tbtnCreated = true; + update_playback_state(w32); + return 0; + } + + return DefWindowProcW(hWnd, message, wParam, lParam); +} + +static mp_once window_class_init_once = MP_STATIC_ONCE_INITIALIZER; +static ATOM window_class; +static void register_window_class(void) +{ + window_class = RegisterClassExW(&(WNDCLASSEXW) { + .cbSize = sizeof(WNDCLASSEXW), + .style = CS_HREDRAW | CS_VREDRAW, + .lpfnWndProc = WndProc, + .hInstance = HINST_THISCOMPONENT, + .hIcon = LoadIconW(HINST_THISCOMPONENT, L"IDI_ICON1"), + .hCursor = LoadCursor(NULL, IDC_ARROW), + .hbrBackground = (HBRUSH) GetStockObject(BLACK_BRUSH), + .lpszClassName = L"mpv", + }); +} + +static ATOM get_window_class(void) +{ + mp_exec_once(&window_class_init_once, register_window_class); + return window_class; +} + +static void resize_child_win(HWND parent) +{ + // Check if an mpv window is a child of this window. This will not + // necessarily be the case because the hook functions will run for all + // windows on the parent window's thread. + ATOM cls = get_window_class(); + HWND child = FindWindowExW(parent, NULL, (LPWSTR)MAKEINTATOM(cls), NULL); + if (!child) + return; + // Make sure the window was created by this instance + if (GetWindowLongPtrW(child, GWLP_HINSTANCE) != (LONG_PTR)HINST_THISCOMPONENT) + return; + + // Resize the mpv window to match its parent window's size + RECT rm, rp; + if (!GetClientRect(child, &rm)) + return; + if (!GetClientRect(parent, &rp)) + return; + if (EqualRect(&rm, &rp)) + return; + SetWindowPos(child, NULL, 0, 0, rp.right, rp.bottom, SWP_ASYNCWINDOWPOS | + SWP_NOACTIVATE | SWP_NOZORDER | SWP_NOOWNERZORDER | SWP_NOSENDCHANGING); +} + +static LRESULT CALLBACK parent_win_hook(int nCode, WPARAM wParam, LPARAM lParam) +{ + if (nCode != HC_ACTION) + goto done; + CWPSTRUCT *cwp = (CWPSTRUCT*)lParam; + if (cwp->message != WM_WINDOWPOSCHANGED) + goto done; + resize_child_win(cwp->hwnd); +done: + return CallNextHookEx(NULL, nCode, wParam, lParam); +} + +static void CALLBACK parent_evt_hook(HWINEVENTHOOK hWinEventHook, DWORD event, + HWND hwnd, LONG idObject, LONG idChild, DWORD dwEventThread, + DWORD dwmsEventTime) +{ + if (event != EVENT_OBJECT_LOCATIONCHANGE) + return; + if (!hwnd || idObject != OBJID_WINDOW || idChild != CHILDID_SELF) + return; + resize_child_win(hwnd); +} + +static void install_parent_hook(struct vo_w32_state *w32) +{ + DWORD pid; + DWORD tid = GetWindowThreadProcessId(w32->parent, &pid); + + // If the parent lives inside the current process, install a Windows hook + if (pid == GetCurrentProcessId()) { + w32->parent_win_hook = SetWindowsHookExW(WH_CALLWNDPROC, + parent_win_hook, NULL, tid); + } else { + // Otherwise, use a WinEvent hook. These don't seem to be as smooth as + // Windows hooks, but they can be delivered across process boundaries. + w32->parent_evt_hook = SetWinEventHook( + EVENT_OBJECT_LOCATIONCHANGE, EVENT_OBJECT_LOCATIONCHANGE, + NULL, parent_evt_hook, pid, tid, WINEVENT_OUTOFCONTEXT); + } +} + +static void remove_parent_hook(struct vo_w32_state *w32) +{ + if (w32->parent_win_hook) + UnhookWindowsHookEx(w32->parent_win_hook); + if (w32->parent_evt_hook) + UnhookWinEvent(w32->parent_evt_hook); +} + +// Dispatch incoming window events and handle them. +// This returns only when the thread is asked to terminate. +static void run_message_loop(struct vo_w32_state *w32) +{ + MSG msg; + while (GetMessageW(&msg, 0, 0, 0) > 0) + DispatchMessageW(&msg); + + // Even if the message loop somehow exits, we still have to respond to + // external requests until termination is requested. + while (!w32->terminate) + mp_dispatch_queue_process(w32->dispatch, 1000); +} + +static void gui_thread_reconfig(void *ptr) +{ + struct vo_w32_state *w32 = ptr; + struct vo *vo = w32->vo; + + RECT r = get_working_area(w32); + // for normal window which is auto-positioned (centered), center the window + // rather than the content (by subtracting the borders from the work area) + if (!w32->current_fs && !IsMaximized(w32->window) && w32->opts->border && + !w32->opts->geometry.xy_valid /* specific position not requested */) + { + subtract_window_borders(w32, w32->window, &r); + } + struct mp_rect screen = { r.left, r.top, r.right, r.bottom }; + struct vo_win_geometry geo; + + RECT monrc = get_monitor_info(w32).rcMonitor; + struct mp_rect mon = { monrc.left, monrc.top, monrc.right, monrc.bottom }; + + if (w32->dpi_scale == 0) + force_update_display_info(w32); + + vo_calc_window_geometry3(vo, &screen, &mon, w32->dpi_scale, &geo); + vo_apply_window_geometry(vo, &geo); + + bool reset_size = (w32->o_dwidth != vo->dwidth || + w32->o_dheight != vo->dheight) && + w32->opts->auto_window_resize; + + w32->o_dwidth = vo->dwidth; + w32->o_dheight = vo->dheight; + + if (!w32->parent && !w32->window_bounds_initialized) { + SetRect(&w32->windowrc, geo.win.x0, geo.win.y0, + geo.win.x0 + vo->dwidth, geo.win.y0 + vo->dheight); + w32->prev_windowrc = w32->windowrc; + w32->window_bounds_initialized = true; + w32->win_force_pos = geo.flags & VO_WIN_FORCE_POS; + w32->fit_on_screen = !w32->win_force_pos; + goto finish; + } + + // The rect which size is going to be modified. + RECT *rc = &w32->windowrc; + + // The desired size always matches the window size in wid mode. + if (!reset_size || w32->parent) { + GetClientRect(w32->window, &r); + // Restore vo_dwidth and vo_dheight, which were reset in vo_config() + vo->dwidth = r.right; + vo->dheight = r.bottom; + } else { + if (w32->current_fs) + rc = &w32->prev_windowrc; + w32->fit_on_screen = true; + } + + resize_and_move_rect(w32, rc, vo->dwidth, vo->dheight); + +finish: + reinit_window_state(w32); +} + +// Resize the window. On the first call, it's also made visible. +void vo_w32_config(struct vo *vo) +{ + struct vo_w32_state *w32 = vo->w32; + mp_dispatch_run(w32->dispatch, gui_thread_reconfig, w32); +} + +static void w32_api_load(struct vo_w32_state *w32) +{ + HMODULE shcore_dll = LoadLibraryW(L"shcore.dll"); + // Available since Win8.1 + w32->api.pGetDpiForMonitor = !shcore_dll ? NULL : + (void *)GetProcAddress(shcore_dll, "GetDpiForMonitor"); + + HMODULE user32_dll = LoadLibraryW(L"user32.dll"); + // Available since Win10 + w32->api.pAdjustWindowRectExForDpi = !user32_dll ? NULL : + (void *)GetProcAddress(user32_dll, "AdjustWindowRectExForDpi"); + + // imm32.dll must be loaded dynamically + // to account for machines without East Asian language support + HMODULE imm32_dll = LoadLibraryW(L"imm32.dll"); + w32->api.pImmDisableIME = !imm32_dll ? NULL : + (void *)GetProcAddress(imm32_dll, "ImmDisableIME"); + + // Dark mode related functions, available since the 1809 Windows 10 update + // Check the Windows build version as on previous versions used ordinals + // may point to unexpected code/data. Alternatively could check uxtheme.dll + // version directly, but it is little bit more boilerplate code, and build + // number is good enough check. + void (WINAPI *pRtlGetNtVersionNumbers)(LPDWORD, LPDWORD, LPDWORD) = + (void *)GetProcAddress(GetModuleHandleW(L"ntdll.dll"), "RtlGetNtVersionNumbers"); + + DWORD major, build; + pRtlGetNtVersionNumbers(&major, NULL, &build); + build &= ~0xF0000000; + + HMODULE uxtheme_dll = (major < 10 || build < 17763) ? NULL : + GetModuleHandle(L"uxtheme.dll"); + w32->api.pShouldAppsUseDarkMode = !uxtheme_dll ? NULL : + (void *)GetProcAddress(uxtheme_dll, MAKEINTRESOURCEA(132)); + w32->api.pSetPreferredAppMode = !uxtheme_dll ? NULL : + (void *)GetProcAddress(uxtheme_dll, MAKEINTRESOURCEA(135)); +} + +static MP_THREAD_VOID gui_thread(void *ptr) +{ + struct vo_w32_state *w32 = ptr; + bool ole_ok = false; + int res = 0; + + mp_thread_set_name("window"); + + w32_api_load(w32); + + // Disables the IME for windows on this thread + if (w32->api.pImmDisableIME) + w32->api.pImmDisableIME(0); + + if (w32->opts->WinID >= 0) + w32->parent = (HWND)(intptr_t)(w32->opts->WinID); + + ATOM cls = get_window_class(); + if (w32->parent) { + RECT r; + GetClientRect(w32->parent, &r); + CreateWindowExW(WS_EX_NOPARENTNOTIFY, (LPWSTR)MAKEINTATOM(cls), L"mpv", + WS_CHILD | WS_VISIBLE, 0, 0, r.right, r.bottom, + w32->parent, 0, HINST_THISCOMPONENT, w32); + + // Install a hook to get notifications when the parent changes size + if (w32->window) + install_parent_hook(w32); + } else { + CreateWindowExW(0, (LPWSTR)MAKEINTATOM(cls), L"mpv", + update_style(w32, 0), CW_USEDEFAULT, SW_HIDE, 100, 100, + 0, 0, HINST_THISCOMPONENT, w32); + } + + if (!w32->window) { + MP_ERR(w32, "unable to create window!\n"); + goto done; + } + + update_dark_mode(w32); + update_corners_pref(w32); + if (w32->opts->window_affinity) + update_affinity(w32); + if (w32->opts->backdrop_type) + update_backdrop(w32); + + if (SUCCEEDED(OleInitialize(NULL))) { + ole_ok = true; + + IDropTarget *dt = mp_w32_droptarget_create(w32->log, w32->opts, w32->input_ctx); + RegisterDragDrop(w32->window, dt); + + // ITaskbarList2 has the MarkFullscreenWindow method, which is used to + // make sure the taskbar is hidden when mpv goes fullscreen + if (SUCCEEDED(CoCreateInstance(&CLSID_TaskbarList, NULL, + CLSCTX_INPROC_SERVER, &IID_ITaskbarList2, + (void**)&w32->taskbar_list))) + { + if (FAILED(ITaskbarList2_HrInit(w32->taskbar_list))) { + ITaskbarList2_Release(w32->taskbar_list); + w32->taskbar_list = NULL; + } + } + + // ITaskbarList3 has methods for status indication on taskbar buttons, + // however that interface is only available on Win7/2008 R2 or newer + if (SUCCEEDED(CoCreateInstance(&CLSID_TaskbarList, NULL, + CLSCTX_INPROC_SERVER, &IID_ITaskbarList3, + (void**)&w32->taskbar_list3))) + { + if (FAILED(ITaskbarList3_HrInit(w32->taskbar_list3))) { + ITaskbarList3_Release(w32->taskbar_list3); + w32->taskbar_list3 = NULL; + } else { + w32->tbtnCreatedMsg = RegisterWindowMessage(L"TaskbarButtonCreated"); + } + } + } else { + MP_ERR(w32, "Failed to initialize OLE/COM\n"); + } + + w32->tracking = FALSE; + w32->trackEvent = (TRACKMOUSEEVENT){ + .cbSize = sizeof(TRACKMOUSEEVENT), + .dwFlags = TME_LEAVE, + .hwndTrack = w32->window, + }; + + if (w32->parent) + EnableWindow(w32->window, 0); + + w32->cursor_visible = true; + w32->moving = false; + w32->snapped = 0; + w32->snap_dx = w32->snap_dy = 0; + + mp_dispatch_set_wakeup_fn(w32->dispatch, wakeup_gui_thread, w32); + + res = 1; +done: + + mp_rendezvous(w32, res); // init barrier + + // This blocks until the GUI thread is to be exited. + if (res) + run_message_loop(w32); + + MP_VERBOSE(w32, "uninit\n"); + + remove_parent_hook(w32); + if (w32->window && !w32->destroyed) + DestroyWindow(w32->window); + if (w32->taskbar_list) + ITaskbarList2_Release(w32->taskbar_list); + if (w32->taskbar_list3) + ITaskbarList3_Release(w32->taskbar_list3); + if (ole_ok) + OleUninitialize(); + SetThreadExecutionState(ES_CONTINUOUS); + MP_THREAD_RETURN(); +} + +bool vo_w32_init(struct vo *vo) +{ + assert(!vo->w32); + + struct vo_w32_state *w32 = talloc_ptrtype(vo, w32); + *w32 = (struct vo_w32_state){ + .log = mp_log_new(w32, vo->log, "win32"), + .vo = vo, + .opts_cache = m_config_cache_alloc(w32, vo->global, &vo_sub_opts), + .input_ctx = vo->input_ctx, + .dispatch = mp_dispatch_create(w32), + }; + w32->opts = w32->opts_cache->opts; + vo->w32 = w32; + + if (mp_thread_create(&w32->thread, gui_thread, w32)) + goto fail; + + if (!mp_rendezvous(w32, 0)) { // init barrier + mp_thread_join(w32->thread); + goto fail; + } + + // While the UI runs in its own thread, the thread in which this function + // runs in will be the renderer thread. Apply magic MMCSS cargo-cult, + // which might stop Windows from throttling clock rate and so on. + if (vo->opts->mmcss_profile[0]) { + wchar_t *profile = mp_from_utf8(NULL, vo->opts->mmcss_profile); + w32->avrt_handle = AvSetMmThreadCharacteristicsW(profile, &(DWORD){0}); + talloc_free(profile); + } + + return true; +fail: + talloc_free(w32); + vo->w32 = NULL; + return false; +} + +struct disp_names_data { + HMONITOR assoc; + int count; + char **names; +}; + +static BOOL CALLBACK disp_names_proc(HMONITOR mon, HDC dc, LPRECT r, LPARAM p) +{ + struct disp_names_data *data = (struct disp_names_data*)p; + + // get_disp_names() adds data->assoc to the list, so skip it here + if (mon == data->assoc) + return TRUE; + + MONITORINFOEXW mi = { .cbSize = sizeof mi }; + if (GetMonitorInfoW(mon, (MONITORINFO*)&mi)) { + MP_TARRAY_APPEND(NULL, data->names, data->count, + mp_to_utf8(NULL, mi.szDevice)); + } + return TRUE; +} + +static char **get_disp_names(struct vo_w32_state *w32) +{ + // Get the client area of the window in screen space + RECT rect = { 0 }; + GetClientRect(w32->window, &rect); + MapWindowPoints(w32->window, NULL, (POINT*)&rect, 2); + + struct disp_names_data data = { .assoc = w32->monitor }; + + // Make sure the monitor that Windows considers to be associated with the + // window is first in the list + MONITORINFOEXW mi = { .cbSize = sizeof mi }; + if (GetMonitorInfoW(data.assoc, (MONITORINFO*)&mi)) { + MP_TARRAY_APPEND(NULL, data.names, data.count, + mp_to_utf8(NULL, mi.szDevice)); + } + + // Get the names of the other monitors that intersect the client rect + EnumDisplayMonitors(NULL, &rect, disp_names_proc, (LPARAM)&data); + MP_TARRAY_APPEND(NULL, data.names, data.count, NULL); + return data.names; +} + +static int gui_thread_control(struct vo_w32_state *w32, int request, void *arg) +{ + switch (request) { + case VOCTRL_VO_OPTS_CHANGED: { + void *changed_option; + + while (m_config_cache_get_next_changed(w32->opts_cache, + &changed_option)) + { + struct mp_vo_opts *vo_opts = w32->opts_cache->opts; + + if (changed_option == &vo_opts->fullscreen) { + reinit_window_state(w32); + } else if (changed_option == &vo_opts->window_affinity) { + update_affinity(w32); + } else if (changed_option == &vo_opts->ontop) { + update_window_state(w32); + } else if (changed_option == &vo_opts->backdrop_type) { + update_backdrop(w32); + } else if (changed_option == &vo_opts->border || + changed_option == &vo_opts->title_bar) + { + update_window_style(w32); + update_window_state(w32); + } else if (changed_option == &vo_opts->window_minimized) { + update_minimized_state(w32); + } else if (changed_option == &vo_opts->window_maximized) { + update_maximized_state(w32); + } else if (changed_option == &vo_opts->window_corners) { + update_corners_pref(w32); + } + } + + return VO_TRUE; + } + case VOCTRL_GET_WINDOW_ID: { + if (!w32->window) + return VO_NOTAVAIL; + *(int64_t *)arg = (intptr_t)w32->window; + return VO_TRUE; + } + case VOCTRL_GET_HIDPI_SCALE: { + *(double *)arg = w32->dpi_scale; + return VO_TRUE; + } + case VOCTRL_GET_UNFS_WINDOW_SIZE: { + int *s = arg; + + if (!w32->window_bounds_initialized) + return VO_FALSE; + + RECT *rc = w32->current_fs ? &w32->prev_windowrc : &w32->windowrc; + s[0] = rect_w(*rc) / w32->dpi_scale; + s[1] = rect_h(*rc) / w32->dpi_scale; + return VO_TRUE; + } + case VOCTRL_SET_UNFS_WINDOW_SIZE: { + int *s = arg; + + if (!w32->window_bounds_initialized) + return VO_FALSE; + + s[0] *= w32->dpi_scale; + s[1] *= w32->dpi_scale; + + RECT *rc = w32->current_fs ? &w32->prev_windowrc : &w32->windowrc; + resize_and_move_rect(w32, rc, s[0], s[1]); + + w32->fit_on_screen = true; + reinit_window_state(w32); + return VO_TRUE; + } + case VOCTRL_SET_CURSOR_VISIBILITY: + w32->cursor_visible = *(bool *)arg; + + if (w32->can_set_cursor && w32->tracking) { + if (w32->cursor_visible) + SetCursor(LoadCursor(NULL, IDC_ARROW)); + else + SetCursor(NULL); + } + return VO_TRUE; + case VOCTRL_KILL_SCREENSAVER: + w32->disable_screensaver = true; + SetThreadExecutionState(ES_CONTINUOUS | ES_SYSTEM_REQUIRED | + ES_DISPLAY_REQUIRED); + return VO_TRUE; + case VOCTRL_RESTORE_SCREENSAVER: + w32->disable_screensaver = false; + SetThreadExecutionState(ES_CONTINUOUS); + return VO_TRUE; + case VOCTRL_UPDATE_WINDOW_TITLE: { + wchar_t *title = mp_from_utf8(NULL, (char *)arg); + SetWindowTextW(w32->window, title); + talloc_free(title); + return VO_TRUE; + } + case VOCTRL_UPDATE_PLAYBACK_STATE: { + w32->current_pstate = *(struct voctrl_playback_state *)arg; + + update_playback_state(w32); + return VO_TRUE; + } + case VOCTRL_GET_DISPLAY_FPS: + update_display_info(w32); + *(double*) arg = w32->display_fps; + return VO_TRUE; + case VOCTRL_GET_DISPLAY_RES: ; + RECT monrc = get_monitor_info(w32).rcMonitor; + ((int *)arg)[0] = monrc.right - monrc.left; + ((int *)arg)[1] = monrc.bottom - monrc.top; + return VO_TRUE; + case VOCTRL_GET_DISPLAY_NAMES: + *(char ***)arg = get_disp_names(w32); + return VO_TRUE; + case VOCTRL_GET_ICC_PROFILE: + update_display_info(w32); + if (w32->color_profile) { + bstr *p = arg; + *p = stream_read_file(w32->color_profile, NULL, + w32->vo->global, 100000000); // 100 MB + return p->len ? VO_TRUE : VO_FALSE; + } + return VO_FALSE; + case VOCTRL_GET_FOCUSED: + *(bool *)arg = w32->focused; + return VO_TRUE; + } + return VO_NOTIMPL; +} + +static void do_control(void *ptr) +{ + void **p = ptr; + struct vo_w32_state *w32 = p[0]; + int *events = p[1]; + int request = *(int *)p[2]; + void *arg = p[3]; + int *ret = p[4]; + *ret = gui_thread_control(w32, request, arg); + *events |= atomic_fetch_and(&w32->event_flags, 0); + // Safe access, since caller (owner of vo) is blocked. + if (*events & VO_EVENT_RESIZE) { + w32->vo->dwidth = rect_w(w32->windowrc); + w32->vo->dheight = rect_h(w32->windowrc); + } +} + +int vo_w32_control(struct vo *vo, int *events, int request, void *arg) +{ + struct vo_w32_state *w32 = vo->w32; + if (request == VOCTRL_CHECK_EVENTS) { + *events |= atomic_fetch_and(&w32->event_flags, 0); + if (*events & VO_EVENT_RESIZE) { + mp_dispatch_lock(w32->dispatch); + vo->dwidth = rect_w(w32->windowrc); + vo->dheight = rect_h(w32->windowrc); + mp_dispatch_unlock(w32->dispatch); + } + return VO_TRUE; + } else { + int r; + void *p[] = {w32, events, &request, arg, &r}; + mp_dispatch_run(w32->dispatch, do_control, p); + return r; + } +} + +static void do_terminate(void *ptr) +{ + struct vo_w32_state *w32 = ptr; + w32->terminate = true; + + if (!w32->destroyed) + DestroyWindow(w32->window); + + mp_dispatch_interrupt(w32->dispatch); +} + +void vo_w32_uninit(struct vo *vo) +{ + struct vo_w32_state *w32 = vo->w32; + if (!w32) + return; + + mp_dispatch_run(w32->dispatch, do_terminate, w32); + mp_thread_join(w32->thread); + + AvRevertMmThreadCharacteristics(w32->avrt_handle); + + talloc_free(w32); + vo->w32 = NULL; +} + +HWND vo_w32_hwnd(struct vo *vo) +{ + struct vo_w32_state *w32 = vo->w32; + return w32->window; // immutable, so no synchronization needed +} + +void vo_w32_run_on_thread(struct vo *vo, void (*cb)(void *ctx), void *ctx) +{ + struct vo_w32_state *w32 = vo->w32; + mp_dispatch_run(w32->dispatch, cb, ctx); +} diff --git a/video/out/w32_common.h b/video/out/w32_common.h new file mode 100644 index 0000000..528b216 --- /dev/null +++ b/video/out/w32_common.h @@ -0,0 +1,36 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MPLAYER_W32_COMMON_H +#define MPLAYER_W32_COMMON_H + +#include <stdint.h> +#include <stdbool.h> +#include <windows.h> + +#include "common/common.h" + +struct vo; + +bool vo_w32_init(struct vo *vo); +void vo_w32_uninit(struct vo *vo); +int vo_w32_control(struct vo *vo, int *events, int request, void *arg); +void vo_w32_config(struct vo *vo); +HWND vo_w32_hwnd(struct vo *vo); +void vo_w32_run_on_thread(struct vo *vo, void (*cb)(void *ctx), void *ctx); + +#endif /* MPLAYER_W32_COMMON_H */ diff --git a/video/out/wayland_common.c b/video/out/wayland_common.c new file mode 100644 index 0000000..589135f --- /dev/null +++ b/video/out/wayland_common.c @@ -0,0 +1,2629 @@ +/* + * This file is part of mpv video player. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <errno.h> +#include <limits.h> +#include <linux/input-event-codes.h> +#include <poll.h> +#include <time.h> +#include <unistd.h> +#include <wayland-cursor.h> +#include <xkbcommon/xkbcommon.h> + +#include "common/msg.h" +#include "input/input.h" +#include "input/keycodes.h" +#include "options/m_config.h" +#include "osdep/io.h" +#include "osdep/poll_wrapper.h" +#include "osdep/timer.h" +#include "present_sync.h" +#include "wayland_common.h" +#include "win_state.h" + +// Generated from wayland-protocols +#include "idle-inhibit-unstable-v1.h" +#include "linux-dmabuf-unstable-v1.h" +#include "presentation-time.h" +#include "xdg-decoration-unstable-v1.h" +#include "xdg-shell.h" +#include "viewporter.h" + +#if HAVE_WAYLAND_PROTOCOLS_1_27 +#include "content-type-v1.h" +#include "single-pixel-buffer-v1.h" +#endif + +#if HAVE_WAYLAND_PROTOCOLS_1_31 +#include "fractional-scale-v1.h" +#endif + +#if HAVE_WAYLAND_PROTOCOLS_1_32 +#include "cursor-shape-v1.h" +#endif + +#if WAYLAND_VERSION_MAJOR > 1 || WAYLAND_VERSION_MINOR >= 22 +#define HAVE_WAYLAND_1_22 +#endif + +#ifndef CLOCK_MONOTONIC_RAW +#define CLOCK_MONOTONIC_RAW 4 +#endif + +#ifndef XDG_TOPLEVEL_STATE_SUSPENDED +#define XDG_TOPLEVEL_STATE_SUSPENDED 9 +#endif + + +static const struct mp_keymap keymap[] = { + /* Special keys */ + {XKB_KEY_Pause, MP_KEY_PAUSE}, {XKB_KEY_Escape, MP_KEY_ESC}, + {XKB_KEY_BackSpace, MP_KEY_BS}, {XKB_KEY_Tab, MP_KEY_TAB}, + {XKB_KEY_Return, MP_KEY_ENTER}, {XKB_KEY_Menu, MP_KEY_MENU}, + {XKB_KEY_Print, MP_KEY_PRINT}, {XKB_KEY_ISO_Left_Tab, MP_KEY_TAB}, + + /* Cursor keys */ + {XKB_KEY_Left, MP_KEY_LEFT}, {XKB_KEY_Right, MP_KEY_RIGHT}, + {XKB_KEY_Up, MP_KEY_UP}, {XKB_KEY_Down, MP_KEY_DOWN}, + + /* Navigation keys */ + {XKB_KEY_Insert, MP_KEY_INSERT}, {XKB_KEY_Delete, MP_KEY_DELETE}, + {XKB_KEY_Home, MP_KEY_HOME}, {XKB_KEY_End, MP_KEY_END}, + {XKB_KEY_Page_Up, MP_KEY_PAGE_UP}, {XKB_KEY_Page_Down, MP_KEY_PAGE_DOWN}, + + /* F-keys */ + {XKB_KEY_F1, MP_KEY_F + 1}, {XKB_KEY_F2, MP_KEY_F + 2}, + {XKB_KEY_F3, MP_KEY_F + 3}, {XKB_KEY_F4, MP_KEY_F + 4}, + {XKB_KEY_F5, MP_KEY_F + 5}, {XKB_KEY_F6, MP_KEY_F + 6}, + {XKB_KEY_F7, MP_KEY_F + 7}, {XKB_KEY_F8, MP_KEY_F + 8}, + {XKB_KEY_F9, MP_KEY_F + 9}, {XKB_KEY_F10, MP_KEY_F +10}, + {XKB_KEY_F11, MP_KEY_F +11}, {XKB_KEY_F12, MP_KEY_F +12}, + {XKB_KEY_F13, MP_KEY_F +13}, {XKB_KEY_F14, MP_KEY_F +14}, + {XKB_KEY_F15, MP_KEY_F +15}, {XKB_KEY_F16, MP_KEY_F +16}, + {XKB_KEY_F17, MP_KEY_F +17}, {XKB_KEY_F18, MP_KEY_F +18}, + {XKB_KEY_F19, MP_KEY_F +19}, {XKB_KEY_F20, MP_KEY_F +20}, + {XKB_KEY_F21, MP_KEY_F +21}, {XKB_KEY_F22, MP_KEY_F +22}, + {XKB_KEY_F23, MP_KEY_F +23}, {XKB_KEY_F24, MP_KEY_F +24}, + + /* Numpad independent of numlock */ + {XKB_KEY_KP_Subtract, '-'}, {XKB_KEY_KP_Add, '+'}, + {XKB_KEY_KP_Multiply, '*'}, {XKB_KEY_KP_Divide, '/'}, + {XKB_KEY_KP_Enter, MP_KEY_KPENTER}, + + /* Numpad with numlock */ + {XKB_KEY_KP_0, MP_KEY_KP0}, {XKB_KEY_KP_1, MP_KEY_KP1}, + {XKB_KEY_KP_2, MP_KEY_KP2}, {XKB_KEY_KP_3, MP_KEY_KP3}, + {XKB_KEY_KP_4, MP_KEY_KP4}, {XKB_KEY_KP_5, MP_KEY_KP5}, + {XKB_KEY_KP_6, MP_KEY_KP6}, {XKB_KEY_KP_7, MP_KEY_KP7}, + {XKB_KEY_KP_8, MP_KEY_KP8}, {XKB_KEY_KP_9, MP_KEY_KP9}, + {XKB_KEY_KP_Decimal, MP_KEY_KPDEC}, {XKB_KEY_KP_Separator, MP_KEY_KPDEC}, + + /* Numpad without numlock */ + {XKB_KEY_KP_Insert, MP_KEY_KPINS}, {XKB_KEY_KP_End, MP_KEY_KPEND}, + {XKB_KEY_KP_Down, MP_KEY_KPDOWN}, {XKB_KEY_KP_Page_Down, MP_KEY_KPPGDOWN}, + {XKB_KEY_KP_Left, MP_KEY_KPLEFT}, {XKB_KEY_KP_Begin, MP_KEY_KP5}, + {XKB_KEY_KP_Right, MP_KEY_KPRIGHT}, {XKB_KEY_KP_Home, MP_KEY_KPHOME}, + {XKB_KEY_KP_Up, MP_KEY_KPUP}, {XKB_KEY_KP_Page_Up, MP_KEY_KPPGUP}, + {XKB_KEY_KP_Delete, MP_KEY_KPDEL}, + + /* Multimedia keys */ + {XKB_KEY_XF86MenuKB, MP_KEY_MENU}, + {XKB_KEY_XF86AudioPlay, MP_KEY_PLAY}, {XKB_KEY_XF86AudioPause, MP_KEY_PAUSE}, + {XKB_KEY_XF86AudioStop, MP_KEY_STOP}, + {XKB_KEY_XF86AudioPrev, MP_KEY_PREV}, {XKB_KEY_XF86AudioNext, MP_KEY_NEXT}, + {XKB_KEY_XF86AudioRewind, MP_KEY_REWIND}, + {XKB_KEY_XF86AudioForward, MP_KEY_FORWARD}, + {XKB_KEY_XF86AudioMute, MP_KEY_MUTE}, + {XKB_KEY_XF86AudioLowerVolume, MP_KEY_VOLUME_DOWN}, + {XKB_KEY_XF86AudioRaiseVolume, MP_KEY_VOLUME_UP}, + {XKB_KEY_XF86HomePage, MP_KEY_HOMEPAGE}, {XKB_KEY_XF86WWW, MP_KEY_WWW}, + {XKB_KEY_XF86Mail, MP_KEY_MAIL}, {XKB_KEY_XF86Favorites, MP_KEY_FAVORITES}, + {XKB_KEY_XF86Search, MP_KEY_SEARCH}, {XKB_KEY_XF86Sleep, MP_KEY_SLEEP}, + {XKB_KEY_XF86Back, MP_KEY_BACK}, {XKB_KEY_XF86Tools, MP_KEY_TOOLS}, + {XKB_KEY_XF86ZoomIn, MP_KEY_ZOOMIN}, {XKB_KEY_XF86ZoomOut, MP_KEY_ZOOMOUT}, + + {0, 0} +}; + +#define OPT_BASE_STRUCT struct wayland_opts +const struct m_sub_options wayland_conf = { + .opts = (const struct m_option[]) { + {"wayland-configure-bounds", OPT_CHOICE(configure_bounds, + {"auto", -1}, {"no", 0}, {"yes", 1})}, + {"wayland-disable-vsync", OPT_BOOL(disable_vsync)}, + {"wayland-edge-pixels-pointer", OPT_INT(edge_pixels_pointer), + M_RANGE(0, INT_MAX)}, + {"wayland-edge-pixels-touch", OPT_INT(edge_pixels_touch), + M_RANGE(0, INT_MAX)}, + {0}, + }, + .size = sizeof(struct wayland_opts), + .defaults = &(struct wayland_opts) { + .configure_bounds = -1, + .edge_pixels_pointer = 16, + .edge_pixels_touch = 32, + }, +}; + +struct vo_wayland_feedback_pool { + struct wp_presentation_feedback **fback; + struct vo_wayland_state *wl; + int len; +}; + +struct vo_wayland_output { + struct vo_wayland_state *wl; + struct wl_output *output; + struct mp_rect geometry; + bool has_surface; + uint32_t id; + uint32_t flags; + int phys_width; + int phys_height; + int scale; + double refresh_rate; + char *make; + char *model; + char *name; + struct wl_list link; +}; + +static int check_for_resize(struct vo_wayland_state *wl, int edge_pixels, + enum xdg_toplevel_resize_edge *edge); +static int get_mods(struct vo_wayland_state *wl); +static int lookupkey(int key); +static int set_cursor_visibility(struct vo_wayland_state *wl, bool on); +static int spawn_cursor(struct vo_wayland_state *wl); + +static void add_feedback(struct vo_wayland_feedback_pool *fback_pool, + struct wp_presentation_feedback *fback); +static void get_shape_device(struct vo_wayland_state *wl); +static int greatest_common_divisor(int a, int b); +static void guess_focus(struct vo_wayland_state *wl); +static void prepare_resize(struct vo_wayland_state *wl, int width, int height); +static void remove_feedback(struct vo_wayland_feedback_pool *fback_pool, + struct wp_presentation_feedback *fback); +static void remove_output(struct vo_wayland_output *out); +static void request_decoration_mode(struct vo_wayland_state *wl, uint32_t mode); +static void rescale_geometry(struct vo_wayland_state *wl, double old_scale); +static void set_geometry(struct vo_wayland_state *wl, bool resize); +static void set_surface_scaling(struct vo_wayland_state *wl); +static void window_move(struct vo_wayland_state *wl, uint32_t serial); + +/* Wayland listener boilerplate */ +static void pointer_handle_enter(void *data, struct wl_pointer *pointer, + uint32_t serial, struct wl_surface *surface, + wl_fixed_t sx, wl_fixed_t sy) +{ + struct vo_wayland_state *wl = data; + + wl->pointer = pointer; + wl->pointer_id = serial; + + set_cursor_visibility(wl, wl->cursor_visible); + mp_input_put_key(wl->vo->input_ctx, MP_KEY_MOUSE_ENTER); +} + +static void pointer_handle_leave(void *data, struct wl_pointer *pointer, + uint32_t serial, struct wl_surface *surface) +{ + struct vo_wayland_state *wl = data; + mp_input_put_key(wl->vo->input_ctx, MP_KEY_MOUSE_LEAVE); +} + +static void pointer_handle_motion(void *data, struct wl_pointer *pointer, + uint32_t time, wl_fixed_t sx, wl_fixed_t sy) +{ + struct vo_wayland_state *wl = data; + + wl->mouse_x = wl_fixed_to_int(sx) * wl->scaling; + wl->mouse_y = wl_fixed_to_int(sy) * wl->scaling; + + if (!wl->toplevel_configured) + mp_input_set_mouse_pos(wl->vo->input_ctx, wl->mouse_x, wl->mouse_y); + wl->toplevel_configured = false; +} + +static void pointer_handle_button(void *data, struct wl_pointer *wl_pointer, + uint32_t serial, uint32_t time, uint32_t button, + uint32_t state) +{ + struct vo_wayland_state *wl = data; + state = state == WL_POINTER_BUTTON_STATE_PRESSED ? MP_KEY_STATE_DOWN + : MP_KEY_STATE_UP; + + if (button >= BTN_MOUSE && button < BTN_JOYSTICK) { + switch (button) { + case BTN_LEFT: + button = MP_MBTN_LEFT; + break; + case BTN_MIDDLE: + button = MP_MBTN_MID; + break; + case BTN_RIGHT: + button = MP_MBTN_RIGHT; + break; + case BTN_SIDE: + button = MP_MBTN_BACK; + break; + case BTN_EXTRA: + button = MP_MBTN_FORWARD; + break; + default: + button += MP_MBTN9 - BTN_FORWARD; + break; + } + } else { + button = 0; + } + + if (button) + mp_input_put_key(wl->vo->input_ctx, button | state | wl->mpmod); + + if (!mp_input_test_dragging(wl->vo->input_ctx, wl->mouse_x, wl->mouse_y) && + !wl->locked_size && (button == MP_MBTN_LEFT) && (state == MP_KEY_STATE_DOWN)) + { + uint32_t edges; + // Implement an edge resize zone if there are no decorations + if (!wl->vo_opts->border && check_for_resize(wl, wl->opts->edge_pixels_pointer, &edges)) { + xdg_toplevel_resize(wl->xdg_toplevel, wl->seat, serial, edges); + } else { + window_move(wl, serial); + } + // Explicitly send an UP event after the client finishes a move/resize + mp_input_put_key(wl->vo->input_ctx, button | MP_KEY_STATE_UP); + } +} + +static void pointer_handle_axis(void *data, struct wl_pointer *wl_pointer, + uint32_t time, uint32_t axis, wl_fixed_t value) +{ + struct vo_wayland_state *wl = data; + + double val = wl_fixed_to_double(value) < 0 ? -1 : 1; + switch (axis) { + case WL_POINTER_AXIS_VERTICAL_SCROLL: + if (value > 0) + mp_input_put_wheel(wl->vo->input_ctx, MP_WHEEL_DOWN | wl->mpmod, +val); + if (value < 0) + mp_input_put_wheel(wl->vo->input_ctx, MP_WHEEL_UP | wl->mpmod, -val); + break; + case WL_POINTER_AXIS_HORIZONTAL_SCROLL: + if (value > 0) + mp_input_put_wheel(wl->vo->input_ctx, MP_WHEEL_RIGHT | wl->mpmod, +val); + if (value < 0) + mp_input_put_wheel(wl->vo->input_ctx, MP_WHEEL_LEFT | wl->mpmod, -val); + break; + } +} + +static const struct wl_pointer_listener pointer_listener = { + pointer_handle_enter, + pointer_handle_leave, + pointer_handle_motion, + pointer_handle_button, + pointer_handle_axis, +}; + +static void touch_handle_down(void *data, struct wl_touch *wl_touch, + uint32_t serial, uint32_t time, struct wl_surface *surface, + int32_t id, wl_fixed_t x_w, wl_fixed_t y_w) +{ + struct vo_wayland_state *wl = data; + wl->mouse_x = wl_fixed_to_int(x_w) * wl->scaling; + wl->mouse_y = wl_fixed_to_int(y_w) * wl->scaling; + + enum xdg_toplevel_resize_edge edge; + if (!mp_input_test_dragging(wl->vo->input_ctx, wl->mouse_x, wl->mouse_y)) { + if (check_for_resize(wl, wl->opts->edge_pixels_touch, &edge)) { + xdg_toplevel_resize(wl->xdg_toplevel, wl->seat, serial, edge); + } else { + xdg_toplevel_move(wl->xdg_toplevel, wl->seat, serial); + } + } + + mp_input_set_mouse_pos(wl->vo->input_ctx, wl->mouse_x, wl->mouse_y); + mp_input_put_key(wl->vo->input_ctx, MP_MBTN_LEFT | MP_KEY_STATE_DOWN); +} + +static void touch_handle_up(void *data, struct wl_touch *wl_touch, + uint32_t serial, uint32_t time, int32_t id) +{ + struct vo_wayland_state *wl = data; + mp_input_put_key(wl->vo->input_ctx, MP_MBTN_LEFT | MP_KEY_STATE_UP); +} + +static void touch_handle_motion(void *data, struct wl_touch *wl_touch, + uint32_t time, int32_t id, wl_fixed_t x_w, wl_fixed_t y_w) +{ + struct vo_wayland_state *wl = data; + + wl->mouse_x = wl_fixed_to_int(x_w) * wl->scaling; + wl->mouse_y = wl_fixed_to_int(y_w) * wl->scaling; + + mp_input_set_mouse_pos(wl->vo->input_ctx, wl->mouse_x, wl->mouse_y); +} + +static void touch_handle_frame(void *data, struct wl_touch *wl_touch) +{ +} + +static void touch_handle_cancel(void *data, struct wl_touch *wl_touch) +{ +} + +static const struct wl_touch_listener touch_listener = { + touch_handle_down, + touch_handle_up, + touch_handle_motion, + touch_handle_frame, + touch_handle_cancel, +}; + +static void keyboard_handle_keymap(void *data, struct wl_keyboard *wl_keyboard, + uint32_t format, int32_t fd, uint32_t size) +{ + struct vo_wayland_state *wl = data; + char *map_str; + + if (format != WL_KEYBOARD_KEYMAP_FORMAT_XKB_V1) { + close(fd); + return; + } + + map_str = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + if (map_str == MAP_FAILED) { + close(fd); + return; + } + + wl->xkb_keymap = xkb_keymap_new_from_buffer(wl->xkb_context, map_str, + strnlen(map_str, size), + XKB_KEYMAP_FORMAT_TEXT_V1, 0); + + munmap(map_str, size); + close(fd); + + if (!wl->xkb_keymap) { + MP_ERR(wl, "failed to compile keymap\n"); + return; + } + + wl->xkb_state = xkb_state_new(wl->xkb_keymap); + if (!wl->xkb_state) { + MP_ERR(wl, "failed to create XKB state\n"); + xkb_keymap_unref(wl->xkb_keymap); + wl->xkb_keymap = NULL; + return; + } +} + +static void keyboard_handle_enter(void *data, struct wl_keyboard *wl_keyboard, + uint32_t serial, struct wl_surface *surface, + struct wl_array *keys) +{ + struct vo_wayland_state *wl = data; + wl->has_keyboard_input = true; + guess_focus(wl); +} + +static void keyboard_handle_leave(void *data, struct wl_keyboard *wl_keyboard, + uint32_t serial, struct wl_surface *surface) +{ + struct vo_wayland_state *wl = data; + wl->has_keyboard_input = false; + wl->keyboard_code = 0; + wl->mpkey = 0; + wl->mpmod = 0; + mp_input_put_key(wl->vo->input_ctx, MP_INPUT_RELEASE_ALL); + guess_focus(wl); +} + +static void keyboard_handle_key(void *data, struct wl_keyboard *wl_keyboard, + uint32_t serial, uint32_t time, uint32_t key, + uint32_t state) +{ + struct vo_wayland_state *wl = data; + + wl->keyboard_code = key + 8; + xkb_keysym_t sym = xkb_state_key_get_one_sym(wl->xkb_state, wl->keyboard_code); + int mpkey = lookupkey(sym); + + state = state == WL_KEYBOARD_KEY_STATE_PRESSED ? MP_KEY_STATE_DOWN + : MP_KEY_STATE_UP; + + if (mpkey) { + mp_input_put_key(wl->vo->input_ctx, mpkey | state | wl->mpmod); + } else { + char s[128]; + if (xkb_keysym_to_utf8(sym, s, sizeof(s)) > 0) { + mp_input_put_key_utf8(wl->vo->input_ctx, state | wl->mpmod, bstr0(s)); + } else { + // Assume a modifier was pressed and handle it in the mod event instead. + return; + } + } + if (state == MP_KEY_STATE_DOWN) + wl->mpkey = mpkey; + if (mpkey && state == MP_KEY_STATE_UP) + wl->mpkey = 0; +} + +static void keyboard_handle_modifiers(void *data, struct wl_keyboard *wl_keyboard, + uint32_t serial, uint32_t mods_depressed, + uint32_t mods_latched, uint32_t mods_locked, + uint32_t group) +{ + struct vo_wayland_state *wl = data; + + if (wl->xkb_state) { + xkb_state_update_mask(wl->xkb_state, mods_depressed, mods_latched, + mods_locked, 0, 0, group); + wl->mpmod = get_mods(wl); + if (wl->mpkey) + mp_input_put_key(wl->vo->input_ctx, wl->mpkey | MP_KEY_STATE_DOWN | wl->mpmod); + } +} + +static void keyboard_handle_repeat_info(void *data, struct wl_keyboard *wl_keyboard, + int32_t rate, int32_t delay) +{ + struct vo_wayland_state *wl = data; + if (wl->vo_opts->native_keyrepeat) + mp_input_set_repeat_info(wl->vo->input_ctx, rate, delay); +} + +static const struct wl_keyboard_listener keyboard_listener = { + keyboard_handle_keymap, + keyboard_handle_enter, + keyboard_handle_leave, + keyboard_handle_key, + keyboard_handle_modifiers, + keyboard_handle_repeat_info, +}; + +static void seat_handle_caps(void *data, struct wl_seat *seat, + enum wl_seat_capability caps) +{ + struct vo_wayland_state *wl = data; + + if ((caps & WL_SEAT_CAPABILITY_POINTER) && !wl->pointer) { + wl->pointer = wl_seat_get_pointer(seat); + get_shape_device(wl); + wl_pointer_add_listener(wl->pointer, &pointer_listener, wl); + } else if (!(caps & WL_SEAT_CAPABILITY_POINTER) && wl->pointer) { + wl_pointer_destroy(wl->pointer); + wl->pointer = NULL; + } + + if ((caps & WL_SEAT_CAPABILITY_KEYBOARD) && !wl->keyboard) { + wl->keyboard = wl_seat_get_keyboard(seat); + wl_keyboard_add_listener(wl->keyboard, &keyboard_listener, wl); + } else if (!(caps & WL_SEAT_CAPABILITY_KEYBOARD) && wl->keyboard) { + wl_keyboard_destroy(wl->keyboard); + wl->keyboard = NULL; + } + + if ((caps & WL_SEAT_CAPABILITY_TOUCH) && !wl->touch) { + wl->touch = wl_seat_get_touch(seat); + wl_touch_set_user_data(wl->touch, wl); + wl_touch_add_listener(wl->touch, &touch_listener, wl); + } else if (!(caps & WL_SEAT_CAPABILITY_TOUCH) && wl->touch) { + wl_touch_destroy(wl->touch); + wl->touch = NULL; + } +} + +static const struct wl_seat_listener seat_listener = { + seat_handle_caps, +}; + +static void data_offer_handle_offer(void *data, struct wl_data_offer *offer, + const char *mime_type) +{ + struct vo_wayland_state *wl = data; + int score = mp_event_get_mime_type_score(wl->vo->input_ctx, mime_type); + if (score > wl->dnd_mime_score && wl->vo_opts->drag_and_drop != -2) { + wl->dnd_mime_score = score; + if (wl->dnd_mime_type) + talloc_free(wl->dnd_mime_type); + wl->dnd_mime_type = talloc_strdup(wl, mime_type); + MP_VERBOSE(wl, "Given DND offer with mime type %s\n", wl->dnd_mime_type); + } +} + +static void data_offer_source_actions(void *data, struct wl_data_offer *offer, uint32_t source_actions) +{ +} + +static void data_offer_action(void *data, struct wl_data_offer *wl_data_offer, uint32_t dnd_action) +{ + struct vo_wayland_state *wl = data; + if (dnd_action && wl->vo_opts->drag_and_drop != -2) { + if (wl->vo_opts->drag_and_drop >= 0) { + wl->dnd_action = wl->vo_opts->drag_and_drop; + } else { + wl->dnd_action = dnd_action & WL_DATA_DEVICE_MANAGER_DND_ACTION_COPY ? + DND_REPLACE : DND_APPEND; + } + MP_VERBOSE(wl, "DND action is %s\n", + wl->dnd_action == DND_REPLACE ? "DND_REPLACE" : "DND_APPEND"); + } +} + +static const struct wl_data_offer_listener data_offer_listener = { + data_offer_handle_offer, + data_offer_source_actions, + data_offer_action, +}; + +static void data_device_handle_data_offer(void *data, struct wl_data_device *wl_ddev, + struct wl_data_offer *id) +{ + struct vo_wayland_state *wl = data; + if (wl->dnd_offer) + wl_data_offer_destroy(wl->dnd_offer); + + wl->dnd_offer = id; + wl_data_offer_add_listener(id, &data_offer_listener, wl); +} + +static void data_device_handle_enter(void *data, struct wl_data_device *wl_ddev, + uint32_t serial, struct wl_surface *surface, + wl_fixed_t x, wl_fixed_t y, + struct wl_data_offer *id) +{ + struct vo_wayland_state *wl = data; + if (wl->dnd_offer != id) { + MP_FATAL(wl, "DND offer ID mismatch!\n"); + return; + } + + if (wl->vo_opts->drag_and_drop != -2) { + wl_data_offer_set_actions(id, WL_DATA_DEVICE_MANAGER_DND_ACTION_COPY | + WL_DATA_DEVICE_MANAGER_DND_ACTION_MOVE, + WL_DATA_DEVICE_MANAGER_DND_ACTION_COPY); + wl_data_offer_accept(id, serial, wl->dnd_mime_type); + MP_VERBOSE(wl, "Accepting DND offer with mime type %s\n", wl->dnd_mime_type); + } + +} + +static void data_device_handle_leave(void *data, struct wl_data_device *wl_ddev) +{ + struct vo_wayland_state *wl = data; + + if (wl->dnd_offer) { + if (wl->dnd_fd != -1) + return; + wl_data_offer_destroy(wl->dnd_offer); + wl->dnd_offer = NULL; + } + + if (wl->vo_opts->drag_and_drop != -2) { + MP_VERBOSE(wl, "Releasing DND offer with mime type %s\n", wl->dnd_mime_type); + if (wl->dnd_mime_type) + TA_FREEP(&wl->dnd_mime_type); + wl->dnd_mime_score = 0; + } +} + +static void data_device_handle_motion(void *data, struct wl_data_device *wl_ddev, + uint32_t time, wl_fixed_t x, wl_fixed_t y) +{ + struct vo_wayland_state *wl = data; + wl_data_offer_accept(wl->dnd_offer, time, wl->dnd_mime_type); +} + +static void data_device_handle_drop(void *data, struct wl_data_device *wl_ddev) +{ + struct vo_wayland_state *wl = data; + + int pipefd[2]; + + if (pipe2(pipefd, O_CLOEXEC) == -1) { + MP_ERR(wl, "Failed to create dnd pipe!\n"); + return; + } + + if (wl->vo_opts->drag_and_drop != -2) { + MP_VERBOSE(wl, "Receiving DND offer with mime %s\n", wl->dnd_mime_type); + wl_data_offer_receive(wl->dnd_offer, wl->dnd_mime_type, pipefd[1]); + } + + close(pipefd[1]); + wl->dnd_fd = pipefd[0]; +} + +static void data_device_handle_selection(void *data, struct wl_data_device *wl_ddev, + struct wl_data_offer *id) +{ + struct vo_wayland_state *wl = data; + + if (wl->dnd_offer) { + wl_data_offer_destroy(wl->dnd_offer); + wl->dnd_offer = NULL; + MP_VERBOSE(wl, "Received a new DND offer. Releasing the previous offer.\n"); + } + +} + +static const struct wl_data_device_listener data_device_listener = { + data_device_handle_data_offer, + data_device_handle_enter, + data_device_handle_leave, + data_device_handle_motion, + data_device_handle_drop, + data_device_handle_selection, +}; + +static void output_handle_geometry(void *data, struct wl_output *wl_output, + int32_t x, int32_t y, int32_t phys_width, + int32_t phys_height, int32_t subpixel, + const char *make, const char *model, + int32_t transform) +{ + struct vo_wayland_output *output = data; + output->make = talloc_strdup(output->wl, make); + output->model = talloc_strdup(output->wl, model); + output->geometry.x0 = x; + output->geometry.y0 = y; + output->phys_width = phys_width; + output->phys_height = phys_height; +} + +static void output_handle_mode(void *data, struct wl_output *wl_output, + uint32_t flags, int32_t width, + int32_t height, int32_t refresh) +{ + struct vo_wayland_output *output = data; + + /* Only save current mode */ + if (!(flags & WL_OUTPUT_MODE_CURRENT)) + return; + + output->geometry.x1 = width; + output->geometry.y1 = height; + output->flags = flags; + output->refresh_rate = (double)refresh * 0.001; +} + +static void output_handle_done(void *data, struct wl_output *wl_output) +{ + struct vo_wayland_output *o = data; + struct vo_wayland_state *wl = o->wl; + + o->geometry.x1 += o->geometry.x0; + o->geometry.y1 += o->geometry.y0; + + MP_VERBOSE(o->wl, "Registered output %s %s (0x%x):\n" + "\tx: %dpx, y: %dpx\n" + "\tw: %dpx (%dmm), h: %dpx (%dmm)\n" + "\tscale: %d\n" + "\tHz: %f\n", o->make, o->model, o->id, o->geometry.x0, + o->geometry.y0, mp_rect_w(o->geometry), o->phys_width, + mp_rect_h(o->geometry), o->phys_height, o->scale, o->refresh_rate); + + /* If we satisfy this conditional, something about the current + * output must have changed (resolution, scale, etc). All window + * geometry and scaling should be recalculated. */ + if (wl->current_output && wl->current_output->output == wl_output) { + set_surface_scaling(wl); + spawn_cursor(wl); + set_geometry(wl, false); + prepare_resize(wl, 0, 0); + wl->pending_vo_events |= VO_EVENT_DPI; + } + + wl->pending_vo_events |= VO_EVENT_WIN_STATE; +} + +static void output_handle_scale(void *data, struct wl_output *wl_output, + int32_t factor) +{ + struct vo_wayland_output *output = data; + if (!factor) { + MP_ERR(output->wl, "Invalid output scale given by the compositor!\n"); + return; + } + output->scale = factor; +} + +static void output_handle_name(void *data, struct wl_output *wl_output, + const char *name) +{ + struct vo_wayland_output *output = data; + output->name = talloc_strdup(output->wl, name); +} + +static void output_handle_description(void *data, struct wl_output *wl_output, + const char *description) +{ +} + +static const struct wl_output_listener output_listener = { + output_handle_geometry, + output_handle_mode, + output_handle_done, + output_handle_scale, + output_handle_name, + output_handle_description, +}; + +static void surface_handle_enter(void *data, struct wl_surface *wl_surface, + struct wl_output *output) +{ + struct vo_wayland_state *wl = data; + if (!wl->current_output) + return; + + struct mp_rect old_output_geometry = wl->current_output->geometry; + struct mp_rect old_geometry = wl->geometry; + wl->current_output = NULL; + + struct vo_wayland_output *o; + wl_list_for_each(o, &wl->output_list, link) { + if (o->output == output) { + wl->current_output = o; + break; + } + } + + wl->current_output->has_surface = true; + bool force_resize = false; + + if (!wl->fractional_scale_manager && wl_surface_get_version(wl_surface) < 6 && + wl->scaling != wl->current_output->scale) + { + set_surface_scaling(wl); + spawn_cursor(wl); + force_resize = true; + wl->pending_vo_events |= VO_EVENT_DPI; + } + + if (!mp_rect_equals(&old_output_geometry, &wl->current_output->geometry)) { + set_geometry(wl, false); + force_resize = true; + } + + if (!mp_rect_equals(&old_geometry, &wl->geometry) || force_resize) + prepare_resize(wl, 0, 0); + + MP_VERBOSE(wl, "Surface entered output %s %s (0x%x), scale = %f, refresh rate = %f Hz\n", + o->make, o->model, o->id, wl->scaling, o->refresh_rate); + + wl->pending_vo_events |= VO_EVENT_WIN_STATE; +} + +static void surface_handle_leave(void *data, struct wl_surface *wl_surface, + struct wl_output *output) +{ + struct vo_wayland_state *wl = data; + + struct vo_wayland_output *o; + wl_list_for_each(o, &wl->output_list, link) { + if (o->output == output) { + o->has_surface = false; + wl->pending_vo_events |= VO_EVENT_WIN_STATE; + return; + } + } +} + +#ifdef HAVE_WAYLAND_1_22 +static void surface_handle_preferred_buffer_scale(void *data, + struct wl_surface *wl_surface, + int32_t scale) +{ + struct vo_wayland_state *wl = data; + double old_scale = wl->scaling; + + if (wl->fractional_scale_manager) + return; + + // dmabuf_wayland is always wl->scaling = 1 + wl->scaling = !wl->using_dmabuf_wayland ? scale : 1; + MP_VERBOSE(wl, "Obtained preferred scale, %f, from the compositor.\n", + wl->scaling); + wl->pending_vo_events |= VO_EVENT_DPI; + if (wl->current_output) { + rescale_geometry(wl, old_scale); + set_geometry(wl, false); + prepare_resize(wl, 0, 0); + } +} + +static void surface_handle_preferred_buffer_transform(void *data, + struct wl_surface *wl_surface, + uint32_t transform) +{ +} +#endif + +static const struct wl_surface_listener surface_listener = { + surface_handle_enter, + surface_handle_leave, +#ifdef HAVE_WAYLAND_1_22 + surface_handle_preferred_buffer_scale, + surface_handle_preferred_buffer_transform, +#endif +}; + +static void xdg_wm_base_ping(void *data, struct xdg_wm_base *wm_base, uint32_t serial) +{ + xdg_wm_base_pong(wm_base, serial); +} + +static const struct xdg_wm_base_listener xdg_wm_base_listener = { + xdg_wm_base_ping, +}; + +static void handle_surface_config(void *data, struct xdg_surface *surface, + uint32_t serial) +{ + xdg_surface_ack_configure(surface, serial); +} + +static const struct xdg_surface_listener xdg_surface_listener = { + handle_surface_config, +}; + +static void handle_toplevel_config(void *data, struct xdg_toplevel *toplevel, + int32_t width, int32_t height, struct wl_array *states) +{ + struct vo_wayland_state *wl = data; + struct mp_vo_opts *vo_opts = wl->vo_opts; + struct mp_rect old_geometry = wl->geometry; + + int old_toplevel_width = wl->toplevel_width; + int old_toplevel_height = wl->toplevel_height; + wl->toplevel_width = width; + wl->toplevel_height = height; + + if (!wl->configured) { + /* Save initial window size if the compositor gives us a hint here. */ + bool autofit_or_geometry = vo_opts->geometry.wh_valid || vo_opts->autofit.wh_valid || + vo_opts->autofit_larger.wh_valid || vo_opts->autofit_smaller.wh_valid; + if (width && height && !autofit_or_geometry) { + wl->initial_size_hint = true; + wl->window_size = (struct mp_rect){0, 0, width, height}; + wl->geometry = wl->window_size; + } + return; + } + + bool is_maximized = false; + bool is_fullscreen = false; + bool is_activated = false; + bool is_suspended = false; + bool is_tiled = false; + enum xdg_toplevel_state *state; + wl_array_for_each(state, states) { + switch (*state) { + case XDG_TOPLEVEL_STATE_FULLSCREEN: + is_fullscreen = true; + break; + case XDG_TOPLEVEL_STATE_RESIZING: + break; + case XDG_TOPLEVEL_STATE_ACTIVATED: + is_activated = true; + /* + * If we get an ACTIVATED state, we know it cannot be + * minimized, but it may not have been minimized + * previously, so we can't detect the exact state. + */ + vo_opts->window_minimized = false; + m_config_cache_write_opt(wl->vo_opts_cache, + &vo_opts->window_minimized); + break; + case XDG_TOPLEVEL_STATE_TILED_TOP: + case XDG_TOPLEVEL_STATE_TILED_LEFT: + case XDG_TOPLEVEL_STATE_TILED_RIGHT: + case XDG_TOPLEVEL_STATE_TILED_BOTTOM: + is_tiled = true; + break; + case XDG_TOPLEVEL_STATE_MAXIMIZED: + is_maximized = true; + break; + case XDG_TOPLEVEL_STATE_SUSPENDED: + is_suspended = true; + break; + } + } + + if (wl->hidden != is_suspended) + wl->hidden = is_suspended; + + if (vo_opts->fullscreen != is_fullscreen) { + wl->state_change = true; + vo_opts->fullscreen = is_fullscreen; + m_config_cache_write_opt(wl->vo_opts_cache, &vo_opts->fullscreen); + } + + if (vo_opts->window_maximized != is_maximized) { + wl->state_change = true; + vo_opts->window_maximized = is_maximized; + m_config_cache_write_opt(wl->vo_opts_cache, &vo_opts->window_maximized); + } + + wl->tiled = is_tiled; + + wl->locked_size = is_fullscreen || is_maximized || is_tiled; + + if (wl->requested_decoration) + request_decoration_mode(wl, wl->requested_decoration); + + if (wl->activated != is_activated) { + wl->activated = is_activated; + guess_focus(wl); + /* Just force a redraw to be on the safe side. */ + if (wl->activated) { + wl->hidden = false; + wl->pending_vo_events |= VO_EVENT_EXPOSE; + } + } + + if (wl->state_change) { + if (!wl->locked_size) { + wl->geometry = wl->window_size; + wl->state_change = false; + goto resize; + } + } + + /* Reuse old size if either of these are 0. */ + if (width == 0 || height == 0) { + if (!wl->locked_size) { + wl->geometry = wl->window_size; + } + goto resize; + } + + if (old_toplevel_width == wl->toplevel_width && + old_toplevel_height == wl->toplevel_height) + return; + + if (!wl->locked_size) { + if (vo_opts->keepaspect) { + double scale_factor = (double)width / wl->reduced_width; + width = ceil(wl->reduced_width * scale_factor); + if (vo_opts->keepaspect_window) + height = ceil(wl->reduced_height * scale_factor); + } + wl->window_size.x0 = 0; + wl->window_size.y0 = 0; + wl->window_size.x1 = round(width * wl->scaling); + wl->window_size.y1 = round(height * wl->scaling); + } + wl->geometry.x0 = 0; + wl->geometry.y0 = 0; + wl->geometry.x1 = round(width * wl->scaling); + wl->geometry.y1 = round(height * wl->scaling); + + if (mp_rect_equals(&old_geometry, &wl->geometry)) + return; + +resize: + MP_VERBOSE(wl, "Resizing due to xdg from %ix%i to %ix%i\n", + mp_rect_w(old_geometry), mp_rect_h(old_geometry), + mp_rect_w(wl->geometry), mp_rect_h(wl->geometry)); + + prepare_resize(wl, width, height); + wl->toplevel_configured = true; +} + +static void handle_toplevel_close(void *data, struct xdg_toplevel *xdg_toplevel) +{ + struct vo_wayland_state *wl = data; + mp_input_put_key(wl->vo->input_ctx, MP_KEY_CLOSE_WIN); +} + +static void handle_configure_bounds(void *data, struct xdg_toplevel *xdg_toplevel, + int32_t width, int32_t height) +{ + struct vo_wayland_state *wl = data; + wl->bounded_width = width * wl->scaling; + wl->bounded_height = height * wl->scaling; +} + +#ifdef XDG_TOPLEVEL_WM_CAPABILITIES_SINCE_VERSION +static void handle_wm_capabilities(void *data, struct xdg_toplevel *xdg_toplevel, + struct wl_array *capabilities) +{ +} +#endif + +static const struct xdg_toplevel_listener xdg_toplevel_listener = { + handle_toplevel_config, + handle_toplevel_close, + handle_configure_bounds, +#ifdef XDG_TOPLEVEL_WM_CAPABILITIES_SINCE_VERSION + handle_wm_capabilities, +#endif +}; + +#if HAVE_WAYLAND_PROTOCOLS_1_31 +static void preferred_scale(void *data, + struct wp_fractional_scale_v1 *fractional_scale, + uint32_t scale) +{ + struct vo_wayland_state *wl = data; + double old_scale = wl->scaling; + + // dmabuf_wayland is always wl->scaling = 1 + wl->scaling = !wl->using_dmabuf_wayland ? (double)scale / 120 : 1; + MP_VERBOSE(wl, "Obtained preferred scale, %f, from the compositor.\n", + wl->scaling); + wl->pending_vo_events |= VO_EVENT_DPI; + if (wl->current_output) { + rescale_geometry(wl, old_scale); + set_geometry(wl, false); + prepare_resize(wl, 0, 0); + } +} + +static const struct wp_fractional_scale_v1_listener fractional_scale_listener = { + preferred_scale, +}; +#endif + +static const char *zxdg_decoration_mode_to_str(const uint32_t mode) +{ + switch (mode) { + case ZXDG_TOPLEVEL_DECORATION_V1_MODE_SERVER_SIDE: + return "server-side"; + case ZXDG_TOPLEVEL_DECORATION_V1_MODE_CLIENT_SIDE: + return "client-side"; + default: + return "<unknown>"; + } +} + +static void configure_decorations(void *data, + struct zxdg_toplevel_decoration_v1 *xdg_toplevel_decoration, + uint32_t mode) +{ + struct vo_wayland_state *wl = data; + struct mp_vo_opts *opts = wl->vo_opts; + + if (wl->requested_decoration && mode != wl->requested_decoration) { + MP_DBG(wl, + "Requested %s decorations but compositor responded with %s. " + "It is likely that compositor wants us to stay in a given mode.\n", + zxdg_decoration_mode_to_str(wl->requested_decoration), + zxdg_decoration_mode_to_str(mode)); + } + + wl->requested_decoration = 0; + + if (mode == ZXDG_TOPLEVEL_DECORATION_V1_MODE_SERVER_SIDE) { + MP_VERBOSE(wl, "Enabling server decorations\n"); + } else { + MP_VERBOSE(wl, "Disabling server decorations\n"); + } + opts->border = mode == ZXDG_TOPLEVEL_DECORATION_V1_MODE_SERVER_SIDE; + m_config_cache_write_opt(wl->vo_opts_cache, &opts->border); +} + +static const struct zxdg_toplevel_decoration_v1_listener decoration_listener = { + configure_decorations, +}; + +static void pres_set_clockid(void *data, struct wp_presentation *pres, + uint32_t clockid) +{ + struct vo_wayland_state *wl = data; + + if (clockid == CLOCK_MONOTONIC || clockid == CLOCK_MONOTONIC_RAW) + wl->use_present = true; +} + +static const struct wp_presentation_listener pres_listener = { + pres_set_clockid, +}; + +static void feedback_sync_output(void *data, struct wp_presentation_feedback *fback, + struct wl_output *output) +{ +} + +static void feedback_presented(void *data, struct wp_presentation_feedback *fback, + uint32_t tv_sec_hi, uint32_t tv_sec_lo, + uint32_t tv_nsec, uint32_t refresh_nsec, + uint32_t seq_hi, uint32_t seq_lo, + uint32_t flags) +{ + struct vo_wayland_feedback_pool *fback_pool = data; + struct vo_wayland_state *wl = fback_pool->wl; + + if (fback) + remove_feedback(fback_pool, fback); + + wl->refresh_interval = (int64_t)refresh_nsec; + + // Very similar to oml_sync_control, in this case we assume that every + // time the compositor receives feedback, a buffer swap has been already + // been performed. + // + // Notes: + // - tv_sec_lo + tv_sec_hi is the equivalent of oml's ust + // - seq_lo + seq_hi is the equivalent of oml's msc + // - these values are updated every time the compositor receives feedback. + + int64_t sec = (uint64_t) tv_sec_lo + ((uint64_t) tv_sec_hi << 32); + int64_t ust = MP_TIME_S_TO_NS(sec) + (uint64_t) tv_nsec; + int64_t msc = (uint64_t) seq_lo + ((uint64_t) seq_hi << 32); + present_sync_update_values(wl->present, ust, msc); +} + +static void feedback_discarded(void *data, struct wp_presentation_feedback *fback) +{ + struct vo_wayland_feedback_pool *fback_pool = data; + if (fback) + remove_feedback(fback_pool, fback); +} + +static const struct wp_presentation_feedback_listener feedback_listener = { + feedback_sync_output, + feedback_presented, + feedback_discarded, +}; + +static const struct wl_callback_listener frame_listener; + +static void frame_callback(void *data, struct wl_callback *callback, uint32_t time) +{ + struct vo_wayland_state *wl = data; + + if (callback) + wl_callback_destroy(callback); + + wl->frame_callback = wl_surface_frame(wl->callback_surface); + wl_callback_add_listener(wl->frame_callback, &frame_listener, wl); + + if (wl->use_present) { + struct wp_presentation_feedback *fback = wp_presentation_feedback(wl->presentation, wl->callback_surface); + add_feedback(wl->fback_pool, fback); + wp_presentation_feedback_add_listener(fback, &feedback_listener, wl->fback_pool); + } + + wl->frame_wait = false; + wl->hidden = false; +} + +static const struct wl_callback_listener frame_listener = { + frame_callback, +}; + +static void done(void *data, + struct zwp_linux_dmabuf_feedback_v1 *zwp_linux_dmabuf_feedback_v1) +{ +} + +static void format_table(void *data, + struct zwp_linux_dmabuf_feedback_v1 *zwp_linux_dmabuf_feedback_v1, + int32_t fd, + uint32_t size) +{ + struct vo_wayland_state *wl = data; + + void *map = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); + close(fd); + + if (map != MAP_FAILED) { + wl->format_map = map; + wl->format_size = size; + } +} + +static void main_device(void *data, + struct zwp_linux_dmabuf_feedback_v1 *zwp_linux_dmabuf_feedback_v1, + struct wl_array *device) +{ +} + +static void tranche_done(void *data, + struct zwp_linux_dmabuf_feedback_v1 *zwp_linux_dmabuf_feedback_v1) +{ +} + +static void tranche_target_device(void *data, + struct zwp_linux_dmabuf_feedback_v1 *zwp_linux_dmabuf_feedback_v1, + struct wl_array *device) +{ +} + +static void tranche_formats(void *data, + struct zwp_linux_dmabuf_feedback_v1 *zwp_linux_dmabuf_feedback_v1, + struct wl_array *indices) +{ +} + +static void tranche_flags(void *data, + struct zwp_linux_dmabuf_feedback_v1 *zwp_linux_dmabuf_feedback_v1, + uint32_t flags) +{ +} + +static const struct zwp_linux_dmabuf_feedback_v1_listener dmabuf_feedback_listener = { + done, + format_table, + main_device, + tranche_done, + tranche_target_device, + tranche_formats, + tranche_flags, +}; + +static void registry_handle_add(void *data, struct wl_registry *reg, uint32_t id, + const char *interface, uint32_t ver) +{ + int found = 1; + struct vo_wayland_state *wl = data; + + if (!strcmp(interface, wl_compositor_interface.name) && (ver >= 4) && found++) { +#ifdef HAVE_WAYLAND_1_22 + ver = MPMIN(ver, 6); /* Cap at 6 in case new events are added later. */ +#else + ver = 4; +#endif + wl->compositor = wl_registry_bind(reg, id, &wl_compositor_interface, ver); + wl->surface = wl_compositor_create_surface(wl->compositor); + wl->video_surface = wl_compositor_create_surface(wl->compositor); + wl->osd_surface = wl_compositor_create_surface(wl->compositor); + + /* never accept input events on anything besides the main surface */ + struct wl_region *region = wl_compositor_create_region(wl->compositor); + wl_surface_set_input_region(wl->osd_surface, region); + wl_surface_set_input_region(wl->video_surface, region); + wl_region_destroy(region); + + wl->cursor_surface = wl_compositor_create_surface(wl->compositor); + wl_surface_add_listener(wl->surface, &surface_listener, wl); + } + + if (!strcmp(interface, wl_subcompositor_interface.name) && (ver >= 1) && found++) { + wl->subcompositor = wl_registry_bind(reg, id, &wl_subcompositor_interface, 1); + } + + if (!strcmp (interface, zwp_linux_dmabuf_v1_interface.name) && (ver >= 4) && found++) { + wl->dmabuf = wl_registry_bind(reg, id, &zwp_linux_dmabuf_v1_interface, 4); + wl->dmabuf_feedback = zwp_linux_dmabuf_v1_get_default_feedback(wl->dmabuf); + zwp_linux_dmabuf_feedback_v1_add_listener(wl->dmabuf_feedback, &dmabuf_feedback_listener, wl); + } + + if (!strcmp (interface, wp_viewporter_interface.name) && (ver >= 1) && found++) { + wl->viewporter = wl_registry_bind (reg, id, &wp_viewporter_interface, 1); + } + + if (!strcmp(interface, wl_data_device_manager_interface.name) && (ver >= 3) && found++) { + wl->dnd_devman = wl_registry_bind(reg, id, &wl_data_device_manager_interface, 3); + } + + if (!strcmp(interface, wl_output_interface.name) && (ver >= 2) && found++) { + struct vo_wayland_output *output = talloc_zero(wl, struct vo_wayland_output); + + output->wl = wl; + output->id = id; + output->scale = 1; + output->name = ""; + + ver = MPMIN(ver, 4); /* Cap at 4 in case new events are added later. */ + output->output = wl_registry_bind(reg, id, &wl_output_interface, ver); + wl_output_add_listener(output->output, &output_listener, output); + wl_list_insert(&wl->output_list, &output->link); + } + + if (!strcmp(interface, wl_seat_interface.name) && found++) { + wl->seat = wl_registry_bind(reg, id, &wl_seat_interface, 1); + wl_seat_add_listener(wl->seat, &seat_listener, wl); + } + + if (!strcmp(interface, wl_shm_interface.name) && found++) { + wl->shm = wl_registry_bind(reg, id, &wl_shm_interface, 1); + } + +#if HAVE_WAYLAND_PROTOCOLS_1_27 + if (!strcmp(interface, wp_content_type_manager_v1_interface.name) && found++) { + wl->content_type_manager = wl_registry_bind(reg, id, &wp_content_type_manager_v1_interface, 1); + } + + if (!strcmp(interface, wp_single_pixel_buffer_manager_v1_interface.name) && found++) { + wl->single_pixel_manager = wl_registry_bind(reg, id, &wp_single_pixel_buffer_manager_v1_interface, 1); + } +#endif + +#if HAVE_WAYLAND_PROTOCOLS_1_31 + if (!strcmp(interface, wp_fractional_scale_manager_v1_interface.name) && found++) { + wl->fractional_scale_manager = wl_registry_bind(reg, id, &wp_fractional_scale_manager_v1_interface, 1); + } +#endif + +#if HAVE_WAYLAND_PROTOCOLS_1_32 + if (!strcmp(interface, wp_cursor_shape_manager_v1_interface.name) && found++) { + wl->cursor_shape_manager = wl_registry_bind(reg, id, &wp_cursor_shape_manager_v1_interface, 1); + } +#endif + + if (!strcmp(interface, wp_presentation_interface.name) && found++) { + wl->presentation = wl_registry_bind(reg, id, &wp_presentation_interface, 1); + wp_presentation_add_listener(wl->presentation, &pres_listener, wl); + } + + if (!strcmp(interface, xdg_wm_base_interface.name) && found++) { + ver = MPMIN(ver, 6); /* Cap at 6 in case new events are added later. */ + wl->wm_base = wl_registry_bind(reg, id, &xdg_wm_base_interface, ver); + xdg_wm_base_add_listener(wl->wm_base, &xdg_wm_base_listener, wl); + } + + if (!strcmp(interface, zxdg_decoration_manager_v1_interface.name) && found++) { + wl->xdg_decoration_manager = wl_registry_bind(reg, id, &zxdg_decoration_manager_v1_interface, 1); + } + + if (!strcmp(interface, zwp_idle_inhibit_manager_v1_interface.name) && found++) { + wl->idle_inhibit_manager = wl_registry_bind(reg, id, &zwp_idle_inhibit_manager_v1_interface, 1); + } + + if (found > 1) + MP_VERBOSE(wl, "Registered for protocol %s\n", interface); +} + +static void registry_handle_remove(void *data, struct wl_registry *reg, uint32_t id) +{ + struct vo_wayland_state *wl = data; + struct vo_wayland_output *output, *tmp; + wl_list_for_each_safe(output, tmp, &wl->output_list, link) { + if (output->id == id) { + remove_output(output); + return; + } + } +} + +static const struct wl_registry_listener registry_listener = { + registry_handle_add, + registry_handle_remove, +}; + +/* Static functions */ +static void check_dnd_fd(struct vo_wayland_state *wl) +{ + if (wl->dnd_fd == -1) + return; + + struct pollfd fdp = { wl->dnd_fd, POLLIN | POLLHUP, 0 }; + if (poll(&fdp, 1, 0) <= 0) + return; + + if (fdp.revents & POLLIN) { + ptrdiff_t offset = 0; + size_t data_read = 0; + const size_t chunk_size = 1; + uint8_t *buffer = ta_zalloc_size(wl, chunk_size); + if (!buffer) + goto end; + + while ((data_read = read(wl->dnd_fd, buffer + offset, chunk_size)) > 0) { + offset += data_read; + buffer = ta_realloc_size(wl, buffer, offset + chunk_size); + memset(buffer + offset, 0, chunk_size); + if (!buffer) + goto end; + } + + MP_VERBOSE(wl, "Read %td bytes from the DND fd\n", offset); + + struct bstr file_list = bstr0(buffer); + mp_event_drop_mime_data(wl->vo->input_ctx, wl->dnd_mime_type, + file_list, wl->dnd_action); + talloc_free(buffer); +end: + if (wl->dnd_mime_type) + talloc_free(wl->dnd_mime_type); + + if (wl->dnd_action >= 0 && wl->dnd_offer) + wl_data_offer_finish(wl->dnd_offer); + + wl->dnd_action = -1; + wl->dnd_mime_type = NULL; + wl->dnd_mime_score = 0; + } + + if (fdp.revents & (POLLIN | POLLERR | POLLHUP)) { + close(wl->dnd_fd); + wl->dnd_fd = -1; + } +} + +static int check_for_resize(struct vo_wayland_state *wl, int edge_pixels, + enum xdg_toplevel_resize_edge *edge) +{ + if (wl->vo_opts->fullscreen || wl->vo_opts->window_maximized) + return 0; + + int pos[2] = { wl->mouse_x, wl->mouse_y }; + int left_edge = pos[0] < edge_pixels; + int top_edge = pos[1] < edge_pixels; + int right_edge = pos[0] > (mp_rect_w(wl->geometry) - edge_pixels); + int bottom_edge = pos[1] > (mp_rect_h(wl->geometry) - edge_pixels); + + if (left_edge) { + *edge = XDG_TOPLEVEL_RESIZE_EDGE_LEFT; + if (top_edge) + *edge = XDG_TOPLEVEL_RESIZE_EDGE_TOP_LEFT; + else if (bottom_edge) + *edge = XDG_TOPLEVEL_RESIZE_EDGE_BOTTOM_LEFT; + } else if (right_edge) { + *edge = XDG_TOPLEVEL_RESIZE_EDGE_RIGHT; + if (top_edge) + *edge = XDG_TOPLEVEL_RESIZE_EDGE_TOP_RIGHT; + else if (bottom_edge) + *edge = XDG_TOPLEVEL_RESIZE_EDGE_BOTTOM_RIGHT; + } else if (top_edge) { + *edge = XDG_TOPLEVEL_RESIZE_EDGE_TOP; + } else if (bottom_edge) { + *edge = XDG_TOPLEVEL_RESIZE_EDGE_BOTTOM; + } else { + *edge = 0; + return 0; + } + + return 1; +} + +static bool create_input(struct vo_wayland_state *wl) +{ + wl->xkb_context = xkb_context_new(XKB_CONTEXT_NO_FLAGS); + + if (!wl->xkb_context) { + MP_ERR(wl, "failed to initialize input: check xkbcommon\n"); + return 1; + } + + return 0; +} + +static int create_viewports(struct vo_wayland_state *wl) +{ + if (wl->viewporter) { + wl->viewport = wp_viewporter_get_viewport(wl->viewporter, wl->surface); + wl->osd_viewport = wp_viewporter_get_viewport(wl->viewporter, wl->osd_surface); + wl->video_viewport = wp_viewporter_get_viewport(wl->viewporter, wl->video_surface); + } + + if (wl->viewporter && (!wl->viewport || !wl->osd_viewport || !wl->video_viewport)) { + MP_ERR(wl, "failed to create viewport interfaces!\n"); + return 1; + } + return 0; +} + +static int create_xdg_surface(struct vo_wayland_state *wl) +{ + wl->xdg_surface = xdg_wm_base_get_xdg_surface(wl->wm_base, wl->surface); + xdg_surface_add_listener(wl->xdg_surface, &xdg_surface_listener, wl); + + wl->xdg_toplevel = xdg_surface_get_toplevel(wl->xdg_surface); + xdg_toplevel_add_listener(wl->xdg_toplevel, &xdg_toplevel_listener, wl); + + if (!wl->xdg_surface || !wl->xdg_toplevel) { + MP_ERR(wl, "failed to create xdg_surface and xdg_toplevel!\n"); + return 1; + } + return 0; +} + +static void add_feedback(struct vo_wayland_feedback_pool *fback_pool, + struct wp_presentation_feedback *fback) +{ + for (int i = 0; i < fback_pool->len; ++i) { + if (!fback_pool->fback[i]) { + fback_pool->fback[i] = fback; + break; + } else if (i == fback_pool->len - 1) { + // Shouldn't happen in practice. + wp_presentation_feedback_destroy(fback_pool->fback[i]); + fback_pool->fback[i] = fback; + } + } +} + +static void do_minimize(struct vo_wayland_state *wl) +{ + if (!wl->xdg_toplevel) + return; + if (wl->vo_opts->window_minimized) + xdg_toplevel_set_minimized(wl->xdg_toplevel); +} + +static char **get_displays_spanned(struct vo_wayland_state *wl) +{ + char **names = NULL; + int displays_spanned = 0; + struct vo_wayland_output *output; + wl_list_for_each(output, &wl->output_list, link) { + if (output->has_surface) { + char *name = output->name ? output->name : output->model; + MP_TARRAY_APPEND(NULL, names, displays_spanned, + talloc_strdup(NULL, name)); + } + } + MP_TARRAY_APPEND(NULL, names, displays_spanned, NULL); + return names; +} + +static int get_mods(struct vo_wayland_state *wl) +{ + static char* const mod_names[] = { + XKB_MOD_NAME_SHIFT, + XKB_MOD_NAME_CTRL, + XKB_MOD_NAME_ALT, + XKB_MOD_NAME_LOGO, + }; + + static const int mods[] = { + MP_KEY_MODIFIER_SHIFT, + MP_KEY_MODIFIER_CTRL, + MP_KEY_MODIFIER_ALT, + MP_KEY_MODIFIER_META, + }; + + int modifiers = 0; + + for (int n = 0; n < MP_ARRAY_SIZE(mods); n++) { + xkb_mod_index_t index = xkb_keymap_mod_get_index(wl->xkb_keymap, mod_names[n]); + if (!xkb_state_mod_index_is_consumed(wl->xkb_state, wl->keyboard_code, index) + && xkb_state_mod_index_is_active(wl->xkb_state, index, + XKB_STATE_MODS_DEPRESSED)) + modifiers |= mods[n]; + } + return modifiers; +} + +static void get_shape_device(struct vo_wayland_state *wl) +{ +#if HAVE_WAYLAND_PROTOCOLS_1_32 + if (!wl->cursor_shape_device && wl->cursor_shape_manager) { + wl->cursor_shape_device = wp_cursor_shape_manager_v1_get_pointer(wl->cursor_shape_manager, + wl->pointer); + } +#endif +} + +static int greatest_common_divisor(int a, int b) +{ + int rem = a % b; + if (rem == 0) + return b; + return greatest_common_divisor(b, rem); +} + +static void guess_focus(struct vo_wayland_state *wl) +{ + // We can't actually know if the window is focused or not in wayland, + // so just guess it with some common sense. Obviously won't work if + // the user has no keyboard. + if ((!wl->focused && wl->activated && wl->has_keyboard_input) || + (wl->focused && !wl->activated)) + { + wl->focused = !wl->focused; + wl->pending_vo_events |= VO_EVENT_FOCUS; + } +} + +static struct vo_wayland_output *find_output(struct vo_wayland_state *wl) +{ + int index = 0; + struct mp_vo_opts *opts = wl->vo_opts; + int screen_id = opts->fullscreen ? opts->fsscreen_id : opts->screen_id; + char *screen_name = opts->fullscreen ? opts->fsscreen_name : opts->screen_name; + struct vo_wayland_output *output = NULL; + struct vo_wayland_output *fallback_output = NULL; + wl_list_for_each(output, &wl->output_list, link) { + if (index == 0) + fallback_output = output; + if (screen_id == -1 && !screen_name) + return output; + if (screen_id == -1 && screen_name && !strcmp(screen_name, output->name)) + return output; + if (screen_id == -1 && screen_name && !strcmp(screen_name, output->model)) + return output; + if (screen_id == index++) + return output; + } + if (!fallback_output) { + MP_ERR(wl, "No screens could be found!\n"); + return NULL; + } else if (screen_id >= 0) { + MP_WARN(wl, "Screen index %i not found/unavailable! Falling back to screen 0!\n", screen_id); + } else if (screen_name && screen_name[0]) { + MP_WARN(wl, "Screen name %s not found/unavailable! Falling back to screen 0!\n", screen_name); + } + return fallback_output; +} + +static int lookupkey(int key) +{ + const char *passthrough_keys = " -+*/<>`~!@#$%^&()_{}:;\"\',.?\\|=[]"; + + int mpkey = 0; + if ((key >= 'a' && key <= 'z') || (key >= 'A' && key <= 'Z') || + (key >= '0' && key <= '9') || + (key > 0 && key < 256 && strchr(passthrough_keys, key))) + mpkey = key; + + if (!mpkey) + mpkey = lookup_keymap_table(keymap, key); + + return mpkey; +} + +static void prepare_resize(struct vo_wayland_state *wl, int width, int height) +{ + if (!width) + width = mp_rect_w(wl->geometry) / wl->scaling; + if (!height) + height = mp_rect_h(wl->geometry) / wl->scaling; + xdg_surface_set_window_geometry(wl->xdg_surface, 0, 0, width, height); + wl->pending_vo_events |= VO_EVENT_RESIZE; +} + +static void request_decoration_mode(struct vo_wayland_state *wl, uint32_t mode) +{ + wl->requested_decoration = mode; + zxdg_toplevel_decoration_v1_set_mode(wl->xdg_toplevel_decoration, mode); +} + +static void rescale_geometry(struct vo_wayland_state *wl, double old_scale) +{ + double factor = old_scale / wl->scaling; + wl->window_size.x1 /= factor; + wl->window_size.y1 /= factor; + wl->geometry.x1 /= factor; + wl->geometry.y1 /= factor; +} + +static void clean_feedback_pool(struct vo_wayland_feedback_pool *fback_pool) +{ + for (int i = 0; i < fback_pool->len; ++i) { + if (fback_pool->fback[i]) { + wp_presentation_feedback_destroy(fback_pool->fback[i]); + fback_pool->fback[i] = NULL; + } + } +} + +static void remove_feedback(struct vo_wayland_feedback_pool *fback_pool, + struct wp_presentation_feedback *fback) +{ + for (int i = 0; i < fback_pool->len; ++i) { + if (fback_pool->fback[i] == fback) { + wp_presentation_feedback_destroy(fback); + fback_pool->fback[i] = NULL; + break; + } + } +} + +static void remove_output(struct vo_wayland_output *out) +{ + if (!out) + return; + + MP_VERBOSE(out->wl, "Deregistering output %s %s (0x%x)\n", out->make, + out->model, out->id); + wl_list_remove(&out->link); + wl_output_destroy(out->output); + talloc_free(out->make); + talloc_free(out->model); + talloc_free(out); + return; +} + +static void set_content_type(struct vo_wayland_state *wl) +{ + if (!wl->content_type_manager) + return; +#if HAVE_WAYLAND_PROTOCOLS_1_27 + // handle auto; + if (wl->vo_opts->content_type == -1) { + wp_content_type_v1_set_content_type(wl->content_type, wl->current_content_type); + } else { + wp_content_type_v1_set_content_type(wl->content_type, wl->vo_opts->content_type); + } +#endif +} + +static void set_cursor_shape(struct vo_wayland_state *wl) +{ +#if HAVE_WAYLAND_PROTOCOLS_1_32 + wp_cursor_shape_device_v1_set_shape(wl->cursor_shape_device, wl->pointer_id, + WP_CURSOR_SHAPE_DEVICE_V1_SHAPE_DEFAULT); +#endif +} + +static int set_cursor_visibility(struct vo_wayland_state *wl, bool on) +{ + wl->cursor_visible = on; + if (on) { + if (wl->cursor_shape_device) { + set_cursor_shape(wl); + } else { + if (spawn_cursor(wl)) + return VO_FALSE; + struct wl_cursor_image *img = wl->default_cursor->images[0]; + struct wl_buffer *buffer = wl_cursor_image_get_buffer(img); + if (!buffer) + return VO_FALSE; + int scale = MPMAX(wl->scaling, 1); + wl_pointer_set_cursor(wl->pointer, wl->pointer_id, wl->cursor_surface, + img->hotspot_x / scale, img->hotspot_y / scale); + wl_surface_set_buffer_scale(wl->cursor_surface, scale); + wl_surface_attach(wl->cursor_surface, buffer, 0, 0); + wl_surface_damage_buffer(wl->cursor_surface, 0, 0, img->width, img->height); + } + wl_surface_commit(wl->cursor_surface); + } else { + wl_pointer_set_cursor(wl->pointer, wl->pointer_id, NULL, 0, 0); + } + return VO_TRUE; +} + +static void set_geometry(struct vo_wayland_state *wl, bool resize) +{ + struct vo *vo = wl->vo; + if (!wl->current_output) + return; + + struct vo_win_geometry geo; + struct mp_rect screenrc = wl->current_output->geometry; + vo_calc_window_geometry2(vo, &screenrc, wl->scaling, &geo); + vo_apply_window_geometry(vo, &geo); + + int gcd = greatest_common_divisor(vo->dwidth, vo->dheight); + wl->reduced_width = vo->dwidth / gcd; + wl->reduced_height = vo->dheight / gcd; + + if (!wl->initial_size_hint) + wl->window_size = (struct mp_rect){0, 0, vo->dwidth, vo->dheight}; + wl->initial_size_hint = false; + + if (resize) { + if (!wl->locked_size) + wl->geometry = wl->window_size; + prepare_resize(wl, 0, 0); + } +} + +static void set_input_region(struct vo_wayland_state *wl, bool passthrough) +{ + if (passthrough) { + struct wl_region *region = wl_compositor_create_region(wl->compositor); + wl_surface_set_input_region(wl->surface, region); + wl_region_destroy(region); + } else { + wl_surface_set_input_region(wl->surface, NULL); + } +} + +static int set_screensaver_inhibitor(struct vo_wayland_state *wl, int state) +{ + if (!wl->idle_inhibit_manager) + return VO_NOTIMPL; + if (state == (!!wl->idle_inhibitor)) + return VO_TRUE; + if (state) { + MP_VERBOSE(wl, "Enabling idle inhibitor\n"); + struct zwp_idle_inhibit_manager_v1 *mgr = wl->idle_inhibit_manager; + wl->idle_inhibitor = zwp_idle_inhibit_manager_v1_create_inhibitor(mgr, wl->surface); + } else { + MP_VERBOSE(wl, "Disabling the idle inhibitor\n"); + zwp_idle_inhibitor_v1_destroy(wl->idle_inhibitor); + wl->idle_inhibitor = NULL; + } + return VO_TRUE; +} + +static void set_surface_scaling(struct vo_wayland_state *wl) +{ + if (wl->fractional_scale_manager) + return; + + // dmabuf_wayland is always wl->scaling = 1 + double old_scale = wl->scaling; + wl->scaling = !wl->using_dmabuf_wayland ? wl->current_output->scale : 1; + + rescale_geometry(wl, old_scale); + wl_surface_set_buffer_scale(wl->surface, wl->scaling); +} + +static void set_window_bounds(struct vo_wayland_state *wl) +{ + // If the user has set geometry/autofit and the option is auto, + // don't use these. + if (wl->opts->configure_bounds == -1 && (wl->vo_opts->geometry.wh_valid || + wl->vo_opts->autofit.wh_valid || wl->vo_opts->autofit_larger.wh_valid || + wl->vo_opts->autofit_smaller.wh_valid)) + { + return; + } + + if (wl->bounded_width && wl->bounded_width < wl->window_size.x1) + wl->window_size.x1 = wl->bounded_width; + if (wl->bounded_height && wl->bounded_height < wl->window_size.y1) + wl->window_size.y1 = wl->bounded_height; +} + +static int spawn_cursor(struct vo_wayland_state *wl) +{ + /* Don't use this if we have cursor-shape. */ + if (wl->cursor_shape_device) + return 0; + /* Reuse if size is identical */ + if (!wl->pointer || wl->allocated_cursor_scale == wl->scaling) + return 0; + else if (wl->cursor_theme) + wl_cursor_theme_destroy(wl->cursor_theme); + + const char *xcursor_theme = getenv("XCURSOR_THEME"); + const char *size_str = getenv("XCURSOR_SIZE"); + int size = 24; + if (size_str != NULL) { + errno = 0; + char *end; + long size_long = strtol(size_str, &end, 10); + if (!*end && !errno && size_long > 0 && size_long <= INT_MAX) + size = (int)size_long; + } + + wl->cursor_theme = wl_cursor_theme_load(xcursor_theme, size*wl->scaling, wl->shm); + if (!wl->cursor_theme) { + MP_ERR(wl, "Unable to load cursor theme!\n"); + return 1; + } + + wl->default_cursor = wl_cursor_theme_get_cursor(wl->cursor_theme, "left_ptr"); + if (!wl->default_cursor) { + MP_ERR(wl, "Unable to load cursor theme!\n"); + return 1; + } + + wl->allocated_cursor_scale = wl->scaling; + + return 0; +} + +static void toggle_fullscreen(struct vo_wayland_state *wl) +{ + if (!wl->xdg_toplevel) + return; + wl->state_change = true; + bool specific_screen = wl->vo_opts->fsscreen_id >= 0 || wl->vo_opts->fsscreen_name; + if (wl->vo_opts->fullscreen && !specific_screen) { + xdg_toplevel_set_fullscreen(wl->xdg_toplevel, NULL); + } else if (wl->vo_opts->fullscreen && specific_screen) { + struct vo_wayland_output *output = find_output(wl); + xdg_toplevel_set_fullscreen(wl->xdg_toplevel, output->output); + } else { + xdg_toplevel_unset_fullscreen(wl->xdg_toplevel); + } +} + +static void toggle_maximized(struct vo_wayland_state *wl) +{ + if (!wl->xdg_toplevel) + return; + wl->state_change = true; + if (wl->vo_opts->window_maximized) { + xdg_toplevel_set_maximized(wl->xdg_toplevel); + } else { + xdg_toplevel_unset_maximized(wl->xdg_toplevel); + } +} + +static void update_app_id(struct vo_wayland_state *wl) +{ + if (!wl->xdg_toplevel) + return; + xdg_toplevel_set_app_id(wl->xdg_toplevel, wl->vo_opts->appid); +} + +static int update_window_title(struct vo_wayland_state *wl, const char *title) +{ + if (!wl->xdg_toplevel) + return VO_NOTAVAIL; + /* The xdg-shell protocol requires that the title is UTF-8. */ + void *tmp = talloc_new(NULL); + struct bstr b_title = bstr_sanitize_utf8_latin1(tmp, bstr0(title)); + xdg_toplevel_set_title(wl->xdg_toplevel, bstrto0(tmp, b_title)); + talloc_free(tmp); + return VO_TRUE; +} + +static void window_move(struct vo_wayland_state *wl, uint32_t serial) +{ + if (wl->xdg_toplevel) + xdg_toplevel_move(wl->xdg_toplevel, wl->seat, serial); +} + +static void wayland_dispatch_events(struct vo_wayland_state *wl, int nfds, int64_t timeout_ns) +{ + if (wl->display_fd == -1) + return; + + struct pollfd fds[2] = { + {.fd = wl->display_fd, .events = POLLIN }, + {.fd = wl->wakeup_pipe[0], .events = POLLIN }, + }; + + while (wl_display_prepare_read(wl->display) != 0) + wl_display_dispatch_pending(wl->display); + wl_display_flush(wl->display); + + mp_poll(fds, nfds, timeout_ns); + + if (fds[0].revents & POLLIN) { + wl_display_read_events(wl->display); + } else { + wl_display_cancel_read(wl->display); + } + + if (fds[0].revents & (POLLERR | POLLHUP | POLLNVAL)) { + MP_FATAL(wl, "Error occurred on the display fd\n"); + wl->display_fd = -1; + mp_input_put_key(wl->vo->input_ctx, MP_KEY_CLOSE_WIN); + } + + if (fds[1].revents & POLLIN) + mp_flush_wakeup_pipe(wl->wakeup_pipe[0]); + + wl_display_dispatch_pending(wl->display); +} + +/* Non-static */ +int vo_wayland_allocate_memfd(struct vo *vo, size_t size) +{ +#if !HAVE_MEMFD_CREATE + return VO_ERROR; +#else + int fd = memfd_create("mpv", MFD_CLOEXEC | MFD_ALLOW_SEALING); + if (fd < 0) { + MP_ERR(vo, "Failed to allocate memfd: %s\n", mp_strerror(errno)); + return VO_ERROR; + } + + fcntl(fd, F_ADD_SEALS, F_SEAL_SHRINK | F_SEAL_SEAL); + + if (posix_fallocate(fd, 0, size) == 0) + return fd; + + close(fd); + MP_ERR(vo, "Failed to allocate memfd: %s\n", mp_strerror(errno)); + + return VO_ERROR; +#endif +} + +bool vo_wayland_check_visible(struct vo *vo) +{ + struct vo_wayland_state *wl = vo->wl; + bool render = !wl->hidden || wl->vo_opts->force_render; + wl->frame_wait = true; + return render; +} + +int vo_wayland_control(struct vo *vo, int *events, int request, void *arg) +{ + struct vo_wayland_state *wl = vo->wl; + struct mp_vo_opts *opts = wl->vo_opts; + wl_display_dispatch_pending(wl->display); + + switch (request) { + case VOCTRL_CHECK_EVENTS: { + check_dnd_fd(wl); + *events |= wl->pending_vo_events; + if (*events & VO_EVENT_RESIZE) { + *events |= VO_EVENT_EXPOSE; + wl->frame_wait = false; + wl->timeout_count = 0; + wl->hidden = false; + } + wl->pending_vo_events = 0; + return VO_TRUE; + } + case VOCTRL_VO_OPTS_CHANGED: { + void *opt; + while (m_config_cache_get_next_changed(wl->vo_opts_cache, &opt)) { + if (opt == &opts->appid) + update_app_id(wl); + if (opt == &opts->border) + { + // This is stupid but the value of border shouldn't be written + // unless we get a configure event. Change it back to its old + // value and let configure_decorations handle it after the request. + if (wl->xdg_toplevel_decoration) { + int requested_border_mode = opts->border; + opts->border = !opts->border; + m_config_cache_write_opt(wl->vo_opts_cache, + &opts->border); + request_decoration_mode( + wl, requested_border_mode ? + ZXDG_TOPLEVEL_DECORATION_V1_MODE_SERVER_SIDE : + ZXDG_TOPLEVEL_DECORATION_V1_MODE_CLIENT_SIDE); + } else { + opts->border = false; + m_config_cache_write_opt(wl->vo_opts_cache, + &wl->vo_opts->border); + } + } + if (opt == &opts->content_type) + set_content_type(wl); + if (opt == &opts->cursor_passthrough) + set_input_region(wl, opts->cursor_passthrough); + if (opt == &opts->fullscreen) + toggle_fullscreen(wl); + if (opt == &opts->hidpi_window_scale) + set_geometry(wl, true); + if (opt == &opts->window_maximized) + toggle_maximized(wl); + if (opt == &opts->window_minimized) + do_minimize(wl); + if (opt == &opts->geometry || opt == &opts->autofit || + opt == &opts->autofit_smaller || opt == &opts->autofit_larger) + { + set_geometry(wl, true); + } + } + return VO_TRUE; + } + case VOCTRL_CONTENT_TYPE: { +#if HAVE_WAYLAND_PROTOCOLS_1_27 + wl->current_content_type = *(enum mp_content_type *)arg; + set_content_type(wl); +#endif + return VO_TRUE; + } + case VOCTRL_GET_FOCUSED: { + *(bool *)arg = wl->focused; + return VO_TRUE; + } + case VOCTRL_GET_DISPLAY_NAMES: { + *(char ***)arg = get_displays_spanned(wl); + return VO_TRUE; + } + case VOCTRL_GET_UNFS_WINDOW_SIZE: { + int *s = arg; + if (wl->vo_opts->window_maximized || wl->tiled) { + s[0] = mp_rect_w(wl->geometry); + s[1] = mp_rect_h(wl->geometry); + } else { + s[0] = mp_rect_w(wl->window_size); + s[1] = mp_rect_h(wl->window_size); + } + return VO_TRUE; + } + case VOCTRL_SET_UNFS_WINDOW_SIZE: { + int *s = arg; + wl->window_size.x0 = 0; + wl->window_size.y0 = 0; + wl->window_size.x1 = s[0]; + wl->window_size.y1 = s[1]; + if (!wl->vo_opts->fullscreen && !wl->tiled) { + if (wl->vo_opts->window_maximized) { + xdg_toplevel_unset_maximized(wl->xdg_toplevel); + wl_display_dispatch_pending(wl->display); + /* Make sure the compositor let us unmaximize */ + if (wl->vo_opts->window_maximized) + return VO_TRUE; + } + wl->geometry = wl->window_size; + prepare_resize(wl, 0, 0); + } + return VO_TRUE; + } + case VOCTRL_GET_DISPLAY_FPS: { + struct vo_wayland_output *out; + if (wl->current_output) { + out = wl->current_output; + } else { + out = find_output(wl); + } + if (!out) + return VO_NOTAVAIL; + *(double *)arg = out->refresh_rate; + return VO_TRUE; + } + case VOCTRL_GET_DISPLAY_RES: { + struct vo_wayland_output *out; + if (wl->current_output) { + out = wl->current_output; + } else { + out = find_output(wl); + } + if (!out) + return VO_NOTAVAIL; + ((int *)arg)[0] = out->geometry.x1; + ((int *)arg)[1] = out->geometry.y1; + return VO_TRUE; + } + case VOCTRL_GET_HIDPI_SCALE: { + if (!wl->scaling) + return VO_NOTAVAIL; + *(double *)arg = wl->scaling; + return VO_TRUE; + } + case VOCTRL_UPDATE_WINDOW_TITLE: + return update_window_title(wl, (const char *)arg); + case VOCTRL_SET_CURSOR_VISIBILITY: + if (!wl->pointer) + return VO_NOTAVAIL; + return set_cursor_visibility(wl, *(bool *)arg); + case VOCTRL_KILL_SCREENSAVER: + return set_screensaver_inhibitor(wl, true); + case VOCTRL_RESTORE_SCREENSAVER: + return set_screensaver_inhibitor(wl, false); + } + + return VO_NOTIMPL; +} + +void vo_wayland_handle_fractional_scale(struct vo_wayland_state *wl) +{ + if (wl->fractional_scale_manager && wl->viewport) + wp_viewport_set_destination(wl->viewport, + round(mp_rect_w(wl->geometry) / wl->scaling), + round(mp_rect_h(wl->geometry) / wl->scaling)); +} + +bool vo_wayland_init(struct vo *vo) +{ + vo->wl = talloc_zero(NULL, struct vo_wayland_state); + struct vo_wayland_state *wl = vo->wl; + + *wl = (struct vo_wayland_state) { + .display = wl_display_connect(NULL), + .vo = vo, + .log = mp_log_new(wl, vo->log, "wayland"), + .bounded_width = 0, + .bounded_height = 0, + .refresh_interval = 0, + .scaling = 1, + .wakeup_pipe = {-1, -1}, + .display_fd = -1, + .dnd_fd = -1, + .cursor_visible = true, + .vo_opts_cache = m_config_cache_alloc(wl, vo->global, &vo_sub_opts), + }; + wl->vo_opts = wl->vo_opts_cache->opts; + wl->using_dmabuf_wayland = !strcmp(wl->vo->driver->name, "dmabuf-wayland"); + + wl_list_init(&wl->output_list); + + if (!wl->display) + goto err; + + if (create_input(wl)) + goto err; + + wl->registry = wl_display_get_registry(wl->display); + wl_registry_add_listener(wl->registry, ®istry_listener, wl); + + /* Do a roundtrip to run the registry */ + wl_display_roundtrip(wl->display); + + if (!wl->surface) { + MP_FATAL(wl, "Compositor doesn't support %s (ver. 4)\n", + wl_compositor_interface.name); + goto err; + } + + if (!wl->wm_base) { + MP_FATAL(wl, "Compositor doesn't support the required %s protocol!\n", + xdg_wm_base_interface.name); + goto err; + } + + if (!wl_list_length(&wl->output_list)) { + MP_FATAL(wl, "No outputs found or compositor doesn't support %s (ver. 2)\n", + wl_output_interface.name); + goto err; + } + + /* Can't be initialized during registry due to multi-protocol dependence */ + if (create_viewports(wl)) + goto err; + + if (create_xdg_surface(wl)) + goto err; + + if (wl->subcompositor) { + wl->osd_subsurface = wl_subcompositor_get_subsurface(wl->subcompositor, wl->osd_surface, wl->video_surface); + wl->video_subsurface = wl_subcompositor_get_subsurface(wl->subcompositor, wl->video_surface, wl->surface); + } + +#if HAVE_WAYLAND_PROTOCOLS_1_27 + if (wl->content_type_manager) { + wl->content_type = wp_content_type_manager_v1_get_surface_content_type(wl->content_type_manager, wl->surface); + } else { + MP_VERBOSE(wl, "Compositor doesn't support the %s protocol!\n", + wp_content_type_manager_v1_interface.name); + } + + if (!wl->single_pixel_manager) { + MP_VERBOSE(wl, "Compositor doesn't support the %s protocol!\n", + wp_single_pixel_buffer_manager_v1_interface.name); + } +#endif + +#if HAVE_WAYLAND_PROTOCOLS_1_31 + if (wl->fractional_scale_manager) { + wl->fractional_scale = wp_fractional_scale_manager_v1_get_fractional_scale(wl->fractional_scale_manager, wl->surface); + wp_fractional_scale_v1_add_listener(wl->fractional_scale, &fractional_scale_listener, wl); + } else { + MP_VERBOSE(wl, "Compositor doesn't support the %s protocol!\n", + wp_fractional_scale_manager_v1_interface.name); + } +#endif + + if (wl->dnd_devman && wl->seat) { + wl->dnd_ddev = wl_data_device_manager_get_data_device(wl->dnd_devman, wl->seat); + wl_data_device_add_listener(wl->dnd_ddev, &data_device_listener, wl); + } else if (!wl->dnd_devman) { + MP_VERBOSE(wl, "Compositor doesn't support the %s (ver. 3) protocol!\n", + wl_data_device_manager_interface.name); + } + + if (wl->presentation) { + wl->fback_pool = talloc_zero(wl, struct vo_wayland_feedback_pool); + wl->fback_pool->wl = wl; + wl->fback_pool->len = VO_MAX_SWAPCHAIN_DEPTH; + wl->fback_pool->fback = talloc_zero_array(wl->fback_pool, struct wp_presentation_feedback *, + wl->fback_pool->len); + wl->present = mp_present_initialize(wl, wl->vo_opts, VO_MAX_SWAPCHAIN_DEPTH); + } else { + MP_VERBOSE(wl, "Compositor doesn't support the %s protocol!\n", + wp_presentation_interface.name); + } + + if (wl->xdg_decoration_manager) { + wl->xdg_toplevel_decoration = zxdg_decoration_manager_v1_get_toplevel_decoration(wl->xdg_decoration_manager, wl->xdg_toplevel); + zxdg_toplevel_decoration_v1_add_listener(wl->xdg_toplevel_decoration, &decoration_listener, wl); + request_decoration_mode( + wl, wl->vo_opts->border ? + ZXDG_TOPLEVEL_DECORATION_V1_MODE_SERVER_SIDE : + ZXDG_TOPLEVEL_DECORATION_V1_MODE_CLIENT_SIDE); + } else { + wl->vo_opts->border = false; + m_config_cache_write_opt(wl->vo_opts_cache, + &wl->vo_opts->border); + MP_VERBOSE(wl, "Compositor doesn't support the %s protocol!\n", + zxdg_decoration_manager_v1_interface.name); + } + + if (!wl->idle_inhibit_manager) { + MP_VERBOSE(wl, "Compositor doesn't support the %s protocol!\n", + zwp_idle_inhibit_manager_v1_interface.name); + } + + wl->opts = mp_get_config_group(wl, wl->vo->global, &wayland_conf); + wl->display_fd = wl_display_get_fd(wl->display); + + update_app_id(wl); + mp_make_wakeup_pipe(wl->wakeup_pipe); + + wl->callback_surface = wl->using_dmabuf_wayland ? wl->video_surface : wl->surface; + wl->frame_callback = wl_surface_frame(wl->callback_surface); + wl_callback_add_listener(wl->frame_callback, &frame_listener, wl); + wl_surface_commit(wl->surface); + + /* Do another roundtrip to ensure all of the above is initialized + * before mpv does anything else. */ + wl_display_roundtrip(wl->display); + + return true; + +err: + vo_wayland_uninit(vo); + return false; +} + +bool vo_wayland_reconfig(struct vo *vo) +{ + struct vo_wayland_state *wl = vo->wl; + + MP_VERBOSE(wl, "Reconfiguring!\n"); + + if (!wl->current_output) { + wl->current_output = find_output(wl); + if (!wl->current_output) + return false; + set_surface_scaling(wl); + wl->pending_vo_events |= VO_EVENT_DPI; + } + + if (wl->vo_opts->auto_window_resize || !wl->configured) + set_geometry(wl, false); + + if (wl->opts->configure_bounds) + set_window_bounds(wl); + + if (!wl->configured || !wl->locked_size) { + wl->geometry = wl->window_size; + wl->configured = true; + } + + if (wl->vo_opts->cursor_passthrough) + set_input_region(wl, true); + + if (wl->vo_opts->fullscreen) + toggle_fullscreen(wl); + + if (wl->vo_opts->window_maximized) + toggle_maximized(wl); + + if (wl->vo_opts->window_minimized) + do_minimize(wl); + + prepare_resize(wl, 0, 0); + + return true; +} + +void vo_wayland_set_opaque_region(struct vo_wayland_state *wl, bool alpha) +{ + const int32_t width = mp_rect_w(wl->geometry); + const int32_t height = mp_rect_h(wl->geometry); + if (!alpha) { + struct wl_region *region = wl_compositor_create_region(wl->compositor); + wl_region_add(region, 0, 0, width, height); + wl_surface_set_opaque_region(wl->surface, region); + wl_region_destroy(region); + } else { + wl_surface_set_opaque_region(wl->surface, NULL); + } +} + +void vo_wayland_uninit(struct vo *vo) +{ + struct vo_wayland_state *wl = vo->wl; + if (!wl) + return; + + mp_input_put_key(wl->vo->input_ctx, MP_INPUT_RELEASE_ALL); + + if (wl->compositor) + wl_compositor_destroy(wl->compositor); + + if (wl->subcompositor) + wl_subcompositor_destroy(wl->subcompositor); + +#if HAVE_WAYLAND_PROTOCOLS_1_32 + if (wl->cursor_shape_device) + wp_cursor_shape_device_v1_destroy(wl->cursor_shape_device); + + if (wl->cursor_shape_manager) + wp_cursor_shape_manager_v1_destroy(wl->cursor_shape_manager); +#endif + + if (wl->cursor_surface) + wl_surface_destroy(wl->cursor_surface); + + if (wl->cursor_theme) + wl_cursor_theme_destroy(wl->cursor_theme); + +#if HAVE_WAYLAND_PROTOCOLS_1_27 + if (wl->content_type) + wp_content_type_v1_destroy(wl->content_type); + + if (wl->content_type_manager) + wp_content_type_manager_v1_destroy(wl->content_type_manager); +#endif + + if (wl->dnd_ddev) + wl_data_device_destroy(wl->dnd_ddev); + + if (wl->dnd_devman) + wl_data_device_manager_destroy(wl->dnd_devman); + + if (wl->dnd_offer) + wl_data_offer_destroy(wl->dnd_offer); + + if (wl->fback_pool) + clean_feedback_pool(wl->fback_pool); + +#if HAVE_WAYLAND_PROTOCOLS_1_31 + if (wl->fractional_scale) + wp_fractional_scale_v1_destroy(wl->fractional_scale); + + if (wl->fractional_scale_manager) + wp_fractional_scale_manager_v1_destroy(wl->fractional_scale_manager); +#endif + + if (wl->frame_callback) + wl_callback_destroy(wl->frame_callback); + + if (wl->idle_inhibitor) + zwp_idle_inhibitor_v1_destroy(wl->idle_inhibitor); + + if (wl->idle_inhibit_manager) + zwp_idle_inhibit_manager_v1_destroy(wl->idle_inhibit_manager); + + if (wl->keyboard) + wl_keyboard_destroy(wl->keyboard); + + if (wl->pointer) + wl_pointer_destroy(wl->pointer); + + if (wl->presentation) + wp_presentation_destroy(wl->presentation); + + if (wl->registry) + wl_registry_destroy(wl->registry); + + if (wl->viewporter) + wp_viewporter_destroy(wl->viewporter); + + if (wl->viewport) + wp_viewport_destroy(wl->viewport); + + if (wl->osd_viewport) + wp_viewport_destroy(wl->osd_viewport); + + if (wl->video_viewport) + wp_viewport_destroy(wl->video_viewport); + + if (wl->dmabuf) + zwp_linux_dmabuf_v1_destroy(wl->dmabuf); + + if (wl->dmabuf_feedback) + zwp_linux_dmabuf_feedback_v1_destroy(wl->dmabuf_feedback); + + if (wl->seat) + wl_seat_destroy(wl->seat); + + if (wl->shm) + wl_shm_destroy(wl->shm); + +#if HAVE_WAYLAND_PROTOCOLS_1_27 + if (wl->single_pixel_manager) + wp_single_pixel_buffer_manager_v1_destroy(wl->single_pixel_manager); +#endif + + if (wl->surface) + wl_surface_destroy(wl->surface); + + if (wl->osd_surface) + wl_surface_destroy(wl->osd_surface); + + if (wl->osd_subsurface) + wl_subsurface_destroy(wl->osd_subsurface); + + if (wl->video_surface) + wl_surface_destroy(wl->video_surface); + + if (wl->video_subsurface) + wl_subsurface_destroy(wl->video_subsurface); + + if (wl->wm_base) + xdg_wm_base_destroy(wl->wm_base); + + if (wl->xdg_decoration_manager) + zxdg_decoration_manager_v1_destroy(wl->xdg_decoration_manager); + + if (wl->xdg_toplevel) + xdg_toplevel_destroy(wl->xdg_toplevel); + + if (wl->xdg_toplevel_decoration) + zxdg_toplevel_decoration_v1_destroy(wl->xdg_toplevel_decoration); + + if (wl->xdg_surface) + xdg_surface_destroy(wl->xdg_surface); + + if (wl->xkb_context) + xkb_context_unref(wl->xkb_context); + + if (wl->xkb_keymap) + xkb_keymap_unref(wl->xkb_keymap); + + if (wl->xkb_state) + xkb_state_unref(wl->xkb_state); + + struct vo_wayland_output *output, *tmp; + wl_list_for_each_safe(output, tmp, &wl->output_list, link) + remove_output(output); + + if (wl->display) + wl_display_disconnect(wl->display); + + munmap(wl->format_map, wl->format_size); + + for (int n = 0; n < 2; n++) + close(wl->wakeup_pipe[n]); + talloc_free(wl); + vo->wl = NULL; +} + +void vo_wayland_wait_frame(struct vo_wayland_state *wl) +{ + int64_t vblank_time = 0; + /* We need some vblank interval to use for the timeout in + * this function. The order of preference of values to use is: + * 1. vsync duration from presentation time + * 2. refresh interval reported by presentation time + * 3. refresh rate of the output reported by the compositor + * 4. make up crap if vblank_time is still <= 0 (better than nothing) */ + + if (wl->use_present && wl->present->head) + vblank_time = wl->present->head->vsync_duration; + + if (vblank_time <= 0 && wl->refresh_interval > 0) + vblank_time = wl->refresh_interval; + + if (vblank_time <= 0 && wl->current_output->refresh_rate > 0) + vblank_time = 1e9 / wl->current_output->refresh_rate; + + // Ideally you should never reach this point. + if (vblank_time <= 0) + vblank_time = 1e9 / 60; + + // Completely arbitrary amount of additional time to wait. + vblank_time += 0.05 * vblank_time; + int64_t finish_time = mp_time_ns() + vblank_time; + + while (wl->frame_wait && finish_time > mp_time_ns()) { + int64_t poll_time = finish_time - mp_time_ns(); + if (poll_time < 0) { + poll_time = 0; + } + wayland_dispatch_events(wl, 1, poll_time); + } + + /* If the compositor does not have presentation time, we cannot be sure + * that this wait is accurate. Do a hacky block with wl_display_roundtrip. */ + if (!wl->use_present && !wl_display_get_error(wl->display)) + wl_display_roundtrip(wl->display); + + /* Only use this heuristic if the compositor doesn't support the suspended state. */ + if (wl->frame_wait && xdg_toplevel_get_version(wl->xdg_toplevel) < 6) { + // Only consider consecutive missed callbacks. + if (wl->timeout_count > 1) { + wl->hidden = true; + return; + } else { + wl->timeout_count += 1; + return; + } + } + + wl->timeout_count = 0; +} + +void vo_wayland_wait_events(struct vo *vo, int64_t until_time_ns) +{ + struct vo_wayland_state *wl = vo->wl; + + int64_t wait_ns = until_time_ns - mp_time_ns(); + int64_t timeout_ns = MPCLAMP(wait_ns, 0, MP_TIME_S_TO_NS(10)); + + wayland_dispatch_events(wl, 2, timeout_ns); +} + +void vo_wayland_wakeup(struct vo *vo) +{ + struct vo_wayland_state *wl = vo->wl; + (void)write(wl->wakeup_pipe[1], &(char){0}, 1); +} diff --git a/video/out/wayland_common.h b/video/out/wayland_common.h new file mode 100644 index 0000000..adbcca6 --- /dev/null +++ b/video/out/wayland_common.h @@ -0,0 +1,189 @@ +/* + * This file is part of mpv video player. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MPLAYER_WAYLAND_COMMON_H +#define MPLAYER_WAYLAND_COMMON_H + +#include <wayland-client.h> +#include "input/event.h" +#include "vo.h" + +typedef struct { + uint32_t format; + uint32_t padding; + uint64_t modifier; +} wayland_format; + +struct wayland_opts { + int configure_bounds; + int content_type; + bool disable_vsync; + int edge_pixels_pointer; + int edge_pixels_touch; +}; + +struct vo_wayland_state { + struct m_config_cache *vo_opts_cache; + struct mp_log *log; + struct mp_vo_opts *vo_opts; + struct vo *vo; + struct wayland_opts *opts; + struct wl_callback *frame_callback; + struct wl_compositor *compositor; + struct wl_subcompositor *subcompositor; + struct wl_display *display; + struct wl_registry *registry; + struct wl_shm *shm; + struct wl_surface *surface; + struct wl_surface *osd_surface; + struct wl_subsurface *osd_subsurface; + struct wl_surface *video_surface; + struct wl_surface *callback_surface; + struct wl_subsurface *video_subsurface; + + /* Geometry */ + struct mp_rect geometry; + struct mp_rect window_size; + struct wl_list output_list; + struct vo_wayland_output *current_output; + int bounded_height; + int bounded_width; + int reduced_height; + int reduced_width; + int toplevel_width; + int toplevel_height; + + /* State */ + bool activated; + bool configured; + bool focused; + bool frame_wait; + bool has_keyboard_input; + bool hidden; + bool initial_size_hint; + bool locked_size; + bool state_change; + bool tiled; + bool toplevel_configured; + int display_fd; + int mouse_x; + int mouse_y; + int pending_vo_events; + double scaling; + int timeout_count; + int wakeup_pipe[2]; + + /* content-type */ + /* TODO: unvoid these if required wayland protocols is bumped to 1.27+ */ + void *content_type_manager; + void *content_type; + int current_content_type; + + /* cursor-shape */ + /* TODO: unvoid these if required wayland protocols is bumped to 1.32+ */ + void *cursor_shape_manager; + void *cursor_shape_device; + + /* fractional-scale */ + /* TODO: unvoid these if required wayland protocols is bumped to 1.31+ */ + void *fractional_scale_manager; + void *fractional_scale; + + /* idle-inhibit */ + struct zwp_idle_inhibit_manager_v1 *idle_inhibit_manager; + struct zwp_idle_inhibitor_v1 *idle_inhibitor; + + /* linux-dmabuf */ + struct zwp_linux_dmabuf_v1 *dmabuf; + struct zwp_linux_dmabuf_feedback_v1 *dmabuf_feedback; + wayland_format *format_map; + uint32_t format_size; + bool using_dmabuf_wayland; + + /* presentation-time */ + struct wp_presentation *presentation; + struct vo_wayland_feedback_pool *fback_pool; + struct mp_present *present; + int64_t refresh_interval; + bool use_present; + + /* single-pixel-buffer */ + /* TODO: unvoid this if required wayland-protocols is bumped to 1.27+ */ + void *single_pixel_manager; + + /* xdg-decoration */ + struct zxdg_decoration_manager_v1 *xdg_decoration_manager; + struct zxdg_toplevel_decoration_v1 *xdg_toplevel_decoration; + int requested_decoration; + + /* xdg-shell */ + struct xdg_wm_base *wm_base; + struct xdg_surface *xdg_surface; + struct xdg_toplevel *xdg_toplevel; + + /* viewporter */ + struct wp_viewporter *viewporter; + struct wp_viewport *viewport; + struct wp_viewport *osd_viewport; + struct wp_viewport *video_viewport; + + /* Input */ + struct wl_keyboard *keyboard; + struct wl_pointer *pointer; + struct wl_seat *seat; + struct wl_touch *touch; + struct xkb_context *xkb_context; + struct xkb_keymap *xkb_keymap; + struct xkb_state *xkb_state; + uint32_t keyboard_code; + int mpkey; + int mpmod; + + /* DND */ + struct wl_data_device *dnd_ddev; + struct wl_data_device_manager *dnd_devman; + struct wl_data_offer *dnd_offer; + enum mp_dnd_action dnd_action; + char *dnd_mime_type; + int dnd_fd; + int dnd_mime_score; + + /* Cursor */ + struct wl_cursor_theme *cursor_theme; + struct wl_cursor *default_cursor; + struct wl_surface *cursor_surface; + bool cursor_visible; + int allocated_cursor_scale; + uint32_t pointer_id; +}; + +bool vo_wayland_check_visible(struct vo *vo); +bool vo_wayland_init(struct vo *vo); +bool vo_wayland_reconfig(struct vo *vo); + +int vo_wayland_allocate_memfd(struct vo *vo, size_t size); +int vo_wayland_control(struct vo *vo, int *events, int request, void *arg); + +void vo_wayland_handle_fractional_scale(struct vo_wayland_state *wl); +void vo_wayland_set_opaque_region(struct vo_wayland_state *wl, bool alpha); +void vo_wayland_sync_swap(struct vo_wayland_state *wl); +void vo_wayland_uninit(struct vo *vo); +void vo_wayland_wait_events(struct vo *vo, int64_t until_time_ns); +void vo_wayland_wait_frame(struct vo_wayland_state *wl); +void vo_wayland_wakeup(struct vo *vo); + +#endif /* MPLAYER_WAYLAND_COMMON_H */ diff --git a/video/out/win32/displayconfig.c b/video/out/win32/displayconfig.c new file mode 100644 index 0000000..9844afd --- /dev/null +++ b/video/out/win32/displayconfig.c @@ -0,0 +1,140 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <windows.h> +#include <stdbool.h> +#include <string.h> + +#include "displayconfig.h" + +#include "mpv_talloc.h" + +static bool is_valid_refresh_rate(DISPLAYCONFIG_RATIONAL rr) +{ + // DisplayConfig sometimes reports a rate of 1 when the rate is not known + return rr.Denominator != 0 && rr.Numerator / rr.Denominator > 1; +} + +static int get_config(void *ctx, + UINT32 *num_paths, DISPLAYCONFIG_PATH_INFO** paths, + UINT32 *num_modes, DISPLAYCONFIG_MODE_INFO** modes) +{ + LONG res; + *paths = NULL; + *modes = NULL; + + // The display configuration could change between the call to + // GetDisplayConfigBufferSizes and the call to QueryDisplayConfig, so call + // them in a loop until the correct buffer size is chosen + do { + res = GetDisplayConfigBufferSizes(QDC_ONLY_ACTIVE_PATHS, num_paths, + num_modes); + if (res != ERROR_SUCCESS) + goto fail; + + // Free old buffers if they exist and allocate new ones + talloc_free(*paths); + talloc_free(*modes); + *paths = talloc_array(ctx, DISPLAYCONFIG_PATH_INFO, *num_paths); + *modes = talloc_array(ctx, DISPLAYCONFIG_MODE_INFO, *num_modes); + + res = QueryDisplayConfig(QDC_ONLY_ACTIVE_PATHS, num_paths, *paths, + num_modes, *modes, NULL); + } while (res == ERROR_INSUFFICIENT_BUFFER); + if (res != ERROR_SUCCESS) + goto fail; + + return 0; +fail: + talloc_free(*paths); + talloc_free(*modes); + return -1; +} + +static DISPLAYCONFIG_PATH_INFO *get_path(UINT32 num_paths, + DISPLAYCONFIG_PATH_INFO* paths, + const wchar_t *device) +{ + // Search for a path with a matching device name + for (UINT32 i = 0; i < num_paths; i++) { + // Send a GET_SOURCE_NAME request + DISPLAYCONFIG_SOURCE_DEVICE_NAME source = { + .header = { + .size = sizeof source, + .type = DISPLAYCONFIG_DEVICE_INFO_GET_SOURCE_NAME, + .adapterId = paths[i].sourceInfo.adapterId, + .id = paths[i].sourceInfo.id, + } + }; + if (DisplayConfigGetDeviceInfo(&source.header) != ERROR_SUCCESS) + return NULL; + + // Check if the device name matches + if (!wcscmp(device, source.viewGdiDeviceName)) + return &paths[i]; + } + + return NULL; +} + +static double get_refresh_rate_from_mode(DISPLAYCONFIG_MODE_INFO *mode) +{ + if (mode->infoType != DISPLAYCONFIG_MODE_INFO_TYPE_TARGET) + return 0.0; + + DISPLAYCONFIG_VIDEO_SIGNAL_INFO *info = + &mode->targetMode.targetVideoSignalInfo; + if (!is_valid_refresh_rate(info->vSyncFreq)) + return 0.0; + + return ((double)info->vSyncFreq.Numerator) / + ((double)info->vSyncFreq.Denominator); +} + +double mp_w32_displayconfig_get_refresh_rate(const wchar_t *device) +{ + void *ctx = talloc_new(NULL); + double freq = 0.0; + + // Get the current display configuration + UINT32 num_paths; + DISPLAYCONFIG_PATH_INFO* paths; + UINT32 num_modes; + DISPLAYCONFIG_MODE_INFO* modes; + if (get_config(ctx, &num_paths, &paths, &num_modes, &modes)) + goto end; + + // Get the path for the specified monitor + DISPLAYCONFIG_PATH_INFO* path; + if (!(path = get_path(num_paths, paths, device))) + goto end; + + // Try getting the refresh rate from the mode first. The value in the mode + // overrides the value in the path. + if (path->targetInfo.modeInfoIdx != DISPLAYCONFIG_PATH_MODE_IDX_INVALID) + freq = get_refresh_rate_from_mode(&modes[path->targetInfo.modeInfoIdx]); + + // If the mode didn't contain a valid refresh rate, try the path + if (freq == 0.0 && is_valid_refresh_rate(path->targetInfo.refreshRate)) { + freq = ((double)path->targetInfo.refreshRate.Numerator) / + ((double)path->targetInfo.refreshRate.Denominator); + } + +end: + talloc_free(ctx); + return freq; +} diff --git a/video/out/win32/displayconfig.h b/video/out/win32/displayconfig.h new file mode 100644 index 0000000..ee6cd03 --- /dev/null +++ b/video/out/win32/displayconfig.h @@ -0,0 +1,27 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MP_WIN32_DISPLAYCONFIG_H_ +#define MP_WIN32_DISPLAYCONFIG_H_ + +#include <wchar.h> + +// Given a GDI monitor device name, get the precise refresh rate using the +// Windows 7 DisplayConfig API. Returns 0.0 on failure. +double mp_w32_displayconfig_get_refresh_rate(const wchar_t *device); + +#endif diff --git a/video/out/win32/droptarget.c b/video/out/win32/droptarget.c new file mode 100644 index 0000000..8a33522 --- /dev/null +++ b/video/out/win32/droptarget.c @@ -0,0 +1,227 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ +#include <stdatomic.h> + +#include <windows.h> +#include <ole2.h> +#include <shobjidl.h> + +#include "common/msg.h" +#include "common/common.h" +#include "input/input.h" +#include "input/event.h" +#include "osdep/io.h" +#include "osdep/windows_utils.h" +#include "mpv_talloc.h" + +#include "droptarget.h" + +struct droptarget { + IDropTarget iface; + atomic_int ref_cnt; + struct mp_log *log; + struct input_ctx *input_ctx; + struct mp_vo_opts *opts; + DWORD last_effect; + IDataObject *data_obj; +}; + +static FORMATETC fmtetc_file = { + .cfFormat = CF_HDROP, + .dwAspect = DVASPECT_CONTENT, + .lindex = -1, + .tymed = TYMED_HGLOBAL, +}; + +static FORMATETC fmtetc_url = { + .dwAspect = DVASPECT_CONTENT, + .lindex = -1, + .tymed = TYMED_HGLOBAL, +}; + +static void DropTarget_Destroy(struct droptarget *t) +{ + SAFE_RELEASE(t->data_obj); + talloc_free(t); +} + +static STDMETHODIMP DropTarget_QueryInterface(IDropTarget *self, REFIID riid, + void **ppvObject) +{ + if (IsEqualIID(riid, &IID_IUnknown) || IsEqualIID(riid, &IID_IDropTarget)) { + *ppvObject = self; + IDropTarget_AddRef(self); + return S_OK; + } + + *ppvObject = NULL; + return E_NOINTERFACE; +} + +static STDMETHODIMP_(ULONG) DropTarget_AddRef(IDropTarget *self) +{ + struct droptarget *t = (struct droptarget *)self; + return atomic_fetch_add(&t->ref_cnt, 1) + 1; +} + +static STDMETHODIMP_(ULONG) DropTarget_Release(IDropTarget *self) +{ + struct droptarget *t = (struct droptarget *)self; + + ULONG ref_cnt = atomic_fetch_add(&t->ref_cnt, -1) - 1; + if (ref_cnt == 0) + DropTarget_Destroy(t); + return ref_cnt; +} + +static STDMETHODIMP DropTarget_DragEnter(IDropTarget *self, + IDataObject *pDataObj, + DWORD grfKeyState, POINTL pt, + DWORD *pdwEffect) +{ + struct droptarget *t = (struct droptarget *)self; + + IDataObject_AddRef(pDataObj); + if (FAILED(IDataObject_QueryGetData(pDataObj, &fmtetc_file)) && + FAILED(IDataObject_QueryGetData(pDataObj, &fmtetc_url))) + { + *pdwEffect = DROPEFFECT_NONE; + } + + SAFE_RELEASE(t->data_obj); + t->data_obj = pDataObj; + t->last_effect = *pdwEffect; + return S_OK; +} + +static STDMETHODIMP DropTarget_DragOver(IDropTarget *self, DWORD grfKeyState, + POINTL pt, DWORD *pdwEffect) +{ + struct droptarget *t = (struct droptarget *)self; + + *pdwEffect = t->last_effect; + return S_OK; +} + +static STDMETHODIMP DropTarget_DragLeave(IDropTarget *self) +{ + struct droptarget *t = (struct droptarget *)self; + + SAFE_RELEASE(t->data_obj); + return S_OK; +} + +static STDMETHODIMP DropTarget_Drop(IDropTarget *self, IDataObject *pDataObj, + DWORD grfKeyState, POINTL pt, + DWORD *pdwEffect) +{ + struct droptarget *t = (struct droptarget *)self; + + enum mp_dnd_action action; + if (t->opts->drag_and_drop >= 0) { + action = t->opts->drag_and_drop; + } else { + action = (grfKeyState & MK_SHIFT) ? DND_APPEND : DND_REPLACE; + } + + SAFE_RELEASE(t->data_obj); + + STGMEDIUM medium; + if (t->opts->drag_and_drop == -2) { + t->last_effect = DROPEFFECT_NONE; + } else if (SUCCEEDED(IDataObject_GetData(pDataObj, &fmtetc_file, &medium))) { + if (GlobalLock(medium.hGlobal)) { + HDROP drop = medium.hGlobal; + + UINT files_num = DragQueryFileW(drop, 0xFFFFFFFF, NULL, 0); + char **files = talloc_zero_array(NULL, char*, files_num); + + UINT recvd_files = 0; + for (UINT i = 0; i < files_num; i++) { + UINT len = DragQueryFileW(drop, i, NULL, 0); + wchar_t *buf = talloc_array(NULL, wchar_t, len + 1); + + if (DragQueryFileW(drop, i, buf, len + 1) == len) { + char *fname = mp_to_utf8(files, buf); + files[recvd_files++] = fname; + + MP_VERBOSE(t, "received dropped file: %s\n", fname); + } else { + MP_ERR(t, "error getting dropped file name\n"); + } + + talloc_free(buf); + } + + GlobalUnlock(medium.hGlobal); + mp_event_drop_files(t->input_ctx, recvd_files, files, action); + talloc_free(files); + } + + ReleaseStgMedium(&medium); + } else if (SUCCEEDED(IDataObject_GetData(pDataObj, &fmtetc_url, &medium))) { + wchar_t *wurl = GlobalLock(medium.hGlobal); + if (wurl) { + char *url = mp_to_utf8(NULL, wurl); + if (mp_event_drop_mime_data(t->input_ctx, "text/uri-list", + bstr0(url), action) > 0) + { + MP_VERBOSE(t, "received dropped URL: %s\n", url); + } else { + MP_ERR(t, "error getting dropped URL\n"); + } + + talloc_free(url); + GlobalUnlock(medium.hGlobal); + } + + ReleaseStgMedium(&medium); + } else { + t->last_effect = DROPEFFECT_NONE; + } + + *pdwEffect = t->last_effect; + return S_OK; +} + +static IDropTargetVtbl idroptarget_vtbl = { + .QueryInterface = DropTarget_QueryInterface, + .AddRef = DropTarget_AddRef, + .Release = DropTarget_Release, + .DragEnter = DropTarget_DragEnter, + .DragOver = DropTarget_DragOver, + .DragLeave = DropTarget_DragLeave, + .Drop = DropTarget_Drop, +}; + +IDropTarget *mp_w32_droptarget_create(struct mp_log *log, + struct mp_vo_opts *opts, + struct input_ctx *input_ctx) +{ + fmtetc_url.cfFormat = RegisterClipboardFormatW(L"UniformResourceLocatorW"); + + struct droptarget *dt = talloc(NULL, struct droptarget); + dt->iface.lpVtbl = &idroptarget_vtbl; + atomic_store(&dt->ref_cnt, 0); + dt->last_effect = 0; + dt->data_obj = NULL; + dt->log = mp_log_new(dt, log, "droptarget"); + dt->opts = opts; + dt->input_ctx = input_ctx; + + return &dt->iface; +} diff --git a/video/out/win32/droptarget.h b/video/out/win32/droptarget.h new file mode 100644 index 0000000..1c18c06 --- /dev/null +++ b/video/out/win32/droptarget.h @@ -0,0 +1,35 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MP_WIN32_DROPTARGET_H_ +#define MP_WIN32_DROPTARGET_H_ + +#include <windows.h> +#include <ole2.h> +#include <shobjidl.h> + +#include "input/input.h" +#include "common/msg.h" +#include "common/common.h" +#include "options/options.h" + +// Create a IDropTarget implementation that sends dropped files to input_ctx +IDropTarget *mp_w32_droptarget_create(struct mp_log *log, + struct mp_vo_opts *opts, + struct input_ctx *input_ctx); + +#endif diff --git a/video/out/win_state.c b/video/out/win_state.c new file mode 100644 index 0000000..b4bc9fd --- /dev/null +++ b/video/out/win_state.c @@ -0,0 +1,155 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "win_state.h" +#include "vo.h" + +#include "video/mp_image.h" + +static void calc_monitor_aspect(struct mp_vo_opts *opts, int scr_w, int scr_h, + double *pixelaspect, int *w, int *h) +{ + *pixelaspect = 1.0 / opts->monitor_pixel_aspect; + + if (scr_w > 0 && scr_h > 0 && opts->force_monitor_aspect) + *pixelaspect = 1.0 / (opts->force_monitor_aspect * scr_h / scr_w); + + if (*pixelaspect < 1) { + *h /= *pixelaspect; + } else { + *w *= *pixelaspect; + } +} + +// Fit *w/*h into the size specified by geo. +static void apply_autofit(int *w, int *h, int scr_w, int scr_h, + struct m_geometry *geo, bool allow_up, bool allow_down) +{ + if (!geo->wh_valid) + return; + + int dummy = 0; + int n_w = *w, n_h = *h; + m_geometry_apply(&dummy, &dummy, &n_w, &n_h, scr_w, scr_h, geo); + + if (!allow_up && *w <= n_w && *h <= n_h) + return; + if (!allow_down && *w >= n_w && *h >= n_h) + return; + + // If aspect mismatches, always make the window smaller than the fit box + // (Or larger, if allow_down==false.) + double asp = (double)*w / *h; + double n_asp = (double)n_w / n_h; + if ((n_asp <= asp) == allow_down) { + *w = n_w; + *h = n_w / asp; + } else { + *w = n_h * asp; + *h = n_h; + } +} + +// Compute the "suggested" window size and position and return it in *out_geo. +// screen is the bounding box of the current screen within the virtual desktop. +// Does not change *vo. +// screen: position of the area on virtual desktop on which the video-content +// should be placed (maybe after excluding decorations, taskbars, etc) +// monitor: position of the monitor on virtual desktop (used for pixelaspect). +// dpi_scale: the DPI multiplier to get from virtual to real coordinates +// (>1 for "hidpi") +// Use vo_apply_window_geometry() to copy the result into the vo. +// NOTE: currently, all windowing backends do their own handling of window +// geometry additional to this code. This is to deal with initial window +// placement, fullscreen handling, avoiding resize on reconfig() with no +// size change, multi-monitor stuff, and possibly more. +void vo_calc_window_geometry3(struct vo *vo, const struct mp_rect *screen, + const struct mp_rect *monitor, + double dpi_scale, struct vo_win_geometry *out_geo) +{ + struct mp_vo_opts *opts = vo->opts; + + *out_geo = (struct vo_win_geometry){0}; + + // The case of calling this function even though no video was configured + // yet (i.e. vo->params==NULL) happens when vo_gpu creates a hidden window + // in order to create a rendering context. + struct mp_image_params params = { .w = 320, .h = 200 }; + if (vo->params) + params = *vo->params; + + if (!opts->hidpi_window_scale) + dpi_scale = 1; + + int d_w, d_h; + mp_image_params_get_dsize(¶ms, &d_w, &d_h); + if ((vo->driver->caps & VO_CAP_ROTATE90) && params.rotate % 180 == 90) + MPSWAP(int, d_w, d_h); + d_w = MPCLAMP(d_w * opts->window_scale * dpi_scale, 1, 16000); + d_h = MPCLAMP(d_h * opts->window_scale * dpi_scale, 1, 16000); + + int scr_w = screen->x1 - screen->x0; + int scr_h = screen->y1 - screen->y0; + + int mon_w = monitor->x1 - monitor->x0; + int mon_h = monitor->y1 - monitor->y0; + + MP_DBG(vo, "max content size: %dx%d\n", scr_w, scr_h); + MP_DBG(vo, "monitor size: %dx%d\n", mon_w, mon_h); + + calc_monitor_aspect(opts, mon_w, mon_h, &out_geo->monitor_par, &d_w, &d_h); + + apply_autofit(&d_w, &d_h, scr_w, scr_h, &opts->autofit, true, true); + apply_autofit(&d_w, &d_h, scr_w, scr_h, &opts->autofit_smaller, true, false); + apply_autofit(&d_w, &d_h, scr_w, scr_h, &opts->autofit_larger, false, true); + + out_geo->win.x0 = (int)(scr_w - d_w) / 2; + out_geo->win.y0 = (int)(scr_h - d_h) / 2; + m_geometry_apply(&out_geo->win.x0, &out_geo->win.y0, &d_w, &d_h, + scr_w, scr_h, &opts->geometry); + + out_geo->win.x0 += screen->x0; + out_geo->win.y0 += screen->y0; + out_geo->win.x1 = out_geo->win.x0 + d_w; + out_geo->win.y1 = out_geo->win.y0 + d_h; + + if (opts->geometry.xy_valid || opts->force_window_position) + out_geo->flags |= VO_WIN_FORCE_POS; +} + +// same as vo_calc_window_geometry3 with monitor assumed same as screen +void vo_calc_window_geometry2(struct vo *vo, const struct mp_rect *screen, + double dpi_scale, struct vo_win_geometry *out_geo) +{ + vo_calc_window_geometry3(vo, screen, screen, dpi_scale, out_geo); +} + +void vo_calc_window_geometry(struct vo *vo, const struct mp_rect *screen, + struct vo_win_geometry *out_geo) +{ + vo_calc_window_geometry2(vo, screen, 1.0, out_geo); +} + +// Copy the parameters in *geo to the vo fields. +// (Doesn't do anything else - windowing backends should trigger VO_EVENT_RESIZE +// to ensure that the VO reinitializes rendering properly.) +void vo_apply_window_geometry(struct vo *vo, const struct vo_win_geometry *geo) +{ + vo->dwidth = geo->win.x1 - geo->win.x0; + vo->dheight = geo->win.y1 - geo->win.y0; + vo->monitor_par = geo->monitor_par; +} diff --git a/video/out/win_state.h b/video/out/win_state.h new file mode 100644 index 0000000..a253efa --- /dev/null +++ b/video/out/win_state.h @@ -0,0 +1,35 @@ +#ifndef MP_WIN_STATE_H_ +#define MP_WIN_STATE_H_ + +#include "common/common.h" + +struct vo; + +enum { + // By user settings, the window manager's chosen window position should + // be overridden. + VO_WIN_FORCE_POS = (1 << 0), +}; + +struct vo_win_geometry { + // Bitfield of VO_WIN_* flags + int flags; + // Position & size of the window. In xinerama coordinates, i.e. they're + // relative to the virtual desktop encompassing all screens, not the + // current screen. + struct mp_rect win; + // Aspect ratio of the current monitor. + // (calculated from screen size and options.) + double monitor_par; +}; + +void vo_calc_window_geometry(struct vo *vo, const struct mp_rect *screen, + struct vo_win_geometry *out_geo); +void vo_calc_window_geometry2(struct vo *vo, const struct mp_rect *screen, + double dpi_scale, struct vo_win_geometry *out_geo); +void vo_calc_window_geometry3(struct vo *vo, const struct mp_rect *screen, + const struct mp_rect *monitor, + double dpi_scale, struct vo_win_geometry *out_geo); +void vo_apply_window_geometry(struct vo *vo, const struct vo_win_geometry *geo); + +#endif diff --git a/video/out/wldmabuf/context_wldmabuf.c b/video/out/wldmabuf/context_wldmabuf.c new file mode 100644 index 0000000..c494575 --- /dev/null +++ b/video/out/wldmabuf/context_wldmabuf.c @@ -0,0 +1,43 @@ +/* + * This file is part of mpv video player. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "video/out/wayland_common.h" +#include "video/out/opengl/context.h" +#include "ra_wldmabuf.h" + +static void uninit(struct ra_ctx *ctx) +{ + ra_free(&ctx->ra); + vo_wayland_uninit(ctx->vo); +} + +static bool init(struct ra_ctx *ctx) +{ + if (!vo_wayland_init(ctx->vo)) + return false; + ctx->ra = ra_create_wayland(ctx->log, ctx->vo); + + return true; +} + +const struct ra_ctx_fns ra_ctx_wldmabuf = { + .type = "none", + .name = "wldmabuf", + .hidden = true, + .init = init, + .uninit = uninit, +}; diff --git a/video/out/wldmabuf/ra_wldmabuf.c b/video/out/wldmabuf/ra_wldmabuf.c new file mode 100644 index 0000000..3f27314 --- /dev/null +++ b/video/out/wldmabuf/ra_wldmabuf.c @@ -0,0 +1,66 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "video/out/wayland_common.h" +#include "video/out/gpu/ra.h" +#include "ra_wldmabuf.h" + +struct priv { + struct vo *vo; +}; + +static void destroy(struct ra *ra) +{ + talloc_free(ra->priv); +} + +bool ra_compatible_format(struct ra* ra, uint32_t drm_format, uint64_t modifier) +{ + struct priv* p = ra->priv; + struct vo_wayland_state *wl = p->vo->wl; + const wayland_format *formats = wl->format_map; + + for (int i = 0; i < wl->format_size / sizeof(wayland_format); i++) { + if (drm_format == formats[i].format && modifier == formats[i].modifier) + return true; + } + + return false; +} + +static struct ra_fns ra_fns_wldmabuf = { + .destroy = destroy, +}; + +struct ra *ra_create_wayland(struct mp_log *log, struct vo* vo) +{ + struct ra *ra = talloc_zero(NULL, struct ra); + + ra->fns = &ra_fns_wldmabuf; + ra->log = log; + ra_add_native_resource(ra, "wl", vo->wl->display); + ra->priv = talloc_zero(NULL, struct priv); + struct priv *p = ra->priv; + p->vo = vo; + + return ra; +} + +bool ra_is_wldmabuf(struct ra *ra) +{ + return (ra->fns == &ra_fns_wldmabuf); +} diff --git a/video/out/wldmabuf/ra_wldmabuf.h b/video/out/wldmabuf/ra_wldmabuf.h new file mode 100644 index 0000000..8e20173 --- /dev/null +++ b/video/out/wldmabuf/ra_wldmabuf.h @@ -0,0 +1,23 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#pragma once +#include "video/out/wayland_common.h" + +struct ra *ra_create_wayland(struct mp_log *log, struct vo *vo); +bool ra_compatible_format(struct ra* ra, uint32_t drm_format, uint64_t modifier); +bool ra_is_wldmabuf(struct ra *ra); diff --git a/video/out/x11_common.c b/video/out/x11_common.c new file mode 100644 index 0000000..b4605bf --- /dev/null +++ b/video/out/x11_common.c @@ -0,0 +1,2291 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <math.h> +#include <inttypes.h> +#include <limits.h> +#include <unistd.h> +#include <poll.h> +#include <string.h> +#include <assert.h> + +#include <X11/Xmd.h> +#include <X11/Xlib.h> +#include <X11/Xutil.h> +#include <X11/Xatom.h> +#include <X11/keysym.h> +#include <X11/XKBlib.h> +#include <X11/XF86keysym.h> + +#include <X11/extensions/scrnsaver.h> +#include <X11/extensions/dpms.h> +#include <X11/extensions/shape.h> +#include <X11/extensions/Xpresent.h> +#include <X11/extensions/Xrandr.h> + +#include "misc/bstr.h" +#include "options/options.h" +#include "options/m_config.h" +#include "common/common.h" +#include "common/msg.h" +#include "input/input.h" +#include "input/event.h" +#include "video/image_loader.h" +#include "video/mp_image.h" +#include "present_sync.h" +#include "x11_common.h" +#include "mpv_talloc.h" + +#include "vo.h" +#include "win_state.h" +#include "osdep/io.h" +#include "osdep/poll_wrapper.h" +#include "osdep/timer.h" +#include "osdep/subprocess.h" + +#include "input/input.h" +#include "input/keycodes.h" + +#define vo_wm_LAYER 1 +#define vo_wm_FULLSCREEN 2 +#define vo_wm_STAYS_ON_TOP 4 +#define vo_wm_ABOVE 8 +#define vo_wm_BELOW 16 +#define vo_wm_STICKY 32 + +/* EWMH state actions, see + http://freedesktop.org/Standards/wm-spec/index.html#id2768769 */ +#define NET_WM_STATE_REMOVE 0 /* remove/unset property */ +#define NET_WM_STATE_ADD 1 /* add/set property */ +#define NET_WM_STATE_TOGGLE 2 /* toggle property */ + +#define WIN_LAYER_ONBOTTOM 2 +#define WIN_LAYER_NORMAL 4 +#define WIN_LAYER_ONTOP 6 +#define WIN_LAYER_ABOVE_DOCK 10 + +#define DND_VERSION 5 + +#define XEMBED_VERSION 0 +#define XEMBED_MAPPED (1 << 0) +#define XEMBED_EMBEDDED_NOTIFY 0 +#define XEMBED_REQUEST_FOCUS 3 + +// ----- Motif header: ------- + +#define MWM_HINTS_FUNCTIONS (1L << 0) +#define MWM_HINTS_DECORATIONS (1L << 1) + +#define MWM_FUNC_RESIZE (1L << 1) +#define MWM_FUNC_MOVE (1L << 2) +#define MWM_FUNC_MINIMIZE (1L << 3) +#define MWM_FUNC_MAXIMIZE (1L << 4) +#define MWM_FUNC_CLOSE (1L << 5) + +#define MWM_DECOR_ALL (1L << 0) + +typedef struct +{ + long flags; + long functions; + long decorations; + long input_mode; + long state; +} MotifWmHints; + +static const char x11_icon_16[] = +#include "etc/mpv-icon-8bit-16x16.png.inc" +; + +static const char x11_icon_32[] = +#include "etc/mpv-icon-8bit-32x32.png.inc" +; + +static const char x11_icon_64[] = +#include "etc/mpv-icon-8bit-64x64.png.inc" +; + +static const char x11_icon_128[] = +#include "etc/mpv-icon-8bit-128x128.png.inc" +; + +#define ICON_ENTRY(var) { (char *)var, sizeof(var) } +static const struct bstr x11_icons[] = { + ICON_ENTRY(x11_icon_16), + ICON_ENTRY(x11_icon_32), + ICON_ENTRY(x11_icon_64), + ICON_ENTRY(x11_icon_128), + {0} +}; + +static struct mp_log *x11_error_output; +static atomic_int x11_error_silence; + +static bool rc_overlaps(struct mp_rect rc1, struct mp_rect rc2); +static void vo_x11_update_geometry(struct vo *vo); +static void vo_x11_fullscreen(struct vo *vo); +static void xscreensaver_heartbeat(struct vo_x11_state *x11); +static void set_screensaver(struct vo_x11_state *x11, bool enabled); +static void vo_x11_selectinput_witherr(struct vo *vo, Display *display, + Window w, long event_mask); +static void vo_x11_setlayer(struct vo *vo, bool ontop); +static void vo_x11_xembed_handle_message(struct vo *vo, XClientMessageEvent *ce); +static void vo_x11_xembed_send_message(struct vo_x11_state *x11, long m[4]); +static void vo_x11_move_resize(struct vo *vo, bool move, bool resize, + struct mp_rect rc); +static void vo_x11_maximize(struct vo *vo); +static void vo_x11_minimize(struct vo *vo); +static void vo_x11_set_input_region(struct vo *vo, bool passthrough); +static void vo_x11_sticky(struct vo *vo, bool sticky); + +#define XA(x11, s) (XInternAtom((x11)->display, # s, False)) +#define XAs(x11, s) XInternAtom((x11)->display, s, False) + +#define RC_W(rc) ((rc).x1 - (rc).x0) +#define RC_H(rc) ((rc).y1 - (rc).y0) + +static char *x11_atom_name_buf(struct vo_x11_state *x11, Atom atom, + char *buf, size_t buf_size) +{ + buf[0] = '\0'; + + char *new_name = XGetAtomName(x11->display, atom); + if (new_name) { + snprintf(buf, buf_size, "%s", new_name); + XFree(new_name); + } + + return buf; +} + +#define x11_atom_name(x11, atom) x11_atom_name_buf(x11, atom, (char[80]){0}, 80) + +// format = 8 (unsigned char), 16 (short), 32 (long, even on LP64 systems) +// *out_nitems = returned number of items of requested format +static void *x11_get_property(struct vo_x11_state *x11, Window w, Atom property, + Atom type, int format, int *out_nitems) +{ + assert(format == 8 || format == 16 || format == 32); + *out_nitems = 0; + if (!w) + return NULL; + long max_len = 128 * 1024 * 1024; // static maximum limit + Atom ret_type = 0; + int ret_format = 0; + unsigned long ret_nitems = 0; + unsigned long ret_bytesleft = 0; + unsigned char *ret_prop = NULL; + if (XGetWindowProperty(x11->display, w, property, 0, max_len, False, type, + &ret_type, &ret_format, &ret_nitems, &ret_bytesleft, + &ret_prop) != Success) + return NULL; + if (ret_format != format || ret_nitems < 1 || ret_bytesleft) { + XFree(ret_prop); + ret_prop = NULL; + ret_nitems = 0; + } + *out_nitems = ret_nitems; + return ret_prop; +} + +static bool x11_get_property_copy(struct vo_x11_state *x11, Window w, + Atom property, Atom type, int format, + void *dst, size_t dst_size) +{ + bool ret = false; + int len; + void *ptr = x11_get_property(x11, w, property, type, format, &len); + if (ptr) { + size_t ib = format == 32 ? sizeof(long) : format / 8; + if (dst_size <= len * ib) { + memcpy(dst, ptr, dst_size); + ret = true; + } + XFree(ptr); + } + return ret; +} + +static void x11_send_ewmh_msg(struct vo_x11_state *x11, char *message_type, + long params[5]) +{ + if (!x11->window) + return; + + XEvent xev = { + .xclient = { + .type = ClientMessage, + .send_event = True, + .message_type = XInternAtom(x11->display, message_type, False), + .window = x11->window, + .format = 32, + }, + }; + for (int n = 0; n < 5; n++) + xev.xclient.data.l[n] = params[n]; + + if (!XSendEvent(x11->display, x11->rootwin, False, + SubstructureRedirectMask | SubstructureNotifyMask, + &xev)) + MP_ERR(x11, "Couldn't send EWMH %s message!\n", message_type); +} + +// change the _NET_WM_STATE hint. Remove or add the state according to "set". +static void x11_set_ewmh_state(struct vo_x11_state *x11, char *state, bool set) +{ + long params[5] = { + set ? NET_WM_STATE_ADD : NET_WM_STATE_REMOVE, + XInternAtom(x11->display, state, False), + 0, // No second state + 1, // source indication: normal + }; + x11_send_ewmh_msg(x11, "_NET_WM_STATE", params); +} + +static void vo_update_cursor(struct vo *vo) +{ + Cursor no_ptr; + Pixmap bm_no; + XColor black, dummy; + Colormap colormap; + const char bm_no_data[] = {0, 0, 0, 0, 0, 0, 0, 0}; + struct vo_x11_state *x11 = vo->x11; + Display *disp = x11->display; + Window win = x11->window; + bool should_hide = x11->has_focus && !x11->mouse_cursor_visible; + + if (should_hide == x11->mouse_cursor_set) + return; + + x11->mouse_cursor_set = should_hide; + + if (x11->parent == x11->rootwin || !win) + return; // do not hide if playing on the root window + + if (x11->mouse_cursor_set) { + colormap = DefaultColormap(disp, DefaultScreen(disp)); + if (!XAllocNamedColor(disp, colormap, "black", &black, &dummy)) + return; // color alloc failed, give up + bm_no = XCreateBitmapFromData(disp, win, bm_no_data, 8, 8); + no_ptr = XCreatePixmapCursor(disp, bm_no, bm_no, &black, &black, 0, 0); + XDefineCursor(disp, win, no_ptr); + XFreeCursor(disp, no_ptr); + if (bm_no != None) + XFreePixmap(disp, bm_no); + XFreeColors(disp, colormap, &black.pixel, 1, 0); + } else { + XDefineCursor(x11->display, x11->window, 0); + } +} + +static int x11_errorhandler(Display *display, XErrorEvent *event) +{ + struct mp_log *log = x11_error_output; + if (!log) + return 0; + + char msg[60]; + XGetErrorText(display, event->error_code, (char *) &msg, sizeof(msg)); + + int lev = atomic_load(&x11_error_silence) ? MSGL_V : MSGL_ERR; + mp_msg(log, lev, "X11 error: %s\n", msg); + mp_msg(log, lev, "Type: %x, display: %p, resourceid: %lx, serial: %lx\n", + event->type, event->display, event->resourceid, event->serial); + mp_msg(log, lev, "Error code: %x, request code: %x, minor code: %x\n", + event->error_code, event->request_code, event->minor_code); + + return 0; +} + +void vo_x11_silence_xlib(int dir) +{ + atomic_fetch_add(&x11_error_silence, dir); +} + +static int net_wm_support_state_test(struct vo_x11_state *x11, Atom atom) +{ +#define NET_WM_STATE_TEST(x) { \ + if (atom == XA(x11, _NET_WM_STATE_##x)) { \ + MP_DBG(x11, "Detected wm supports " #x " state.\n" ); \ + return vo_wm_##x; \ + } \ +} + + NET_WM_STATE_TEST(FULLSCREEN); + NET_WM_STATE_TEST(ABOVE); + NET_WM_STATE_TEST(STAYS_ON_TOP); + NET_WM_STATE_TEST(BELOW); + NET_WM_STATE_TEST(STICKY); + return 0; +} + +static int vo_wm_detect(struct vo *vo) +{ + struct vo_x11_state *x11 = vo->x11; + int i; + int wm = 0; + int nitems; + Atom *args = NULL; + Window win = x11->rootwin; + + if (x11->parent) + return 0; + +// -- supports layers + args = x11_get_property(x11, win, XA(x11, _WIN_PROTOCOLS), XA_ATOM, 32, + &nitems); + if (args) { + for (i = 0; i < nitems; i++) { + if (args[i] == XA(x11, _WIN_LAYER)) { + MP_DBG(x11, "Detected wm supports layers.\n"); + wm |= vo_wm_LAYER; + } + } + XFree(args); + } +// --- netwm + args = x11_get_property(x11, win, XA(x11, _NET_SUPPORTED), XA_ATOM, 32, + &nitems); + if (args) { + MP_DBG(x11, "Detected wm supports NetWM.\n"); + if (x11->opts->x11_netwm >= 0) { + for (i = 0; i < nitems; i++) + wm |= net_wm_support_state_test(vo->x11, args[i]); + } else { + MP_DBG(x11, "NetWM usage disabled by user.\n"); + } + XFree(args); + } + + if (wm == 0) + MP_DBG(x11, "Unknown wm type...\n"); + if (x11->opts->x11_netwm > 0 && !(wm & vo_wm_FULLSCREEN)) { + MP_WARN(x11, "Forcing NetWM FULLSCREEN support.\n"); + wm |= vo_wm_FULLSCREEN; + } + return wm; +} + +static void xpresent_set(struct vo_x11_state *x11) +{ + int present = x11->opts->x11_present; + x11->use_present = x11->present_code && + ((x11->has_mesa && !x11->has_nvidia && present) || + present == 2); + if (x11->use_present) { + MP_VERBOSE(x11, "XPresent enabled.\n"); + } else { + MP_VERBOSE(x11, "XPresent disabled.\n"); + } +} + +static void xrandr_read(struct vo_x11_state *x11) +{ + for(int i = 0; i < x11->num_displays; i++) + talloc_free(x11->displays[i].name); + + x11->num_displays = 0; + + if (x11->xrandr_event < 0) { + int event_base, error_base; + if (!XRRQueryExtension(x11->display, &event_base, &error_base)) { + MP_VERBOSE(x11, "Couldn't init Xrandr.\n"); + return; + } + x11->xrandr_event = event_base + RRNotify; + XRRSelectInput(x11->display, x11->rootwin, RRScreenChangeNotifyMask | + RRCrtcChangeNotifyMask | RROutputChangeNotifyMask); + } + + XRRScreenResources *r = XRRGetScreenResourcesCurrent(x11->display, x11->rootwin); + if (!r) { + MP_VERBOSE(x11, "Xrandr doesn't work.\n"); + return; + } + + /* Look at the available providers on the current screen and try to determine + * the driver. If amd/intel/radeon, assume this is mesa. If nvidia is found, + * assume nvidia. Because the same screen can have multiple providers (e.g. + * a laptop with switchable graphics), we need to know both of these things. + * In practice, this is used for determining whether or not to use XPresent + * (i.e. needs to be Mesa and not Nvidia). Requires Randr 1.4. */ + XRRProviderResources *pr = XRRGetProviderResources(x11->display, x11->rootwin); + for (int i = 0; i < pr->nproviders; i++) { + XRRProviderInfo *info = XRRGetProviderInfo(x11->display, r, pr->providers[i]); + struct bstr provider_name = bstrdup(x11, bstr0(info->name)); + bstr_lower(provider_name); + int amd = bstr_find0(provider_name, "amd"); + int intel = bstr_find0(provider_name, "intel"); + int modesetting = bstr_find0(provider_name, "modesetting"); + int nouveau = bstr_find0(provider_name, "nouveau"); + int nvidia = bstr_find0(provider_name, "nvidia"); + int radeon = bstr_find0(provider_name, "radeon"); + x11->has_mesa = x11->has_mesa || amd >= 0 || intel >= 0 || + modesetting >= 0 || nouveau >= 0 || radeon >= 0; + x11->has_nvidia = x11->has_nvidia || nvidia >= 0; + XRRFreeProviderInfo(info); + } + if (x11->present_code) + xpresent_set(x11); + XRRFreeProviderResources(pr); + + int primary_id = -1; + RROutput primary = XRRGetOutputPrimary(x11->display, x11->rootwin); + for (int o = 0; o < r->noutput; o++) { + RROutput output = r->outputs[o]; + XRRCrtcInfo *crtc = NULL; + XRROutputInfo *out = XRRGetOutputInfo(x11->display, r, output); + if (!out || !out->crtc) + goto next; + crtc = XRRGetCrtcInfo(x11->display, r, out->crtc); + if (!crtc) + goto next; + for (int om = 0; om < out->nmode; om++) { + RRMode xm = out->modes[om]; + for (int n = 0; n < r->nmode; n++) { + XRRModeInfo m = r->modes[n]; + if (m.id != xm || crtc->mode != xm) + continue; + if (x11->num_displays >= MAX_DISPLAYS) + continue; + double vTotal = m.vTotal; + if (m.modeFlags & RR_DoubleScan) + vTotal *= 2; + if (m.modeFlags & RR_Interlace) + vTotal /= 2; + struct xrandr_display d = { + .rc = { crtc->x, crtc->y, + crtc->x + crtc->width, crtc->y + crtc->height }, + .fps = m.dotClock / (m.hTotal * vTotal), + .name = talloc_strdup(x11, out->name), + }; + int num = x11->num_displays++; + MP_VERBOSE(x11, "Display %d (%s): [%d, %d, %d, %d] @ %f FPS\n", + num, d.name, d.rc.x0, d.rc.y0, d.rc.x1, d.rc.y1, d.fps); + x11->displays[num] = d; + if (output == primary) + primary_id = num; + } + } + next: + if (crtc) + XRRFreeCrtcInfo(crtc); + if (out) + XRRFreeOutputInfo(out); + } + + for (int i = 0; i < x11->num_displays; i++) { + struct xrandr_display *d = &(x11->displays[i]); + d->screen = i; + + if (i == primary_id) { + d->atom_id = 0; + continue; + } + if (primary_id > 0 && i < primary_id) { + d->atom_id = i+1; + continue; + } + d->atom_id = i; + } + + XRRFreeScreenResources(r); +} + +static int vo_x11_select_screen(struct vo *vo) +{ + struct vo_x11_state *x11 = vo->x11; + struct mp_vo_opts *opts = x11->opts; + int screen = -2; // all displays + if (!opts->fullscreen || opts->fsscreen_id != -2) { + screen = opts->fullscreen ? opts->fsscreen_id : opts->screen_id; + if (opts->fullscreen && opts->fsscreen_id == -1) + screen = opts->screen_id; + + if (screen == -1 && (opts->fsscreen_name || opts->screen_name)) { + char *screen_name = opts->fullscreen ? opts->fsscreen_name : opts->screen_name; + if (screen_name) { + bool screen_found = false; + for (int n = 0; n < x11->num_displays; n++) { + char *display_name = x11->displays[n].name; + if (!strcmp(display_name, screen_name)) { + screen = n; + screen_found = true; + break; + } + } + if (!screen_found) + MP_WARN(x11, "Screen name %s not found!\n", screen_name); + } + } + + if (screen >= x11->num_displays) + screen = x11->num_displays - 1; + } + return screen; +} + +static void vo_x11_update_screeninfo(struct vo *vo) +{ + struct vo_x11_state *x11 = vo->x11; + x11->screenrc = (struct mp_rect){.x1 = x11->ws_width, .y1 = x11->ws_height}; + int screen = vo_x11_select_screen(vo); + if (screen >= -1) { + if (screen == -1) { + int x = x11->winrc.x0 + RC_W(x11->winrc) / 2; + int y = x11->winrc.y0 + RC_H(x11->winrc) / 2; + for (screen = x11->num_displays - 1; screen > 0; screen--) { + struct xrandr_display *disp = &x11->displays[screen]; + int left = disp->rc.x0; + int right = disp->rc.x1; + int top = disp->rc.y0; + int bottom = disp->rc.y1; + if (left <= x && x <= right && top <= y && y <= bottom) + break; + } + } + + if (screen < 0) + screen = 0; + x11->screenrc = (struct mp_rect){ + .x0 = x11->displays[screen].rc.x0, + .y0 = x11->displays[screen].rc.y0, + .x1 = x11->displays[screen].rc.x1, + .y1 = x11->displays[screen].rc.y1, + }; + } +} + +static struct xrandr_display *get_current_display(struct vo *vo) +{ + struct vo_x11_state *x11 = vo->x11; + struct xrandr_display *selected_disp = NULL; + for (int n = 0; n < x11->num_displays; n++) { + struct xrandr_display *disp = &x11->displays[n]; + disp->overlaps = rc_overlaps(disp->rc, x11->winrc); + if (disp->overlaps && (!selected_disp || disp->fps < selected_disp->fps)) + selected_disp = disp; + } + return selected_disp; +} + +// Get the monitors for the 4 edges of the rectangle spanning all screens. +static void vo_x11_get_bounding_monitors(struct vo_x11_state *x11, long b[4]) +{ + //top bottom left right + b[0] = b[1] = b[2] = b[3] = 0; + for (int n = 0; n < x11->num_displays; n++) { + struct xrandr_display *d = &x11->displays[n]; + if (d->rc.y0 < x11->displays[b[0]].rc.y0) + b[0] = n; + if (d->rc.y1 < x11->displays[b[1]].rc.y1) + b[1] = n; + if (d->rc.x0 < x11->displays[b[2]].rc.x0) + b[2] = n; + if (d->rc.x1 < x11->displays[b[3]].rc.x1) + b[3] = n; + } +} + +bool vo_x11_init(struct vo *vo) +{ + char *dispName; + + assert(!vo->x11); + + XInitThreads(); + + struct vo_x11_state *x11 = talloc_ptrtype(NULL, x11); + *x11 = (struct vo_x11_state){ + .log = mp_log_new(x11, vo->log, "x11"), + .input_ctx = vo->input_ctx, + .screensaver_enabled = true, + .xrandr_event = -1, + .wakeup_pipe = {-1, -1}, + .dpi_scale = 1, + .opts_cache = m_config_cache_alloc(x11, vo->global, &vo_sub_opts), + }; + x11->opts = x11->opts_cache->opts; + vo->x11 = x11; + + x11_error_output = x11->log; + XSetErrorHandler(x11_errorhandler); + x11->present = mp_present_initialize(x11, x11->opts, VO_MAX_SWAPCHAIN_DEPTH); + + dispName = XDisplayName(NULL); + + MP_VERBOSE(x11, "X11 opening display: %s\n", dispName); + + x11->display = XOpenDisplay(dispName); + if (!x11->display) { + MP_MSG(x11, vo->probing ? MSGL_V : MSGL_ERR, + "couldn't open the X11 display (%s)!\n", dispName); + goto error; + } + x11->screen = DefaultScreen(x11->display); // screen ID + x11->rootwin = RootWindow(x11->display, x11->screen); // root window ID + + if (x11->opts->WinID >= 0) + x11->parent = x11->opts->WinID ? x11->opts->WinID : x11->rootwin; + + if (!x11->opts->native_keyrepeat) { + Bool ok = False; + XkbSetDetectableAutoRepeat(x11->display, True, &ok); + x11->no_autorepeat = ok; + } + + x11->xim = XOpenIM(x11->display, NULL, NULL, NULL); + if (!x11->xim) + MP_WARN(x11, "XOpenIM() failed. Unicode input will not work.\n"); + + x11->ws_width = DisplayWidth(x11->display, x11->screen); + x11->ws_height = DisplayHeight(x11->display, x11->screen); + + if (strncmp(dispName, "unix:", 5) == 0) + dispName += 4; + else if (strncmp(dispName, "localhost:", 10) == 0) + dispName += 9; + x11->display_is_local = dispName[0] == ':' && + strtoul(dispName + 1, NULL, 10) < 10; + MP_DBG(x11, "X11 running at %dx%d (\"%s\" => %s display)\n", + x11->ws_width, x11->ws_height, dispName, + x11->display_is_local ? "local" : "remote"); + + int w_mm = DisplayWidthMM(x11->display, x11->screen); + int h_mm = DisplayHeightMM(x11->display, x11->screen); + double dpi_x = x11->ws_width * 25.4 / w_mm; + double dpi_y = x11->ws_height * 25.4 / h_mm; + double base_dpi = 96; + if (isfinite(dpi_x) && isfinite(dpi_y) && x11->opts->hidpi_window_scale) { + int s_x = lrint(MPCLAMP(dpi_x / base_dpi, 0, 10)); + int s_y = lrint(MPCLAMP(dpi_y / base_dpi, 0, 10)); + if (s_x == s_y && s_x > 1 && s_x < 10) { + x11->dpi_scale = s_x; + MP_VERBOSE(x11, "Assuming DPI scale %d for prescaling. This can " + "be disabled with --hidpi-window-scale=no.\n", + x11->dpi_scale); + } + } + + x11->wm_type = vo_wm_detect(vo); + + x11->event_fd = ConnectionNumber(x11->display); + mp_make_wakeup_pipe(x11->wakeup_pipe); + + xrandr_read(x11); + + vo_x11_update_geometry(vo); + + return true; + +error: + vo_x11_uninit(vo); + return false; +} + +static const struct mp_keymap keymap[] = { + // special keys + {XK_Pause, MP_KEY_PAUSE}, {XK_Escape, MP_KEY_ESC}, + {XK_BackSpace, MP_KEY_BS}, {XK_Tab, MP_KEY_TAB}, {XK_Return, MP_KEY_ENTER}, + {XK_Menu, MP_KEY_MENU}, {XK_Print, MP_KEY_PRINT}, + {XK_Cancel, MP_KEY_CANCEL}, {XK_ISO_Left_Tab, MP_KEY_TAB}, + + // cursor keys + {XK_Left, MP_KEY_LEFT}, {XK_Right, MP_KEY_RIGHT}, {XK_Up, MP_KEY_UP}, + {XK_Down, MP_KEY_DOWN}, + + // navigation block + {XK_Insert, MP_KEY_INSERT}, {XK_Delete, MP_KEY_DELETE}, + {XK_Home, MP_KEY_HOME}, {XK_End, MP_KEY_END}, {XK_Page_Up, MP_KEY_PAGE_UP}, + {XK_Page_Down, MP_KEY_PAGE_DOWN}, + + // F-keys + {XK_F1, MP_KEY_F+1}, {XK_F2, MP_KEY_F+2}, {XK_F3, MP_KEY_F+3}, + {XK_F4, MP_KEY_F+4}, {XK_F5, MP_KEY_F+5}, {XK_F6, MP_KEY_F+6}, + {XK_F7, MP_KEY_F+7}, {XK_F8, MP_KEY_F+8}, {XK_F9, MP_KEY_F+9}, + {XK_F10, MP_KEY_F+10}, {XK_F11, MP_KEY_F+11}, {XK_F12, MP_KEY_F+12}, + {XK_F13, MP_KEY_F+13}, {XK_F14, MP_KEY_F+14}, {XK_F15, MP_KEY_F+15}, + {XK_F16, MP_KEY_F+16}, {XK_F17, MP_KEY_F+17}, {XK_F18, MP_KEY_F+18}, + {XK_F19, MP_KEY_F+19}, {XK_F20, MP_KEY_F+20}, {XK_F21, MP_KEY_F+21}, + {XK_F22, MP_KEY_F+22}, {XK_F23, MP_KEY_F+23}, {XK_F24, MP_KEY_F+24}, + + // numpad independent of numlock + {XK_KP_Subtract, '-'}, {XK_KP_Add, '+'}, {XK_KP_Multiply, '*'}, + {XK_KP_Divide, '/'}, {XK_KP_Enter, MP_KEY_KPENTER}, + + // numpad with numlock + {XK_KP_0, MP_KEY_KP0}, {XK_KP_1, MP_KEY_KP1}, {XK_KP_2, MP_KEY_KP2}, + {XK_KP_3, MP_KEY_KP3}, {XK_KP_4, MP_KEY_KP4}, {XK_KP_5, MP_KEY_KP5}, + {XK_KP_6, MP_KEY_KP6}, {XK_KP_7, MP_KEY_KP7}, {XK_KP_8, MP_KEY_KP8}, + {XK_KP_9, MP_KEY_KP9}, {XK_KP_Decimal, MP_KEY_KPDEC}, + {XK_KP_Separator, MP_KEY_KPDEC}, + + // numpad without numlock + {XK_KP_Insert, MP_KEY_KPINS}, {XK_KP_End, MP_KEY_KPEND}, + {XK_KP_Down, MP_KEY_KPDOWN}, {XK_KP_Page_Down, MP_KEY_KPPGDOWN}, + {XK_KP_Left, MP_KEY_KPLEFT}, {XK_KP_Begin, MP_KEY_KP5}, + {XK_KP_Right, MP_KEY_KPRIGHT}, {XK_KP_Home, MP_KEY_KPHOME}, {XK_KP_Up, MP_KEY_KPUP}, + {XK_KP_Page_Up, MP_KEY_KPPGUP}, {XK_KP_Delete, MP_KEY_KPDEL}, + + {XF86XK_MenuKB, MP_KEY_MENU}, + {XF86XK_AudioPlay, MP_KEY_PLAY}, {XF86XK_AudioPause, MP_KEY_PAUSE}, + {XF86XK_AudioStop, MP_KEY_STOP}, + {XF86XK_AudioPrev, MP_KEY_PREV}, {XF86XK_AudioNext, MP_KEY_NEXT}, + {XF86XK_AudioRewind, MP_KEY_REWIND}, {XF86XK_AudioForward, MP_KEY_FORWARD}, + {XF86XK_AudioMute, MP_KEY_MUTE}, + {XF86XK_AudioLowerVolume, MP_KEY_VOLUME_DOWN}, + {XF86XK_AudioRaiseVolume, MP_KEY_VOLUME_UP}, + {XF86XK_HomePage, MP_KEY_HOMEPAGE}, {XF86XK_WWW, MP_KEY_WWW}, + {XF86XK_Mail, MP_KEY_MAIL}, {XF86XK_Favorites, MP_KEY_FAVORITES}, + {XF86XK_Search, MP_KEY_SEARCH}, {XF86XK_Sleep, MP_KEY_SLEEP}, + {XF86XK_Back, MP_KEY_BACK}, {XF86XK_Tools, MP_KEY_TOOLS}, + {XF86XK_ZoomIn, MP_KEY_ZOOMIN}, {XF86XK_ZoomOut, MP_KEY_ZOOMOUT}, + + {0, 0} +}; + +static int vo_x11_lookupkey(int key) +{ + const char *passthrough_keys = " -+*/<>`~!@#$%^&()_{}:;\"\',.?\\|=[]"; + int mpkey = 0; + if ((key >= 'a' && key <= 'z') || + (key >= 'A' && key <= 'Z') || + (key >= '0' && key <= '9') || + (key > 0 && key < 256 && strchr(passthrough_keys, key))) + mpkey = key; + + if (!mpkey) + mpkey = lookup_keymap_table(keymap, key); + + // XFree86 keysym range; typically contains obscure "extra" keys + if (!mpkey && key >= 0x10080001 && key <= 0x1008FFFF) { + mpkey = MP_KEY_UNKNOWN_RESERVED_START + (key - 0x10080000); + if (mpkey > MP_KEY_UNKNOWN_RESERVED_LAST) + mpkey = 0; + } + + return mpkey; +} + +static void vo_x11_decoration(struct vo *vo, bool d) +{ + struct vo_x11_state *x11 = vo->x11; + + if (x11->parent || !x11->window) + return; + + Atom motif_hints = XA(x11, _MOTIF_WM_HINTS); + MotifWmHints mhints = {0}; + bool got = x11_get_property_copy(x11, x11->window, motif_hints, + motif_hints, 32, &mhints, sizeof(mhints)); + // hints weren't set, and decorations requested -> assume WM displays them + if (!got && d) + return; + if (!got) { + mhints.flags = MWM_HINTS_FUNCTIONS; + mhints.functions = MWM_FUNC_MOVE | MWM_FUNC_CLOSE | MWM_FUNC_MINIMIZE | + MWM_FUNC_MAXIMIZE | MWM_FUNC_RESIZE; + } + mhints.flags |= MWM_HINTS_DECORATIONS; + mhints.decorations = d ? MWM_DECOR_ALL : 0; + XChangeProperty(x11->display, x11->window, motif_hints, motif_hints, 32, + PropModeReplace, (unsigned char *) &mhints, 5); +} + +static void vo_x11_wm_hints(struct vo *vo, Window window) +{ + struct vo_x11_state *x11 = vo->x11; + XWMHints hints = {0}; + hints.flags = InputHint | StateHint; + hints.input = 1; + hints.initial_state = NormalState; + XSetWMHints(x11->display, window, &hints); +} + +static void vo_x11_classhint(struct vo *vo, Window window, const char *name) +{ + struct vo_x11_state *x11 = vo->x11; + struct mp_vo_opts *opts = x11->opts; + XClassHint wmClass; + long pid = getpid(); + + wmClass.res_name = opts->winname ? opts->winname : (char *)name; + wmClass.res_class = "mpv"; + XSetClassHint(x11->display, window, &wmClass); + XChangeProperty(x11->display, window, XA(x11, _NET_WM_PID), XA_CARDINAL, + 32, PropModeReplace, (unsigned char *) &pid, 1); +} + +void vo_x11_uninit(struct vo *vo) +{ + struct vo_x11_state *x11 = vo->x11; + if (!x11) + return; + + mp_input_put_key(x11->input_ctx, MP_INPUT_RELEASE_ALL); + + set_screensaver(x11, true); + + if (x11->window != None && x11->window != x11->rootwin) + XDestroyWindow(x11->display, x11->window); + if (x11->xic) + XDestroyIC(x11->xic); + if (x11->colormap != None) + XFreeColormap(vo->x11->display, x11->colormap); + + MP_DBG(x11, "uninit ...\n"); + if (x11->xim) + XCloseIM(x11->xim); + if (x11->display) { + XSetErrorHandler(NULL); + x11_error_output = NULL; + XCloseDisplay(x11->display); + } + + if (x11->wakeup_pipe[0] >= 0) { + close(x11->wakeup_pipe[0]); + close(x11->wakeup_pipe[1]); + } + + talloc_free(x11); + vo->x11 = NULL; +} + +#define DND_PROPERTY "mpv_dnd_selection" + +static void vo_x11_dnd_init_window(struct vo *vo) +{ + struct vo_x11_state *x11 = vo->x11; + + Atom version = DND_VERSION; + XChangeProperty(x11->display, x11->window, XA(x11, XdndAware), XA_ATOM, + 32, PropModeReplace, (unsigned char *)&version, 1); +} + +// The Atom does not always map to a mime type, but often. +static char *x11_dnd_mime_type_buf(struct vo_x11_state *x11, Atom atom, + char *buf, size_t buf_size) +{ + if (atom == XInternAtom(x11->display, "UTF8_STRING", False)) + return "text"; + return x11_atom_name_buf(x11, atom, buf, buf_size); +} + +#define x11_dnd_mime_type(x11, atom) \ + x11_dnd_mime_type_buf(x11, atom, (char[80]){0}, 80) + +static bool dnd_format_is_better(struct vo_x11_state *x11, Atom cur, Atom new) +{ + int new_score = mp_event_get_mime_type_score(x11->input_ctx, + x11_dnd_mime_type(x11, new)); + int cur_score = -1; + if (cur) { + cur_score = mp_event_get_mime_type_score(x11->input_ctx, + x11_dnd_mime_type(x11, cur)); + } + return new_score >= 0 && new_score > cur_score; +} + +static void dnd_select_format(struct vo_x11_state *x11, Atom *args, int items) +{ + x11->dnd_requested_format = 0; + + for (int n = 0; n < items; n++) { + MP_VERBOSE(x11, "DnD type: '%s'\n", x11_atom_name(x11, args[n])); + // There are other types; possibly not worth supporting. + if (dnd_format_is_better(x11, x11->dnd_requested_format, args[n])) + x11->dnd_requested_format = args[n]; + } + + MP_VERBOSE(x11, "Selected DnD type: %s\n", x11->dnd_requested_format ? + x11_atom_name(x11, x11->dnd_requested_format) : "(none)"); +} + +static void dnd_reset(struct vo *vo) +{ + struct vo_x11_state *x11 = vo->x11; + + x11->dnd_src_window = 0; + x11->dnd_requested_format = 0; +} + +static void vo_x11_dnd_handle_message(struct vo *vo, XClientMessageEvent *ce) +{ + struct vo_x11_state *x11 = vo->x11; + + if (!x11->window) + return; + + if (ce->message_type == XA(x11, XdndEnter)) { + x11->dnd_requested_format = 0; + + Window src = ce->data.l[0]; + if (ce->data.l[1] & 1) { + int nitems; + Atom *args = x11_get_property(x11, src, XA(x11, XdndTypeList), + XA_ATOM, 32, &nitems); + if (args) { + dnd_select_format(x11, args, nitems); + XFree(args); + } + } else { + Atom args[3]; + for (int n = 2; n <= 4; n++) + args[n - 2] = ce->data.l[n]; + dnd_select_format(x11, args, 3); + } + } else if (ce->message_type == XA(x11, XdndPosition)) { + x11->dnd_requested_action = ce->data.l[4]; + + Window src = ce->data.l[0]; + XEvent xev; + + xev.xclient.type = ClientMessage; + xev.xclient.serial = 0; + xev.xclient.send_event = True; + xev.xclient.message_type = XA(x11, XdndStatus); + xev.xclient.window = src; + xev.xclient.format = 32; + xev.xclient.data.l[0] = x11->window; + xev.xclient.data.l[1] = x11->dnd_requested_format ? 1 : 0; + xev.xclient.data.l[2] = 0; + xev.xclient.data.l[3] = 0; + xev.xclient.data.l[4] = XA(x11, XdndActionCopy); + + XSendEvent(x11->display, src, False, 0, &xev); + } else if (ce->message_type == XA(x11, XdndDrop)) { + x11->dnd_src_window = ce->data.l[0]; + XConvertSelection(x11->display, XA(x11, XdndSelection), + x11->dnd_requested_format, XAs(x11, DND_PROPERTY), + x11->window, ce->data.l[2]); + } else if (ce->message_type == XA(x11, XdndLeave)) { + dnd_reset(vo); + } +} + +static void vo_x11_dnd_handle_selection(struct vo *vo, XSelectionEvent *se) +{ + struct vo_x11_state *x11 = vo->x11; + + if (!x11->window || !x11->dnd_src_window) + return; + + bool success = false; + + if (se->selection == XA(x11, XdndSelection) && + se->property == XAs(x11, DND_PROPERTY) && + se->target == x11->dnd_requested_format && + x11->opts->drag_and_drop != -2) + { + int nitems; + void *prop = x11_get_property(x11, x11->window, XAs(x11, DND_PROPERTY), + x11->dnd_requested_format, 8, &nitems); + if (prop) { + enum mp_dnd_action action; + if (x11->opts->drag_and_drop >= 0) { + action = x11->opts->drag_and_drop; + } else { + action = x11->dnd_requested_action == XA(x11, XdndActionCopy) ? + DND_REPLACE : DND_APPEND; + } + + char *mime_type = x11_dnd_mime_type(x11, x11->dnd_requested_format); + MP_VERBOSE(x11, "Dropping type: %s (%s)\n", + x11_atom_name(x11, x11->dnd_requested_format), mime_type); + + // No idea if this is guaranteed to be \0-padded, so use bstr. + success = mp_event_drop_mime_data(x11->input_ctx, mime_type, + (bstr){prop, nitems}, action) > 0; + XFree(prop); + } + } + + XEvent xev; + + xev.xclient.type = ClientMessage; + xev.xclient.serial = 0; + xev.xclient.send_event = True; + xev.xclient.message_type = XA(x11, XdndFinished); + xev.xclient.window = x11->dnd_src_window; + xev.xclient.format = 32; + xev.xclient.data.l[0] = x11->window; + xev.xclient.data.l[1] = success ? 1 : 0; + xev.xclient.data.l[2] = success ? XA(x11, XdndActionCopy) : 0; + xev.xclient.data.l[3] = 0; + xev.xclient.data.l[4] = 0; + + XSendEvent(x11->display, x11->dnd_src_window, False, 0, &xev); + + dnd_reset(vo); +} + +static void update_vo_size(struct vo *vo) +{ + struct vo_x11_state *x11 = vo->x11; + + if (RC_W(x11->winrc) != vo->dwidth || RC_H(x11->winrc) != vo->dheight) { + vo->dwidth = RC_W(x11->winrc); + vo->dheight = RC_H(x11->winrc); + x11->pending_vo_events |= VO_EVENT_RESIZE; + } +} + +static int get_mods(unsigned int state) +{ + int modifiers = 0; + if (state & ShiftMask) + modifiers |= MP_KEY_MODIFIER_SHIFT; + if (state & ControlMask) + modifiers |= MP_KEY_MODIFIER_CTRL; + if (state & Mod1Mask) + modifiers |= MP_KEY_MODIFIER_ALT; + if (state & Mod4Mask) + modifiers |= MP_KEY_MODIFIER_META; + return modifiers; +} + +static void vo_x11_update_composition_hint(struct vo *vo) +{ + struct vo_x11_state *x11 = vo->x11; + + long hint = 0; + switch (x11->opts->x11_bypass_compositor) { + case 0: hint = 0; break; // leave default + case 1: hint = 1; break; // always bypass + case 2: hint = x11->fs ? 1 : 0; break; // bypass in FS + case 3: hint = 2; break; // always enable + } + + XChangeProperty(x11->display, x11->window, XA(x11,_NET_WM_BYPASS_COMPOSITOR), + XA_CARDINAL, 32, PropModeReplace, (unsigned char *)&hint, 1); +} + +static void vo_x11_check_net_wm_state_change(struct vo *vo) +{ + struct vo_x11_state *x11 = vo->x11; + struct mp_vo_opts *opts = x11->opts; + + if (x11->parent) + return; + + if (x11->wm_type & vo_wm_FULLSCREEN) { + int num_elems; + long *elems = x11_get_property(x11, x11->window, XA(x11, _NET_WM_STATE), + XA_ATOM, 32, &num_elems); + int is_fullscreen = 0, is_minimized = 0, is_maximized = 0; + if (elems) { + Atom fullscreen_prop = XA(x11, _NET_WM_STATE_FULLSCREEN); + Atom hidden = XA(x11, _NET_WM_STATE_HIDDEN); + Atom max_vert = XA(x11, _NET_WM_STATE_MAXIMIZED_VERT); + Atom max_horiz = XA(x11, _NET_WM_STATE_MAXIMIZED_HORZ); + for (int n = 0; n < num_elems; n++) { + if (elems[n] == fullscreen_prop) + is_fullscreen = 1; + if (elems[n] == hidden) + is_minimized = 1; + if (elems[n] == max_vert || elems[n] == max_horiz) + is_maximized = 1; + } + XFree(elems); + } + + if (opts->window_maximized && !is_maximized && x11->geometry_change) { + x11->geometry_change = false; + vo_x11_config_vo_window(vo); + } + + opts->window_minimized = is_minimized; + x11->hidden = is_minimized; + m_config_cache_write_opt(x11->opts_cache, &opts->window_minimized); + opts->window_maximized = is_maximized; + m_config_cache_write_opt(x11->opts_cache, &opts->window_maximized); + + if ((x11->opts->fullscreen && !is_fullscreen) || + (!x11->opts->fullscreen && is_fullscreen)) + { + x11->opts->fullscreen = is_fullscreen; + x11->fs = is_fullscreen; + m_config_cache_write_opt(x11->opts_cache, &x11->opts->fullscreen); + + if (!is_fullscreen && (x11->pos_changed_during_fs || + x11->size_changed_during_fs)) + { + vo_x11_move_resize(vo, x11->pos_changed_during_fs, + x11->size_changed_during_fs, + x11->nofsrc); + } + + x11->size_changed_during_fs = false; + x11->pos_changed_during_fs = false; + + vo_x11_update_composition_hint(vo); + } + } +} + +static void vo_x11_check_net_wm_desktop_change(struct vo *vo) +{ + struct vo_x11_state *x11 = vo->x11; + + if (x11->parent) + return; + + long params[1] = {0}; + if (x11_get_property_copy(x11, x11->window, XA(x11, _NET_WM_DESKTOP), + XA_CARDINAL, 32, params, sizeof(params))) + { + x11->opts->all_workspaces = params[0] == -1; // (gets sign-extended?) + m_config_cache_write_opt(x11->opts_cache, &x11->opts->all_workspaces); + } +} + +// Releasing all keys on key-up or defocus is simpler and ensures no keys can +// get "stuck". +static void release_all_keys(struct vo *vo) +{ + struct vo_x11_state *x11 = vo->x11; + + if (x11->no_autorepeat) + mp_input_put_key(x11->input_ctx, MP_INPUT_RELEASE_ALL); + x11->win_drag_button1_down = false; +} + +void vo_x11_check_events(struct vo *vo) +{ + struct vo_x11_state *x11 = vo->x11; + Display *display = vo->x11->display; + XEvent Event; + + xscreensaver_heartbeat(vo->x11); + + while (XPending(display)) { + XNextEvent(display, &Event); + MP_TRACE(x11, "XEvent: %d\n", Event.type); + switch (Event.type) { + case Expose: + x11->pending_vo_events |= VO_EVENT_EXPOSE; + break; + case ConfigureNotify: + if (x11->window == None) + break; + vo_x11_update_geometry(vo); + if (x11->parent && Event.xconfigure.window == x11->parent) { + MP_TRACE(x11, "adjusting embedded window position\n"); + XMoveResizeWindow(x11->display, x11->window, + 0, 0, RC_W(x11->winrc), RC_H(x11->winrc)); + } + break; + case KeyPress: { + char buf[100]; + KeySym keySym = 0; + int modifiers = get_mods(Event.xkey.state); + if (x11->no_autorepeat) + modifiers |= MP_KEY_STATE_DOWN; + if (x11->xic) { + Status status; + int len = Xutf8LookupString(x11->xic, &Event.xkey, buf, + sizeof(buf), &keySym, &status); + int mpkey = vo_x11_lookupkey(keySym); + if (mpkey) { + mp_input_put_key(x11->input_ctx, mpkey | modifiers); + } else if (status == XLookupChars || status == XLookupBoth) { + struct bstr t = { buf, len }; + mp_input_put_key_utf8(x11->input_ctx, modifiers, t); + } + } else { + XLookupString(&Event.xkey, buf, sizeof(buf), &keySym, + &x11->compose_status); + int mpkey = vo_x11_lookupkey(keySym); + if (mpkey) + mp_input_put_key(x11->input_ctx, mpkey | modifiers); + } + break; + } + case FocusIn: + x11->has_focus = true; + vo_update_cursor(vo); + x11->pending_vo_events |= VO_EVENT_FOCUS; + break; + case FocusOut: + release_all_keys(vo); + x11->has_focus = false; + vo_update_cursor(vo); + x11->pending_vo_events |= VO_EVENT_FOCUS; + break; + case KeyRelease: + release_all_keys(vo); + break; + case MotionNotify: + if (x11->win_drag_button1_down && !x11->fs && + !mp_input_test_dragging(x11->input_ctx, Event.xmotion.x, + Event.xmotion.y)) + { + mp_input_put_key(x11->input_ctx, MP_INPUT_RELEASE_ALL); + XUngrabPointer(x11->display, CurrentTime); + + long params[5] = { + Event.xmotion.x_root, Event.xmotion.y_root, + 8, // _NET_WM_MOVERESIZE_MOVE + 1, // button 1 + 1, // source indication: normal + }; + x11_send_ewmh_msg(x11, "_NET_WM_MOVERESIZE", params); + } else { + mp_input_set_mouse_pos(x11->input_ctx, Event.xmotion.x, + Event.xmotion.y); + } + x11->win_drag_button1_down = false; + break; + case LeaveNotify: + if (Event.xcrossing.mode != NotifyNormal) + break; + x11->win_drag_button1_down = false; + mp_input_put_key(x11->input_ctx, MP_KEY_MOUSE_LEAVE); + break; + case EnterNotify: + if (Event.xcrossing.mode != NotifyNormal) + break; + mp_input_put_key(x11->input_ctx, MP_KEY_MOUSE_ENTER); + break; + case ButtonPress: + if (Event.xbutton.button - 1 >= MP_KEY_MOUSE_BTN_COUNT) + break; + if (Event.xbutton.button == 1) + x11->win_drag_button1_down = true; + mp_input_put_key(x11->input_ctx, + (MP_MBTN_BASE + Event.xbutton.button - 1) | + get_mods(Event.xbutton.state) | MP_KEY_STATE_DOWN); + long msg[4] = {XEMBED_REQUEST_FOCUS}; + vo_x11_xembed_send_message(x11, msg); + break; + case ButtonRelease: + if (Event.xbutton.button - 1 >= MP_KEY_MOUSE_BTN_COUNT) + break; + if (Event.xbutton.button == 1) + x11->win_drag_button1_down = false; + mp_input_put_key(x11->input_ctx, + (MP_MBTN_BASE + Event.xbutton.button - 1) | + get_mods(Event.xbutton.state) | MP_KEY_STATE_UP); + break; + case MapNotify: + x11->window_hidden = false; + x11->pseudo_mapped = true; + x11->current_screen = -1; + vo_x11_update_geometry(vo); + break; + case DestroyNotify: + MP_WARN(x11, "Our window was destroyed, exiting\n"); + mp_input_put_key(x11->input_ctx, MP_KEY_CLOSE_WIN); + x11->window = 0; + break; + case ClientMessage: + if (Event.xclient.message_type == XA(x11, WM_PROTOCOLS) && + Event.xclient.data.l[0] == XA(x11, WM_DELETE_WINDOW)) + mp_input_put_key(x11->input_ctx, MP_KEY_CLOSE_WIN); + vo_x11_dnd_handle_message(vo, &Event.xclient); + vo_x11_xembed_handle_message(vo, &Event.xclient); + break; + case SelectionNotify: + vo_x11_dnd_handle_selection(vo, &Event.xselection); + break; + case PropertyNotify: + if (Event.xproperty.atom == XA(x11, _NET_FRAME_EXTENTS) || + Event.xproperty.atom == XA(x11, WM_STATE)) + { + if (!x11->pseudo_mapped && !x11->parent) { + MP_VERBOSE(x11, "not waiting for MapNotify\n"); + x11->pseudo_mapped = true; + } + } else if (Event.xproperty.atom == XA(x11, _NET_WM_STATE)) { + vo_x11_check_net_wm_state_change(vo); + } else if (Event.xproperty.atom == XA(x11, _NET_WM_DESKTOP)) { + vo_x11_check_net_wm_desktop_change(vo); + } else if (Event.xproperty.atom == x11->icc_profile_property) { + x11->pending_vo_events |= VO_EVENT_ICC_PROFILE_CHANGED; + } + break; + case GenericEvent: { + XGenericEventCookie *cookie = (XGenericEventCookie *)&Event.xcookie; + if (cookie->extension == x11->present_code && x11->use_present) + { + XGetEventData(x11->display, cookie); + if (cookie->evtype == PresentCompleteNotify) { + XPresentCompleteNotifyEvent *present_event; + present_event = (XPresentCompleteNotifyEvent *)cookie->data; + present_sync_update_values(x11->present, + present_event->ust * 1000, + present_event->msc); + } + } + XFreeEventData(x11->display, cookie); + break; + } + default: + if (Event.type == x11->ShmCompletionEvent) { + if (x11->ShmCompletionWaitCount > 0) + x11->ShmCompletionWaitCount--; + } + if (Event.type == x11->xrandr_event) { + xrandr_read(x11); + vo_x11_update_geometry(vo); + } + break; + } + } + + update_vo_size(vo); +} + +static void vo_x11_sizehint(struct vo *vo, struct mp_rect rc, bool override_pos) +{ + struct vo_x11_state *x11 = vo->x11; + struct mp_vo_opts *opts = x11->opts; + + if (!x11->window || x11->parent) + return; + + bool screen = opts->screen_id >= 0 || (opts->screen_name && + opts->screen_name[0]); + bool fsscreen = opts->fsscreen_id >= 0 || (opts->fsscreen_name && + opts->fsscreen_name[0]); + bool force_pos = opts->geometry.xy_valid || // explicitly forced by user + opts->force_window_position || // resize -> reset position + screen || fsscreen || // force onto screen area + opts->screen_name || // also force onto screen area + x11->parent || // force to fill parent + override_pos; // for fullscreen and such + + XSizeHints *hint = XAllocSizeHints(); + if (!hint) + return; // OOM + + hint->flags |= PSize | (force_pos ? PPosition : 0); + hint->x = rc.x0; + hint->y = rc.y0; + hint->width = RC_W(rc); + hint->height = RC_H(rc); + hint->max_width = 0; + hint->max_height = 0; + + if (opts->keepaspect && opts->keepaspect_window) { + hint->flags |= PAspect; + hint->min_aspect.x = hint->width; + hint->min_aspect.y = hint->height; + hint->max_aspect.x = hint->width; + hint->max_aspect.y = hint->height; + } + + // Set minimum height/width to 4 to avoid off-by-one errors. + hint->flags |= PMinSize; + hint->min_width = hint->min_height = 4; + + hint->flags |= PWinGravity; + hint->win_gravity = StaticGravity; + + XSetWMNormalHints(x11->display, x11->window, hint); + XFree(hint); +} + +static void vo_x11_move_resize(struct vo *vo, bool move, bool resize, + struct mp_rect rc) +{ + if (!vo->x11->window) + return; + int w = RC_W(rc), h = RC_H(rc); + XWindowChanges req = {.x = rc.x0, .y = rc.y0, .width = w, .height = h}; + unsigned mask = (move ? CWX | CWY : 0) | (resize ? CWWidth | CWHeight : 0); + if (mask) + XConfigureWindow(vo->x11->display, vo->x11->window, mask, &req); + vo_x11_sizehint(vo, rc, false); +} + +// set a X text property that expects a UTF8_STRING type +static void vo_x11_set_property_utf8(struct vo *vo, Atom name, const char *t) +{ + struct vo_x11_state *x11 = vo->x11; + + XChangeProperty(x11->display, x11->window, name, XA(x11, UTF8_STRING), 8, + PropModeReplace, t, strlen(t)); +} + +// set a X text property that expects a STRING or COMPOUND_TEXT type +static void vo_x11_set_property_string(struct vo *vo, Atom name, const char *t) +{ + struct vo_x11_state *x11 = vo->x11; + XTextProperty prop = {0}; + + if (Xutf8TextListToTextProperty(x11->display, (char **)&t, 1, + XStdICCTextStyle, &prop) == Success) + { + XSetTextProperty(x11->display, x11->window, &prop, name); + } else { + // Strictly speaking this violates the ICCCM, but there's no way we + // can do this correctly. + vo_x11_set_property_utf8(vo, name, t); + } + XFree(prop.value); +} + +static void vo_x11_update_window_title(struct vo *vo) +{ + struct vo_x11_state *x11 = vo->x11; + + if (!x11->window || !x11->window_title) + return; + + vo_x11_set_property_string(vo, XA_WM_NAME, x11->window_title); + vo_x11_set_property_string(vo, XA_WM_ICON_NAME, x11->window_title); + + /* _NET_WM_NAME and _NET_WM_ICON_NAME must be sanitized to UTF-8. */ + void *tmp = talloc_new(NULL); + struct bstr b_title = bstr_sanitize_utf8_latin1(tmp, bstr0(x11->window_title)); + vo_x11_set_property_utf8(vo, XA(x11, _NET_WM_NAME), bstrto0(tmp, b_title)); + vo_x11_set_property_utf8(vo, XA(x11, _NET_WM_ICON_NAME), bstrto0(tmp, b_title)); + talloc_free(tmp); +} + +static void vo_x11_xembed_update(struct vo_x11_state *x11, int flags) +{ + if (!x11->window || !x11->parent) + return; + + long xembed_info[] = {XEMBED_VERSION, flags}; + Atom name = XA(x11, _XEMBED_INFO); + XChangeProperty(x11->display, x11->window, name, name, 32, + PropModeReplace, (char *)xembed_info, 2); +} + +static void vo_x11_xembed_handle_message(struct vo *vo, XClientMessageEvent *ce) +{ + struct vo_x11_state *x11 = vo->x11; + if (!x11->window || !x11->parent || ce->message_type != XA(x11, _XEMBED)) + return; + + long msg = ce->data.l[1]; + if (msg == XEMBED_EMBEDDED_NOTIFY) + MP_VERBOSE(x11, "Parent windows supports XEmbed.\n"); +} + +static void vo_x11_xembed_send_message(struct vo_x11_state *x11, long m[4]) +{ + if (!x11->window || !x11->parent) + return; + XEvent ev = {.xclient = { + .type = ClientMessage, + .window = x11->parent, + .message_type = XA(x11, _XEMBED), + .format = 32, + .data = {.l = { CurrentTime, m[0], m[1], m[2], m[3] }}, + } }; + XSendEvent(x11->display, x11->parent, False, NoEventMask, &ev); +} + +static void vo_x11_set_wm_icon(struct vo_x11_state *x11) +{ + int icon_size = 0; + long *icon = talloc_array(NULL, long, 0); + + for (int n = 0; x11_icons[n].start; n++) { + struct mp_image *img = + load_image_png_buf(x11_icons[n].start, x11_icons[n].len, IMGFMT_RGBA); + if (!img) + continue; + int new_size = 2 + img->w * img->h; + MP_RESIZE_ARRAY(NULL, icon, icon_size + new_size); + long *cur = icon + icon_size; + icon_size += new_size; + *cur++ = img->w; + *cur++ = img->h; + for (int y = 0; y < img->h; y++) { + uint8_t *s = (uint8_t *)img->planes[0] + img->stride[0] * y; + for (int x = 0; x < img->w; x++) { + *cur++ = s[x * 4 + 0] | (s[x * 4 + 1] << 8) | + (s[x * 4 + 2] << 16) | ((unsigned)s[x * 4 + 3] << 24); + } + } + talloc_free(img); + } + + XChangeProperty(x11->display, x11->window, XA(x11, _NET_WM_ICON), + XA_CARDINAL, 32, PropModeReplace, + (unsigned char *)icon, icon_size); + talloc_free(icon); +} + +static void vo_x11_create_window(struct vo *vo, XVisualInfo *vis, + struct mp_rect rc) +{ + struct vo_x11_state *x11 = vo->x11; + + assert(x11->window == None); + assert(!x11->xic); + + XVisualInfo vinfo_storage; + if (!vis) { + vis = &vinfo_storage; + XWindowAttributes att; + XGetWindowAttributes(x11->display, x11->rootwin, &att); + XMatchVisualInfo(x11->display, x11->screen, att.depth, TrueColor, vis); + } + + if (x11->colormap == None) { + x11->colormap = XCreateColormap(x11->display, x11->rootwin, + vis->visual, AllocNone); + } + + unsigned long xswamask = CWBorderPixel | CWColormap; + XSetWindowAttributes xswa = { + .border_pixel = 0, + .colormap = x11->colormap, + }; + + Window parent = x11->parent; + if (!parent) + parent = x11->rootwin; + + x11->window = + XCreateWindow(x11->display, parent, rc.x0, rc.y0, RC_W(rc), RC_H(rc), 0, + vis->depth, CopyFromParent, vis->visual, xswamask, &xswa); + Atom protos[1] = {XA(x11, WM_DELETE_WINDOW)}; + XSetWMProtocols(x11->display, x11->window, protos, 1); + + if (!XPresentQueryExtension(x11->display, &x11->present_code, NULL, NULL)) { + MP_VERBOSE(x11, "The XPresent extension is not supported.\n"); + } else { + MP_VERBOSE(x11, "The XPresent extension was found.\n"); + XPresentSelectInput(x11->display, x11->window, PresentCompleteNotifyMask); + } + xpresent_set(x11); + + x11->mouse_cursor_set = false; + x11->mouse_cursor_visible = true; + vo_update_cursor(vo); + + if (x11->xim) { + x11->xic = XCreateIC(x11->xim, + XNInputStyle, XIMPreeditNone | XIMStatusNone, + XNClientWindow, x11->window, + XNFocusWindow, x11->window, + NULL); + } + + if (!x11->parent) { + vo_x11_update_composition_hint(vo); + vo_x11_set_wm_icon(x11); + vo_x11_dnd_init_window(vo); + vo_x11_set_property_utf8(vo, XA(x11, _GTK_THEME_VARIANT), "dark"); + } + if (!x11->parent || x11->opts->x11_wid_title) + vo_x11_update_window_title(vo); + vo_x11_xembed_update(x11, 0); +} + +static void vo_x11_map_window(struct vo *vo, struct mp_rect rc) +{ + struct vo_x11_state *x11 = vo->x11; + + vo_x11_move_resize(vo, true, true, rc); + vo_x11_decoration(vo, x11->opts->border); + + if (x11->opts->fullscreen && (x11->wm_type & vo_wm_FULLSCREEN)) { + Atom state = XA(x11, _NET_WM_STATE_FULLSCREEN); + XChangeProperty(x11->display, x11->window, XA(x11, _NET_WM_STATE), XA_ATOM, + 32, PropModeAppend, (unsigned char *)&state, 1); + x11->fs = 1; + // The "saved" positions are bogus, so reset them when leaving FS again. + x11->size_changed_during_fs = true; + x11->pos_changed_during_fs = true; + } + + if (x11->opts->fsscreen_id != -1) { + long params[5] = {0}; + if (x11->opts->fsscreen_id >= 0) { + for (int n = 0; n < 4; n++) + params[n] = x11->opts->fsscreen_id; + } else { + vo_x11_get_bounding_monitors(x11, ¶ms[0]); + } + params[4] = 1; // source indication: normal + x11_send_ewmh_msg(x11, "_NET_WM_FULLSCREEN_MONITORS", params); + } + + if (x11->opts->all_workspaces) { + if (x11->wm_type & vo_wm_STICKY) { + Atom state = XA(x11, _NET_WM_STATE_STICKY); + XChangeProperty(x11->display, x11->window, XA(x11, _NET_WM_STATE), XA_ATOM, + 32, PropModeReplace, (unsigned char *)&state, 1); + } else { + long v = 0xFFFFFFFF; + XChangeProperty(x11->display, x11->window, XA(x11, _NET_WM_DESKTOP), + XA_CARDINAL, 32, PropModeReplace, (unsigned char *)&v, 1); + } + } else if (x11->opts->geometry.ws > 0) { + long v = x11->opts->geometry.ws - 1; + XChangeProperty(x11->display, x11->window, XA(x11, _NET_WM_DESKTOP), + XA_CARDINAL, 32, PropModeReplace, (unsigned char *)&v, 1); + } + + vo_x11_update_composition_hint(vo); + + // map window + int events = StructureNotifyMask | ExposureMask | PropertyChangeMask | + LeaveWindowMask | EnterWindowMask | FocusChangeMask; + if (mp_input_mouse_enabled(x11->input_ctx)) + events |= PointerMotionMask | ButtonPressMask | ButtonReleaseMask; + if (mp_input_vo_keyboard_enabled(x11->input_ctx)) + events |= KeyPressMask | KeyReleaseMask; + vo_x11_selectinput_witherr(vo, x11->display, x11->window, events); + XMapWindow(x11->display, x11->window); + + if (x11->opts->cursor_passthrough) + vo_x11_set_input_region(vo, true); + + if (x11->opts->window_maximized) // don't override WM default on "no" + vo_x11_maximize(vo); + if (x11->opts->window_minimized) // don't override WM default on "no" + vo_x11_minimize(vo); + + if (x11->opts->fullscreen && (x11->wm_type & vo_wm_FULLSCREEN)) + x11_set_ewmh_state(x11, "_NET_WM_STATE_FULLSCREEN", 1); + + vo_x11_xembed_update(x11, XEMBED_MAPPED); +} + +static void vo_x11_highlevel_resize(struct vo *vo, struct mp_rect rc) +{ + struct vo_x11_state *x11 = vo->x11; + struct mp_vo_opts *opts = x11->opts; + + bool reset_pos = opts->force_window_position; + if (reset_pos) { + x11->nofsrc = rc; + } else { + x11->nofsrc.x1 = x11->nofsrc.x0 + RC_W(rc); + x11->nofsrc.y1 = x11->nofsrc.y0 + RC_H(rc); + } + + if (opts->fullscreen) { + x11->size_changed_during_fs = true; + x11->pos_changed_during_fs = reset_pos; + vo_x11_sizehint(vo, rc, false); + } else { + vo_x11_move_resize(vo, reset_pos, true, rc); + } +} + +static void wait_until_mapped(struct vo *vo) +{ + struct vo_x11_state *x11 = vo->x11; + if (!x11->pseudo_mapped) + x11_send_ewmh_msg(x11, "_NET_REQUEST_FRAME_EXTENTS", (long[5]){0}); + while (!x11->pseudo_mapped && x11->window) { + XWindowAttributes att; + XGetWindowAttributes(x11->display, x11->window, &att); + if (att.map_state != IsUnmapped) { + x11->pseudo_mapped = true; + break; + } + XEvent unused; + XPeekEvent(x11->display, &unused); + vo_x11_check_events(vo); + } +} + +// Create the X11 window. There is only 1, and it must be created before +// vo_x11_config_vo_window() is called. vis can be NULL for default. +bool vo_x11_create_vo_window(struct vo *vo, XVisualInfo *vis, + const char *classname) +{ + struct vo_x11_state *x11 = vo->x11; + assert(!x11->window); + + if (x11->parent) { + if (x11->parent == x11->rootwin) { + x11->window = x11->rootwin; + x11->pseudo_mapped = true; + XSelectInput(x11->display, x11->window, StructureNotifyMask); + } else { + XSelectInput(x11->display, x11->parent, StructureNotifyMask); + } + } + if (x11->window == None) { + vo_x11_create_window(vo, vis, (struct mp_rect){.x1 = 320, .y1 = 200 }); + vo_x11_classhint(vo, x11->window, classname); + vo_x11_wm_hints(vo, x11->window); + x11->window_hidden = true; + } + + return !!x11->window; +} + +// Resize the window (e.g. new file, or video resolution change) +void vo_x11_config_vo_window(struct vo *vo) +{ + struct vo_x11_state *x11 = vo->x11; + struct mp_vo_opts *opts = x11->opts; + + assert(x11->window); + + // Don't attempt to change autofit/geometry on maximized windows. + if (x11->geometry_change && opts->window_maximized) + return; + + vo_x11_update_screeninfo(vo); + + struct vo_win_geometry geo; + vo_calc_window_geometry2(vo, &x11->screenrc, x11->dpi_scale, &geo); + vo_apply_window_geometry(vo, &geo); + + struct mp_rect rc = geo.win; + + if (x11->parent) { + vo_x11_update_geometry(vo); + rc = (struct mp_rect){0, 0, RC_W(x11->winrc), RC_H(x11->winrc)}; + } + + bool reset_size = (x11->old_dw != RC_W(rc) || x11->old_dh != RC_H(rc)) && + (opts->auto_window_resize || x11->geometry_change); + + x11->old_dw = RC_W(rc); + x11->old_dh = RC_H(rc); + + if (x11->window_hidden) { + x11->nofsrc = rc; + vo_x11_map_window(vo, rc); + } else if (reset_size) { + vo_x11_highlevel_resize(vo, rc); + } + + x11->geometry_change = false; + + if (opts->ontop) + vo_x11_setlayer(vo, opts->ontop); + + vo_x11_fullscreen(vo); + + wait_until_mapped(vo); + vo_x11_update_geometry(vo); + update_vo_size(vo); + x11->pending_vo_events &= ~VO_EVENT_RESIZE; // implicitly done by the VO +} + +static void vo_x11_sticky(struct vo *vo, bool sticky) +{ + struct vo_x11_state *x11 = vo->x11; + if (x11->wm_type & vo_wm_STICKY) { + x11_set_ewmh_state(x11, "_NET_WM_STATE_STICKY", sticky); + } else { + long params[5] = {0xFFFFFFFF, 1}; + if (!sticky) { + x11_get_property_copy(x11, x11->rootwin, + XA(x11, _NET_CURRENT_DESKTOP), + XA_CARDINAL, 32, ¶ms[0], + sizeof(params[0])); + } + x11_send_ewmh_msg(x11, "_NET_WM_DESKTOP", params); + } +} + +static void vo_x11_setlayer(struct vo *vo, bool ontop) +{ + struct vo_x11_state *x11 = vo->x11; + if (x11->parent || !x11->window) + return; + + if (x11->wm_type & (vo_wm_STAYS_ON_TOP | vo_wm_ABOVE)) { + char *state = "_NET_WM_STATE_ABOVE"; + + // Not in EWMH - but the old code preferred this (maybe it is "better") + if (x11->wm_type & vo_wm_STAYS_ON_TOP) + state = "_NET_WM_STATE_STAYS_ON_TOP"; + + x11_set_ewmh_state(x11, state, ontop); + + MP_VERBOSE(x11, "NET style stay on top (%d). Using state %s.\n", + ontop, state); + } else if (x11->wm_type & vo_wm_LAYER) { + if (!x11->orig_layer) { + x11->orig_layer = WIN_LAYER_NORMAL; + x11_get_property_copy(x11, x11->window, XA(x11, _WIN_LAYER), + XA_CARDINAL, 32, &x11->orig_layer, sizeof(long)); + MP_VERBOSE(x11, "original window layer is %ld.\n", x11->orig_layer); + } + + long params[5] = {0}; + // if not fullscreen, stay on default layer + params[0] = ontop ? WIN_LAYER_ABOVE_DOCK : x11->orig_layer; + params[1] = CurrentTime; + MP_VERBOSE(x11, "Layered style stay on top (layer %ld).\n", params[0]); + x11_send_ewmh_msg(x11, "_WIN_LAYER", params); + } +} + +static bool rc_overlaps(struct mp_rect rc1, struct mp_rect rc2) +{ + return mp_rect_intersection(&rc1, &rc2); // changes the first argument +} + +// update x11->winrc with current boundaries of vo->x11->window +static void vo_x11_update_geometry(struct vo *vo) +{ + struct vo_x11_state *x11 = vo->x11; + int x = 0, y = 0; + unsigned w, h, dummy_uint; + int dummy_int; + Window dummy_win; + Window win = x11->parent ? x11->parent : x11->window; + x11->winrc = (struct mp_rect){0, 0, 0, 0}; + if (win) { + XGetGeometry(x11->display, win, &dummy_win, &dummy_int, &dummy_int, + &w, &h, &dummy_int, &dummy_uint); + if (w > INT_MAX || h > INT_MAX) + w = h = 0; + XTranslateCoordinates(x11->display, win, x11->rootwin, 0, 0, + &x, &y, &dummy_win); + x11->winrc = (struct mp_rect){x, y, x + w, y + h}; + } + struct xrandr_display *disp = get_current_display(vo); + // Try to fallback to something reasonable if we have no disp yet + if (!disp) { + int screen = vo_x11_select_screen(vo); + if (screen > -1) { + disp = &x11->displays[screen]; + } else if (x11->current_screen > - 1) { + disp = &x11->displays[x11->current_screen]; + } + } + double fps = disp ? disp->fps : 0; + if (fps != x11->current_display_fps) + MP_VERBOSE(x11, "Current display FPS: %f\n", fps); + x11->current_display_fps = fps; + if (disp && x11->current_screen != disp->screen) { + x11->current_screen = disp->screen; + x11->pending_vo_events |= VO_EVENT_ICC_PROFILE_CHANGED; + } + x11->pending_vo_events |= VO_EVENT_WIN_STATE; +} + +static void vo_x11_fullscreen(struct vo *vo) +{ + struct vo_x11_state *x11 = vo->x11; + struct mp_vo_opts *opts = x11->opts; + + if (opts->fullscreen == x11->fs) + return; + x11->fs = opts->fullscreen; // x11->fs now contains the new state + if (x11->parent || !x11->window) + return; + + // Save old state before entering fullscreen + if (x11->fs) { + vo_x11_update_geometry(vo); + x11->nofsrc = x11->winrc; + } + + struct mp_rect rc = x11->nofsrc; + + if (x11->wm_type & vo_wm_FULLSCREEN) { + x11_set_ewmh_state(x11, "_NET_WM_STATE_FULLSCREEN", x11->fs); + if (!x11->fs && (x11->pos_changed_during_fs || + x11->size_changed_during_fs)) + { + if (x11->screenrc.x0 == rc.x0 && x11->screenrc.x1 == rc.x1 && + x11->screenrc.y0 == rc.y0 && x11->screenrc.y1 == rc.y1) + { + // Workaround for some WMs switching back to FS in this case. + MP_VERBOSE(x11, "avoiding triggering old-style fullscreen\n"); + rc.x1 -= 1; + rc.y1 -= 1; + } + vo_x11_move_resize(vo, x11->pos_changed_during_fs, + x11->size_changed_during_fs, rc); + } + } else { + if (x11->fs) { + vo_x11_update_screeninfo(vo); + rc = x11->screenrc; + } + + vo_x11_decoration(vo, opts->border && !x11->fs); + vo_x11_sizehint(vo, rc, true); + + XMoveResizeWindow(x11->display, x11->window, rc.x0, rc.y0, + RC_W(rc), RC_H(rc)); + + vo_x11_setlayer(vo, x11->fs || opts->ontop); + + XRaiseWindow(x11->display, x11->window); + XFlush(x11->display); + } + + x11->size_changed_during_fs = false; + x11->pos_changed_during_fs = false; + + vo_x11_update_composition_hint(vo); +} + +static void vo_x11_maximize(struct vo *vo) +{ + struct vo_x11_state *x11 = vo->x11; + + long params[5] = { + x11->opts->window_maximized ? NET_WM_STATE_ADD : NET_WM_STATE_REMOVE, + XA(x11, _NET_WM_STATE_MAXIMIZED_VERT), + XA(x11, _NET_WM_STATE_MAXIMIZED_HORZ), + 1, // source indication: normal + }; + x11_send_ewmh_msg(x11, "_NET_WM_STATE", params); +} + +static void vo_x11_minimize(struct vo *vo) +{ + struct vo_x11_state *x11 = vo->x11; + + if (x11->opts->window_minimized) { + XIconifyWindow(x11->display, x11->window, x11->screen); + } else { + long params[5] = {0}; + x11_send_ewmh_msg(x11, "_NET_ACTIVE_WINDOW", params); + } +} + +static void vo_x11_set_geometry(struct vo *vo) +{ + struct vo_x11_state *x11 = vo->x11; + + if (!x11->window) + return; + + x11->geometry_change = true; + vo_x11_config_vo_window(vo); +} + +bool vo_x11_check_visible(struct vo *vo) +{ + struct vo_x11_state *x11 = vo->x11; + struct mp_vo_opts *opts = x11->opts; + + bool render = !x11->hidden || opts->force_render || + VS_IS_DISP(opts->video_sync); + return render; +} + +static void vo_x11_set_input_region(struct vo *vo, bool passthrough) +{ + struct vo_x11_state *x11 = vo->x11; + + if (passthrough) { + XRectangle rect = {0, 0, 0, 0}; + Region region = XCreateRegion(); + XUnionRectWithRegion(&rect, region, region); + XShapeCombineRegion(x11->display, x11->window, ShapeInput, 0, 0, + region, ShapeSet); + XDestroyRegion(region); + } else { + XShapeCombineMask(x11->display, x11->window, ShapeInput, 0, 0, + 0, ShapeSet); + } +} + +int vo_x11_control(struct vo *vo, int *events, int request, void *arg) +{ + struct vo_x11_state *x11 = vo->x11; + struct mp_vo_opts *opts = x11->opts; + switch (request) { + case VOCTRL_CHECK_EVENTS: + vo_x11_check_events(vo); + *events |= x11->pending_vo_events; + x11->pending_vo_events = 0; + return VO_TRUE; + case VOCTRL_VO_OPTS_CHANGED: { + void *opt; + while (m_config_cache_get_next_changed(x11->opts_cache, &opt)) { + if (opt == &opts->fullscreen) + vo_x11_fullscreen(vo); + if (opt == &opts->ontop) + vo_x11_setlayer(vo, opts->ontop); + if (opt == &opts->border) + vo_x11_decoration(vo, opts->border); + if (opt == &opts->all_workspaces) + vo_x11_sticky(vo, opts->all_workspaces); + if (opt == &opts->window_minimized) + vo_x11_minimize(vo); + if (opt == &opts->window_maximized) + vo_x11_maximize(vo); + if (opt == &opts->cursor_passthrough) + vo_x11_set_input_region(vo, opts->cursor_passthrough); + if (opt == &opts->x11_present) + xpresent_set(x11); + if (opt == &opts->geometry || opt == &opts->autofit || + opt == &opts->autofit_smaller || opt == &opts->autofit_larger) + { + vo_x11_set_geometry(vo); + } + } + return VO_TRUE; + } + case VOCTRL_GET_UNFS_WINDOW_SIZE: { + int *s = arg; + if (!x11->window || x11->parent) + return VO_FALSE; + s[0] = (x11->fs ? RC_W(x11->nofsrc) : RC_W(x11->winrc)) / x11->dpi_scale; + s[1] = (x11->fs ? RC_H(x11->nofsrc) : RC_H(x11->winrc)) / x11->dpi_scale; + return VO_TRUE; + } + case VOCTRL_SET_UNFS_WINDOW_SIZE: { + int *s = arg; + if (!x11->window || x11->parent) + return VO_FALSE; + int w = s[0] * x11->dpi_scale; + int h = s[1] * x11->dpi_scale; + struct mp_rect rc = x11->winrc; + rc.x1 = rc.x0 + w; + rc.y1 = rc.y0 + h; + if (x11->opts->window_maximized) { + x11->opts->window_maximized = false; + m_config_cache_write_opt(x11->opts_cache, + &x11->opts->window_maximized); + vo_x11_maximize(vo); + } + vo_x11_highlevel_resize(vo, rc); + if (!x11->fs) { // guess new window size, instead of waiting for X + x11->winrc.x1 = x11->winrc.x0 + w; + x11->winrc.y1 = x11->winrc.y0 + h; + } + return VO_TRUE; + } + case VOCTRL_GET_FOCUSED: { + *(bool *)arg = x11->has_focus; + return VO_TRUE; + } + case VOCTRL_GET_DISPLAY_NAMES: { + if (!x11->pseudo_mapped) + return VO_FALSE; + char **names = NULL; + int displays_spanned = 0; + for (int n = 0; n < x11->num_displays; n++) { + if (rc_overlaps(x11->displays[n].rc, x11->winrc)) + MP_TARRAY_APPEND(NULL, names, displays_spanned, + talloc_strdup(NULL, x11->displays[n].name)); + } + MP_TARRAY_APPEND(NULL, names, displays_spanned, NULL); + *(char ***)arg = names; + return VO_TRUE; + } + case VOCTRL_GET_ICC_PROFILE: { + if (!x11->pseudo_mapped) + return VO_NOTAVAIL; + int atom_id = x11->displays[x11->current_screen].atom_id; + char prop[80]; + snprintf(prop, sizeof(prop), "_ICC_PROFILE"); + if (atom_id > 0) + mp_snprintf_cat(prop, sizeof(prop), "_%d", atom_id); + x11->icc_profile_property = XAs(x11, prop); + int len; + MP_VERBOSE(x11, "Retrieving ICC profile for display: %d\n", x11->current_screen); + void *icc = x11_get_property(x11, x11->rootwin, x11->icc_profile_property, + XA_CARDINAL, 8, &len); + if (!icc) + return VO_FALSE; + *(bstr *)arg = bstrdup(NULL, (bstr){icc, len}); + XFree(icc); + // Watch x11->icc_profile_property + XSelectInput(x11->display, x11->rootwin, PropertyChangeMask); + return VO_TRUE; + } + case VOCTRL_SET_CURSOR_VISIBILITY: + x11->mouse_cursor_visible = *(bool *)arg; + vo_update_cursor(vo); + return VO_TRUE; + case VOCTRL_KILL_SCREENSAVER: + set_screensaver(x11, false); + return VO_TRUE; + case VOCTRL_RESTORE_SCREENSAVER: + set_screensaver(x11, true); + return VO_TRUE; + case VOCTRL_UPDATE_WINDOW_TITLE: + talloc_free(x11->window_title); + x11->window_title = talloc_strdup(x11, (char *)arg); + if (!x11->parent || x11->opts->x11_wid_title) + vo_x11_update_window_title(vo); + return VO_TRUE; + case VOCTRL_GET_DISPLAY_FPS: { + double fps = x11->current_display_fps; + if (fps <= 0) + break; + *(double *)arg = fps; + return VO_TRUE; + } + case VOCTRL_GET_DISPLAY_RES: { + struct xrandr_display *disp = NULL; + if (x11->current_screen > -1) + disp = &x11->displays[x11->current_screen]; + if (!x11->window || x11->parent || !disp) + return VO_NOTAVAIL; + ((int *)arg)[0] = mp_rect_w(disp->rc); + ((int *)arg)[1] = mp_rect_h(disp->rc); + return VO_TRUE; + } + case VOCTRL_GET_WINDOW_ID: { + if (!x11->window) + return VO_NOTAVAIL; + *(int64_t *)arg = x11->window; + return VO_TRUE; + } + case VOCTRL_GET_HIDPI_SCALE: + *(double *)arg = x11->dpi_scale; + return VO_TRUE; + } + return VO_NOTIMPL; +} + +void vo_x11_present(struct vo *vo) +{ + struct vo_x11_state *x11 = vo->x11; + XPresentNotifyMSC(x11->display, x11->window, + 0, 0, 1, 0); +} + +void vo_x11_wakeup(struct vo *vo) +{ + struct vo_x11_state *x11 = vo->x11; + + (void)write(x11->wakeup_pipe[1], &(char){0}, 1); +} + +void vo_x11_wait_events(struct vo *vo, int64_t until_time_ns) +{ + struct vo_x11_state *x11 = vo->x11; + + struct pollfd fds[2] = { + { .fd = x11->event_fd, .events = POLLIN }, + { .fd = x11->wakeup_pipe[0], .events = POLLIN }, + }; + int64_t wait_ns = until_time_ns - mp_time_ns(); + int64_t timeout_ns = MPCLAMP(wait_ns, 0, MP_TIME_S_TO_NS(10)); + + mp_poll(fds, 2, timeout_ns); + + if (fds[1].revents & POLLIN) + mp_flush_wakeup_pipe(x11->wakeup_pipe[0]); +} + +static void xscreensaver_heartbeat(struct vo_x11_state *x11) +{ + double time = mp_time_sec(); + + if (x11->display && !x11->screensaver_enabled && + (time - x11->screensaver_time_last) >= 10) + { + x11->screensaver_time_last = time; + XResetScreenSaver(x11->display); + } +} + +static int xss_suspend(Display *mDisplay, Bool suspend) +{ + int event, error, major, minor; + if (XScreenSaverQueryExtension(mDisplay, &event, &error) != True || + XScreenSaverQueryVersion(mDisplay, &major, &minor) != True) + return 0; + if (major < 1 || (major == 1 && minor < 1)) + return 0; + XScreenSaverSuspend(mDisplay, suspend); + return 1; +} + +static void set_screensaver(struct vo_x11_state *x11, bool enabled) +{ + Display *mDisplay = x11->display; + if (!mDisplay || x11->screensaver_enabled == enabled) + return; + MP_VERBOSE(x11, "%s screensaver.\n", enabled ? "Enabling" : "Disabling"); + x11->screensaver_enabled = enabled; + if (xss_suspend(mDisplay, !enabled)) + return; + int nothing; + if (DPMSQueryExtension(mDisplay, ¬hing, ¬hing)) { + BOOL onoff = 0; + CARD16 state; + DPMSInfo(mDisplay, &state, &onoff); + if (!x11->dpms_touched && enabled) + return; // enable DPMS only we we disabled it before + if (enabled != !!onoff) { + MP_VERBOSE(x11, "Setting DMPS: %s.\n", enabled ? "on" : "off"); + if (enabled) { + DPMSEnable(mDisplay); + } else { + DPMSDisable(mDisplay); + x11->dpms_touched = true; + } + DPMSInfo(mDisplay, &state, &onoff); + if (enabled != !!onoff) + MP_WARN(x11, "DPMS state could not be set.\n"); + } + } +} + +static void vo_x11_selectinput_witherr(struct vo *vo, + Display *display, + Window w, + long event_mask) +{ + XSelectInput(display, w, NoEventMask); + + // NOTE: this can raise BadAccess, which should be ignored by the X error + // handler; also see below + XSelectInput(display, w, event_mask); + + // Test whether setting the event mask failed (with a BadAccess X error, + // although we don't know whether this really happened). + // This is needed for obscure situations like using --rootwin with a window + // manager active. + XWindowAttributes a; + if (XGetWindowAttributes(display, w, &a)) { + long bad = ButtonPressMask | ButtonReleaseMask | PointerMotionMask; + if ((event_mask & bad) && (a.all_event_masks & bad) && + ((a.your_event_mask & bad) != (event_mask & bad))) + { + MP_ERR(vo->x11, "X11 error: error during XSelectInput " + "call, trying without mouse events\n"); + XSelectInput(display, w, event_mask & ~bad); + } + } +} + +bool vo_x11_screen_is_composited(struct vo *vo) +{ + struct vo_x11_state *x11 = vo->x11; + char buf[50]; + snprintf(buf, sizeof(buf), "_NET_WM_CM_S%d", x11->screen); + Atom NET_WM_CM = XInternAtom(x11->display, buf, False); + return XGetSelectionOwner(x11->display, NET_WM_CM) != None; +} + +// Return whether the given visual has alpha (when compositing is used). +bool vo_x11_is_rgba_visual(XVisualInfo *v) +{ + // This is a heuristic at best. Note that normal 8 bit Visuals use + // a depth of 24, even if the pixels are padded to 32 bit. If the + // depth is higher than 24, the remaining bits must be alpha. + // Note: vinfo->bits_per_rgb appears to be useless (is always 8). + unsigned long mask = v->depth == sizeof(unsigned long) * 8 ? + (unsigned long)-1 : (1UL << v->depth) - 1; + return mask & ~(v->red_mask | v->green_mask | v->blue_mask); +} diff --git a/video/out/x11_common.h b/video/out/x11_common.h new file mode 100644 index 0000000..62a96d7 --- /dev/null +++ b/video/out/x11_common.h @@ -0,0 +1,164 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MPLAYER_X11_COMMON_H +#define MPLAYER_X11_COMMON_H + +#include <stdatomic.h> +#include <stdbool.h> +#include <stdint.h> + +#include <X11/Xlib.h> +#include <X11/Xutil.h> + +#include "common/common.h" + +#include "config.h" +#if !HAVE_GPL +#error GPL only +#endif + +struct vo; +struct mp_log; + +#define MAX_DISPLAYS 32 // ought to be enough for everyone + +struct xrandr_display { + struct mp_rect rc; + double fps; + char *name; + bool overlaps; + int atom_id; // offset by location of primary + int screen; +}; + +struct vo_x11_state { + struct mp_log *log; + struct input_ctx *input_ctx; + struct m_config_cache *opts_cache; + struct mp_vo_opts *opts; + Display *display; + int event_fd; + int wakeup_pipe[2]; + Window window; + Window rootwin; + Window parent; // embedded in this foreign window + int screen; + int display_is_local; + int ws_width; + int ws_height; + int dpi_scale; + struct mp_rect screenrc; + char *window_title; + + struct xrandr_display displays[MAX_DISPLAYS]; + int num_displays; + int current_screen; + + int xrandr_event; + bool has_mesa; + bool has_nvidia; + + bool screensaver_enabled; + bool dpms_touched; + double screensaver_time_last; + + struct mp_present *present; + bool use_present; + int present_code; + + XIM xim; + XIC xic; + bool no_autorepeat; + + Colormap colormap; + + int wm_type; + bool hidden; // _NET_WM_STATE_HIDDEN + bool window_hidden; // the window was mapped at least once + bool pseudo_mapped; // not necessarily mapped, but known window size + int fs; // whether we assume the window is in fullscreen mode + + bool mouse_cursor_visible; // whether we want the cursor to be visible (only + // takes effect when the window is focused) + bool mouse_cursor_set; // whether the cursor is *currently* *hidden* + bool has_focus; + long orig_layer; + + // Current actual window position (updated on window move/resize events). + struct mp_rect winrc; + double current_display_fps; + + int pending_vo_events; + + // last non-fullscreen extends (updated on fullscreen or reinitialization) + struct mp_rect nofsrc; + + /* Keep track of original video width/height to determine when to + * resize window when reconfiguring. Resize window when video size + * changes, but don't force window size changes as long as video size + * stays the same (even if that size is different from the current + * window size after the user modified the latter). */ + int old_dw, old_dh; + /* Video size changed during fullscreen when we couldn't tell the new + * size to the window manager. Must set window size when turning + * fullscreen off. */ + bool size_changed_during_fs; + bool pos_changed_during_fs; + + /* One of the autofit/geometry options changed at runtime. */ + bool geometry_change; + + XComposeStatus compose_status; + + /* XShm stuff */ + int ShmCompletionEvent; + /* Number of outstanding XShmPutImage requests */ + /* Decremented when ShmCompletionEvent is received */ + /* Increment it before XShmPutImage */ + int ShmCompletionWaitCount; + + /* drag and drop */ + Atom dnd_requested_format; + Atom dnd_requested_action; + Window dnd_src_window; + + /* dragging the window */ + bool win_drag_button1_down; + + Atom icc_profile_property; +}; + +bool vo_x11_init(struct vo *vo); +void vo_x11_uninit(struct vo *vo); +void vo_x11_check_events(struct vo *vo); +bool vo_x11_screen_is_composited(struct vo *vo); +bool vo_x11_create_vo_window(struct vo *vo, XVisualInfo *vis, + const char *classname); +void vo_x11_config_vo_window(struct vo *vo); +bool vo_x11_check_visible(struct vo *vo); +int vo_x11_control(struct vo *vo, int *events, int request, void *arg); +void vo_x11_present(struct vo *vo); +void vo_x11_sync_swap(struct vo *vo); +void vo_x11_wakeup(struct vo *vo); +void vo_x11_wait_events(struct vo *vo, int64_t until_time_ns); + +void vo_x11_silence_xlib(int dir); + +bool vo_x11_is_rgba_visual(XVisualInfo *v); + +#endif /* MPLAYER_X11_COMMON_H */ diff --git a/video/repack.c b/video/repack.c new file mode 100644 index 0000000..ce3703a --- /dev/null +++ b/video/repack.c @@ -0,0 +1,1203 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <math.h> + +#include <libavutil/bswap.h> +#include <libavutil/pixfmt.h> + +#include "common/common.h" +#include "repack.h" +#include "video/csputils.h" +#include "video/fmt-conversion.h" +#include "video/img_format.h" +#include "video/mp_image.h" + +enum repack_step_type { + REPACK_STEP_FLOAT, + REPACK_STEP_REPACK, + REPACK_STEP_ENDIAN, +}; + +struct repack_step { + enum repack_step_type type; + // 0=input, 1=output + struct mp_image *buf[2]; + bool user_buf[2]; // user_buf[n]==true if buf[n] = user src/dst buffer + struct mp_imgfmt_desc fmt[2]; + struct mp_image *tmp; // output buffer, if needed +}; + +struct mp_repack { + bool pack; // if false, this is for unpacking + int flags; + int imgfmt_user; // original mp format (unchanged endian) + int imgfmt_a; // original mp format (possibly packed format, + // swapped endian) + int imgfmt_b; // equivalent unpacked/planar format + struct mp_imgfmt_desc fmt_a;// ==imgfmt_a + struct mp_imgfmt_desc fmt_b;// ==imgfmt_b + + void (*repack)(struct mp_repack *rp, + struct mp_image *a, int a_x, int a_y, + struct mp_image *b, int b_x, int b_y, int w); + + bool passthrough_y; // possible luma plane optimization for e.g. nv12 + int endian_size; // endian swap; 0=none, 2/4=swap word size + + // For packed_repack. + int components[4]; // b[n] = mp_image.planes[components[n]] + // pack: a is dst, b is src + // unpack: a is src, b is dst + void (*packed_repack_scanline)(void *a, void *b[], int w); + + // Fringe RGB/YUV. + uint8_t comp_size; + uint8_t comp_map[6]; + uint8_t comp_shifts[3]; + uint8_t *comp_lut; + void (*repack_fringe_yuv)(void *dst, void *src[], int w, uint8_t *c); + + // F32 repacking. + int f32_comp_size; + float f32_m[4], f32_o[4]; + uint32_t f32_pmax[4]; + enum mp_csp f32_csp_space; + enum mp_csp_levels f32_csp_levels; + + // REPACK_STEP_REPACK: if true, need to copy this plane + bool copy_buf[4]; + + struct repack_step steps[4]; + int num_steps; + + bool configured; +}; + +// depth = number of LSB in use +static int find_gbrp_format(int depth, int num_planes) +{ + if (num_planes != 3 && num_planes != 4) + return 0; + struct mp_regular_imgfmt desc = { + .component_type = MP_COMPONENT_TYPE_UINT, + .forced_csp = MP_CSP_RGB, + .component_size = depth > 8 ? 2 : 1, + .component_pad = depth - (depth > 8 ? 16 : 8), + .num_planes = num_planes, + .planes = { {1, {2}}, {1, {3}}, {1, {1}}, {1, {4}} }, + }; + return mp_find_regular_imgfmt(&desc); +} + +// depth = number of LSB in use +static int find_yuv_format(int depth, int num_planes) +{ + if (num_planes < 1 || num_planes > 4) + return 0; + struct mp_regular_imgfmt desc = { + .component_type = MP_COMPONENT_TYPE_UINT, + .component_size = depth > 8 ? 2 : 1, + .component_pad = depth - (depth > 8 ? 16 : 8), + .num_planes = num_planes, + .planes = { {1, {1}}, {1, {2}}, {1, {3}}, {1, {4}} }, + }; + if (num_planes == 2) + desc.planes[1].components[0] = 4; + return mp_find_regular_imgfmt(&desc); +} + +// Copy one line on the plane p. +static void copy_plane(struct mp_image *dst, int dst_x, int dst_y, + struct mp_image *src, int src_x, int src_y, + int w, int p) +{ + // Number of lines on this plane. + int h = (1 << dst->fmt.chroma_ys) - (1 << dst->fmt.ys[p]) + 1; + size_t size = mp_image_plane_bytes(dst, p, dst_x, w); + + assert(dst->fmt.bpp[p] == src->fmt.bpp[p]); + + for (int y = 0; y < h; y++) { + void *pd = mp_image_pixel_ptr_ny(dst, p, dst_x, dst_y + y); + void *ps = mp_image_pixel_ptr_ny(src, p, src_x, src_y + y); + memcpy(pd, ps, size); + } +} + +// Swap endian for one line. +static void swap_endian(struct mp_image *dst, int dst_x, int dst_y, + struct mp_image *src, int src_x, int src_y, + int w, int endian_size) +{ + assert(src->fmt.num_planes == dst->fmt.num_planes); + + for (int p = 0; p < dst->fmt.num_planes; p++) { + int xs = dst->fmt.xs[p]; + int bpp = dst->fmt.bpp[p] / 8; + int words_per_pixel = bpp / endian_size; + int num_words = ((w + (1 << xs) - 1) >> xs) * words_per_pixel; + // Number of lines on this plane. + int h = (1 << dst->fmt.chroma_ys) - (1 << dst->fmt.ys[p]) + 1; + + assert(src->fmt.bpp[p] == bpp * 8); + + for (int y = 0; y < h; y++) { + void *s = mp_image_pixel_ptr_ny(src, p, src_x, src_y + y); + void *d = mp_image_pixel_ptr_ny(dst, p, dst_x, dst_y + y); + switch (endian_size) { + case 2: + for (int x = 0; x < num_words; x++) + ((uint16_t *)d)[x] = av_bswap16(((uint16_t *)s)[x]); + break; + case 4: + for (int x = 0; x < num_words; x++) + ((uint32_t *)d)[x] = av_bswap32(((uint32_t *)s)[x]); + break; + default: + MP_ASSERT_UNREACHABLE(); + } + } + } +} + +// PA = PAck, copy planar input to single packed array +// UN = UNpack, copy packed input to planar output +// Naming convention: +// pa_/un_ prefix to identify conversion direction. +// Left (LSB, lowest byte address) -> Right (MSB, highest byte address). +// (This is unusual; MSB to LSB is more commonly used to describe formats, +// but our convention makes more sense for byte access in little endian.) +// "c" identifies a color component. +// "z" identifies known zero padding. +// "x" identifies uninitialized padding. +// A component is followed by its size in bits. +// Size can be omitted for multiple uniform components (c8c8c8 == ccc8). +// Unpackers will often use "x" for padding, because they ignore it, while +// packers will use "z" because they write zero. + +#define PA_WORD_4(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, sh_c3) \ + static void name(void *dst, void *src[], int w) { \ + for (int x = 0; x < w; x++) { \ + ((packed_t *)dst)[x] = \ + ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) | \ + ((packed_t)((plane_t *)src[1])[x] << (sh_c1)) | \ + ((packed_t)((plane_t *)src[2])[x] << (sh_c2)) | \ + ((packed_t)((plane_t *)src[3])[x] << (sh_c3)); \ + } \ + } + +#define UN_WORD_4(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, sh_c3, mask)\ + static void name(void *src, void *dst[], int w) { \ + for (int x = 0; x < w; x++) { \ + packed_t c = ((packed_t *)src)[x]; \ + ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask); \ + ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask); \ + ((plane_t *)dst[2])[x] = (c >> (sh_c2)) & (mask); \ + ((plane_t *)dst[3])[x] = (c >> (sh_c3)) & (mask); \ + } \ + } + + +#define PA_WORD_3(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, pad) \ + static void name(void *dst, void *src[], int w) { \ + for (int x = 0; x < w; x++) { \ + ((packed_t *)dst)[x] = (pad) | \ + ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) | \ + ((packed_t)((plane_t *)src[1])[x] << (sh_c1)) | \ + ((packed_t)((plane_t *)src[2])[x] << (sh_c2)); \ + } \ + } + +UN_WORD_4(un_cccc8, uint32_t, uint8_t, 0, 8, 16, 24, 0xFFu) +PA_WORD_4(pa_cccc8, uint32_t, uint8_t, 0, 8, 16, 24) +// Not sure if this is a good idea; there may be no alignment guarantee. +UN_WORD_4(un_cccc16, uint64_t, uint16_t, 0, 16, 32, 48, 0xFFFFu) +PA_WORD_4(pa_cccc16, uint64_t, uint16_t, 0, 16, 32, 48) + +#define UN_WORD_3(name, packed_t, plane_t, sh_c0, sh_c1, sh_c2, mask) \ + static void name(void *src, void *dst[], int w) { \ + for (int x = 0; x < w; x++) { \ + packed_t c = ((packed_t *)src)[x]; \ + ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask); \ + ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask); \ + ((plane_t *)dst[2])[x] = (c >> (sh_c2)) & (mask); \ + } \ + } + +UN_WORD_3(un_ccc8x8, uint32_t, uint8_t, 0, 8, 16, 0xFFu) +PA_WORD_3(pa_ccc8z8, uint32_t, uint8_t, 0, 8, 16, 0) +UN_WORD_3(un_x8ccc8, uint32_t, uint8_t, 8, 16, 24, 0xFFu) +PA_WORD_3(pa_z8ccc8, uint32_t, uint8_t, 8, 16, 24, 0) +UN_WORD_3(un_ccc10x2, uint32_t, uint16_t, 0, 10, 20, 0x3FFu) +PA_WORD_3(pa_ccc10z2, uint32_t, uint16_t, 0, 10, 20, 0) +UN_WORD_3(un_ccc16x16, uint64_t, uint16_t, 0, 16, 32, 0xFFFFu) +PA_WORD_3(pa_ccc16z16, uint64_t, uint16_t, 0, 16, 32, 0) + +#define PA_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, pad) \ + static void name(void *dst, void *src[], int w) { \ + for (int x = 0; x < w; x++) { \ + ((packed_t *)dst)[x] = (pad) | \ + ((packed_t)((plane_t *)src[0])[x] << (sh_c0)) | \ + ((packed_t)((plane_t *)src[1])[x] << (sh_c1)); \ + } \ + } + +#define UN_WORD_2(name, packed_t, plane_t, sh_c0, sh_c1, mask) \ + static void name(void *src, void *dst[], int w) { \ + for (int x = 0; x < w; x++) { \ + packed_t c = ((packed_t *)src)[x]; \ + ((plane_t *)dst[0])[x] = (c >> (sh_c0)) & (mask); \ + ((plane_t *)dst[1])[x] = (c >> (sh_c1)) & (mask); \ + } \ + } + +UN_WORD_2(un_cc8, uint16_t, uint8_t, 0, 8, 0xFFu) +PA_WORD_2(pa_cc8, uint16_t, uint8_t, 0, 8, 0) +UN_WORD_2(un_cc16, uint32_t, uint16_t, 0, 16, 0xFFFFu) +PA_WORD_2(pa_cc16, uint32_t, uint16_t, 0, 16, 0) + +#define PA_SEQ_3(name, comp_t) \ + static void name(void *dst, void *src[], int w) { \ + comp_t *r = dst; \ + for (int x = 0; x < w; x++) { \ + *r++ = ((comp_t *)src[0])[x]; \ + *r++ = ((comp_t *)src[1])[x]; \ + *r++ = ((comp_t *)src[2])[x]; \ + } \ + } + +#define UN_SEQ_3(name, comp_t) \ + static void name(void *src, void *dst[], int w) { \ + comp_t *r = src; \ + for (int x = 0; x < w; x++) { \ + ((comp_t *)dst[0])[x] = *r++; \ + ((comp_t *)dst[1])[x] = *r++; \ + ((comp_t *)dst[2])[x] = *r++; \ + } \ + } + +UN_SEQ_3(un_ccc8, uint8_t) +PA_SEQ_3(pa_ccc8, uint8_t) +UN_SEQ_3(un_ccc16, uint16_t) +PA_SEQ_3(pa_ccc16, uint16_t) + +// "regular": single packed plane, all components have same width (except padding) +struct regular_repacker { + int packed_width; // number of bits of the packed pixel + int component_width; // number of bits for a single component + int prepadding; // number of bits of LSB padding + int num_components; // number of components that can be accessed + void (*pa_scanline)(void *a, void *b[], int w); + void (*un_scanline)(void *a, void *b[], int w); +}; + +static const struct regular_repacker regular_repackers[] = { + {32, 8, 0, 3, pa_ccc8z8, un_ccc8x8}, + {32, 8, 8, 3, pa_z8ccc8, un_x8ccc8}, + {32, 8, 0, 4, pa_cccc8, un_cccc8}, + {64, 16, 0, 4, pa_cccc16, un_cccc16}, + {64, 16, 0, 3, pa_ccc16z16, un_ccc16x16}, + {24, 8, 0, 3, pa_ccc8, un_ccc8}, + {48, 16, 0, 3, pa_ccc16, un_ccc16}, + {16, 8, 0, 2, pa_cc8, un_cc8}, + {32, 16, 0, 2, pa_cc16, un_cc16}, + {32, 10, 0, 3, pa_ccc10z2, un_ccc10x2}, +}; + +static void packed_repack(struct mp_repack *rp, + struct mp_image *a, int a_x, int a_y, + struct mp_image *b, int b_x, int b_y, int w) +{ + uint32_t *pa = mp_image_pixel_ptr(a, 0, a_x, a_y); + + void *pb[4] = {0}; + for (int p = 0; p < b->num_planes; p++) { + int s = rp->components[p]; + pb[p] = mp_image_pixel_ptr(b, s, b_x, b_y); + } + + rp->packed_repack_scanline(pa, pb, w); +} + +// Tries to set a packer/unpacker for component-wise byte aligned formats. +static void setup_packed_packer(struct mp_repack *rp) +{ + struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(rp->imgfmt_a); + if (!(desc.flags & MP_IMGFLAG_HAS_COMPS) || + !(desc.flags & MP_IMGFLAG_TYPE_UINT) || + !(desc.flags & MP_IMGFLAG_NE) || + desc.num_planes != 1) + return; + + int num_real_components = 0; + int components[4] = {0}; + for (int n = 0; n < MP_NUM_COMPONENTS; n++) { + if (!desc.comps[n].size) + continue; + if (desc.comps[n].size != desc.comps[0].size || + desc.comps[n].pad != desc.comps[0].pad || + desc.comps[n].offset % desc.comps[0].size) + return; + int item = desc.comps[n].offset / desc.comps[0].size; + if (item >= 4) + return; + components[item] = n + 1; + num_real_components++; + } + + int depth = desc.comps[0].size + MPMIN(0, desc.comps[0].pad); + + static const int reorder_gbrp[] = {0, 3, 1, 2, 4}; + static const int reorder_yuv[] = {0, 1, 2, 3, 4}; + int planar_fmt = 0; + const int *reorder = NULL; + if (desc.flags & MP_IMGFLAG_COLOR_YUV) { + planar_fmt = find_yuv_format(depth, num_real_components); + reorder = reorder_yuv; + } else { + planar_fmt = find_gbrp_format(depth, num_real_components); + reorder = reorder_gbrp; + } + if (!planar_fmt) + return; + + for (int i = 0; i < MP_ARRAY_SIZE(regular_repackers); i++) { + const struct regular_repacker *pa = ®ular_repackers[i]; + + // The following may assume little endian (because some repack backends + // use word access, while the metadata here uses byte access). + + int prepad = components[0] ? 0 : 8; + int first_comp = components[0] ? 0 : 1; + void (*repack_cb)(void *pa, void *pb[], int w) = + rp->pack ? pa->pa_scanline : pa->un_scanline; + + if (pa->packed_width != desc.bpp[0] || + pa->component_width != depth || + pa->num_components != num_real_components || + pa->prepadding != prepad || + !repack_cb) + continue; + + rp->repack = packed_repack; + rp->packed_repack_scanline = repack_cb; + rp->imgfmt_b = planar_fmt; + for (int n = 0; n < num_real_components; n++) { + // Determine permutation that maps component order between the two + // formats, with has_alpha special case (see above). + int c = reorder[components[first_comp + n]]; + rp->components[n] = c == 4 ? num_real_components - 1 : c - 1; + } + return; + } +} + +#define PA_SHIFT_LUT8(name, packed_t) \ + static void name(void *dst, void *src[], int w, uint8_t *lut, \ + uint8_t s0, uint8_t s1, uint8_t s2) { \ + for (int x = 0; x < w; x++) { \ + ((packed_t *)dst)[x] = \ + (lut[((uint8_t *)src[0])[x] + 256 * 0] << s0) | \ + (lut[((uint8_t *)src[1])[x] + 256 * 1] << s1) | \ + (lut[((uint8_t *)src[2])[x] + 256 * 2] << s2); \ + } \ + } + + +#define UN_SHIFT_LUT8(name, packed_t) \ + static void name(void *src, void *dst[], int w, uint8_t *lut, \ + uint8_t s0, uint8_t s1, uint8_t s2) { \ + for (int x = 0; x < w; x++) { \ + packed_t c = ((packed_t *)src)[x]; \ + ((uint8_t *)dst[0])[x] = lut[((c >> s0) & 0xFF) + 256 * 0]; \ + ((uint8_t *)dst[1])[x] = lut[((c >> s1) & 0xFF) + 256 * 1]; \ + ((uint8_t *)dst[2])[x] = lut[((c >> s2) & 0xFF) + 256 * 2]; \ + } \ + } + +PA_SHIFT_LUT8(pa_shift_lut8_8, uint8_t) +PA_SHIFT_LUT8(pa_shift_lut8_16, uint16_t) +UN_SHIFT_LUT8(un_shift_lut8_8, uint8_t) +UN_SHIFT_LUT8(un_shift_lut8_16, uint16_t) + +static void fringe_rgb_repack(struct mp_repack *rp, + struct mp_image *a, int a_x, int a_y, + struct mp_image *b, int b_x, int b_y, int w) +{ + void *pa = mp_image_pixel_ptr(a, 0, a_x, a_y); + + void *pb[4] = {0}; + for (int p = 0; p < b->num_planes; p++) { + int s = rp->components[p]; + pb[p] = mp_image_pixel_ptr(b, s, b_x, b_y); + } + + assert(rp->comp_size == 1 || rp->comp_size == 2); + + void (*repack)(void *pa, void *pb[], int w, uint8_t *lut, + uint8_t s0, uint8_t s1, uint8_t s2) = NULL; + if (rp->pack) { + repack = rp->comp_size == 1 ? pa_shift_lut8_8 : pa_shift_lut8_16; + } else { + repack = rp->comp_size == 1 ? un_shift_lut8_8 : un_shift_lut8_16; + } + repack(pa, pb, w, rp->comp_lut, + rp->comp_shifts[0], rp->comp_shifts[1], rp->comp_shifts[2]); +} + +static void setup_fringe_rgb_packer(struct mp_repack *rp) +{ + struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(rp->imgfmt_a); + if (!(desc.flags & MP_IMGFLAG_HAS_COMPS)) + return; + + if (desc.bpp[0] > 16 || (desc.bpp[0] % 8u) || + mp_imgfmt_get_forced_csp(rp->imgfmt_a) != MP_CSP_RGB || + desc.num_planes != 1 || desc.comps[3].size) + return; + + int depth = desc.comps[0].size; + for (int n = 0; n < 3; n++) { + struct mp_imgfmt_comp_desc *c = &desc.comps[n]; + + if (c->size < 1 || c->size > 8 || c->pad) + return; + + if (rp->flags & REPACK_CREATE_ROUND_DOWN) { + depth = MPMIN(depth, c->size); + } else { + depth = MPMAX(depth, c->size); + } + } + if (rp->flags & REPACK_CREATE_EXPAND_8BIT) + depth = 8; + + rp->imgfmt_b = find_gbrp_format(depth, 3); + if (!rp->imgfmt_b) + return; + rp->comp_lut = talloc_array(rp, uint8_t, 256 * 3); + rp->repack = fringe_rgb_repack; + for (int n = 0; n < 3; n++) + rp->components[n] = ((int[]){3, 1, 2})[n] - 1; + + for (int n = 0; n < 3; n++) { + int bits = desc.comps[n].size; + rp->comp_shifts[n] = desc.comps[n].offset; + if (rp->comp_lut) { + uint8_t *lut = rp->comp_lut + 256 * n; + uint8_t zmax = (1 << depth) - 1; + uint8_t cmax = (1 << bits) - 1; + for (int v = 0; v < 256; v++) { + if (rp->pack) { + lut[v] = (v * cmax + zmax / 2) / zmax; + } else { + lut[v] = (v & cmax) * zmax / cmax; + } + } + } + } + + rp->comp_size = (desc.bpp[0] + 7) / 8; + assert(rp->comp_size == 1 || rp->comp_size == 2); + + if (desc.endian_shift) { + assert(rp->comp_size == 2 && (1 << desc.endian_shift) == 2); + rp->endian_size = 2; + } +} + +static void unpack_pal(struct mp_repack *rp, + struct mp_image *a, int a_x, int a_y, + struct mp_image *b, int b_x, int b_y, int w) +{ + uint8_t *src = mp_image_pixel_ptr(a, 0, a_x, a_y); + uint32_t *pal = (void *)a->planes[1]; + + uint8_t *dst[4] = {0}; + for (int p = 0; p < b->num_planes; p++) + dst[p] = mp_image_pixel_ptr(b, p, b_x, b_y); + + for (int x = 0; x < w; x++) { + uint32_t c = pal[src[x]]; + dst[0][x] = (c >> 8) & 0xFF; // G + dst[1][x] = (c >> 0) & 0xFF; // B + dst[2][x] = (c >> 16) & 0xFF; // R + dst[3][x] = (c >> 24) & 0xFF; // A + } +} + +static void bitmap_repack(struct mp_repack *rp, + struct mp_image *a, int a_x, int a_y, + struct mp_image *b, int b_x, int b_y, int w) +{ + uint8_t *pa = mp_image_pixel_ptr(a, 0, a_x, a_y); + uint8_t *pb = mp_image_pixel_ptr(b, 0, b_x, b_y); + + if (rp->pack) { + for (unsigned x = 0; x < w; x += 8) { + uint8_t d = 0; + int max_b = MPMIN(8, w - x); + for (int bp = 0; bp < max_b; bp++) + d |= (rp->comp_lut[pb[x + bp]]) << (7 - bp); + pa[x / 8] = d; + } + } else { + for (unsigned x = 0; x < w; x += 8) { + uint8_t d = pa[x / 8]; + int max_b = MPMIN(8, w - x); + for (int bp = 0; bp < max_b; bp++) + pb[x + bp] = rp->comp_lut[d & (1 << (7 - bp))]; + } + } +} + +static void setup_misc_packer(struct mp_repack *rp) +{ + if (rp->imgfmt_a == IMGFMT_PAL8 && !rp->pack) { + int grap_fmt = find_gbrp_format(8, 4); + if (!grap_fmt) + return; + rp->imgfmt_b = grap_fmt; + rp->repack = unpack_pal; + } else { + enum AVPixelFormat avfmt = imgfmt2pixfmt(rp->imgfmt_a); + if (avfmt == AV_PIX_FMT_MONOWHITE || avfmt == AV_PIX_FMT_MONOBLACK) { + rp->comp_lut = talloc_array(rp, uint8_t, 256); + rp->imgfmt_b = IMGFMT_Y1; + int max = 1; + if (rp->flags & REPACK_CREATE_EXPAND_8BIT) { + rp->imgfmt_b = IMGFMT_Y8; + max = 255; + } + bool inv = avfmt == AV_PIX_FMT_MONOWHITE; + for (int n = 0; n < 256; n++) { + rp->comp_lut[n] = rp->pack ? (inv ^ (n >= (max + 1) / 2)) + : ((inv ^ !!n) ? max : 0); + } + rp->repack = bitmap_repack; + return; + } + } +} + +#define PA_P422(name, comp_t) \ + static void name(void *dst, void *src[], int w, uint8_t *c) { \ + for (int x = 0; x < w; x += 2) { \ + ((comp_t *)dst)[x * 2 + c[0]] = ((comp_t *)src[0])[x + 0]; \ + ((comp_t *)dst)[x * 2 + c[1]] = ((comp_t *)src[0])[x + 1]; \ + ((comp_t *)dst)[x * 2 + c[4]] = ((comp_t *)src[1])[x >> 1]; \ + ((comp_t *)dst)[x * 2 + c[5]] = ((comp_t *)src[2])[x >> 1]; \ + } \ + } + + +#define UN_P422(name, comp_t) \ + static void name(void *src, void *dst[], int w, uint8_t *c) { \ + for (int x = 0; x < w; x += 2) { \ + ((comp_t *)dst[0])[x + 0] = ((comp_t *)src)[x * 2 + c[0]]; \ + ((comp_t *)dst[0])[x + 1] = ((comp_t *)src)[x * 2 + c[1]]; \ + ((comp_t *)dst[1])[x >> 1] = ((comp_t *)src)[x * 2 + c[4]]; \ + ((comp_t *)dst[2])[x >> 1] = ((comp_t *)src)[x * 2 + c[5]]; \ + } \ + } + +PA_P422(pa_p422_8, uint8_t) +PA_P422(pa_p422_16, uint16_t) +UN_P422(un_p422_8, uint8_t) +UN_P422(un_p422_16, uint16_t) + +static void pa_p411_8(void *dst, void *src[], int w, uint8_t *c) +{ + for (int x = 0; x < w; x += 4) { + ((uint8_t *)dst)[x / 4 * 6 + c[0]] = ((uint8_t *)src[0])[x + 0]; + ((uint8_t *)dst)[x / 4 * 6 + c[1]] = ((uint8_t *)src[0])[x + 1]; + ((uint8_t *)dst)[x / 4 * 6 + c[2]] = ((uint8_t *)src[0])[x + 2]; + ((uint8_t *)dst)[x / 4 * 6 + c[3]] = ((uint8_t *)src[0])[x + 3]; + ((uint8_t *)dst)[x / 4 * 6 + c[4]] = ((uint8_t *)src[1])[x >> 2]; + ((uint8_t *)dst)[x / 4 * 6 + c[5]] = ((uint8_t *)src[2])[x >> 2]; + } +} + + +static void un_p411_8(void *src, void *dst[], int w, uint8_t *c) +{ + for (int x = 0; x < w; x += 4) { + ((uint8_t *)dst[0])[x + 0] = ((uint8_t *)src)[x / 4 * 6 + c[0]]; + ((uint8_t *)dst[0])[x + 1] = ((uint8_t *)src)[x / 4 * 6 + c[1]]; + ((uint8_t *)dst[0])[x + 2] = ((uint8_t *)src)[x / 4 * 6 + c[2]]; + ((uint8_t *)dst[0])[x + 3] = ((uint8_t *)src)[x / 4 * 6 + c[3]]; + ((uint8_t *)dst[1])[x >> 2] = ((uint8_t *)src)[x / 4 * 6 + c[4]]; + ((uint8_t *)dst[2])[x >> 2] = ((uint8_t *)src)[x / 4 * 6 + c[5]]; + } +} + +static void fringe_yuv_repack(struct mp_repack *rp, + struct mp_image *a, int a_x, int a_y, + struct mp_image *b, int b_x, int b_y, int w) +{ + void *pa = mp_image_pixel_ptr(a, 0, a_x, a_y); + + void *pb[4] = {0}; + for (int p = 0; p < b->num_planes; p++) + pb[p] = mp_image_pixel_ptr(b, p, b_x, b_y); + + rp->repack_fringe_yuv(pa, pb, w, rp->comp_map); +} + +static void setup_fringe_yuv_packer(struct mp_repack *rp) +{ + struct mp_imgfmt_desc desc = mp_imgfmt_get_desc(rp->imgfmt_a); + if (!(desc.flags & MP_IMGFLAG_PACKED_SS_YUV) || + mp_imgfmt_desc_get_num_comps(&desc) != 3 || + desc.align_x > 4) + return; + + uint8_t y_loc[4]; + if (!mp_imgfmt_get_packed_yuv_locations(desc.id, y_loc)) + return; + + for (int n = 0; n < MP_NUM_COMPONENTS; n++) { + if (!desc.comps[n].size) + continue; + if (desc.comps[n].size != desc.comps[0].size || + desc.comps[n].pad < 0 || + desc.comps[n].offset % desc.comps[0].size) + return; + if (n == 1 || n == 2) { + rp->comp_map[4 + (n - 1)] = + desc.comps[n].offset / desc.comps[0].size; + } + } + for (int n = 0; n < desc.align_x; n++) { + if (y_loc[n] % desc.comps[0].size) + return; + rp->comp_map[n] = y_loc[n] / desc.comps[0].size; + } + + if (desc.comps[0].size == 8 && desc.align_x == 2) { + rp->repack_fringe_yuv = rp->pack ? pa_p422_8 : un_p422_8; + } else if (desc.comps[0].size == 16 && desc.align_x == 2) { + rp->repack_fringe_yuv = rp->pack ? pa_p422_16 : un_p422_16; + } else if (desc.comps[0].size == 8 && desc.align_x == 4) { + rp->repack_fringe_yuv = rp->pack ? pa_p411_8 : un_p411_8; + } + + if (!rp->repack_fringe_yuv) + return; + + struct mp_regular_imgfmt yuvfmt = { + .component_type = MP_COMPONENT_TYPE_UINT, + // NB: same problem with P010 and not clearing padding. + .component_size = desc.comps[0].size / 8u, + .num_planes = 3, + .planes = { {1, {1}}, {1, {2}}, {1, {3}} }, + .chroma_xs = desc.chroma_xs, + .chroma_ys = 0, + }; + rp->imgfmt_b = mp_find_regular_imgfmt(&yuvfmt); + rp->repack = fringe_yuv_repack; + + if (desc.endian_shift) { + rp->endian_size = 1 << desc.endian_shift; + assert(rp->endian_size == 2); + } +} + +static void repack_nv(struct mp_repack *rp, + struct mp_image *a, int a_x, int a_y, + struct mp_image *b, int b_x, int b_y, int w) +{ + int xs = a->fmt.chroma_xs; + + uint32_t *pa = mp_image_pixel_ptr(a, 1, a_x, a_y); + + void *pb[2]; + for (int p = 0; p < 2; p++) { + int s = rp->components[p]; + pb[p] = mp_image_pixel_ptr(b, s, b_x, b_y); + } + + rp->packed_repack_scanline(pa, pb, (w + (1 << xs) - 1) >> xs); +} + +static void setup_nv_packer(struct mp_repack *rp) +{ + struct mp_regular_imgfmt desc; + if (!mp_get_regular_imgfmt(&desc, rp->imgfmt_a)) + return; + + // Check for NV. + if (desc.num_planes != 2) + return; + if (desc.planes[0].num_components != 1 || desc.planes[0].components[0] != 1) + return; + if (desc.planes[1].num_components != 2) + return; + int cr0 = desc.planes[1].components[0]; + int cr1 = desc.planes[1].components[1]; + if (cr0 > cr1) + MPSWAP(int, cr0, cr1); + if (cr0 != 2 || cr1 != 3) + return; + + // Construct equivalent planar format. + struct mp_regular_imgfmt desc2 = desc; + desc2.num_planes = 3; + desc2.planes[1].num_components = 1; + desc2.planes[1].components[0] = 2; + desc2.planes[2].num_components = 1; + desc2.planes[2].components[0] = 3; + // For P010. Strangely this concept exists only for the NV format. + if (desc2.component_pad > 0) + desc2.component_pad = 0; + + int planar_fmt = mp_find_regular_imgfmt(&desc2); + if (!planar_fmt) + return; + + for (int i = 0; i < MP_ARRAY_SIZE(regular_repackers); i++) { + const struct regular_repacker *pa = ®ular_repackers[i]; + + void (*repack_cb)(void *pa, void *pb[], int w) = + rp->pack ? pa->pa_scanline : pa->un_scanline; + + if (pa->packed_width != desc.component_size * 2 * 8 || + pa->component_width != desc.component_size * 8 || + pa->num_components != 2 || + pa->prepadding != 0 || + !repack_cb) + continue; + + rp->repack = repack_nv; + rp->passthrough_y = true; + rp->packed_repack_scanline = repack_cb; + rp->imgfmt_b = planar_fmt; + rp->components[0] = desc.planes[1].components[0] - 1; + rp->components[1] = desc.planes[1].components[1] - 1; + return; + } +} + +#define PA_F32(name, packed_t) \ + static void name(void *dst, float *src, int w, float m, float o, \ + uint32_t p_max) { \ + for (int x = 0; x < w; x++) { \ + ((packed_t *)dst)[x] = \ + MPCLAMP(lrint((src[x] + o) * m), 0, (packed_t)p_max); \ + } \ + } + +#define UN_F32(name, packed_t) \ + static void name(void *src, float *dst, int w, float m, float o, \ + uint32_t unused) { \ + for (int x = 0; x < w; x++) \ + dst[x] = ((packed_t *)src)[x] * m + o; \ + } + +PA_F32(pa_f32_8, uint8_t) +UN_F32(un_f32_8, uint8_t) +PA_F32(pa_f32_16, uint16_t) +UN_F32(un_f32_16, uint16_t) + +// In all this, float counts as "unpacked". +static void repack_float(struct mp_repack *rp, + struct mp_image *a, int a_x, int a_y, + struct mp_image *b, int b_x, int b_y, int w) +{ + assert(rp->f32_comp_size == 1 || rp->f32_comp_size == 2); + + void (*packer)(void *a, float *b, int w, float fm, float fb, uint32_t max) + = rp->pack ? (rp->f32_comp_size == 1 ? pa_f32_8 : pa_f32_16) + : (rp->f32_comp_size == 1 ? un_f32_8 : un_f32_16); + + for (int p = 0; p < b->num_planes; p++) { + int h = (1 << b->fmt.chroma_ys) - (1 << b->fmt.ys[p]) + 1; + for (int y = 0; y < h; y++) { + void *pa = mp_image_pixel_ptr_ny(a, p, a_x, a_y + y); + void *pb = mp_image_pixel_ptr_ny(b, p, b_x, b_y + y); + + packer(pa, pb, w >> b->fmt.xs[p], rp->f32_m[p], rp->f32_o[p], + rp->f32_pmax[p]); + } + } +} + +static void update_repack_float(struct mp_repack *rp) +{ + if (!rp->f32_comp_size) + return; + + // Image in input format. + struct mp_image *ui = rp->pack ? rp->steps[rp->num_steps - 1].buf[1] + : rp->steps[0].buf[0]; + enum mp_csp csp = ui->params.color.space; + enum mp_csp_levels levels = ui->params.color.levels; + if (rp->f32_csp_space == csp && rp->f32_csp_levels == levels) + return; + + // The fixed point format. + struct mp_regular_imgfmt desc = {0}; + mp_get_regular_imgfmt(&desc, rp->imgfmt_b); + assert(desc.component_size); + + int comp_bits = desc.component_size * 8 + MPMIN(desc.component_pad, 0); + for (int p = 0; p < desc.num_planes; p++) { + double m, o; + mp_get_csp_uint_mul(csp, levels, comp_bits, desc.planes[p].components[0], + &m, &o); + rp->f32_m[p] = rp->pack ? 1.0 / m : m; + rp->f32_o[p] = rp->pack ? -o : o; + rp->f32_pmax[p] = (1u << comp_bits) - 1; + } + + rp->f32_csp_space = csp; + rp->f32_csp_levels = levels; +} + +void repack_line(struct mp_repack *rp, int dst_x, int dst_y, + int src_x, int src_y, int w) +{ + assert(rp->configured); + + struct repack_step *first = &rp->steps[0]; + struct repack_step *last = &rp->steps[rp->num_steps - 1]; + + assert(dst_x >= 0 && dst_y >= 0 && src_x >= 0 && src_y >= 0 && w >= 0); + assert(dst_x + w <= MP_ALIGN_UP(last->buf[1]->w, last->fmt[1].align_x)); + assert(src_x + w <= MP_ALIGN_UP(first->buf[0]->w, first->fmt[0].align_x)); + assert(dst_y < last->buf[1]->h); + assert(src_y < first->buf[0]->h); + assert(!(dst_x & (last->fmt[1].align_x - 1))); + assert(!(src_x & (first->fmt[0].align_x - 1))); + assert(!(w & ((1 << first->fmt[0].chroma_xs) - 1))); + assert(!(dst_y & (last->fmt[1].align_y - 1))); + assert(!(src_y & (first->fmt[0].align_y - 1))); + + for (int n = 0; n < rp->num_steps; n++) { + struct repack_step *rs = &rp->steps[n]; + + // When writing to temporary buffers, always write to the start (maybe + // helps with locality). + int sx = rs->user_buf[0] ? src_x : 0; + int sy = rs->user_buf[0] ? src_y : 0; + int dx = rs->user_buf[1] ? dst_x : 0; + int dy = rs->user_buf[1] ? dst_y : 0; + + struct mp_image *buf_a = rs->buf[rp->pack]; + struct mp_image *buf_b = rs->buf[!rp->pack]; + int a_x = rp->pack ? dx : sx; + int a_y = rp->pack ? dy : sy; + int b_x = rp->pack ? sx : dx; + int b_y = rp->pack ? sy : dy; + + switch (rs->type) { + case REPACK_STEP_REPACK: { + if (rp->repack) + rp->repack(rp, buf_a, a_x, a_y, buf_b, b_x, b_y, w); + + for (int p = 0; p < rs->fmt[0].num_planes; p++) { + if (rp->copy_buf[p]) + copy_plane(rs->buf[1], dx, dy, rs->buf[0], sx, sy, w, p); + } + break; + } + case REPACK_STEP_ENDIAN: + swap_endian(rs->buf[1], dx, dy, rs->buf[0], sx, sy, w, + rp->endian_size); + break; + case REPACK_STEP_FLOAT: + repack_float(rp, buf_a, a_x, a_y, buf_b, b_x, b_y, w); + break; + } + } +} + +static bool setup_format_ne(struct mp_repack *rp) +{ + if (!rp->imgfmt_b) + setup_nv_packer(rp); + if (!rp->imgfmt_b) + setup_misc_packer(rp); + if (!rp->imgfmt_b) + setup_packed_packer(rp); + if (!rp->imgfmt_b) + setup_fringe_rgb_packer(rp); + if (!rp->imgfmt_b) + setup_fringe_yuv_packer(rp); + if (!rp->imgfmt_b) + rp->imgfmt_b = rp->imgfmt_a; // maybe it was planar after all + + struct mp_regular_imgfmt desc; + if (!mp_get_regular_imgfmt(&desc, rp->imgfmt_b)) + return false; + + // no weird stuff + if (desc.num_planes > 4) + return false; + + // Endian swapping. + if (rp->imgfmt_a != rp->imgfmt_user && + rp->imgfmt_a == mp_find_other_endian(rp->imgfmt_user)) + { + struct mp_imgfmt_desc desc_a = mp_imgfmt_get_desc(rp->imgfmt_a); + struct mp_imgfmt_desc desc_u = mp_imgfmt_get_desc(rp->imgfmt_user); + rp->endian_size = 1 << desc_u.endian_shift; + if (!desc_a.endian_shift && rp->endian_size != 2 && rp->endian_size != 4) + return false; + } + + // Accept only true planar formats (with known components and no padding). + for (int n = 0; n < desc.num_planes; n++) { + if (desc.planes[n].num_components != 1) + return false; + int c = desc.planes[n].components[0]; + if (c < 1 || c > 4) + return false; + } + + rp->fmt_a = mp_imgfmt_get_desc(rp->imgfmt_a); + rp->fmt_b = mp_imgfmt_get_desc(rp->imgfmt_b); + + // This is if we did a pack step. + + if (rp->flags & REPACK_CREATE_PLANAR_F32) { + // imgfmt_b with float32 component type. + struct mp_regular_imgfmt fdesc = desc; + fdesc.component_type = MP_COMPONENT_TYPE_FLOAT; + fdesc.component_size = 4; + fdesc.component_pad = 0; + int ffmt = mp_find_regular_imgfmt(&fdesc); + if (!ffmt) + return false; + if (ffmt != rp->imgfmt_b) { + if (desc.component_type != MP_COMPONENT_TYPE_UINT || + (desc.component_size != 1 && desc.component_size != 2)) + return false; + rp->f32_comp_size = desc.component_size; + rp->f32_csp_space = MP_CSP_COUNT; + rp->f32_csp_levels = MP_CSP_LEVELS_COUNT; + rp->steps[rp->num_steps++] = (struct repack_step) { + .type = REPACK_STEP_FLOAT, + .fmt = { + mp_imgfmt_get_desc(ffmt), + rp->fmt_b, + }, + }; + } + } + + rp->steps[rp->num_steps++] = (struct repack_step) { + .type = REPACK_STEP_REPACK, + .fmt = { rp->fmt_b, rp->fmt_a }, + }; + + if (rp->endian_size) { + rp->steps[rp->num_steps++] = (struct repack_step) { + .type = REPACK_STEP_ENDIAN, + .fmt = { + rp->fmt_a, + mp_imgfmt_get_desc(rp->imgfmt_user), + }, + }; + } + + // Reverse if unpack (to reflect actual data flow) + if (!rp->pack) { + for (int n = 0; n < rp->num_steps / 2; n++) { + MPSWAP(struct repack_step, rp->steps[n], + rp->steps[rp->num_steps - 1 - n]); + } + for (int n = 0; n < rp->num_steps; n++) { + struct repack_step *rs = &rp->steps[n]; + MPSWAP(struct mp_imgfmt_desc, rs->fmt[0], rs->fmt[1]); + } + } + + for (int n = 0; n < rp->num_steps - 1; n++) + assert(rp->steps[n].fmt[1].id == rp->steps[n + 1].fmt[0].id); + + return true; +} + +static void reset_params(struct mp_repack *rp) +{ + rp->num_steps = 0; + rp->imgfmt_b = 0; + rp->repack = NULL; + rp->passthrough_y = false; + rp->endian_size = 0; + rp->packed_repack_scanline = NULL; + rp->comp_size = 0; + talloc_free(rp->comp_lut); + rp->comp_lut = NULL; +} + +static bool setup_format(struct mp_repack *rp) +{ + reset_params(rp); + rp->imgfmt_a = rp->imgfmt_user; + if (setup_format_ne(rp)) + return true; + // Try reverse endian. + reset_params(rp); + rp->imgfmt_a = mp_find_other_endian(rp->imgfmt_user); + return rp->imgfmt_a && setup_format_ne(rp); +} + +struct mp_repack *mp_repack_create_planar(int imgfmt, bool pack, int flags) +{ + struct mp_repack *rp = talloc_zero(NULL, struct mp_repack); + rp->imgfmt_user = imgfmt; + rp->pack = pack; + rp->flags = flags; + + if (!setup_format(rp)) { + talloc_free(rp); + return NULL; + } + + return rp; +} + +int mp_repack_get_format_src(struct mp_repack *rp) +{ + return rp->steps[0].fmt[0].id; +} + +int mp_repack_get_format_dst(struct mp_repack *rp) +{ + return rp->steps[rp->num_steps - 1].fmt[1].id; +} + +int mp_repack_get_align_x(struct mp_repack *rp) +{ + // We really want the LCM between those, but since only one of them is + // packed (or they're the same format), and the chroma subsampling is the + // same for both, only the packed one matters. + return rp->fmt_a.align_x; +} + +int mp_repack_get_align_y(struct mp_repack *rp) +{ + return rp->fmt_a.align_y; // should be the same for packed/planar formats +} + +static void image_realloc(struct mp_image **img, int fmt, int w, int h) +{ + if (*img && (*img)->imgfmt == fmt && (*img)->w == w && (*img)->h == h) + return; + talloc_free(*img); + *img = mp_image_alloc(fmt, w, h); +} + +bool repack_config_buffers(struct mp_repack *rp, + int dst_flags, struct mp_image *dst, + int src_flags, struct mp_image *src, + bool *enable_passthrough) +{ + struct repack_step *rs_first = &rp->steps[0]; + struct repack_step *rs_last = &rp->steps[rp->num_steps - 1]; + + rp->configured = false; + + assert(dst && src); + + int buf_w = MPMAX(dst->w, src->w); + + assert(dst->imgfmt == rs_last->fmt[1].id); + assert(src->imgfmt == rs_first->fmt[0].id); + + // Chain/allocate buffers. + + for (int n = 0; n < rp->num_steps; n++) + rp->steps[n].buf[0] = rp->steps[n].buf[1] = NULL; + + rs_first->buf[0] = src; + rs_last->buf[1] = dst; + + for (int n = 0; n < rp->num_steps; n++) { + struct repack_step *rs = &rp->steps[n]; + + if (!rs->buf[0]) { + assert(n > 0); + rs->buf[0] = rp->steps[n - 1].buf[1]; + } + + if (rs->buf[1]) + continue; + + // Note: since repack_line() can have different src/dst offsets, we + // can't do true in-place in general. + bool can_inplace = rs->type == REPACK_STEP_ENDIAN && + rs->buf[0] != src && rs->buf[0] != dst; + if (can_inplace) { + rs->buf[1] = rs->buf[0]; + continue; + } + + if (rs != rs_last) { + struct repack_step *next = &rp->steps[n + 1]; + if (next->buf[0]) { + rs->buf[1] = next->buf[0]; + continue; + } + } + + image_realloc(&rs->tmp, rs->fmt[1].id, buf_w, rs->fmt[1].align_y); + if (!rs->tmp) + return false; + talloc_steal(rp, rs->tmp); + rs->buf[1] = rs->tmp; + } + + for (int n = 0; n < rp->num_steps; n++) { + struct repack_step *rs = &rp->steps[n]; + rs->user_buf[0] = rs->buf[0] == src || rs->buf[0] == dst; + rs->user_buf[1] = rs->buf[1] == src || rs->buf[1] == dst; + } + + // If repacking is the only operation. It's also responsible for simply + // copying src to dst if absolutely no filtering is done. + bool may_passthrough = + rp->num_steps == 1 && rp->steps[0].type == REPACK_STEP_REPACK; + + for (int p = 0; p < rp->fmt_b.num_planes; p++) { + // (All repack callbacks copy, except nv12 does not copy luma.) + bool repack_copies_plane = rp->repack && !(rp->passthrough_y && p == 0); + + bool can_pt = may_passthrough && !repack_copies_plane && + enable_passthrough && enable_passthrough[p]; + + // Copy if needed, unless the repack callback does it anyway. + rp->copy_buf[p] = !repack_copies_plane && !can_pt; + + if (enable_passthrough) + enable_passthrough[p] = can_pt && !rp->copy_buf[p]; + } + + if (enable_passthrough) { + for (int n = rp->fmt_b.num_planes; n < MP_MAX_PLANES; n++) + enable_passthrough[n] = false; + } + + update_repack_float(rp); + + rp->configured = true; + + return true; +} diff --git a/video/repack.h b/video/repack.h new file mode 100644 index 0000000..7afe7ed --- /dev/null +++ b/video/repack.h @@ -0,0 +1,76 @@ +#pragma once + +#include <stdbool.h> + +enum { + // This controls bheavior with different bit widths per component (like + // RGB565). If ROUND_DOWN is specified, the planar format will use the min. + // bit width of all components, otherwise the transformation is lossless. + REPACK_CREATE_ROUND_DOWN = (1 << 0), + + // Expand some (not all) low bit depth fringe formats to 8 bit on unpack. + REPACK_CREATE_EXPAND_8BIT = (1 << 1), + + // For mp_repack_create_planar(). If specified, the planar format uses a + // float 32 bit sample format. No range expansion is done. + REPACK_CREATE_PLANAR_F32 = (1 << 2), +}; + +struct mp_repack; +struct mp_image; + +// Create a repacker between any format (imgfmt parameter) and an equivalent +// planar format (that is native endian). If pack==true, imgfmt is the output, +// otherwise it is the input. The respective other input/output is the planar +// format. The planar format can be queried with mp_repack_get_format_*(). +// Note that some formats may change the "implied" colorspace (for example, +// packed xyz unpacks as rgb). +// If imgfmt is already planar, a passthrough repacker may be created. +// imgfmt: src or dst format (usually packed, non-planar, etc.) +// pack: true if imgfmt is dst, false if imgfmt is src +// flags: any of REPACK_CREATE_* flags +// returns: NULL on failure, otherwise free with talloc_free(). +struct mp_repack *mp_repack_create_planar(int imgfmt, bool pack, int flags); + +// Return input and output formats for which rp was created. +int mp_repack_get_format_src(struct mp_repack *rp); +int mp_repack_get_format_dst(struct mp_repack *rp); + +// Return pixel alignment. For x, this is a lowest pixel count at which there is +// a byte boundary and a full chroma pixel (horizontal subsampling) on src/dst. +// For y, this is the pixel height of the vertical subsampling. +// Always returns a power of 2. +int mp_repack_get_align_x(struct mp_repack *rp); +int mp_repack_get_align_y(struct mp_repack *rp); + +// Repack a single line from dst to src, as set in repack_config_buffers(). +// For subsampled chroma formats, this copies as many luma/alpha rows as needed +// for a complete line (e.g. 2 luma lines, 1 chroma line for 4:2:0). +// dst_x, src_x, y must be aligned to the pixel alignment. w may be unaligned +// if at the right crop-border of the image, but must be always aligned to +// horiz. sub-sampling. y is subject to hslice. +void repack_line(struct mp_repack *rp, int dst_x, int dst_y, + int src_x, int src_y, int w); + +// Configure with a source and target buffer. The rp instance will keep the +// mp_image pointers and access them on repack_line() calls. Refcounting is +// not respected - the caller needs to make sure dst is always writable. +// The images can have different sizes (as repack_line() lets you use different +// target coordinates for dst/src). +// This also allocaters potentially required temporary buffers. +// dst_flags: REPACK_BUF_* flags for dst +// dst: where repack_line() writes to +// src_flags: REPACK_BUF_* flags for src +// src: where repack_line() reads from +// enable_passthrough: if non-NULL, an bool array of size MP_MAX_PLANES indexed +// by plane; a true entry requests disabling copying the +// plane data to the dst plane. The function will write to +// this array whether the plane can really be passed through +// (i.e. will set array entries from true to false if pass- +// through is not possible). It writes to all MP_MAX_PLANES +// entries. If NULL, all entries are implicitly false. +// returns: success (fails on OOM) +bool repack_config_buffers(struct mp_repack *rp, + int dst_flags, struct mp_image *dst, + int src_flags, struct mp_image *src, + bool *enable_passthrough); diff --git a/video/sws_utils.c b/video/sws_utils.c new file mode 100644 index 0000000..5e9c358 --- /dev/null +++ b/video/sws_utils.c @@ -0,0 +1,496 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> + +#include <libswscale/swscale.h> +#include <libavcodec/avcodec.h> +#include <libavutil/bswap.h> +#include <libavutil/opt.h> +#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 37, 100) +#include <libavutil/pixdesc.h> +#endif + +#include "config.h" + +#include "sws_utils.h" + +#include "common/common.h" +#include "options/m_config.h" +#include "options/m_option.h" +#include "video/mp_image.h" +#include "video/img_format.h" +#include "fmt-conversion.h" +#include "csputils.h" +#include "common/msg.h" +#include "osdep/endian.h" + +#if HAVE_ZIMG +#include "zimg.h" +#endif + +//global sws_flags from the command line +struct sws_opts { + int scaler; + float lum_gblur; + float chr_gblur; + int chr_vshift; + int chr_hshift; + float chr_sharpen; + float lum_sharpen; + bool fast; + bool bitexact; + bool zimg; +}; + +#define OPT_BASE_STRUCT struct sws_opts +const struct m_sub_options sws_conf = { + .opts = (const m_option_t[]) { + {"scaler", OPT_CHOICE(scaler, + {"fast-bilinear", SWS_FAST_BILINEAR}, + {"bilinear", SWS_BILINEAR}, + {"bicubic", SWS_BICUBIC}, + {"x", SWS_X}, + {"point", SWS_POINT}, + {"area", SWS_AREA}, + {"bicublin", SWS_BICUBLIN}, + {"gauss", SWS_GAUSS}, + {"sinc", SWS_SINC}, + {"lanczos", SWS_LANCZOS}, + {"spline", SWS_SPLINE})}, + {"lgb", OPT_FLOAT(lum_gblur), M_RANGE(0, 100.0)}, + {"cgb", OPT_FLOAT(chr_gblur), M_RANGE(0, 100.0)}, + {"cvs", OPT_INT(chr_vshift)}, + {"chs", OPT_INT(chr_hshift)}, + {"ls", OPT_FLOAT(lum_sharpen), M_RANGE(-100.0, 100.0)}, + {"cs", OPT_FLOAT(chr_sharpen), M_RANGE(-100.0, 100.0)}, + {"fast", OPT_BOOL(fast)}, + {"bitexact", OPT_BOOL(bitexact)}, + {"allow-zimg", OPT_BOOL(zimg)}, + {0} + }, + .size = sizeof(struct sws_opts), + .defaults = &(const struct sws_opts){ + .scaler = SWS_LANCZOS, + .zimg = true, + }, +}; + +// Highest quality, but also slowest. +static const int mp_sws_hq_flags = SWS_FULL_CHR_H_INT | SWS_FULL_CHR_H_INP | + SWS_ACCURATE_RND; + +// Fast, lossy. +const int mp_sws_fast_flags = SWS_BILINEAR; + +// Set ctx parameters to global command line flags. +static void mp_sws_update_from_cmdline(struct mp_sws_context *ctx) +{ + m_config_cache_update(ctx->opts_cache); + struct sws_opts *opts = ctx->opts_cache->opts; + + sws_freeFilter(ctx->src_filter); + ctx->src_filter = sws_getDefaultFilter(opts->lum_gblur, opts->chr_gblur, + opts->lum_sharpen, opts->chr_sharpen, + opts->chr_hshift, opts->chr_vshift, 0); + ctx->force_reload = true; + + ctx->flags = SWS_PRINT_INFO; + ctx->flags |= opts->scaler; + if (!opts->fast) + ctx->flags |= mp_sws_hq_flags; + if (opts->bitexact) + ctx->flags |= SWS_BITEXACT; + + ctx->allow_zimg = opts->zimg; +} + +bool mp_sws_supported_format(int imgfmt) +{ + enum AVPixelFormat av_format = imgfmt2pixfmt(imgfmt); + + return av_format != AV_PIX_FMT_NONE && sws_isSupportedInput(av_format) + && sws_isSupportedOutput(av_format); +} + +#if HAVE_ZIMG +static bool allow_zimg(struct mp_sws_context *ctx) +{ + return ctx->force_scaler == MP_SWS_ZIMG || + (ctx->force_scaler == MP_SWS_AUTO && ctx->allow_zimg); +} +#endif + +static bool allow_sws(struct mp_sws_context *ctx) +{ + return ctx->force_scaler == MP_SWS_SWS || ctx->force_scaler == MP_SWS_AUTO; +} + +bool mp_sws_supports_formats(struct mp_sws_context *ctx, + int imgfmt_out, int imgfmt_in) +{ +#if HAVE_ZIMG + if (allow_zimg(ctx)) { + if (mp_zimg_supports_in_format(imgfmt_in) && + mp_zimg_supports_out_format(imgfmt_out)) + return true; + } +#endif + + return allow_sws(ctx) && + sws_isSupportedInput(imgfmt2pixfmt(imgfmt_in)) && + sws_isSupportedOutput(imgfmt2pixfmt(imgfmt_out)); +} + +static int mp_csp_to_sws_colorspace(enum mp_csp csp) +{ + // The SWS_CS_* macros are just convenience redefinitions of the + // AVCOL_SPC_* macros, inside swscale.h. + return mp_csp_to_avcol_spc(csp); +} + +static bool cache_valid(struct mp_sws_context *ctx) +{ + struct mp_sws_context *old = ctx->cached; + if (ctx->force_reload) + return false; + return mp_image_params_equal(&ctx->src, &old->src) && + mp_image_params_equal(&ctx->dst, &old->dst) && + ctx->flags == old->flags && + ctx->allow_zimg == old->allow_zimg && + ctx->force_scaler == old->force_scaler && + (!ctx->opts_cache || !m_config_cache_update(ctx->opts_cache)); +} + +static void free_mp_sws(void *p) +{ + struct mp_sws_context *ctx = p; + sws_freeContext(ctx->sws); + sws_freeFilter(ctx->src_filter); + sws_freeFilter(ctx->dst_filter); + TA_FREEP(&ctx->aligned_src); + TA_FREEP(&ctx->aligned_dst); +} + +// You're supposed to set your scaling parameters on the returned context. +// Free the context with talloc_free(). +struct mp_sws_context *mp_sws_alloc(void *talloc_ctx) +{ + struct mp_sws_context *ctx = talloc_ptrtype(talloc_ctx, ctx); + *ctx = (struct mp_sws_context) { + .log = mp_null_log, + .flags = SWS_BILINEAR, + .force_reload = true, + .params = {SWS_PARAM_DEFAULT, SWS_PARAM_DEFAULT}, + .cached = talloc_zero(ctx, struct mp_sws_context), + }; + talloc_set_destructor(ctx, free_mp_sws); + +#if HAVE_ZIMG + ctx->zimg = mp_zimg_alloc(); + talloc_steal(ctx, ctx->zimg); +#endif + + return ctx; +} + +// Enable auto-update of parameters from command line. Don't try to set custom +// options (other than possibly .src/.dst), because they might be overwritten +// if the user changes any options. +void mp_sws_enable_cmdline_opts(struct mp_sws_context *ctx, struct mpv_global *g) +{ + // Should only ever be NULL for tests. + if (!g) + return; + if (ctx->opts_cache) + return; + + ctx->opts_cache = m_config_cache_alloc(ctx, g, &sws_conf); + ctx->force_reload = true; + mp_sws_update_from_cmdline(ctx); + +#if HAVE_ZIMG + mp_zimg_enable_cmdline_opts(ctx->zimg, g); +#endif +} + +// Reinitialize (if needed) - return error code. +// Optional, but possibly useful to avoid having to handle mp_sws_scale errors. +int mp_sws_reinit(struct mp_sws_context *ctx) +{ + struct mp_image_params src = ctx->src; + struct mp_image_params dst = ctx->dst; + + if (cache_valid(ctx)) + return 0; + + if (ctx->opts_cache) + mp_sws_update_from_cmdline(ctx); + + sws_freeContext(ctx->sws); + ctx->sws = NULL; + ctx->zimg_ok = false; + TA_FREEP(&ctx->aligned_src); + TA_FREEP(&ctx->aligned_dst); + +#if HAVE_ZIMG + if (allow_zimg(ctx)) { + ctx->zimg->log = ctx->log; + ctx->zimg->src = src; + ctx->zimg->dst = dst; + if (ctx->zimg_opts) + ctx->zimg->opts = *ctx->zimg_opts; + if (mp_zimg_config(ctx->zimg)) { + ctx->zimg_ok = true; + MP_VERBOSE(ctx, "Using zimg.\n"); + goto success; + } + MP_WARN(ctx, "Not using zimg, falling back to swscale.\n"); + } +#endif + + if (!allow_sws(ctx)) { + MP_ERR(ctx, "No scaler.\n"); + return -1; + } + + ctx->sws = sws_alloc_context(); + if (!ctx->sws) + return -1; + + mp_image_params_guess_csp(&src); // sanitize colorspace/colorlevels + mp_image_params_guess_csp(&dst); + + enum AVPixelFormat s_fmt = imgfmt2pixfmt(src.imgfmt); + if (s_fmt == AV_PIX_FMT_NONE || sws_isSupportedInput(s_fmt) < 1) { + MP_ERR(ctx, "Input image format %s not supported by libswscale.\n", + mp_imgfmt_to_name(src.imgfmt)); + return -1; + } + + enum AVPixelFormat d_fmt = imgfmt2pixfmt(dst.imgfmt); + if (d_fmt == AV_PIX_FMT_NONE || sws_isSupportedOutput(d_fmt) < 1) { + MP_ERR(ctx, "Output image format %s not supported by libswscale.\n", + mp_imgfmt_to_name(dst.imgfmt)); + return -1; + } + + int s_csp = mp_csp_to_sws_colorspace(src.color.space); + int s_range = src.color.levels == MP_CSP_LEVELS_PC; + + int d_csp = mp_csp_to_sws_colorspace(dst.color.space); + int d_range = dst.color.levels == MP_CSP_LEVELS_PC; + + av_opt_set_int(ctx->sws, "sws_flags", ctx->flags, 0); + + av_opt_set_int(ctx->sws, "srcw", src.w, 0); + av_opt_set_int(ctx->sws, "srch", src.h, 0); + av_opt_set_int(ctx->sws, "src_format", s_fmt, 0); + + av_opt_set_int(ctx->sws, "dstw", dst.w, 0); + av_opt_set_int(ctx->sws, "dsth", dst.h, 0); + av_opt_set_int(ctx->sws, "dst_format", d_fmt, 0); + + av_opt_set_double(ctx->sws, "param0", ctx->params[0], 0); + av_opt_set_double(ctx->sws, "param1", ctx->params[1], 0); + + int cr_src = mp_chroma_location_to_av(src.chroma_location); + int cr_dst = mp_chroma_location_to_av(dst.chroma_location); + int cr_xpos, cr_ypos; +#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 37, 100) + if (av_chroma_location_enum_to_pos(&cr_xpos, &cr_ypos, cr_src) >= 0) { + av_opt_set_int(ctx->sws, "src_h_chr_pos", cr_xpos, 0); + av_opt_set_int(ctx->sws, "src_v_chr_pos", cr_ypos, 0); + } + if (av_chroma_location_enum_to_pos(&cr_xpos, &cr_ypos, cr_dst) >= 0) { + av_opt_set_int(ctx->sws, "dst_h_chr_pos", cr_xpos, 0); + av_opt_set_int(ctx->sws, "dst_v_chr_pos", cr_ypos, 0); + } +#else + if (avcodec_enum_to_chroma_pos(&cr_xpos, &cr_ypos, cr_src) >= 0) { + av_opt_set_int(ctx->sws, "src_h_chr_pos", cr_xpos, 0); + av_opt_set_int(ctx->sws, "src_v_chr_pos", cr_ypos, 0); + } + if (avcodec_enum_to_chroma_pos(&cr_xpos, &cr_ypos, cr_dst) >= 0) { + av_opt_set_int(ctx->sws, "dst_h_chr_pos", cr_xpos, 0); + av_opt_set_int(ctx->sws, "dst_v_chr_pos", cr_ypos, 0); + } +#endif + + // This can fail even with normal operation, e.g. if a conversion path + // simply does not support these settings. + int r = + sws_setColorspaceDetails(ctx->sws, sws_getCoefficients(s_csp), s_range, + sws_getCoefficients(d_csp), d_range, + 0, 1 << 16, 1 << 16); + ctx->supports_csp = r >= 0; + + if (sws_init_context(ctx->sws, ctx->src_filter, ctx->dst_filter) < 0) + return -1; + +#if HAVE_ZIMG +success: +#endif + + ctx->force_reload = false; + *ctx->cached = *ctx; + return 1; +} + +static struct mp_image *check_alignment(struct mp_log *log, + struct mp_image **alloc, + struct mp_image *img) +{ + // It's completely unclear which alignment libswscale wants (for performance) + // or requires (for avoiding crashes and memory corruption). + // Is it av_cpu_max_align()? Is it the hardcoded AVFrame "default" of 32 + // in get_video_buffer()? Is it whatever avcodec_align_dimensions2() + // determines? It's like you can't win if you try to prevent libswscale from + // corrupting memory... + // So use 32, a value that has been experimentally determined to be safe, + // and which in most cases is not larger than decoder output. It is smaller + // or equal to what most image allocators in mpv/ffmpeg use. + size_t align = 32; + assert(align <= MP_IMAGE_BYTE_ALIGN); // or mp_image_alloc will not cut it + + bool is_aligned = true; + for (int p = 0; p < img->num_planes; p++) { + is_aligned &= MP_IS_ALIGNED((uintptr_t)img->planes[p], align); + is_aligned &= MP_IS_ALIGNED(labs(img->stride[p]), align); + } + + if (is_aligned) + return img; + + if (!*alloc) { + mp_verbose(log, "unaligned libswscale parameter; using slow copy.\n"); + *alloc = mp_image_alloc(img->imgfmt, img->w, img->h); + if (!*alloc) + return NULL; + } + + mp_image_copy_attributes(*alloc, img); + return *alloc; +} + +// Scale from src to dst - if src/dst have different parameters from previous +// calls, the context is reinitialized. Return error code. (It can fail if +// reinitialization was necessary, and swscale returned an error.) +int mp_sws_scale(struct mp_sws_context *ctx, struct mp_image *dst, + struct mp_image *src) +{ + ctx->src = src->params; + ctx->dst = dst->params; + + int r = mp_sws_reinit(ctx); + if (r < 0) { + MP_ERR(ctx, "libswscale initialization failed.\n"); + return r; + } + +#if HAVE_ZIMG + if (ctx->zimg_ok) + return mp_zimg_convert(ctx->zimg, dst, src) ? 0 : -1; +#endif + + if (src->params.color.space == MP_CSP_XYZ && dst->params.color.space != MP_CSP_XYZ) { + // swsscale has hardcoded gamma 2.2 internally and 2.6 for XYZ + dst->params.color.gamma = MP_CSP_TRC_GAMMA22; + // and sRGB primaries... + dst->params.color.primaries = MP_CSP_PRIM_BT_709; + // it doesn't adjust white point though, but it is not worth to support + // this case. It would require custom prim with equal energy white point + // and sRGB primaries. + } + + struct mp_image *a_src = check_alignment(ctx->log, &ctx->aligned_src, src); + struct mp_image *a_dst = check_alignment(ctx->log, &ctx->aligned_dst, dst); + if (!a_src || !a_dst) { + MP_ERR(ctx, "image allocation failed.\n"); + return -1; + } + + if (a_src != src) + mp_image_copy(a_src, src); + + sws_scale(ctx->sws, (const uint8_t *const *) a_src->planes, a_src->stride, + 0, a_src->h, a_dst->planes, a_dst->stride); + + if (a_dst != dst) + mp_image_copy(dst, a_dst); + + return 0; +} + +int mp_image_swscale(struct mp_image *dst, struct mp_image *src, + int my_sws_flags) +{ + struct mp_sws_context *ctx = mp_sws_alloc(NULL); + ctx->flags = my_sws_flags; + int res = mp_sws_scale(ctx, dst, src); + talloc_free(ctx); + return res; +} + +int mp_image_sw_blur_scale(struct mp_image *dst, struct mp_image *src, + float gblur) +{ + struct mp_sws_context *ctx = mp_sws_alloc(NULL); + ctx->flags = SWS_LANCZOS | mp_sws_hq_flags; + ctx->src_filter = sws_getDefaultFilter(gblur, gblur, 0, 0, 0, 0, 0); + ctx->force_reload = true; + int res = mp_sws_scale(ctx, dst, src); + talloc_free(ctx); + return res; +} + +static const int endian_swaps[][2] = { +#if BYTE_ORDER == LITTLE_ENDIAN +#if defined(AV_PIX_FMT_YA16) && defined(AV_PIX_FMT_RGBA64) + {AV_PIX_FMT_YA16BE, AV_PIX_FMT_YA16LE}, + {AV_PIX_FMT_RGBA64BE, AV_PIX_FMT_RGBA64LE}, + {AV_PIX_FMT_GRAY16BE, AV_PIX_FMT_GRAY16LE}, + {AV_PIX_FMT_RGB48BE, AV_PIX_FMT_RGB48LE}, +#endif +#endif + {AV_PIX_FMT_NONE, AV_PIX_FMT_NONE} +}; + +// Swap _some_ non-native endian formats to native. We do this specifically +// for pixel formats used by PNG, to avoid going through libswscale, which +// might reduce the effective bit depth in some cases. +struct mp_image *mp_img_swap_to_native(struct mp_image *img) +{ + int avfmt = imgfmt2pixfmt(img->imgfmt); + int to = AV_PIX_FMT_NONE; + for (int n = 0; endian_swaps[n][0] != AV_PIX_FMT_NONE; n++) { + if (endian_swaps[n][0] == avfmt) + to = endian_swaps[n][1]; + } + if (to == AV_PIX_FMT_NONE || !mp_image_make_writeable(img)) + return img; + int elems = img->fmt.bpp[0] / 8 / 2 * img->w; + for (int y = 0; y < img->h; y++) { + uint16_t *p = (uint16_t *)(img->planes[0] + y * img->stride[0]); + for (int i = 0; i < elems; i++) + p[i] = av_be2ne16(p[i]); + } + mp_image_setfmt(img, pixfmt2imgfmt(to)); + return img; +} + +// vim: ts=4 sw=4 et tw=80 diff --git a/video/sws_utils.h b/video/sws_utils.h new file mode 100644 index 0000000..24bec07 --- /dev/null +++ b/video/sws_utils.h @@ -0,0 +1,82 @@ +#ifndef MPLAYER_SWS_UTILS_H +#define MPLAYER_SWS_UTILS_H + +#include <stdbool.h> + +#include "mp_image.h" + +struct mp_image; +struct mpv_global; + +// libswscale currently requires 16 bytes alignment for row pointers and +// strides. Otherwise, it will print warnings and use slow codepaths. +// Guaranteed to be a power of 2 and > 1. +#define SWS_MIN_BYTE_ALIGN MP_IMAGE_BYTE_ALIGN + +extern const int mp_sws_fast_flags; + +bool mp_sws_supported_format(int imgfmt); + +int mp_image_swscale(struct mp_image *dst, struct mp_image *src, + int my_sws_flags); + +int mp_image_sw_blur_scale(struct mp_image *dst, struct mp_image *src, + float gblur); + +enum mp_sws_scaler { + MP_SWS_AUTO = 0, // use command line + MP_SWS_SWS, + MP_SWS_ZIMG, +}; + +struct mp_sws_context { + // Can be set for verbose error printing. + struct mp_log *log; + // User configuration. These can be changed freely, at any time. + // mp_sws_scale() will handle the changes transparently. + int flags; + bool allow_zimg; // use zimg if available (ignores filters and all) + bool force_reload; + // These are also implicitly set by mp_sws_scale(), and thus optional. + // Setting them before that call makes sense when using mp_sws_reinit(). + struct mp_image_params src, dst; + + // This is unfortunately a hack: bypass command line choice + enum mp_sws_scaler force_scaler; + + // If zimg is used. Need to manually invalidate cache (set force_reload). + // Conflicts with enabling command line opts. + struct zimg_opts *zimg_opts; + + // Changing these requires setting force_reload=true. + // By default, they are NULL. + // Freeing the mp_sws_context will deallocate these if set. + struct SwsFilter *src_filter, *dst_filter; + double params[2]; + + // Cached context (if any) + struct SwsContext *sws; + bool supports_csp; + + // Private. + struct m_config_cache *opts_cache; + struct mp_sws_context *cached; // contains parameters for which sws is valid + struct mp_zimg_context *zimg; + bool zimg_ok; + struct mp_image *aligned_src, *aligned_dst; +}; + +struct mp_sws_context *mp_sws_alloc(void *talloc_ctx); +void mp_sws_enable_cmdline_opts(struct mp_sws_context *ctx, struct mpv_global *g); +int mp_sws_reinit(struct mp_sws_context *ctx); +int mp_sws_scale(struct mp_sws_context *ctx, struct mp_image *dst, + struct mp_image *src); + +bool mp_sws_supports_formats(struct mp_sws_context *ctx, + int imgfmt_out, int imgfmt_in); + +struct mp_image *mp_img_swap_to_native(struct mp_image *img); + +#endif /* MP_SWS_UTILS_H */ + +// vim: ts=4 sw=4 et tw=80 diff --git a/video/vaapi.c b/video/vaapi.c new file mode 100644 index 0000000..08248a7 --- /dev/null +++ b/video/vaapi.c @@ -0,0 +1,288 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> + +#include "config.h" + +#include "vaapi.h" +#include "common/common.h" +#include "common/msg.h" +#include "osdep/threads.h" +#include "mp_image.h" +#include "img_format.h" +#include "mp_image_pool.h" +#include "options/m_config.h" + +#include <libavutil/hwcontext.h> +#include <libavutil/hwcontext_vaapi.h> + +struct vaapi_opts { + char *path; +}; + +#define OPT_BASE_STRUCT struct vaapi_opts +const struct m_sub_options vaapi_conf = { + .opts = (const struct m_option[]) { + {"device", OPT_STRING(path)}, + {0}, + }, + .defaults = &(const struct vaapi_opts) { + .path = "/dev/dri/renderD128", + }, + .size = sizeof(struct vaapi_opts), +}; + +int va_get_colorspace_flag(enum mp_csp csp) +{ + switch (csp) { + case MP_CSP_BT_601: return VA_SRC_BT601; + case MP_CSP_BT_709: return VA_SRC_BT709; + case MP_CSP_SMPTE_240M: return VA_SRC_SMPTE_240; + } + return 0; +} + +static void va_message_callback(void *context, const char *msg, int mp_level) +{ + struct mp_vaapi_ctx *res = context; + mp_msg(res->log, mp_level, "libva: %s", msg); +} + +static void va_error_callback(void *context, const char *msg) +{ + va_message_callback(context, msg, MSGL_ERR); +} + +static void va_info_callback(void *context, const char *msg) +{ + va_message_callback(context, msg, MSGL_DEBUG); +} + +static void free_device_ref(struct AVHWDeviceContext *hwctx) +{ + struct mp_vaapi_ctx *ctx = hwctx->user_opaque; + + if (ctx->display) + vaTerminate(ctx->display); + + if (ctx->destroy_native_ctx) + ctx->destroy_native_ctx(ctx->native_ctx); + + talloc_free(ctx); +} + +struct mp_vaapi_ctx *va_initialize(VADisplay *display, struct mp_log *plog, + bool probing) +{ + AVBufferRef *avref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_VAAPI); + if (!avref) + return NULL; + + AVHWDeviceContext *hwctx = (void *)avref->data; + AVVAAPIDeviceContext *vactx = hwctx->hwctx; + + struct mp_vaapi_ctx *res = talloc_ptrtype(NULL, res); + *res = (struct mp_vaapi_ctx) { + .log = mp_log_new(res, plog, "/vaapi"), + .display = display, + .av_device_ref = avref, + .hwctx = { + .av_device_ref = avref, + }, + }; + + hwctx->free = free_device_ref; + hwctx->user_opaque = res; + + vaSetErrorCallback(display, va_error_callback, res); + vaSetInfoCallback(display, va_info_callback, res); + + int major, minor; + int status = vaInitialize(display, &major, &minor); + if (status != VA_STATUS_SUCCESS) { + if (!probing) + MP_ERR(res, "Failed to initialize VAAPI: %s\n", vaErrorStr(status)); + goto error; + } + MP_VERBOSE(res, "Initialized VAAPI: version %d.%d\n", major, minor); + + vactx->display = res->display; + + if (av_hwdevice_ctx_init(res->av_device_ref) < 0) + goto error; + + return res; + +error: + res->display = NULL; // do not vaTerminate this + va_destroy(res); + return NULL; +} + +// Undo va_initialize, and close the VADisplay. +void va_destroy(struct mp_vaapi_ctx *ctx) +{ + if (!ctx) + return; + + AVBufferRef *ref = ctx->av_device_ref; + av_buffer_unref(&ref); // frees ctx as well +} + +VASurfaceID va_surface_id(struct mp_image *mpi) +{ + return mpi && mpi->imgfmt == IMGFMT_VAAPI ? + (VASurfaceID)(uintptr_t)mpi->planes[3] : VA_INVALID_ID; +} + +static bool is_emulated(struct AVBufferRef *hw_device_ctx) +{ + AVHWDeviceContext *hwctx = (void *)hw_device_ctx->data; + AVVAAPIDeviceContext *vactx = hwctx->hwctx; + + const char *s = vaQueryVendorString(vactx->display); + return s && strstr(s, "VDPAU backend"); +} + + +bool va_guess_if_emulated(struct mp_vaapi_ctx *ctx) +{ + return is_emulated(ctx->av_device_ref); +} + +struct va_native_display { + void (*create)(VADisplay **out_display, void **out_native_ctx, + const char *path); + void (*destroy)(void *native_ctx); +}; + +#if HAVE_VAAPI_X11 +#include <X11/Xlib.h> +#include <va/va_x11.h> + +static void x11_destroy(void *native_ctx) +{ + XCloseDisplay(native_ctx); +} + +static void x11_create(VADisplay **out_display, void **out_native_ctx, + const char *path) +{ + void *native_display = XOpenDisplay(NULL); + if (!native_display) + return; + *out_display = vaGetDisplay(native_display); + if (*out_display) { + *out_native_ctx = native_display; + } else { + XCloseDisplay(native_display); + } +} + +static const struct va_native_display disp_x11 = { + .create = x11_create, + .destroy = x11_destroy, +}; +#endif + +#if HAVE_VAAPI_DRM +#include <unistd.h> +#include <fcntl.h> +#include <va/va_drm.h> + +struct va_native_display_drm { + int drm_fd; +}; + +static void drm_destroy(void *native_ctx) +{ + struct va_native_display_drm *ctx = native_ctx; + close(ctx->drm_fd); + talloc_free(ctx); +} + +static void drm_create(VADisplay **out_display, void **out_native_ctx, + const char *path) +{ + int drm_fd = open(path, O_RDWR); + if (drm_fd < 0) + return; + + struct va_native_display_drm *ctx = talloc_ptrtype(NULL, ctx); + ctx->drm_fd = drm_fd; + *out_display = vaGetDisplayDRM(drm_fd); + if (*out_display) { + *out_native_ctx = ctx; + return; + } + + close(drm_fd); + talloc_free(ctx); +} + +static const struct va_native_display disp_drm = { + .create = drm_create, + .destroy = drm_destroy, +}; +#endif + +static const struct va_native_display *const native_displays[] = { +#if HAVE_VAAPI_DRM + &disp_drm, +#endif +#if HAVE_VAAPI_X11 + &disp_x11, +#endif + NULL +}; + +static struct AVBufferRef *va_create_standalone(struct mpv_global *global, + struct mp_log *log, struct hwcontext_create_dev_params *params) +{ + struct AVBufferRef *ret = NULL; + struct vaapi_opts *opts = mp_get_config_group(NULL, global, &vaapi_conf); + + for (int n = 0; native_displays[n]; n++) { + VADisplay *display = NULL; + void *native_ctx = NULL; + native_displays[n]->create(&display, &native_ctx, opts->path); + if (display) { + struct mp_vaapi_ctx *ctx = + va_initialize(display, log, params->probing); + if (!ctx) { + vaTerminate(display); + native_displays[n]->destroy(native_ctx); + goto end; + } + ctx->native_ctx = native_ctx; + ctx->destroy_native_ctx = native_displays[n]->destroy; + ret = ctx->hwctx.av_device_ref; + goto end; + } + } + +end: + talloc_free(opts); + return ret; +} + +const struct hwcontext_fns hwcontext_fns_vaapi = { + .av_hwdevice_type = AV_HWDEVICE_TYPE_VAAPI, + .create_dev = va_create_standalone, + .is_emulated = is_emulated, +}; diff --git a/video/vaapi.h b/video/vaapi.h new file mode 100644 index 0000000..56235bc --- /dev/null +++ b/video/vaapi.h @@ -0,0 +1,54 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef MPV_VAAPI_H +#define MPV_VAAPI_H + +#include <stdbool.h> +#include <inttypes.h> +#include <va/va.h> + +#include "mp_image.h" +#include "hwdec.h" + +struct mp_vaapi_ctx { + struct mp_hwdec_ctx hwctx; + struct mp_log *log; + VADisplay display; + struct AVBufferRef *av_device_ref; // AVVAAPIDeviceContext* + // Internal, for va_create_standalone() + void *native_ctx; + void (*destroy_native_ctx)(void *native_ctx); +}; + +#define CHECK_VA_STATUS_LEVEL(ctx, msg, level) \ + (status == VA_STATUS_SUCCESS ? true \ + : (MP_MSG(ctx, level, "%s failed (%s)\n", msg, vaErrorStr(status)), false)) + +#define CHECK_VA_STATUS(ctx, msg) \ + CHECK_VA_STATUS_LEVEL(ctx, msg, MSGL_ERR) + +int va_get_colorspace_flag(enum mp_csp csp); + +struct mp_vaapi_ctx * va_initialize(VADisplay *display, struct mp_log *plog, bool probing); +void va_destroy(struct mp_vaapi_ctx *ctx); + +VASurfaceID va_surface_id(struct mp_image *mpi); + +bool va_guess_if_emulated(struct mp_vaapi_ctx *ctx); + +#endif diff --git a/video/vdpau.c b/video/vdpau.c new file mode 100644 index 0000000..15985d6 --- /dev/null +++ b/video/vdpau.c @@ -0,0 +1,574 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> + +#include <libavutil/hwcontext.h> +#include <libavutil/hwcontext_vdpau.h> + +#include "vdpau.h" + +#include "osdep/threads.h" +#include "osdep/timer.h" + +#include "video/out/x11_common.h" +#include "img_format.h" +#include "mp_image.h" +#include "mp_image_pool.h" +#include "vdpau_mixer.h" + +static void mark_vdpau_objects_uninitialized(struct mp_vdpau_ctx *ctx) +{ + for (int i = 0; i < MAX_VIDEO_SURFACES; i++) { + ctx->video_surfaces[i].surface = VDP_INVALID_HANDLE; + ctx->video_surfaces[i].osurface = VDP_INVALID_HANDLE; + ctx->video_surfaces[i].allocated = false; + } + ctx->vdp_device = VDP_INVALID_HANDLE; + ctx->preemption_obj = VDP_INVALID_HANDLE; +} + +static void preemption_callback(VdpDevice device, void *context) +{ + struct mp_vdpau_ctx *ctx = context; + + mp_mutex_lock(&ctx->preempt_lock); + ctx->is_preempted = true; + mp_mutex_unlock(&ctx->preempt_lock); +} + +static int win_x11_init_vdpau_procs(struct mp_vdpau_ctx *ctx, bool probing) +{ + Display *x11 = ctx->x11; + VdpStatus vdp_st; + + // Don't operate on ctx->vdp directly, so that even if init fails, ctx->vdp + // will have the function pointers from the previous successful init, and + // won't randomly make other code crash on calling NULL pointers. + struct vdp_functions vdp = {0}; + + if (!x11) + return -1; + + struct vdp_function { + const int id; + int offset; + }; + + static const struct vdp_function vdp_func[] = { +#define VDP_FUNCTION(_, macro_name, mp_name) {macro_name, offsetof(struct vdp_functions, mp_name)}, +#include "video/vdpau_functions.inc" +#undef VDP_FUNCTION + {0, -1} + }; + + VdpGetProcAddress *get_proc_address; + vdp_st = vdp_device_create_x11(x11, DefaultScreen(x11), &ctx->vdp_device, + &get_proc_address); + if (vdp_st != VDP_STATUS_OK) { + if (ctx->is_preempted) { + MP_DBG(ctx, "Error calling vdp_device_create_x11 while preempted: %d\n", + vdp_st); + } else { + int lev = probing ? MSGL_V : MSGL_ERR; + mp_msg(ctx->log, lev, "Error when calling vdp_device_create_x11: %d\n", + vdp_st); + } + return -1; + } + + for (const struct vdp_function *dsc = vdp_func; dsc->offset >= 0; dsc++) { + vdp_st = get_proc_address(ctx->vdp_device, dsc->id, + (void **)((char *)&vdp + dsc->offset)); + if (vdp_st != VDP_STATUS_OK) { + MP_ERR(ctx, "Error when calling vdp_get_proc_address(function " + "id %d): %s\n", dsc->id, + vdp.get_error_string ? vdp.get_error_string(vdp_st) : "?"); + return -1; + } + } + + ctx->vdp = vdp; + ctx->get_proc_address = get_proc_address; + + if (ctx->av_device_ref) { + AVHWDeviceContext *hwctx = (void *)ctx->av_device_ref->data; + AVVDPAUDeviceContext *vdctx = hwctx->hwctx; + + vdctx->device = ctx->vdp_device; + vdctx->get_proc_address = ctx->get_proc_address; + } + + vdp_st = vdp.output_surface_create(ctx->vdp_device, VDP_RGBA_FORMAT_B8G8R8A8, + 1, 1, &ctx->preemption_obj); + if (vdp_st != VDP_STATUS_OK) { + MP_ERR(ctx, "Could not create dummy object: %s", + vdp.get_error_string(vdp_st)); + return -1; + } + + vdp.preemption_callback_register(ctx->vdp_device, preemption_callback, ctx); + return 0; +} + +static int handle_preemption(struct mp_vdpau_ctx *ctx) +{ + if (!ctx->is_preempted) + return 0; + mark_vdpau_objects_uninitialized(ctx); + if (!ctx->preemption_user_notified) { + MP_ERR(ctx, "Got display preemption notice! Will attempt to recover.\n"); + ctx->preemption_user_notified = true; + } + /* Trying to initialize seems to be quite slow, so only try once a + * second to avoid using 100% CPU. */ + if (ctx->last_preemption_retry_fail && + mp_time_sec() - ctx->last_preemption_retry_fail < 1.0) + return -1; + if (win_x11_init_vdpau_procs(ctx, false) < 0) { + ctx->last_preemption_retry_fail = mp_time_sec(); + return -1; + } + ctx->preemption_user_notified = false; + ctx->last_preemption_retry_fail = 0; + ctx->is_preempted = false; + ctx->preemption_counter++; + MP_INFO(ctx, "Recovered from display preemption.\n"); + return 1; +} + +// Check whether vdpau display preemption happened. The caller provides a +// preemption counter, which contains the logical timestamp of the last +// preemption handled by the caller. The counter can be 0 for init. +// If counter is NULL, only ever return -1 or 1. +// Return values: +// -1: the display is currently preempted, and vdpau can't be used +// 0: a preemption event happened, and the caller must recover +// (*counter is updated, and a second call will report status ok) +// 1: everything is fine, no preemption happened +int mp_vdpau_handle_preemption(struct mp_vdpau_ctx *ctx, uint64_t *counter) +{ + int r = 1; + mp_mutex_lock(&ctx->preempt_lock); + + const void *p[4] = {&(uint32_t){0}}; + uint32_t stride[4] = {4}; + VdpRect rc = {0}; + ctx->vdp.output_surface_put_bits_native(ctx->preemption_obj, p, stride, &rc); + + // First time init + if (counter && !*counter) + *counter = ctx->preemption_counter; + + if (handle_preemption(ctx) < 0) + r = -1; + + if (counter && r > 0 && *counter < ctx->preemption_counter) { + *counter = ctx->preemption_counter; + r = 0; // signal recovery after preemption + } + + mp_mutex_unlock(&ctx->preempt_lock); + return r; +} + +struct surface_ref { + struct mp_vdpau_ctx *ctx; + int index; +}; + +static void release_decoder_surface(void *ptr) +{ + struct surface_ref *r = ptr; + struct mp_vdpau_ctx *ctx = r->ctx; + + mp_mutex_lock(&ctx->pool_lock); + assert(ctx->video_surfaces[r->index].in_use); + ctx->video_surfaces[r->index].in_use = false; + mp_mutex_unlock(&ctx->pool_lock); + + talloc_free(r); +} + +static struct mp_image *create_ref(struct mp_vdpau_ctx *ctx, int index) +{ + struct surface_entry *e = &ctx->video_surfaces[index]; + assert(!e->in_use); + e->in_use = true; + e->age = ctx->age_counter++; + struct surface_ref *ref = talloc_ptrtype(NULL, ref); + *ref = (struct surface_ref){ctx, index}; + struct mp_image *res = + mp_image_new_custom_ref(NULL, ref, release_decoder_surface); + if (res) { + mp_image_setfmt(res, e->rgb ? IMGFMT_VDPAU_OUTPUT : IMGFMT_VDPAU); + mp_image_set_size(res, e->w, e->h); + res->planes[0] = (void *)"dummy"; // must be non-NULL, otherwise arbitrary + res->planes[3] = (void *)(intptr_t)(e->rgb ? e->osurface : e->surface); + } + return res; +} + +static struct mp_image *mp_vdpau_get_surface(struct mp_vdpau_ctx *ctx, + VdpChromaType chroma, + VdpRGBAFormat rgb_format, + bool rgb, int w, int h) +{ + struct vdp_functions *vdp = &ctx->vdp; + int surface_index = -1; + VdpStatus vdp_st; + + if (rgb) { + chroma = (VdpChromaType)-1; + } else { + rgb_format = (VdpChromaType)-1; + } + + mp_mutex_lock(&ctx->pool_lock); + + // Destroy all unused surfaces that don't have matching parameters + for (int n = 0; n < MAX_VIDEO_SURFACES; n++) { + struct surface_entry *e = &ctx->video_surfaces[n]; + if (!e->in_use && e->allocated) { + if (e->w != w || e->h != h || e->rgb != rgb || + e->chroma != chroma || e->rgb_format != rgb_format) + { + if (e->rgb) { + vdp_st = vdp->output_surface_destroy(e->osurface); + } else { + vdp_st = vdp->video_surface_destroy(e->surface); + } + CHECK_VDP_WARNING(ctx, "Error when destroying surface"); + e->surface = e->osurface = VDP_INVALID_HANDLE; + e->allocated = false; + } + } + } + + // Try to find an existing unused surface + for (int n = 0; n < MAX_VIDEO_SURFACES; n++) { + struct surface_entry *e = &ctx->video_surfaces[n]; + if (!e->in_use && e->allocated) { + assert(e->w == w && e->h == h); + assert(e->chroma == chroma); + assert(e->rgb_format == rgb_format); + assert(e->rgb == rgb); + if (surface_index >= 0) { + struct surface_entry *other = &ctx->video_surfaces[surface_index]; + if (other->age < e->age) + continue; + } + surface_index = n; + } + } + + if (surface_index >= 0) + goto done; + + // Allocate new surface + for (int n = 0; n < MAX_VIDEO_SURFACES; n++) { + struct surface_entry *e = &ctx->video_surfaces[n]; + if (!e->in_use) { + assert(e->surface == VDP_INVALID_HANDLE); + assert(e->osurface == VDP_INVALID_HANDLE); + assert(!e->allocated); + e->chroma = chroma; + e->rgb_format = rgb_format; + e->rgb = rgb; + e->w = w; + e->h = h; + if (mp_vdpau_handle_preemption(ctx, NULL) >= 0) { + if (rgb) { + vdp_st = vdp->output_surface_create(ctx->vdp_device, rgb_format, + w, h, &e->osurface); + e->allocated = e->osurface != VDP_INVALID_HANDLE; + } else { + vdp_st = vdp->video_surface_create(ctx->vdp_device, chroma, + w, h, &e->surface); + e->allocated = e->surface != VDP_INVALID_HANDLE; + } + CHECK_VDP_WARNING(ctx, "Error when allocating surface"); + } else { + e->allocated = false; + e->osurface = VDP_INVALID_HANDLE; + e->surface = VDP_INVALID_HANDLE; + } + surface_index = n; + goto done; + } + } + +done: ; + struct mp_image *mpi = NULL; + if (surface_index >= 0) + mpi = create_ref(ctx, surface_index); + + mp_mutex_unlock(&ctx->pool_lock); + + if (!mpi) + MP_ERR(ctx, "no surfaces available in mp_vdpau_get_video_surface\n"); + return mpi; +} + +struct mp_image *mp_vdpau_get_video_surface(struct mp_vdpau_ctx *ctx, + VdpChromaType chroma, int w, int h) +{ + return mp_vdpau_get_surface(ctx, chroma, 0, false, w, h); +} + +static void free_device_ref(struct AVHWDeviceContext *hwctx) +{ + struct mp_vdpau_ctx *ctx = hwctx->user_opaque; + + struct vdp_functions *vdp = &ctx->vdp; + VdpStatus vdp_st; + + for (int i = 0; i < MAX_VIDEO_SURFACES; i++) { + // can't hold references past context lifetime + assert(!ctx->video_surfaces[i].in_use); + if (ctx->video_surfaces[i].surface != VDP_INVALID_HANDLE) { + vdp_st = vdp->video_surface_destroy(ctx->video_surfaces[i].surface); + CHECK_VDP_WARNING(ctx, "Error when calling vdp_video_surface_destroy"); + } + if (ctx->video_surfaces[i].osurface != VDP_INVALID_HANDLE) { + vdp_st = vdp->output_surface_destroy(ctx->video_surfaces[i].osurface); + CHECK_VDP_WARNING(ctx, "Error when calling vdp_output_surface_destroy"); + } + } + + if (ctx->preemption_obj != VDP_INVALID_HANDLE) { + vdp_st = vdp->output_surface_destroy(ctx->preemption_obj); + CHECK_VDP_WARNING(ctx, "Error when calling vdp_output_surface_destroy"); + } + + if (vdp->device_destroy && ctx->vdp_device != VDP_INVALID_HANDLE) { + vdp_st = vdp->device_destroy(ctx->vdp_device); + CHECK_VDP_WARNING(ctx, "Error when calling vdp_device_destroy"); + } + + if (ctx->close_display) + XCloseDisplay(ctx->x11); + + mp_mutex_destroy(&ctx->pool_lock); + mp_mutex_destroy(&ctx->preempt_lock); + talloc_free(ctx); +} + +struct mp_vdpau_ctx *mp_vdpau_create_device_x11(struct mp_log *log, Display *x11, + bool probing) +{ + AVBufferRef *avref = av_hwdevice_ctx_alloc(AV_HWDEVICE_TYPE_VDPAU); + if (!avref) + return NULL; + + AVHWDeviceContext *hwctx = (void *)avref->data; + AVVDPAUDeviceContext *vdctx = hwctx->hwctx; + + struct mp_vdpau_ctx *ctx = talloc_ptrtype(NULL, ctx); + *ctx = (struct mp_vdpau_ctx) { + .log = log, + .x11 = x11, + .preemption_counter = 1, + .av_device_ref = avref, + .hwctx = { + .av_device_ref = avref, + }, + }; + mp_mutex_init_type(&ctx->preempt_lock, MP_MUTEX_RECURSIVE); + mp_mutex_init(&ctx->pool_lock); + + hwctx->free = free_device_ref; + hwctx->user_opaque = ctx; + + mark_vdpau_objects_uninitialized(ctx); + + if (win_x11_init_vdpau_procs(ctx, probing) < 0) { + mp_vdpau_destroy(ctx); + return NULL; + } + + vdctx->device = ctx->vdp_device; + vdctx->get_proc_address = ctx->get_proc_address; + + if (av_hwdevice_ctx_init(ctx->av_device_ref) < 0) { + mp_vdpau_destroy(ctx); + return NULL; + } + + return ctx; +} + +void mp_vdpau_destroy(struct mp_vdpau_ctx *ctx) +{ + if (!ctx) + return; + + AVBufferRef *ref = ctx->av_device_ref; + av_buffer_unref(&ref); // frees ctx as well +} + +bool mp_vdpau_get_format(int imgfmt, VdpChromaType *out_chroma_type, + VdpYCbCrFormat *out_pixel_format) +{ + VdpChromaType chroma = VDP_CHROMA_TYPE_420; + VdpYCbCrFormat ycbcr = (VdpYCbCrFormat)-1; + + switch (imgfmt) { + case IMGFMT_420P: + ycbcr = VDP_YCBCR_FORMAT_YV12; + break; + case IMGFMT_NV12: + ycbcr = VDP_YCBCR_FORMAT_NV12; + break; + case IMGFMT_UYVY: + ycbcr = VDP_YCBCR_FORMAT_UYVY; + chroma = VDP_CHROMA_TYPE_422; + break; + case IMGFMT_VDPAU: + break; + default: + return false; + } + + if (out_chroma_type) + *out_chroma_type = chroma; + if (out_pixel_format) + *out_pixel_format = ycbcr; + return true; +} + +bool mp_vdpau_get_rgb_format(int imgfmt, VdpRGBAFormat *out_rgba_format) +{ + VdpRGBAFormat format = (VdpRGBAFormat)-1; + + switch (imgfmt) { + case IMGFMT_BGRA: + format = VDP_RGBA_FORMAT_B8G8R8A8; break; + default: + return false; + } + + if (out_rgba_format) + *out_rgba_format = format; + return true; +} + +// Use mp_vdpau_get_video_surface, and upload mpi to it. Return NULL on failure. +// If the image is already a vdpau video surface, just return a reference. +struct mp_image *mp_vdpau_upload_video_surface(struct mp_vdpau_ctx *ctx, + struct mp_image *mpi) +{ + struct vdp_functions *vdp = &ctx->vdp; + VdpStatus vdp_st; + + if (mpi->imgfmt == IMGFMT_VDPAU || mpi->imgfmt == IMGFMT_VDPAU_OUTPUT) + return mp_image_new_ref(mpi); + + VdpChromaType chroma = (VdpChromaType)-1; + VdpYCbCrFormat ycbcr = (VdpYCbCrFormat)-1; + VdpRGBAFormat rgbafmt = (VdpRGBAFormat)-1; + bool rgb = !mp_vdpau_get_format(mpi->imgfmt, &chroma, &ycbcr); + if (rgb && !mp_vdpau_get_rgb_format(mpi->imgfmt, &rgbafmt)) + return NULL; + + struct mp_image *hwmpi = + mp_vdpau_get_surface(ctx, chroma, rgbafmt, rgb, mpi->w, mpi->h); + if (!hwmpi) + return NULL; + + struct mp_image *src = mpi; + if (mpi->stride[0] < 0) + src = mp_image_new_copy(mpi); // unflips it when copying + + if (hwmpi->imgfmt == IMGFMT_VDPAU) { + VdpVideoSurface surface = (intptr_t)hwmpi->planes[3]; + const void *destdata[3] = {src->planes[0], src->planes[2], src->planes[1]}; + if (src->imgfmt == IMGFMT_NV12) + destdata[1] = destdata[2]; + vdp_st = vdp->video_surface_put_bits_y_cb_cr(surface, + ycbcr, destdata, src->stride); + } else { + VdpOutputSurface rgb_surface = (intptr_t)hwmpi->planes[3]; + vdp_st = vdp->output_surface_put_bits_native(rgb_surface, + &(const void *){src->planes[0]}, + &(uint32_t){src->stride[0]}, + NULL); + } + CHECK_VDP_WARNING(ctx, "Error when uploading surface"); + + if (src != mpi) + talloc_free(src); + + mp_image_copy_attributes(hwmpi, mpi); + return hwmpi; +} + +bool mp_vdpau_guess_if_emulated(struct mp_vdpau_ctx *ctx) +{ + struct vdp_functions *vdp = &ctx->vdp; + VdpStatus vdp_st; + char const* info = NULL; + vdp_st = vdp->get_information_string(&info); + CHECK_VDP_WARNING(ctx, "Error when calling vdp_get_information_string"); + return vdp_st == VDP_STATUS_OK && info && strstr(info, "VAAPI"); +} + +// (This clearly works only for contexts wrapped by our code.) +struct mp_vdpau_ctx *mp_vdpau_get_ctx_from_av(AVBufferRef *hw_device_ctx) +{ + AVHWDeviceContext *hwctx = (void *)hw_device_ctx->data; + + if (hwctx->free != free_device_ref) + return NULL; // not ours + + return hwctx->user_opaque; +} + +static bool is_emulated(struct AVBufferRef *hw_device_ctx) +{ + struct mp_vdpau_ctx *ctx = mp_vdpau_get_ctx_from_av(hw_device_ctx); + if (!ctx) + return false; + + return mp_vdpau_guess_if_emulated(ctx); +} + +static struct AVBufferRef *vdpau_create_standalone(struct mpv_global *global, + struct mp_log *log, struct hwcontext_create_dev_params *params) +{ + XInitThreads(); + + Display *display = XOpenDisplay(NULL); + if (!display) + return NULL; + + struct mp_vdpau_ctx *vdp = + mp_vdpau_create_device_x11(log, display, params->probing); + if (!vdp) { + XCloseDisplay(display); + return NULL; + } + + vdp->close_display = true; + return vdp->hwctx.av_device_ref; +} + +const struct hwcontext_fns hwcontext_fns_vdpau = { + .av_hwdevice_type = AV_HWDEVICE_TYPE_VDPAU, + .create_dev = vdpau_create_standalone, + .is_emulated = is_emulated, +}; diff --git a/video/vdpau.h b/video/vdpau.h new file mode 100644 index 0000000..a30f478 --- /dev/null +++ b/video/vdpau.h @@ -0,0 +1,109 @@ +#ifndef MPV_VDPAU_H +#define MPV_VDPAU_H + +#include <stdbool.h> +#include <inttypes.h> + +#include <vdpau/vdpau.h> +#include <vdpau/vdpau_x11.h> + +#include "common/msg.h" +#include "hwdec.h" +#include "osdep/threads.h" + +#include "config.h" +#if !HAVE_GPL +#error GPL only +#endif + +#define CHECK_VDP_ERROR_ST(ctx, message, statement) \ + do { \ + if (vdp_st != VDP_STATUS_OK) { \ + MP_ERR(ctx, "%s: %s\n", message, vdp->get_error_string(vdp_st)); \ + statement \ + } \ + } while (0) + +#define CHECK_VDP_ERROR(ctx, message) \ + CHECK_VDP_ERROR_ST(ctx, message, return -1;) + +#define CHECK_VDP_ERROR_NORETURN(ctx, message) \ + CHECK_VDP_ERROR_ST(ctx, message, ;) + +#define CHECK_VDP_WARNING(ctx, message) \ + do { \ + if (vdp_st != VDP_STATUS_OK) \ + MP_WARN(ctx, "%s: %s\n", message, vdp->get_error_string(vdp_st)); \ + } while (0) + +struct vdp_functions { +#define VDP_FUNCTION(vdp_type, _, mp_name) vdp_type *mp_name; +#include "video/vdpau_functions.inc" +#undef VDP_FUNCTION +}; + + +#define MAX_VIDEO_SURFACES 50 + +// Shared state. Objects created from different VdpDevices are often (always?) +// incompatible to each other, so all code must use a shared VdpDevice. +struct mp_vdpau_ctx { + struct mp_log *log; + Display *x11; + bool close_display; + + struct mp_hwdec_ctx hwctx; + struct AVBufferRef *av_device_ref; + + // These are mostly immutable, except on preemption. We don't really care + // to synchronize the preemption case fully correctly, because it's an + // extremely obscure corner case, and basically a vdpau API design bug. + // What we do will sort-of work anyway (no memory errors are possible). + struct vdp_functions vdp; + VdpGetProcAddress *get_proc_address; + VdpDevice vdp_device; + + mp_mutex preempt_lock; + bool is_preempted; // set to true during unavailability + uint64_t preemption_counter; // incremented after _restoring_ + bool preemption_user_notified; + double last_preemption_retry_fail; + VdpOutputSurface preemption_obj; // dummy for reliable preempt. check + + // Surface pool + mp_mutex pool_lock; + int64_t age_counter; + struct surface_entry { + VdpVideoSurface surface; + VdpOutputSurface osurface; + bool allocated; + int w, h; + VdpRGBAFormat rgb_format; + VdpChromaType chroma; + bool rgb; + bool in_use; + int64_t age; + } video_surfaces[MAX_VIDEO_SURFACES]; +}; + +struct mp_vdpau_ctx *mp_vdpau_create_device_x11(struct mp_log *log, Display *x11, + bool probing); +void mp_vdpau_destroy(struct mp_vdpau_ctx *ctx); + +int mp_vdpau_handle_preemption(struct mp_vdpau_ctx *ctx, uint64_t *counter); + +struct mp_image *mp_vdpau_get_video_surface(struct mp_vdpau_ctx *ctx, + VdpChromaType chroma, int w, int h); + +bool mp_vdpau_get_format(int imgfmt, VdpChromaType *out_chroma_type, + VdpYCbCrFormat *out_pixel_format); +bool mp_vdpau_get_rgb_format(int imgfmt, VdpRGBAFormat *out_rgba_format); + +struct mp_image *mp_vdpau_upload_video_surface(struct mp_vdpau_ctx *ctx, + struct mp_image *mpi); + +struct mp_vdpau_ctx *mp_vdpau_get_ctx_from_av(struct AVBufferRef *hw_device_ctx); + +bool mp_vdpau_guess_if_emulated(struct mp_vdpau_ctx *ctx); + +#endif diff --git a/video/vdpau_functions.inc b/video/vdpau_functions.inc new file mode 100644 index 0000000..22c612c --- /dev/null +++ b/video/vdpau_functions.inc @@ -0,0 +1,50 @@ +/* Lists the VDPAU functions used by MPV. + * First argument on each line is the VDPAU function type name, + * second is the macro name needed to get the function address, + * third is the name MPV uses for the function. + */ + +VDP_FUNCTION(VdpGetErrorString, VDP_FUNC_ID_GET_ERROR_STRING, get_error_string) +VDP_FUNCTION(VdpBitmapSurfaceCreate, VDP_FUNC_ID_BITMAP_SURFACE_CREATE, bitmap_surface_create) +VDP_FUNCTION(VdpBitmapSurfaceDestroy, VDP_FUNC_ID_BITMAP_SURFACE_DESTROY, bitmap_surface_destroy) +VDP_FUNCTION(VdpBitmapSurfacePutBitsNative, VDP_FUNC_ID_BITMAP_SURFACE_PUT_BITS_NATIVE, bitmap_surface_put_bits_native) +VDP_FUNCTION(VdpBitmapSurfaceQueryCapabilities, VDP_FUNC_ID_BITMAP_SURFACE_QUERY_CAPABILITIES, bitmap_surface_query_capabilities) +VDP_FUNCTION(VdpDecoderCreate, VDP_FUNC_ID_DECODER_CREATE, decoder_create) +VDP_FUNCTION(VdpDecoderDestroy, VDP_FUNC_ID_DECODER_DESTROY, decoder_destroy) +VDP_FUNCTION(VdpDecoderRender, VDP_FUNC_ID_DECODER_RENDER, decoder_render) +VDP_FUNCTION(VdpDecoderQueryCapabilities, VDP_FUNC_ID_DECODER_QUERY_CAPABILITIES, decoder_query_capabilities) +VDP_FUNCTION(VdpDeviceDestroy, VDP_FUNC_ID_DEVICE_DESTROY, device_destroy) +VDP_FUNCTION(VdpGetInformationString, VDP_FUNC_ID_GET_INFORMATION_STRING, get_information_string) +VDP_FUNCTION(VdpGenerateCSCMatrix, VDP_FUNC_ID_GENERATE_CSC_MATRIX, generate_csc_matrix) +VDP_FUNCTION(VdpOutputSurfaceCreate, VDP_FUNC_ID_OUTPUT_SURFACE_CREATE, output_surface_create) +VDP_FUNCTION(VdpOutputSurfaceDestroy, VDP_FUNC_ID_OUTPUT_SURFACE_DESTROY, output_surface_destroy) +VDP_FUNCTION(VdpOutputSurfaceGetBitsNative, VDP_FUNC_ID_OUTPUT_SURFACE_GET_BITS_NATIVE, output_surface_get_bits_native) +VDP_FUNCTION(VdpOutputSurfacePutBitsIndexed, VDP_FUNC_ID_OUTPUT_SURFACE_PUT_BITS_INDEXED, output_surface_put_bits_indexed) +VDP_FUNCTION(VdpOutputSurfacePutBitsNative, VDP_FUNC_ID_OUTPUT_SURFACE_PUT_BITS_NATIVE, output_surface_put_bits_native) +VDP_FUNCTION(VdpOutputSurfaceRenderBitmapSurface, VDP_FUNC_ID_OUTPUT_SURFACE_RENDER_BITMAP_SURFACE, output_surface_render_bitmap_surface) +VDP_FUNCTION(VdpOutputSurfaceRenderOutputSurface, VDP_FUNC_ID_OUTPUT_SURFACE_RENDER_OUTPUT_SURFACE, output_surface_render_output_surface) +VDP_FUNCTION(VdpPreemptionCallbackRegister, VDP_FUNC_ID_PREEMPTION_CALLBACK_REGISTER, preemption_callback_register) +VDP_FUNCTION(VdpPresentationQueueBlockUntilSurfaceIdle, VDP_FUNC_ID_PRESENTATION_QUEUE_BLOCK_UNTIL_SURFACE_IDLE, presentation_queue_block_until_surface_idle) +VDP_FUNCTION(VdpPresentationQueueCreate, VDP_FUNC_ID_PRESENTATION_QUEUE_CREATE, presentation_queue_create) +VDP_FUNCTION(VdpPresentationQueueDestroy, VDP_FUNC_ID_PRESENTATION_QUEUE_DESTROY, presentation_queue_destroy) +VDP_FUNCTION(VdpPresentationQueueDisplay, VDP_FUNC_ID_PRESENTATION_QUEUE_DISPLAY, presentation_queue_display) +VDP_FUNCTION(VdpPresentationQueueGetTime, VDP_FUNC_ID_PRESENTATION_QUEUE_GET_TIME, presentation_queue_get_time) +VDP_FUNCTION(VdpPresentationQueueQuerySurfaceStatus, VDP_FUNC_ID_PRESENTATION_QUEUE_QUERY_SURFACE_STATUS, presentation_queue_query_surface_status) +VDP_FUNCTION(VdpPresentationQueueSetBackgroundColor, VDP_FUNC_ID_PRESENTATION_QUEUE_SET_BACKGROUND_COLOR, presentation_queue_set_background_color) +VDP_FUNCTION(VdpPresentationQueueGetBackgroundColor, VDP_FUNC_ID_PRESENTATION_QUEUE_GET_BACKGROUND_COLOR, presentation_queue_get_background_color) +VDP_FUNCTION(VdpPresentationQueueTargetCreateX11, VDP_FUNC_ID_PRESENTATION_QUEUE_TARGET_CREATE_X11, presentation_queue_target_create_x11) +VDP_FUNCTION(VdpPresentationQueueTargetDestroy, VDP_FUNC_ID_PRESENTATION_QUEUE_TARGET_DESTROY, presentation_queue_target_destroy) +VDP_FUNCTION(VdpVideoMixerCreate, VDP_FUNC_ID_VIDEO_MIXER_CREATE, video_mixer_create) +VDP_FUNCTION(VdpVideoMixerDestroy, VDP_FUNC_ID_VIDEO_MIXER_DESTROY, video_mixer_destroy) +VDP_FUNCTION(VdpVideoMixerQueryFeatureSupport, VDP_FUNC_ID_VIDEO_MIXER_QUERY_FEATURE_SUPPORT, video_mixer_query_feature_support) +VDP_FUNCTION(VdpVideoMixerRender, VDP_FUNC_ID_VIDEO_MIXER_RENDER, video_mixer_render) +VDP_FUNCTION(VdpVideoMixerSetAttributeValues, VDP_FUNC_ID_VIDEO_MIXER_SET_ATTRIBUTE_VALUES, video_mixer_set_attribute_values) +VDP_FUNCTION(VdpVideoMixerSetFeatureEnables, VDP_FUNC_ID_VIDEO_MIXER_SET_FEATURE_ENABLES, video_mixer_set_feature_enables) +VDP_FUNCTION(VdpVideoSurfaceCreate, VDP_FUNC_ID_VIDEO_SURFACE_CREATE, video_surface_create) +VDP_FUNCTION(VdpVideoSurfaceDestroy, VDP_FUNC_ID_VIDEO_SURFACE_DESTROY, video_surface_destroy) +VDP_FUNCTION(VdpVideoSurfacePutBitsYCbCr, VDP_FUNC_ID_VIDEO_SURFACE_PUT_BITS_Y_CB_CR, video_surface_put_bits_y_cb_cr) +VDP_FUNCTION(VdpVideoSurfaceGetBitsYCbCr, VDP_FUNC_ID_VIDEO_SURFACE_GET_BITS_Y_CB_CR, video_surface_get_bits_y_cb_cr) +VDP_FUNCTION(VdpVideoSurfaceGetParameters, VDP_FUNC_ID_VIDEO_SURFACE_GET_PARAMETERS, video_surface_get_parameters) +VDP_FUNCTION(VdpVideoSurfaceQueryCapabilities, VDP_FUNC_ID_VIDEO_SURFACE_QUERY_CAPABILITIES, video_surface_query_capabilities) +VDP_FUNCTION(VdpOutputSurfaceQueryCapabilities, VDP_FUNC_ID_OUTPUT_SURFACE_QUERY_CAPABILITIES, output_surface_query_capabilities) +VDP_FUNCTION(VdpOutputSurfaceGetParameters, VDP_FUNC_ID_OUTPUT_SURFACE_GET_PARAMETERS, output_surface_get_parameters) diff --git a/video/vdpau_mixer.c b/video/vdpau_mixer.c new file mode 100644 index 0000000..b1aed70 --- /dev/null +++ b/video/vdpau_mixer.c @@ -0,0 +1,306 @@ +/* + * This file is part of mpv. + * + * Parts of video mixer creation code: + * Copyright (C) 2008 NVIDIA (Rajib Mahapatra <rmahapatra@nvidia.com>) + * Copyright (C) 2009 Uoti Urpala + * + * mpv is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <assert.h> + +#include "vdpau_mixer.h" + +static void free_mixed_frame(void *arg) +{ + struct mp_vdpau_mixer_frame *frame = arg; + talloc_free(frame); +} + +// This creates an image of format IMGFMT_VDPAU with a mp_vdpau_mixer_frame +// struct. Use mp_vdpau_mixed_frame_get() to retrieve the struct and to +// initialize it. +// "base" is used only to set parameters, no image data is referenced. +struct mp_image *mp_vdpau_mixed_frame_create(struct mp_image *base) +{ + assert(base->imgfmt == IMGFMT_VDPAU); + + struct mp_vdpau_mixer_frame *frame = + talloc_zero(NULL, struct mp_vdpau_mixer_frame); + for (int n = 0; n < MP_VDP_HISTORY_FRAMES; n++) + frame->past[n] = frame->future[n] = VDP_INVALID_HANDLE; + frame->current = VDP_INVALID_HANDLE; + frame->field = VDP_VIDEO_MIXER_PICTURE_STRUCTURE_FRAME; + + struct mp_image *mpi = mp_image_new_custom_ref(base, frame, free_mixed_frame); + if (mpi) { + mpi->planes[2] = (void *)frame; + mpi->planes[3] = (void *)(uintptr_t)VDP_INVALID_HANDLE; + } + return mpi; +} + +struct mp_vdpau_mixer_frame *mp_vdpau_mixed_frame_get(struct mp_image *mpi) +{ + if (mpi->imgfmt != IMGFMT_VDPAU) + return NULL; + return (void *)mpi->planes[2]; +} + +struct mp_vdpau_mixer *mp_vdpau_mixer_create(struct mp_vdpau_ctx *vdp_ctx, + struct mp_log *log) +{ + struct mp_vdpau_mixer *mixer = talloc_ptrtype(NULL, mixer); + *mixer = (struct mp_vdpau_mixer){ + .ctx = vdp_ctx, + .log = log, + .video_mixer = VDP_INVALID_HANDLE, + }; + mp_vdpau_handle_preemption(mixer->ctx, &mixer->preemption_counter); + return mixer; +} + +void mp_vdpau_mixer_destroy(struct mp_vdpau_mixer *mixer) +{ + struct vdp_functions *vdp = &mixer->ctx->vdp; + VdpStatus vdp_st; + if (mixer->video_mixer != VDP_INVALID_HANDLE) { + vdp_st = vdp->video_mixer_destroy(mixer->video_mixer); + CHECK_VDP_WARNING(mixer, "Error when calling vdp_video_mixer_destroy"); + } + talloc_free(mixer); +} + +static bool opts_equal(const struct mp_vdpau_mixer_opts *a, + const struct mp_vdpau_mixer_opts *b) +{ + return a->deint == b->deint && a->chroma_deint == b->chroma_deint && + a->pullup == b->pullup && a->hqscaling == b->hqscaling && + a->sharpen == b->sharpen && a->denoise == b->denoise; +} + +static int set_video_attribute(struct mp_vdpau_mixer *mixer, + VdpVideoMixerAttribute attr, + const void *value, char *attr_name) +{ + struct vdp_functions *vdp = &mixer->ctx->vdp; + VdpStatus vdp_st; + + vdp_st = vdp->video_mixer_set_attribute_values(mixer->video_mixer, 1, + &attr, &value); + if (vdp_st != VDP_STATUS_OK) { + MP_ERR(mixer, "Error setting video mixer attribute %s: %s\n", attr_name, + vdp->get_error_string(vdp_st)); + return -1; + } + return 0; +} + +#define SET_VIDEO_ATTR(attr_name, attr_type, value) set_video_attribute(mixer, \ + VDP_VIDEO_MIXER_ATTRIBUTE_ ## attr_name, &(attr_type){value},\ + # attr_name) +static int create_vdp_mixer(struct mp_vdpau_mixer *mixer, + VdpChromaType chroma_type, uint32_t w, uint32_t h) +{ + struct vdp_functions *vdp = &mixer->ctx->vdp; + VdpDevice vdp_device = mixer->ctx->vdp_device; + struct mp_vdpau_mixer_opts *opts = &mixer->opts; +#define VDP_NUM_MIXER_PARAMETER 3 +#define MAX_NUM_FEATURES 6 + int i; + VdpStatus vdp_st; + + MP_VERBOSE(mixer, "Recreating vdpau video mixer.\n"); + + int feature_count = 0; + VdpVideoMixerFeature features[MAX_NUM_FEATURES]; + VdpBool feature_enables[MAX_NUM_FEATURES]; + static const VdpVideoMixerParameter parameters[VDP_NUM_MIXER_PARAMETER] = { + VDP_VIDEO_MIXER_PARAMETER_VIDEO_SURFACE_WIDTH, + VDP_VIDEO_MIXER_PARAMETER_VIDEO_SURFACE_HEIGHT, + VDP_VIDEO_MIXER_PARAMETER_CHROMA_TYPE, + }; + const void *const parameter_values[VDP_NUM_MIXER_PARAMETER] = { + &(uint32_t){w}, + &(uint32_t){h}, + &(VdpChromaType){chroma_type}, + }; + if (opts->deint >= 3) + features[feature_count++] = VDP_VIDEO_MIXER_FEATURE_DEINTERLACE_TEMPORAL; + if (opts->deint == 4) + features[feature_count++] = + VDP_VIDEO_MIXER_FEATURE_DEINTERLACE_TEMPORAL_SPATIAL; + if (opts->pullup) + features[feature_count++] = VDP_VIDEO_MIXER_FEATURE_INVERSE_TELECINE; + if (opts->denoise) + features[feature_count++] = VDP_VIDEO_MIXER_FEATURE_NOISE_REDUCTION; + if (opts->sharpen) + features[feature_count++] = VDP_VIDEO_MIXER_FEATURE_SHARPNESS; + if (opts->hqscaling) { + VdpVideoMixerFeature hqscaling_feature = + VDP_VIDEO_MIXER_FEATURE_HIGH_QUALITY_SCALING_L1 + opts->hqscaling - 1; + VdpBool hqscaling_available; + vdp_st = vdp->video_mixer_query_feature_support(vdp_device, + hqscaling_feature, + &hqscaling_available); + CHECK_VDP_ERROR(mixer, "Error when calling video_mixer_query_feature_support"); + if (hqscaling_available) { + features[feature_count++] = hqscaling_feature; + } else { + MP_ERR(mixer, "Your hardware or VDPAU library does not support " + "requested hqscaling.\n"); + } + } + + vdp_st = vdp->video_mixer_create(vdp_device, feature_count, features, + VDP_NUM_MIXER_PARAMETER, + parameters, parameter_values, + &mixer->video_mixer); + if (vdp_st != VDP_STATUS_OK) + mixer->video_mixer = VDP_INVALID_HANDLE; + + CHECK_VDP_ERROR(mixer, "Error when calling vdp_video_mixer_create"); + + mixer->initialized = true; + mixer->current_chroma_type = chroma_type; + mixer->current_w = w; + mixer->current_h = h; + + for (i = 0; i < feature_count; i++) + feature_enables[i] = VDP_TRUE; + if (feature_count) { + vdp_st = vdp->video_mixer_set_feature_enables(mixer->video_mixer, + feature_count, features, + feature_enables); + CHECK_VDP_WARNING(mixer, "Error calling vdp_video_mixer_set_feature_enables"); + } + if (opts->denoise) + SET_VIDEO_ATTR(NOISE_REDUCTION_LEVEL, float, opts->denoise); + if (opts->sharpen) + SET_VIDEO_ATTR(SHARPNESS_LEVEL, float, opts->sharpen); + if (!opts->chroma_deint) + SET_VIDEO_ATTR(SKIP_CHROMA_DEINTERLACE, uint8_t, 1); + + struct mp_cmat yuv2rgb; + VdpCSCMatrix matrix; + + struct mp_csp_params cparams = MP_CSP_PARAMS_DEFAULTS; + mp_csp_set_image_params(&cparams, &mixer->image_params); + if (mixer->video_eq) + mp_csp_equalizer_state_get(mixer->video_eq, &cparams); + mp_get_csp_matrix(&cparams, &yuv2rgb); + + for (int r = 0; r < 3; r++) { + for (int c = 0; c < 3; c++) + matrix[r][c] = yuv2rgb.m[r][c]; + matrix[r][3] = yuv2rgb.c[r]; + } + + set_video_attribute(mixer, VDP_VIDEO_MIXER_ATTRIBUTE_CSC_MATRIX, + &matrix, "CSC matrix"); + + return 0; +} + +// If opts is NULL, use the opts as implied by the video image. +int mp_vdpau_mixer_render(struct mp_vdpau_mixer *mixer, + struct mp_vdpau_mixer_opts *opts, + VdpOutputSurface output, VdpRect *output_rect, + struct mp_image *video, VdpRect *video_rect) +{ + struct vdp_functions *vdp = &mixer->ctx->vdp; + VdpStatus vdp_st; + VdpRect fallback_rect = {0, 0, video->w, video->h}; + + if (!video_rect) + video_rect = &fallback_rect; + + int pe = mp_vdpau_handle_preemption(mixer->ctx, &mixer->preemption_counter); + if (pe < 1) { + mixer->video_mixer = VDP_INVALID_HANDLE; + if (pe < 0) + return -1; + } + + if (video->imgfmt == IMGFMT_VDPAU_OUTPUT) { + VdpOutputSurface surface = (uintptr_t)video->planes[3]; + int flags = VDP_OUTPUT_SURFACE_RENDER_ROTATE_0; + vdp_st = vdp->output_surface_render_output_surface(output, + output_rect, + surface, + video_rect, + NULL, NULL, flags); + CHECK_VDP_WARNING(mixer, "Error when calling " + "vdp_output_surface_render_output_surface"); + return 0; + } + + if (video->imgfmt != IMGFMT_VDPAU) + return -1; + + struct mp_vdpau_mixer_frame *frame = mp_vdpau_mixed_frame_get(video); + struct mp_vdpau_mixer_frame fallback = {{0}}; + if (!frame) { + frame = &fallback; + frame->current = (uintptr_t)video->planes[3]; + for (int n = 0; n < MP_VDP_HISTORY_FRAMES; n++) + frame->past[n] = frame->future[n] = VDP_INVALID_HANDLE; + frame->field = VDP_VIDEO_MIXER_PICTURE_STRUCTURE_FRAME; + } + + if (!opts) + opts = &frame->opts; + + if (mixer->video_mixer == VDP_INVALID_HANDLE) + mixer->initialized = false; + + if (mixer->video_eq && mp_csp_equalizer_state_changed(mixer->video_eq)) + mixer->initialized = false; + + VdpChromaType s_chroma_type; + uint32_t s_w, s_h; + + vdp_st = vdp->video_surface_get_parameters(frame->current, &s_chroma_type, + &s_w, &s_h); + CHECK_VDP_ERROR(mixer, "Error when calling vdp_video_surface_get_parameters"); + + if (!mixer->initialized || !opts_equal(opts, &mixer->opts) || + !mp_image_params_equal(&video->params, &mixer->image_params) || + mixer->current_w != s_w || mixer->current_h != s_h || + mixer->current_chroma_type != s_chroma_type) + { + mixer->opts = *opts; + mixer->image_params = video->params; + if (mixer->video_mixer != VDP_INVALID_HANDLE) { + vdp_st = vdp->video_mixer_destroy(mixer->video_mixer); + CHECK_VDP_WARNING(mixer, "Error when calling vdp_video_mixer_destroy"); + } + mixer->video_mixer = VDP_INVALID_HANDLE; + mixer->initialized = false; + if (create_vdp_mixer(mixer, s_chroma_type, s_w, s_h) < 0) + return -1; + } + + vdp_st = vdp->video_mixer_render(mixer->video_mixer, VDP_INVALID_HANDLE, + 0, frame->field, + MP_VDP_HISTORY_FRAMES, frame->past, + frame->current, + MP_VDP_HISTORY_FRAMES, frame->future, + video_rect, + output, NULL, output_rect, + 0, NULL); + CHECK_VDP_WARNING(mixer, "Error when calling vdp_video_mixer_render"); + return 0; +} diff --git a/video/vdpau_mixer.h b/video/vdpau_mixer.h new file mode 100644 index 0000000..4abe87e --- /dev/null +++ b/video/vdpau_mixer.h @@ -0,0 +1,61 @@ +#ifndef MP_VDPAU_MIXER_H_ +#define MP_VDPAU_MIXER_H_ + +#include <stdbool.h> + +#include "csputils.h" +#include "mp_image.h" +#include "vdpau.h" + +struct mp_vdpau_mixer_opts { + int deint; + bool chroma_deint; + bool pullup; + float denoise; + float sharpen; + int hqscaling; +}; + +#define MP_VDP_HISTORY_FRAMES 2 + +struct mp_vdpau_mixer_frame { + // settings + struct mp_vdpau_mixer_opts opts; + // video data + VdpVideoMixerPictureStructure field; + VdpVideoSurface past[MP_VDP_HISTORY_FRAMES]; + VdpVideoSurface current; + VdpVideoSurface future[MP_VDP_HISTORY_FRAMES]; +}; + +struct mp_vdpau_mixer { + struct mp_log *log; + struct mp_vdpau_ctx *ctx; + uint64_t preemption_counter; + bool initialized; + + struct mp_image_params image_params; + struct mp_vdpau_mixer_opts opts; + + VdpChromaType current_chroma_type; + int current_w, current_h; + + struct mp_csp_equalizer_state *video_eq; + + VdpVideoMixer video_mixer; +}; + +struct mp_image *mp_vdpau_mixed_frame_create(struct mp_image *base); + +struct mp_vdpau_mixer_frame *mp_vdpau_mixed_frame_get(struct mp_image *mpi); + +struct mp_vdpau_mixer *mp_vdpau_mixer_create(struct mp_vdpau_ctx *vdp_ctx, + struct mp_log *log); +void mp_vdpau_mixer_destroy(struct mp_vdpau_mixer *mixer); + +int mp_vdpau_mixer_render(struct mp_vdpau_mixer *mixer, + struct mp_vdpau_mixer_opts *opts, + VdpOutputSurface output, VdpRect *output_rect, + struct mp_image *video, VdpRect *video_rect); + +#endif diff --git a/video/zimg.c b/video/zimg.c new file mode 100644 index 0000000..5ff300c --- /dev/null +++ b/video/zimg.c @@ -0,0 +1,730 @@ +/* + * This file is part of mpv. + * + * mpv is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * mpv is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with mpv. If not, see <http://www.gnu.org/licenses/>. + */ + +#include <math.h> + +#include <libavutil/cpu.h> + +#include "common/common.h" +#include "common/msg.h" +#include "csputils.h" +#include "misc/thread_pool.h" +#include "misc/thread_tools.h" +#include "options/m_config.h" +#include "options/m_option.h" +#include "repack.h" +#include "video/fmt-conversion.h" +#include "video/img_format.h" +#include "zimg.h" +#include "config.h" + +static_assert(MP_IMAGE_BYTE_ALIGN >= ZIMG_ALIGN, ""); + +#define HAVE_ZIMG_ALPHA (ZIMG_API_VERSION >= ZIMG_MAKE_API_VERSION(2, 4)) + +static const struct m_opt_choice_alternatives mp_zimg_scalers[] = { + {"point", ZIMG_RESIZE_POINT}, + {"bilinear", ZIMG_RESIZE_BILINEAR}, + {"bicubic", ZIMG_RESIZE_BICUBIC}, + {"spline16", ZIMG_RESIZE_SPLINE16}, + {"spline36", ZIMG_RESIZE_SPLINE36}, + {"lanczos", ZIMG_RESIZE_LANCZOS}, + {0} +}; + +const struct zimg_opts zimg_opts_defaults = { + .scaler = ZIMG_RESIZE_LANCZOS, + .scaler_params = {NAN, NAN}, + .scaler_chroma_params = {NAN, NAN}, + .scaler_chroma = ZIMG_RESIZE_BILINEAR, + .dither = ZIMG_DITHER_RANDOM, + .fast = true, +}; + +#define OPT_PARAM(var) OPT_DOUBLE(var), .flags = M_OPT_DEFAULT_NAN + +#define OPT_BASE_STRUCT struct zimg_opts +const struct m_sub_options zimg_conf = { + .opts = (struct m_option[]) { + {"scaler", OPT_CHOICE_C(scaler, mp_zimg_scalers)}, + {"scaler-param-a", OPT_PARAM(scaler_params[0])}, + {"scaler-param-b", OPT_PARAM(scaler_params[1])}, + {"scaler-chroma", OPT_CHOICE_C(scaler_chroma, mp_zimg_scalers)}, + {"scaler-chroma-param-a", OPT_PARAM(scaler_chroma_params[0])}, + {"scaler-chroma-param-b", OPT_PARAM(scaler_chroma_params[1])}, + {"dither", OPT_CHOICE(dither, + {"no", ZIMG_DITHER_NONE}, + {"ordered", ZIMG_DITHER_ORDERED}, + {"random", ZIMG_DITHER_RANDOM}, + {"error-diffusion", ZIMG_DITHER_ERROR_DIFFUSION})}, + {"fast", OPT_BOOL(fast)}, + {"threads", OPT_CHOICE(threads, {"auto", 0}), M_RANGE(1, 64)}, + {0} + }, + .size = sizeof(struct zimg_opts), + .defaults = &zimg_opts_defaults, +}; + +struct mp_zimg_state { + zimg_filter_graph *graph; + void *tmp; + void *tmp_alloc; + struct mp_zimg_repack *src; + struct mp_zimg_repack *dst; + int slice_y, slice_h; // y start position, height of target slice + double scale_y; + struct mp_waiter thread_waiter; +}; + +struct mp_zimg_repack { + bool pack; // if false, this is for unpacking + struct mp_image_params fmt; // original mp format (possibly packed format, + // swapped endian) + int zimgfmt; // zimg equivalent unpacked format + int num_planes; // number of planes involved + unsigned zmask[4]; // zmask[mp_index] = zimg mask (using mp index!) + int z_planes[4]; // z_planes[zimg_index] = mp_index (or -1) + + struct mp_repack *repack; // converting to/from planar + + // Temporary memory for slice-wise repacking. This may be set even if repack + // is not set (then it may be used to avoid alignment issues). This has + // about one slice worth of data. + struct mp_image *tmp; + + // Temporary memory for zimg buffer. + zimg_image_buffer zbuf; + struct mp_image cropped_tmp; + + int real_w, real_h; // aligned size +}; + +static void mp_zimg_update_from_cmdline(struct mp_zimg_context *ctx) +{ + m_config_cache_update(ctx->opts_cache); + + struct zimg_opts *opts = ctx->opts_cache->opts; + ctx->opts = *opts; +} + +static zimg_chroma_location_e mp_to_z_chroma(enum mp_chroma_location cl) +{ + switch (cl) { + case MP_CHROMA_TOPLEFT: return ZIMG_CHROMA_TOP_LEFT; + case MP_CHROMA_LEFT: return ZIMG_CHROMA_LEFT; + case MP_CHROMA_CENTER: return ZIMG_CHROMA_CENTER; + default: return ZIMG_CHROMA_LEFT; + } +} + +static zimg_matrix_coefficients_e mp_to_z_matrix(enum mp_csp csp) +{ + switch (csp) { + case MP_CSP_BT_601: return ZIMG_MATRIX_BT470_BG; + case MP_CSP_BT_709: return ZIMG_MATRIX_BT709; + case MP_CSP_SMPTE_240M: return ZIMG_MATRIX_ST240_M; + case MP_CSP_BT_2020_NC: return ZIMG_MATRIX_BT2020_NCL; + case MP_CSP_BT_2020_C: return ZIMG_MATRIX_BT2020_CL; + case MP_CSP_RGB: return ZIMG_MATRIX_RGB; + case MP_CSP_XYZ: return ZIMG_MATRIX_RGB; + case MP_CSP_YCGCO: return ZIMG_MATRIX_YCGCO; + default: return ZIMG_MATRIX_BT709; + } +} + +static zimg_transfer_characteristics_e mp_to_z_trc(enum mp_csp_trc trc) +{ + switch (trc) { + case MP_CSP_TRC_BT_1886: return ZIMG_TRANSFER_BT709; + case MP_CSP_TRC_SRGB: return ZIMG_TRANSFER_IEC_61966_2_1; + case MP_CSP_TRC_LINEAR: return ZIMG_TRANSFER_LINEAR; + case MP_CSP_TRC_GAMMA22: return ZIMG_TRANSFER_BT470_M; + case MP_CSP_TRC_GAMMA28: return ZIMG_TRANSFER_BT470_BG; + case MP_CSP_TRC_PQ: return ZIMG_TRANSFER_ST2084; + case MP_CSP_TRC_HLG: return ZIMG_TRANSFER_ARIB_B67; +#if HAVE_ZIMG_ST428 + case MP_CSP_TRC_ST428: return ZIMG_TRANSFER_ST428; +#endif + case MP_CSP_TRC_GAMMA18: // ? + case MP_CSP_TRC_GAMMA20: + case MP_CSP_TRC_GAMMA24: + case MP_CSP_TRC_GAMMA26: + case MP_CSP_TRC_PRO_PHOTO: + case MP_CSP_TRC_V_LOG: + case MP_CSP_TRC_S_LOG1: + case MP_CSP_TRC_S_LOG2: // ? + default: return ZIMG_TRANSFER_BT709; + } +} + +static zimg_color_primaries_e mp_to_z_prim(enum mp_csp_prim prim) +{ + switch (prim) { + case MP_CSP_PRIM_BT_601_525:return ZIMG_PRIMARIES_ST170_M; + case MP_CSP_PRIM_BT_601_625:return ZIMG_PRIMARIES_BT470_BG; + case MP_CSP_PRIM_BT_709: return ZIMG_PRIMARIES_BT709; + case MP_CSP_PRIM_BT_2020: return ZIMG_PRIMARIES_BT2020; + case MP_CSP_PRIM_BT_470M: return ZIMG_PRIMARIES_BT470_M; + case MP_CSP_PRIM_DCI_P3: return ZIMG_PRIMARIES_ST431_2; + case MP_CSP_PRIM_DISPLAY_P3:return ZIMG_PRIMARIES_ST432_1; + case MP_CSP_PRIM_EBU_3213: return ZIMG_PRIMARIES_EBU3213_E; + case MP_CSP_PRIM_FILM_C: return ZIMG_PRIMARIES_FILM; + case MP_CSP_PRIM_CIE_1931: + case MP_CSP_PRIM_APPLE: // ? + case MP_CSP_PRIM_ADOBE: + case MP_CSP_PRIM_PRO_PHOTO: + case MP_CSP_PRIM_V_GAMUT: + case MP_CSP_PRIM_S_GAMUT: // ? + case MP_CSP_PRIM_ACES_AP0: + case MP_CSP_PRIM_ACES_AP1: + default: return ZIMG_PRIMARIES_BT709; + } +} + +static void destroy_zimg(struct mp_zimg_context *ctx) +{ + for (int n = 0; n < ctx->num_states; n++) { + struct mp_zimg_state *st = ctx->states[n]; + talloc_free(st->tmp_alloc); + zimg_filter_graph_free(st->graph); + TA_FREEP(&st->src); + TA_FREEP(&st->dst); + talloc_free(st); + } + ctx->num_states = 0; +} + +static void free_mp_zimg(void *p) +{ + struct mp_zimg_context *ctx = p; + + destroy_zimg(ctx); + TA_FREEP(&ctx->tp); +} + +struct mp_zimg_context *mp_zimg_alloc(void) +{ + struct mp_zimg_context *ctx = talloc_ptrtype(NULL, ctx); + *ctx = (struct mp_zimg_context) { + .log = mp_null_log, + }; + ctx->opts = *(struct zimg_opts *)zimg_conf.defaults; + talloc_set_destructor(ctx, free_mp_zimg); + return ctx; +} + +void mp_zimg_enable_cmdline_opts(struct mp_zimg_context *ctx, + struct mpv_global *g) +{ + if (ctx->opts_cache) + return; + + ctx->opts_cache = m_config_cache_alloc(ctx, g, &zimg_conf); + destroy_zimg(ctx); // force update + mp_zimg_update_from_cmdline(ctx); // first update +} + +static int repack_entrypoint(void *user, unsigned i, unsigned x0, unsigned x1) +{ + struct mp_zimg_repack *r = user; + + // If reading is not aligned, just read slightly more data. + if (!r->pack) + x0 &= ~(unsigned)(mp_repack_get_align_x(r->repack) - 1); + + // mp_repack requirements and zimg guarantees. + assert(!(i & (mp_repack_get_align_y(r->repack) - 1))); + assert(!(x0 & (mp_repack_get_align_x(r->repack) - 1))); + + unsigned i_src = i & (r->pack ? r->zmask[0] : ZIMG_BUFFER_MAX); + unsigned i_dst = i & (r->pack ? ZIMG_BUFFER_MAX : r->zmask[0]); + + repack_line(r->repack, x0, i_dst, x0, i_src, x1 - x0); + + return 0; +} + +static bool wrap_buffer(struct mp_zimg_state *st, struct mp_zimg_repack *r, + struct mp_image *a_mpi) +{ + zimg_image_buffer *buf = &r->zbuf; + *buf = (zimg_image_buffer){ZIMG_API_VERSION}; + + struct mp_image *mpi = a_mpi; + if (r->pack) { + mpi = &r->cropped_tmp; + *mpi = *a_mpi; + int y1 = st->slice_y + st->slice_h; + // Due to subsampling we may assume the image to be bigger than it + // actually is (see real_h in setup_format). + if (mpi->h < y1) { + assert(y1 - mpi->h < 4); + mp_image_set_size(mpi, mpi->w, y1); + } + mp_image_crop(mpi, 0, st->slice_y, mpi->w, y1); + } + + bool direct[MP_MAX_PLANES] = {0}; + + for (int p = 0; p < mpi->num_planes; p++) { + // If alignment is good, try to avoid copy. + direct[p] = !((uintptr_t)mpi->planes[p] % ZIMG_ALIGN) && + !(mpi->stride[p] % ZIMG_ALIGN); + } + + if (!repack_config_buffers(r->repack, 0, r->pack ? mpi : r->tmp, + 0, r->pack ? r->tmp : mpi, direct)) + return false; + + for (int n = 0; n < MP_ARRAY_SIZE(buf->plane); n++) { + // Note: this is really the only place we have to care about plane + // permutation (zimg_image_buffer may have a different plane order + // than the shadow mpi like r->tmp). We never use the zimg indexes + // in other places. + int mplane = r->z_planes[n]; + if (mplane < 0) + continue; + + struct mp_image *tmpi = direct[mplane] ? mpi : r->tmp; + buf->plane[n].data = tmpi->planes[mplane]; + buf->plane[n].stride = tmpi->stride[mplane]; + buf->plane[n].mask = direct[mplane] ? ZIMG_BUFFER_MAX : r->zmask[mplane]; + } + + return true; +} + +// (ctx and st can be NULL for probing.) +static bool setup_format(zimg_image_format *zfmt, struct mp_zimg_repack *r, + bool pack, struct mp_image_params *user_fmt, + struct mp_zimg_context *ctx, + struct mp_zimg_state *st) +{ + r->fmt = *user_fmt; + r->pack = pack; + + zimg_image_format_default(zfmt, ZIMG_API_VERSION); + + int rp_flags = 0; + + // For e.g. RGB565, go to lowest depth on pack for less weird dithering. + if (r->pack) { + rp_flags |= REPACK_CREATE_ROUND_DOWN; + } else { + rp_flags |= REPACK_CREATE_EXPAND_8BIT; + } + + r->repack = mp_repack_create_planar(r->fmt.imgfmt, r->pack, rp_flags); + if (!r->repack) + return false; + + int align_x = mp_repack_get_align_x(r->repack); + + r->zimgfmt = r->pack ? mp_repack_get_format_src(r->repack) + : mp_repack_get_format_dst(r->repack); + + if (ctx) { + talloc_steal(r, r->repack); + } else { + TA_FREEP(&r->repack); + } + + struct mp_image_params fmt = r->fmt; + mp_image_params_guess_csp(&fmt); + + struct mp_regular_imgfmt desc; + if (!mp_get_regular_imgfmt(&desc, r->zimgfmt)) + return false; + + // Relies on zimg callbacks reading on 64 byte alignment. + if (!MP_IS_POWER_OF_2(align_x) || align_x > 64 / desc.component_size) + return false; + + // no weird stuff + if (desc.num_planes > 4) + return false; + + for (int n = 0; n < 4; n++) + r->z_planes[n] = -1; + + for (int n = 0; n < desc.num_planes; n++) { + if (desc.planes[n].num_components != 1) + return false; + int c = desc.planes[n].components[0]; + if (c < 1 || c > 4) + return false; + if (c < 4) { + // Unfortunately, ffmpeg prefers GBR order for planar RGB, while zimg + // is sane. This makes it necessary to determine and fix the order. + r->z_planes[c - 1] = n; + } else { + r->z_planes[3] = n; // alpha, always plane 4 in zimg + +#if HAVE_ZIMG_ALPHA + zfmt->alpha = fmt.alpha == MP_ALPHA_PREMUL + ? ZIMG_ALPHA_PREMULTIPLIED : ZIMG_ALPHA_STRAIGHT; +#else + return false; +#endif + } + } + + r->num_planes = desc.num_planes; + + // Take care of input/output size, including slicing. + // Note: formats with subsampled chroma may have odd width or height in + // mpv and FFmpeg. This is because the width/height is actually a cropping + // rectangle. Reconstruct the image allocation size and set the cropping. + zfmt->width = r->real_w = MP_ALIGN_UP(fmt.w, 1 << desc.chroma_xs); + zfmt->height = r->real_h = MP_ALIGN_UP(fmt.h, 1 << desc.chroma_ys); + if (st) { + if (r->pack) { + zfmt->height = r->real_h = st->slice_h = + MPMIN(st->slice_y + st->slice_h, r->real_h) - st->slice_y; + + assert(MP_IS_ALIGNED(r->real_h, 1 << desc.chroma_ys)); + } else { + // Relies on st->dst being initialized first. + struct mp_zimg_repack *dst = st->dst; + + zfmt->active_region.width = dst->real_w * (double)fmt.w / dst->fmt.w; + zfmt->active_region.height = dst->real_h * st->scale_y; + + zfmt->active_region.top = st->slice_y * st->scale_y; + } + } + + zfmt->subsample_w = desc.chroma_xs; + zfmt->subsample_h = desc.chroma_ys; + + zfmt->color_family = ZIMG_COLOR_YUV; + if (desc.num_planes <= 2) { + zfmt->color_family = ZIMG_COLOR_GREY; + } else if (fmt.color.space == MP_CSP_RGB || fmt.color.space == MP_CSP_XYZ) { + zfmt->color_family = ZIMG_COLOR_RGB; + } + + if (desc.component_type == MP_COMPONENT_TYPE_UINT && + desc.component_size == 1) + { + zfmt->pixel_type = ZIMG_PIXEL_BYTE; + } else if (desc.component_type == MP_COMPONENT_TYPE_UINT && + desc.component_size == 2) + { + zfmt->pixel_type = ZIMG_PIXEL_WORD; + } else if (desc.component_type == MP_COMPONENT_TYPE_FLOAT && + desc.component_size == 2) + { + zfmt->pixel_type = ZIMG_PIXEL_HALF; + } else if (desc.component_type == MP_COMPONENT_TYPE_FLOAT && + desc.component_size == 4) + { + zfmt->pixel_type = ZIMG_PIXEL_FLOAT; + } else { + return false; + } + + // (Formats like P010 are basically reported as P016.) + zfmt->depth = desc.component_size * 8 + MPMIN(0, desc.component_pad); + + zfmt->pixel_range = fmt.color.levels == MP_CSP_LEVELS_PC ? + ZIMG_RANGE_FULL : ZIMG_RANGE_LIMITED; + + zfmt->matrix_coefficients = mp_to_z_matrix(fmt.color.space); + zfmt->transfer_characteristics = mp_to_z_trc(fmt.color.gamma); + // For MP_CSP_XYZ only valid primaries are defined in ST 428-1 + zfmt->color_primaries = fmt.color.space == MP_CSP_XYZ + ? ZIMG_PRIMARIES_ST428 + : mp_to_z_prim(fmt.color.primaries); + zfmt->chroma_location = mp_to_z_chroma(fmt.chroma_location); + + if (ctx && ctx->opts.fast) { + // mpv's default for RGB output slows down zimg significantly. + if (zfmt->transfer_characteristics == ZIMG_TRANSFER_IEC_61966_2_1 && + zfmt->color_family == ZIMG_COLOR_RGB) + zfmt->transfer_characteristics = ZIMG_TRANSFER_BT709; + } + + // mpv treats _some_ gray formats as RGB; zimg doesn't like this. + if (zfmt->color_family == ZIMG_COLOR_GREY && + zfmt->matrix_coefficients == ZIMG_MATRIX_RGB) + zfmt->matrix_coefficients = ZIMG_MATRIX_BT470_BG; + + return true; +} + +static bool allocate_buffer(struct mp_zimg_state *st, struct mp_zimg_repack *r) +{ + unsigned lines = 0; + int err; + if (r->pack) { + err = zimg_filter_graph_get_output_buffering(st->graph, &lines); + } else { + err = zimg_filter_graph_get_input_buffering(st->graph, &lines); + } + + if (err) + return false; + + r->zmask[0] = zimg_select_buffer_mask(lines); + + // Either ZIMG_BUFFER_MAX, or a power-of-2 slice buffer. + assert(r->zmask[0] == ZIMG_BUFFER_MAX || MP_IS_POWER_OF_2(r->zmask[0] + 1)); + + int h = r->zmask[0] == ZIMG_BUFFER_MAX ? r->real_h : r->zmask[0] + 1; + if (h >= r->real_h) { + h = r->real_h; + r->zmask[0] = ZIMG_BUFFER_MAX; + } + + r->tmp = mp_image_alloc(r->zimgfmt, r->real_w, h); + talloc_steal(r, r->tmp); + + if (!r->tmp) + return false; + + // Note: although zimg doesn't require that the chroma plane's zmask is + // divided by the full size zmask, the repack callback requires it, + // since mp_repack can handle only proper slices. + for (int n = 1; n < r->tmp->fmt.num_planes; n++) { + r->zmask[n] = r->zmask[0]; + if (r->zmask[0] != ZIMG_BUFFER_MAX) + r->zmask[n] = r->zmask[n] >> r->tmp->fmt.ys[n]; + } + + return true; +} + +static bool mp_zimg_state_init(struct mp_zimg_context *ctx, + struct mp_zimg_state *st, + int slice_y, int slice_h) +{ + struct zimg_opts *opts = &ctx->opts; + + st->src = talloc_zero(NULL, struct mp_zimg_repack); + st->dst = talloc_zero(NULL, struct mp_zimg_repack); + + st->scale_y = ctx->src.h / (double)ctx->dst.h; + st->slice_y = slice_y; + st->slice_h = slice_h; + + zimg_image_format src_fmt, dst_fmt; + + // Note: do dst first, because src uses fields from dst. + if (!setup_format(&dst_fmt, st->dst, true, &ctx->dst, ctx, st) || + !setup_format(&src_fmt, st->src, false, &ctx->src, ctx, st)) + return false; + + zimg_graph_builder_params params; + zimg_graph_builder_params_default(¶ms, ZIMG_API_VERSION); + + params.resample_filter = opts->scaler; + params.filter_param_a = opts->scaler_params[0]; + params.filter_param_b = opts->scaler_params[1]; + + params.resample_filter_uv = opts->scaler_chroma; + params.filter_param_a_uv = opts->scaler_chroma_params[0]; + params.filter_param_b_uv = opts->scaler_chroma_params[1]; + + params.dither_type = opts->dither; + + params.cpu_type = ZIMG_CPU_AUTO_64B; + + if (opts->fast) + params.allow_approximate_gamma = 1; + + // leave at default for SDR, which means 100 cd/m^2 for zimg + if (ctx->dst.color.hdr.max_luma > 0 && mp_trc_is_hdr(ctx->dst.color.gamma)) + params.nominal_peak_luminance = ctx->dst.color.hdr.max_luma; + + st->graph = zimg_filter_graph_build(&src_fmt, &dst_fmt, ¶ms); + if (!st->graph) { + char err[128] = {0}; + zimg_get_last_error(err, sizeof(err) - 1); + MP_ERR(ctx, "zimg_filter_graph_build: %s \n", err); + return false; + } + + size_t tmp_size; + if (!zimg_filter_graph_get_tmp_size(st->graph, &tmp_size)) { + tmp_size = MP_ALIGN_UP(tmp_size, ZIMG_ALIGN) + ZIMG_ALIGN; + st->tmp_alloc = ta_alloc_size(NULL, tmp_size); + if (st->tmp_alloc) + st->tmp = (void *)MP_ALIGN_UP((uintptr_t)st->tmp_alloc, ZIMG_ALIGN); + } + + if (!st->tmp_alloc) + return false; + + if (!allocate_buffer(st, st->src) || !allocate_buffer(st, st->dst)) + return false; + + return true; +} + +bool mp_zimg_config(struct mp_zimg_context *ctx) +{ + destroy_zimg(ctx); + + if (ctx->opts_cache) + mp_zimg_update_from_cmdline(ctx); + + int slices = ctx->opts.threads; + if (slices < 1) + slices = av_cpu_count(); + slices = MPCLAMP(slices, 1, 64); + + struct mp_imgfmt_desc dstfmt = mp_imgfmt_get_desc(ctx->dst.imgfmt); + if (!dstfmt.align_y) + goto fail; + int full_h = MP_ALIGN_UP(ctx->dst.h, dstfmt.align_y); + int slice_h = (full_h + slices - 1) / slices; + slice_h = MP_ALIGN_UP(slice_h, dstfmt.align_y); + slice_h = MP_ALIGN_UP(slice_h, 64); // for dithering and minimum slice size + slices = (full_h + slice_h - 1) / slice_h; + + int threads = slices - 1; + if (threads != ctx->current_thread_count) { + // Just destroy and recreate all - dumb and costly, but rarely happens. + TA_FREEP(&ctx->tp); + ctx->current_thread_count = 0; + if (threads) { + MP_VERBOSE(ctx, "using %d threads for scaling\n", threads); + ctx->tp = mp_thread_pool_create(NULL, threads, threads, threads); + if (!ctx->tp) + goto fail; + ctx->current_thread_count = threads; + } + } + + for (int n = 0; n < slices; n++) { + struct mp_zimg_state *st = talloc_zero(NULL, struct mp_zimg_state); + MP_TARRAY_APPEND(ctx, ctx->states, ctx->num_states, st); + + if (!mp_zimg_state_init(ctx, st, n * slice_h, slice_h)) + goto fail; + } + + assert(ctx->num_states == slices); + + return true; + +fail: + destroy_zimg(ctx); + return false; +} + +bool mp_zimg_config_image_params(struct mp_zimg_context *ctx) +{ + if (ctx->num_states) { + // All states are the same, so checking only one of them is sufficient. + struct mp_zimg_state *st = ctx->states[0]; + if (st->src && mp_image_params_equal(&ctx->src, &st->src->fmt) && + st->dst && mp_image_params_equal(&ctx->dst, &st->dst->fmt) && + (!ctx->opts_cache || !m_config_cache_update(ctx->opts_cache)) && + st->graph) + return true; + } + return mp_zimg_config(ctx); +} + +static void do_convert(struct mp_zimg_state *st) +{ + assert(st->graph); + + // An annoyance. + zimg_image_buffer *zsrc = &st->src->zbuf; + zimg_image_buffer_const zsrc_c = {ZIMG_API_VERSION}; + for (int n = 0; n < MP_ARRAY_SIZE(zsrc_c.plane); n++) { + zsrc_c.plane[n].data = zsrc->plane[n].data; + zsrc_c.plane[n].stride = zsrc->plane[n].stride; + zsrc_c.plane[n].mask = zsrc->plane[n].mask; + } + + // (The API promises to succeed if no user callbacks fail, so no need + // to check the return value.) + zimg_filter_graph_process(st->graph, &zsrc_c, &st->dst->zbuf, st->tmp, + repack_entrypoint, st->src, + repack_entrypoint, st->dst); +} + +static void do_convert_thread(void *ptr) +{ + struct mp_zimg_state *st = ptr; + + do_convert(st); + mp_waiter_wakeup(&st->thread_waiter, 0); +} + +bool mp_zimg_convert(struct mp_zimg_context *ctx, struct mp_image *dst, + struct mp_image *src) +{ + ctx->src = src->params; + ctx->dst = dst->params; + + if (!mp_zimg_config_image_params(ctx)) { + MP_ERR(ctx, "zimg initialization failed.\n"); + return false; + } + + for (int n = 0; n < ctx->num_states; n++) { + struct mp_zimg_state *st = ctx->states[n]; + + if (!wrap_buffer(st, st->src, src) || !wrap_buffer(st, st->dst, dst)) { + MP_ERR(ctx, "zimg repacker initialization failed.\n"); + return false; + } + } + + for (int n = 1; n < ctx->num_states; n++) { + struct mp_zimg_state *st = ctx->states[n]; + + st->thread_waiter = (struct mp_waiter)MP_WAITER_INITIALIZER; + + bool r = mp_thread_pool_run(ctx->tp, do_convert_thread, st); + // This is guaranteed by the API; and unrolling would be inconvenient. + assert(r); + } + + do_convert(ctx->states[0]); + + for (int n = 1; n < ctx->num_states; n++) { + struct mp_zimg_state *st = ctx->states[n]; + + mp_waiter_wait(&st->thread_waiter); + } + + return true; +} + +static bool supports_format(int imgfmt, bool out) +{ + struct mp_image_params fmt = {.imgfmt = imgfmt}; + struct mp_zimg_repack t; + zimg_image_format zfmt; + return setup_format(&zfmt, &t, out, &fmt, NULL, NULL); +} + +bool mp_zimg_supports_in_format(int imgfmt) +{ + return supports_format(imgfmt, false); +} + +bool mp_zimg_supports_out_format(int imgfmt) +{ + return supports_format(imgfmt, true); +} diff --git a/video/zimg.h b/video/zimg.h new file mode 100644 index 0000000..be018ca --- /dev/null +++ b/video/zimg.h @@ -0,0 +1,73 @@ +#pragma once + +#include <stdbool.h> + +#include <zimg.h> + +#include "mp_image.h" + +#define ZIMG_ALIGN 64 + +struct mpv_global; + +bool mp_zimg_supports_in_format(int imgfmt); +bool mp_zimg_supports_out_format(int imgfmt); + +struct zimg_opts { + int scaler; + double scaler_params[2]; + int scaler_chroma; + double scaler_chroma_params[2]; + int dither; + bool fast; + int threads; +}; + +extern const struct zimg_opts zimg_opts_defaults; + +struct mp_zimg_context { + // Can be set for verbose error printing. + struct mp_log *log; + + // User configuration. Note: changing these requires calling mp_zimg_config() + // to update the filter graph. The first mp_zimg_convert() call (or if the + // image format changes) will do this automatically. + struct zimg_opts opts; + + // Input/output parameters. Note: if these mismatch with the + // mp_zimg_convert() parameters, mp_zimg_config() will be called + // automatically. + struct mp_image_params src, dst; + + // Cached zimg state (if any). Private, do not touch. + struct m_config_cache *opts_cache; + struct mp_zimg_state **states; + int num_states; + struct mp_thread_pool *tp; + int current_thread_count; +}; + +// Allocate a zimg context. Always succeeds. Returns a talloc pointer (use +// talloc_free() to release it). +struct mp_zimg_context *mp_zimg_alloc(void); + +// Enable auto-update of parameters from command line. Don't try to set custom +// options (other than possibly .src/.dst), because they might be overwritten +// if the user changes any options. +void mp_zimg_enable_cmdline_opts(struct mp_zimg_context *ctx, + struct mpv_global *g); + +// Try to build the conversion chain using the parameters currently set in ctx. +// If this succeeds, mp_zimg_convert() will always succeed (probably), as long +// as the input has the same parameters. +// Returns false on error. +bool mp_zimg_config(struct mp_zimg_context *ctx); + +// Similar to mp_zimg_config(), but assume none of the user parameters changed, +// except possibly .src and .dst. This essentially checks whether src/dst +// changed, and if so, calls mp_zimg_config(). +bool mp_zimg_config_image_params(struct mp_zimg_context *ctx); + +// Convert/scale src to dst. On failure, the data in dst is not touched. +bool mp_zimg_convert(struct mp_zimg_context *ctx, struct mp_image *dst, + struct mp_image *src); |