From ff6e3c025658a5fa1affd094f220b623e7e1b24b Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 15 Apr 2024 22:38:23 +0200 Subject: Adding upstream version 6.338.2. Signed-off-by: Daniel Baumann --- src/opengl/common.h | 66 +++ src/opengl/context.c | 332 +++++++++++ src/opengl/formats.c | 485 ++++++++++++++++ src/opengl/formats.h | 32 ++ src/opengl/gpu.c | 645 +++++++++++++++++++++ src/opengl/gpu.h | 141 +++++ src/opengl/gpu_pass.c | 707 +++++++++++++++++++++++ src/opengl/gpu_tex.c | 1078 +++++++++++++++++++++++++++++++++++ src/opengl/include/glad/meson.build | 29 + src/opengl/loader_egl.c | 2 + src/opengl/loader_gl.c | 2 + src/opengl/meson.build | 76 +++ src/opengl/stubs.c | 63 ++ src/opengl/swapchain.c | 278 +++++++++ src/opengl/utils.c | 158 +++++ src/opengl/utils.h | 57 ++ 16 files changed, 4151 insertions(+) create mode 100644 src/opengl/common.h create mode 100644 src/opengl/context.c create mode 100644 src/opengl/formats.c create mode 100644 src/opengl/formats.h create mode 100644 src/opengl/gpu.c create mode 100644 src/opengl/gpu.h create mode 100644 src/opengl/gpu_pass.c create mode 100644 src/opengl/gpu_tex.c create mode 100644 src/opengl/include/glad/meson.build create mode 100644 src/opengl/loader_egl.c create mode 100644 src/opengl/loader_gl.c create mode 100644 src/opengl/meson.build create mode 100644 src/opengl/stubs.c create mode 100644 src/opengl/swapchain.c create mode 100644 src/opengl/utils.c create mode 100644 src/opengl/utils.h (limited to 'src/opengl') diff --git a/src/opengl/common.h b/src/opengl/common.h new file mode 100644 index 0000000..c84c69f --- /dev/null +++ b/src/opengl/common.h @@ -0,0 +1,66 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see . + */ + +#pragma once + +#include "../common.h" +#include "../log.h" +#include "../gpu.h" +#include "pl_thread.h" + +#include + +// Collision with llvm-mingw +#undef MemoryBarrier + +#define GLAD_GL +#define GLAD_GLES2 +#include +#include + +typedef GladGLContext gl_funcs; + +// PL_PRIV(pl_opengl) +struct gl_ctx { + pl_log log; + struct pl_opengl_params params; + bool is_debug; + bool is_debug_egl; + bool is_gles; + + // For context locking + pl_mutex lock; + int count; + + // Dispatch table + gl_funcs func; +}; + +struct gl_cb { + void (*callback)(void *priv); + void *priv; + GLsync sync; +}; + +struct fbo_format { + pl_fmt fmt; + const struct gl_format *glfmt; +}; + +// For locking/unlocking +bool gl_make_current(pl_opengl gl); +void gl_release_current(pl_opengl gl); diff --git a/src/opengl/context.c b/src/opengl/context.c new file mode 100644 index 0000000..6ca14b8 --- /dev/null +++ b/src/opengl/context.c @@ -0,0 +1,332 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see . + */ + +#include + +#include "common.h" +#include "utils.h" +#include "gpu.h" + +const struct pl_opengl_params pl_opengl_default_params = {0}; + +static void GLAPIENTRY debug_cb(GLenum source, GLenum type, GLuint id, + GLenum severity, GLsizei length, + const GLchar *message, const void *userParam) +{ + pl_log log = (void *) userParam; + enum pl_log_level level = PL_LOG_ERR; + + switch (severity) { + case GL_DEBUG_SEVERITY_NOTIFICATION:level = PL_LOG_DEBUG; break; + case GL_DEBUG_SEVERITY_LOW: level = PL_LOG_INFO; break; + case GL_DEBUG_SEVERITY_MEDIUM: level = PL_LOG_WARN; break; + case GL_DEBUG_SEVERITY_HIGH: level = PL_LOG_ERR; break; + } + + pl_msg(log, level, "GL: %s", message); + + if (level <= PL_LOG_ERR) + pl_log_stack_trace(log, level); +} + +static void GLAPIENTRY debug_cb_egl(EGLenum error, const char *command, + EGLint messageType, EGLLabelKHR threadLabel, + EGLLabelKHR objectLabel, const char *message) +{ + pl_log log = threadLabel; + enum pl_log_level level = PL_LOG_ERR; + + switch (messageType) { + case EGL_DEBUG_MSG_CRITICAL_KHR: level = PL_LOG_FATAL; break; + case EGL_DEBUG_MSG_ERROR_KHR: level = PL_LOG_ERR; break; + case EGL_DEBUG_MSG_WARN_KHR: level = PL_LOG_WARN; break; + case EGL_DEBUG_MSG_INFO_KHR: level = PL_LOG_DEBUG; break; + } + + pl_msg(log, level, "EGL: %s: %s %s", command, egl_err_str(error), + message); + + if (level <= PL_LOG_ERR) + pl_log_stack_trace(log, level); +} + +// Guards access to the (thread-unsafe) glad global EGL state +static pl_static_mutex glad_egl_mutex = PL_STATIC_MUTEX_INITIALIZER; + +void pl_opengl_destroy(pl_opengl *ptr) +{ + pl_opengl pl_gl = *ptr; + if (!pl_gl) + return; + + struct gl_ctx *p = PL_PRIV(pl_gl); + gl_funcs *gl = &p->func; + if (!gl_make_current(pl_gl)) { + PL_WARN(p, "Failed uninitializing OpenGL context, leaking resources!"); + return; + } + + if (p->is_debug) + gl->DebugMessageCallback(NULL, NULL); + + if (p->is_debug_egl) + eglDebugMessageControlKHR(NULL, NULL); + + pl_gpu_destroy(pl_gl->gpu); + +#ifdef PL_HAVE_GL_PROC_ADDR + if (p->is_gles) { + gladLoaderUnloadGLES2Context(gl); + } else { + gladLoaderUnloadGLContext(gl); + } + + bool used_loader = !p->params.get_proc_addr && !p->params.get_proc_addr_ex; + if (p->params.egl_display && used_loader) { + pl_static_mutex_lock(&glad_egl_mutex); + gladLoaderUnloadEGL(); + pl_static_mutex_unlock(&glad_egl_mutex); + } +#endif + + gl_release_current(pl_gl); + pl_mutex_destroy(&p->lock); + pl_free_ptr((void **) ptr); + +} + +typedef PL_ARRAY(const char *) ext_arr_t; +static void add_exts_str(void *alloc, ext_arr_t *arr, const char *extstr) +{ + pl_str rest = pl_str_strip(pl_str0(pl_strdup0(alloc, pl_str0(extstr)))); + while (rest.len) { + pl_str ext = pl_str_split_char(rest, ' ', &rest); + ext.buf[ext.len] = '\0'; // re-use separator for terminator + PL_ARRAY_APPEND(alloc, *arr, (char *) ext.buf); + } +} + +pl_opengl pl_opengl_create(pl_log log, const struct pl_opengl_params *params) +{ + params = PL_DEF(params, &pl_opengl_default_params); + struct pl_opengl_t *pl_gl = pl_zalloc_obj(NULL, pl_gl, struct gl_ctx); + struct gl_ctx *p = PL_PRIV(pl_gl); + gl_funcs *gl = &p->func; + p->params = *params; + p->log = log; + + pl_mutex_init_type(&p->lock, PL_MUTEX_RECURSIVE); + if (!gl_make_current(pl_gl)) { + pl_free(pl_gl); + return NULL; + } + + bool ok; + if (params->get_proc_addr_ex) { + ok = gladLoadGLContextUserPtr(gl, params->get_proc_addr_ex, params->proc_ctx); + } else if (params->get_proc_addr) { + ok = gladLoadGLContext(gl, params->get_proc_addr); + } else { +#ifdef PL_HAVE_GL_PROC_ADDR + ok = gladLoaderLoadGLContext(gl); +#else + PL_FATAL(p, "No `glGetProcAddress` function provided, and libplacebo " + "built without its built-in OpenGL loader!"); + goto error; +#endif + } + + if (!ok) { + PL_INFO(p, "Failed loading core GL, retrying as GLES..."); + } else if (gl_is_gles(pl_gl)) { + PL_INFO(p, "GL context seems to be OpenGL ES, reloading as GLES..."); + ok = false; + } + + if (!ok) { + memset(gl, 0, sizeof(*gl)); + if (params->get_proc_addr_ex) { + ok = gladLoadGLES2ContextUserPtr(gl, params->get_proc_addr_ex, params->proc_ctx); + } else if (params->get_proc_addr) { + ok = gladLoadGLES2Context(gl, params->get_proc_addr); + } else { +#ifdef PL_HAVE_GL_PROC_ADDR + ok = gladLoaderLoadGLES2Context(gl); +#else + pl_unreachable(); +#endif + } + p->is_gles = ok; + } + + if (!ok) { + PL_FATAL(p, "Failed to initialize OpenGL context - make sure a valid " + "OpenGL context is bound to the current thread!"); + goto error; + } + + const char *version = (const char *) gl->GetString(GL_VERSION); + if (version) { + const char *ver = version; + while (!isdigit(*ver) && *ver != '\0') + ver++; + if (sscanf(ver, "%d.%d", &pl_gl->major, &pl_gl->minor) != 2) { + PL_FATAL(p, "Invalid GL_VERSION string: %s\n", version); + goto error; + } + } + + if (!pl_gl->major) { + PL_FATAL(p, "No OpenGL version detected - make sure an OpenGL context " + "is bound to the current thread!"); + goto error; + } + + static const int gl_ver_req = 3; + if (pl_gl->major < gl_ver_req) { + PL_FATAL(p, "OpenGL version too old (%d < %d), please use a newer " + "OpenGL implementation or downgrade libplacebo!", + pl_gl->major, gl_ver_req); + goto error; + } + + PL_INFO(p, "Detected OpenGL version strings:"); + PL_INFO(p, " GL_VERSION: %s", version); + PL_INFO(p, " GL_VENDOR: %s", (char *) gl->GetString(GL_VENDOR)); + PL_INFO(p, " GL_RENDERER: %s", (char *) gl->GetString(GL_RENDERER)); + + ext_arr_t exts = {0}; + if (pl_gl->major >= 3) { + gl->GetIntegerv(GL_NUM_EXTENSIONS, &exts.num); + PL_ARRAY_RESIZE(pl_gl, exts, exts.num); + for (int i = 0; i < exts.num; i++) + exts.elem[i] = (const char *) gl->GetStringi(GL_EXTENSIONS, i); + } else { + add_exts_str(pl_gl, &exts, (const char *) gl->GetString(GL_EXTENSIONS)); + } + + if (pl_msg_test(log, PL_LOG_DEBUG)) { + PL_DEBUG(p, " GL_EXTENSIONS:"); + for (int i = 0; i < exts.num; i++) + PL_DEBUG(p, " %s", exts.elem[i]); + } + + if (params->egl_display) { + pl_static_mutex_lock(&glad_egl_mutex); + if (params->get_proc_addr_ex) { + ok = gladLoadEGLUserPtr(params->egl_display, params->get_proc_addr_ex, + params->proc_ctx); + } else if (params->get_proc_addr) { + ok = gladLoadEGL(params->egl_display, params->get_proc_addr); + } else { +#ifdef PL_HAVE_GL_PROC_ADDR + ok = gladLoaderLoadEGL(params->egl_display); +#else + pl_unreachable(); +#endif + } + pl_static_mutex_unlock(&glad_egl_mutex); + + if (!ok) { + PL_FATAL(p, "Failed loading EGL functions - double check EGLDisplay?"); + goto error; + } + + int start = exts.num; + add_exts_str(pl_gl, &exts, eglQueryString(params->egl_display, + EGL_EXTENSIONS)); + if (exts.num > start) { + PL_DEBUG(p, " EGL_EXTENSIONS:"); + for (int i = start; i < exts.num; i++) + PL_DEBUG(p, " %s", exts.elem[i]); + } + } + + pl_gl->extensions = exts.elem; + pl_gl->num_extensions = exts.num; + + if (!params->allow_software && gl_is_software(pl_gl)) { + PL_FATAL(p, "OpenGL context is suspected to be a software rasterizer, " + "but `allow_software` is false."); + goto error; + } + + if (params->debug) { + if (pl_opengl_has_ext(pl_gl, "GL_KHR_debug")) { + gl->DebugMessageCallback(debug_cb, log); + gl->Enable(GL_DEBUG_OUTPUT); + p->is_debug = true; + } else { + PL_WARN(p, "OpenGL debugging requested, but GL_KHR_debug is not " + "available... ignoring!"); + } + + if (params->egl_display && pl_opengl_has_ext(pl_gl, "EGL_KHR_debug")) { + static const EGLAttrib attribs[] = { + // Enable everything under the sun, because the `pl_ctx` log + // level may change at runtime. + EGL_DEBUG_MSG_CRITICAL_KHR, EGL_TRUE, + EGL_DEBUG_MSG_ERROR_KHR, EGL_TRUE, + EGL_DEBUG_MSG_WARN_KHR, EGL_TRUE, + EGL_DEBUG_MSG_INFO_KHR, EGL_TRUE, + EGL_NONE, + }; + + eglDebugMessageControlKHR(debug_cb_egl, attribs); + eglLabelObjectKHR(NULL, EGL_OBJECT_THREAD_KHR, NULL, (void *) log); + p->is_debug_egl = true; + } + } + + pl_gl->gpu = pl_gpu_create_gl(log, pl_gl, params); + if (!pl_gl->gpu) + goto error; + + gl_release_current(pl_gl); + return pl_gl; + +error: + PL_FATAL(p, "Failed initializing opengl context!"); + gl_release_current(pl_gl); + pl_opengl_destroy((pl_opengl *) &pl_gl); + return NULL; +} + +bool gl_make_current(pl_opengl pl_gl) +{ + struct gl_ctx *p = PL_PRIV(pl_gl); + pl_mutex_lock(&p->lock); + if (!p->count && p->params.make_current) { + if (!p->params.make_current(p->params.priv)) { + PL_ERR(p, "Failed making OpenGL context current on calling thread!"); + pl_mutex_unlock(&p->lock); + return false; + } + } + + p->count++; + return true; +} + +void gl_release_current(pl_opengl pl_gl) +{ + struct gl_ctx *p = PL_PRIV(pl_gl); + p->count--; + if (!p->count && p->params.release_current) + p->params.release_current(p->params.priv); + pl_mutex_unlock(&p->lock); +} diff --git a/src/opengl/formats.c b/src/opengl/formats.c new file mode 100644 index 0000000..6604835 --- /dev/null +++ b/src/opengl/formats.c @@ -0,0 +1,485 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see . + */ + +#include "gpu.h" +#include "common.h" +#include "formats.h" +#include "utils.h" + +#ifdef PL_HAVE_UNIX +static bool supported_fourcc(struct pl_gl *p, EGLint fourcc) +{ + for (int i = 0; i < p->egl_formats.num; ++i) + if (fourcc == p->egl_formats.elem[i]) + return true; + return false; +} +#endif + +#define FMT(_name, bits, ftype, _caps) \ + (struct pl_fmt_t) { \ + .name = _name, \ + .type = PL_FMT_##ftype, \ + .caps = (enum pl_fmt_caps) (_caps), \ + .sample_order = {0, 1, 2, 3}, \ + .component_depth = {bits, bits, bits, bits}, \ + } + +// Convenience to make the names simpler +enum { + // Type aliases + U8 = GL_UNSIGNED_BYTE, + U16 = GL_UNSIGNED_SHORT, + U32 = GL_UNSIGNED_INT, + I8 = GL_BYTE, + I16 = GL_SHORT, + I32 = GL_INT, + FLT = GL_FLOAT, + + // Component aliases + R = GL_RED, + RG = GL_RG, + RGB = GL_RGB, + RGBA = GL_RGBA, + BGRA = GL_BGRA, + RI = GL_RED_INTEGER, + RGI = GL_RG_INTEGER, + RGBI = GL_RGB_INTEGER, + RGBAI = GL_RGBA_INTEGER, + + // Capability aliases + S = PL_FMT_CAP_SAMPLEABLE, + L = PL_FMT_CAP_LINEAR, + F = PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_BLITTABLE, // FBO support + V = PL_FMT_CAP_VERTEX, +}; + +// Basic 8-bit formats +const struct gl_format formats_norm8[] = { + {GL_R8, R, U8, FMT("r8", 8, UNORM, S|L|F|V)}, + {GL_RG8, RG, U8, FMT("rg8", 8, UNORM, S|L|F|V)}, + {GL_RGB8, RGB, U8, FMT("rgb8", 8, UNORM, S|L|F|V)}, + {GL_RGBA8, RGBA, U8, FMT("rgba8", 8, UNORM, S|L|F|V)}, +}; + +// Signed variants +/* TODO: these are broken in mesa +const struct gl_format formats_snorm8[] = { + {GL_R8_SNORM, R, I8, FMT("r8s", 8, SNORM, S|L|F|V)}, + {GL_RG8_SNORM, RG, I8, FMT("rg8s", 8, SNORM, S|L|F|V)}, + {GL_RGB8_SNORM, RGB, I8, FMT("rgb8s", 8, SNORM, S|L|F|V)}, + {GL_RGBA8_SNORM, RGBA, I8, FMT("rgba8s", 8, SNORM, S|L|F|V)}, +}; +*/ + +// BGRA 8-bit +const struct gl_format formats_bgra8[] = { + {GL_RGBA8, BGRA, U8, { + .name = "bgra8", + .type = PL_FMT_UNORM, + .caps = S|L|F|V, + .sample_order = {2, 1, 0, 3}, + .component_depth = {8, 8, 8, 8}, + }}, +}; + +// Basic 16-bit formats, excluding rgb16 (special cased below) +const struct gl_format formats_norm16[] = { + {GL_R16, R, U16, FMT("r16", 16, UNORM, S|L|F|V)}, + {GL_RG16, RG, U16, FMT("rg16", 16, UNORM, S|L|F|V)}, + {GL_RGBA16, RGBA, U16, FMT("rgba16", 16, UNORM, S|L|F|V)}, +}; + +// Renderable version of rgb16 +const struct gl_format formats_rgb16_fbo[] = { + {GL_RGB16, RGB, U16, FMT("rgb16", 16, UNORM, S|L|F|V)}, +}; + +// Non-renderable version of rgb16 +const struct gl_format formats_rgb16_fallback[] = { + {GL_RGB16, RGB, U16, FMT("rgb16", 16, UNORM, S|L|V)}, +}; + +// Signed 16-bit variants +/* TODO: these are broken in mesa and nvidia +const struct gl_format formats_snorm16[] = { + {GL_R16_SNORM, R, I16, FMT("r16s", 16, SNORM, S|L|F|V)}, + {GL_RG16_SNORM, RG, I16, FMT("rg16s", 16, SNORM, S|L|F|V)}, + {GL_RGB16_SNORM, RGB, I16, FMT("rgb16s", 16, SNORM, S|L|F|V)}, + {GL_RGBA16_SNORM, RGBA, I16, FMT("rgba16s", 16, SNORM, S|L|F|V)}, +}; +*/ + +// Floating point texture formats +const struct gl_format formats_float[] = { + {GL_R16F, R, FLT, FMT("r16f", 16, FLOAT, S|L|F)}, + {GL_RG16F, RG, FLT, FMT("rg16f", 16, FLOAT, S|L|F)}, + {GL_RGB16F, RGB, FLT, FMT("rgb16f", 16, FLOAT, S|L|F)}, + {GL_RGBA16F, RGBA, FLT, FMT("rgba16f", 16, FLOAT, S|L|F)}, + {GL_R32F, R, FLT, FMT("r32f", 32, FLOAT, S|L|F|V)}, + {GL_RG32F, RG, FLT, FMT("rg32f", 32, FLOAT, S|L|F|V)}, + {GL_RGB32F, RGB, FLT, FMT("rgb32f", 32, FLOAT, S|L|F|V)}, + {GL_RGBA32F, RGBA, FLT, FMT("rgba32f", 32, FLOAT, S|L|F|V)}, +}; + +// Renderable 16-bit float formats (excluding rgb16f) +const struct gl_format formats_float16_fbo[] = { + {GL_R16F, R, FLT, FMT("r16f", 16, FLOAT, S|L|F)}, + {GL_RG16F, RG, FLT, FMT("rg16f", 16, FLOAT, S|L|F)}, + {GL_RGB16F, RGB, FLT, FMT("rgb16f", 16, FLOAT, S|L)}, + {GL_RGBA16F, RGBA, FLT, FMT("rgba16f", 16, FLOAT, S|L|F)}, +}; + +// Non-renderable 16-bit float formats +const struct gl_format formats_float16_fallback[] = { + {GL_R16F, R, FLT, FMT("r16f", 16, FLOAT, S|L)}, + {GL_RG16F, RG, FLT, FMT("rg16f", 16, FLOAT, S|L)}, + {GL_RGB16F, RGB, FLT, FMT("rgb16f", 16, FLOAT, S|L)}, + {GL_RGBA16F, RGBA, FLT, FMT("rgba16f", 16, FLOAT, S|L)}, +}; + +// (Unsigned) integer formats +const struct gl_format formats_uint[] = { + {GL_R8UI, RI, U8, FMT("r8u", 8, UINT, S|F|V)}, + {GL_RG8UI, RGI, U8, FMT("rg8u", 8, UINT, S|F|V)}, + {GL_RGB8UI, RGBI, U8, FMT("rgb8u", 8, UINT, S|V)}, + {GL_RGBA8UI, RGBAI, U8, FMT("rgba8u", 8, UINT, S|F|V)}, + {GL_R16UI, RI, U16, FMT("r16u", 16, UINT, S|F|V)}, + {GL_RG16UI, RGI, U16, FMT("rg16u", 16, UINT, S|F|V)}, + {GL_RGB16UI, RGBI, U16, FMT("rgb16u", 16, UINT, S|V)}, + {GL_RGBA16UI, RGBAI, U16, FMT("rgba16u", 16, UINT, S|F|V)}, +}; + +/* TODO + {GL_R32UI, RI, U32, FMT("r32u", 32, UINT)}, + {GL_RG32UI, RGI, U32, FMT("rg32u", 32, UINT)}, + {GL_RGB32UI, RGBI, U32, FMT("rgb32u", 32, UINT)}, + {GL_RGBA32UI, RGBAI, U32, FMT("rgba32u", 32, UINT)}, + + {GL_R8I, RI, I8, FMT("r8i", 8, SINT)}, + {GL_RG8I, RGI, I8, FMT("rg8i", 8, SINT)}, + {GL_RGB8I, RGBI, I8, FMT("rgb8i", 8, SINT)}, + {GL_RGBA8I, RGBAI, I8, FMT("rgba8i", 8, SINT)}, + {GL_R16I, RI, I16, FMT("r16i", 16, SINT)}, + {GL_RG16I, RGI, I16, FMT("rg16i", 16, SINT)}, + {GL_RGB16I, RGBI, I16, FMT("rgb16i", 16, SINT)}, + {GL_RGBA16I, RGBAI, I16, FMT("rgba16i", 16, SINT)}, + {GL_R32I, RI, I32, FMT("r32i", 32, SINT)}, + {GL_RG32I, RGI, I32, FMT("rg32i", 32, SINT)}, + {GL_RGB32I, RGBI, I32, FMT("rgb32i", 32, SINT)}, + {GL_RGBA32I, RGBAI, I32, FMT("rgba32i", 32, SINT)}, +*/ + +// GL2 legacy formats +const struct gl_format formats_legacy_gl2[] = { + {GL_RGB8, RGB, U8, FMT("rgb8", 8, UNORM, S|L|V)}, + {GL_RGBA8, RGBA, U8, FMT("rgba8", 8, UNORM, S|L|V)}, + {GL_RGB16, RGB, U16, FMT("rgb16", 16, UNORM, S|L|V)}, + {GL_RGBA16, RGBA, U16, FMT("rgba16", 16, UNORM, S|L|V)}, +}; + +// GLES2 legacy formats +const struct gl_format formats_legacy_gles2[] = { + {GL_RGB, RGB, U8, FMT("rgb", 8, UNORM, S|L)}, + {GL_RGBA, RGBA, U8, FMT("rgba", 8, UNORM, S|L)}, +}; + +// GLES BGRA +const struct gl_format formats_bgra_gles[] = { + {GL_BGRA, BGRA, U8, { + .name = "bgra8", + .type = PL_FMT_UNORM, + .caps = S|L|F|V, + .sample_order = {2, 1, 0, 3}, + .component_depth = {8, 8, 8, 8}, + }}, +}; + +// Fallback for vertex-only formats, as a last resort +const struct gl_format formats_basic_vertex[] = { + {GL_R32F, R, FLT, FMT("r32f", 32, FLOAT, V)}, + {GL_RG32F, RG, FLT, FMT("rg32f", 32, FLOAT, V)}, + {GL_RGB32F, RGB, FLT, FMT("rgb32f", 32, FLOAT, V)}, + {GL_RGBA32F, RGBA, FLT, FMT("rgba32f", 32, FLOAT, V)}, +}; + +static void add_format(pl_gpu pgpu, const struct gl_format *gl_fmt) +{ + struct pl_gpu_t *gpu = (struct pl_gpu_t *) pgpu; + struct pl_gl *p = PL_PRIV(gpu); + + struct pl_fmt_t *fmt = pl_alloc_obj(gpu, fmt, gl_fmt); + const struct gl_format **fmtp = PL_PRIV(fmt); + *fmt = gl_fmt->tmpl; + *fmtp = gl_fmt; + + // Calculate the host size and number of components + switch (gl_fmt->fmt) { + case GL_RED: + case GL_RED_INTEGER: + fmt->num_components = 1; + break; + case GL_RG: + case GL_RG_INTEGER: + fmt->num_components = 2; + break; + case GL_RGB: + case GL_RGB_INTEGER: + fmt->num_components = 3; + break; + case GL_RGBA: + case GL_RGBA_INTEGER: + case GL_BGRA: + fmt->num_components = 4; + break; + default: + pl_unreachable(); + } + + int size; + switch (gl_fmt->type) { + case GL_BYTE: + case GL_UNSIGNED_BYTE: + size = 1; + break; + case GL_SHORT: + case GL_UNSIGNED_SHORT: + size = 2; + break; + case GL_INT: + case GL_UNSIGNED_INT: + case GL_FLOAT: + size = 4; + break; + default: + pl_unreachable(); + } + + // Host visible representation + fmt->texel_size = fmt->num_components * size; + fmt->texel_align = 1; + for (int i = 0; i < fmt->num_components; i++) + fmt->host_bits[i] = size * 8; + + // Compute internal size by summing up the depth + int ibits = 0; + for (int i = 0; i < fmt->num_components; i++) + ibits += fmt->component_depth[i]; + fmt->internal_size = (ibits + 7) / 8; + + // We're not the ones actually emulating these texture format - the + // driver is - but we might as well set the hint. + fmt->emulated = fmt->texel_size != fmt->internal_size; + + // 3-component formats are almost surely also emulated + if (fmt->num_components == 3) + fmt->emulated = true; + + // Older OpenGL most likely emulates 32-bit float formats as well + if (p->gl_ver < 30 && fmt->component_depth[0] >= 32) + fmt->emulated = true; + + // For sanity, clear the superfluous fields + for (int i = fmt->num_components; i < 4; i++) { + fmt->component_depth[i] = 0; + fmt->sample_order[i] = 0; + fmt->host_bits[i] = 0; + } + + fmt->glsl_type = pl_var_glsl_type_name(pl_var_from_fmt(fmt, "")); + fmt->glsl_format = pl_fmt_glsl_format(fmt, fmt->num_components); + fmt->fourcc = pl_fmt_fourcc(fmt); + pl_assert(fmt->glsl_type); + +#ifdef PL_HAVE_UNIX + if (p->has_modifiers && fmt->fourcc && supported_fourcc(p, fmt->fourcc)) { + int num_mods = 0; + bool ok = eglQueryDmaBufModifiersEXT(p->egl_dpy, fmt->fourcc, + 0, NULL, NULL, &num_mods); + if (ok && num_mods) { + // On my system eglQueryDmaBufModifiersEXT seems to never return + // MOD_INVALID even though eglExportDMABUFImageQueryMESA happily + // returns such modifiers. Since we handle INVALID by not + // requiring modifiers at all, always add this value to the + // list of supported modifiers. May result in duplicates, but + // whatever. + uint64_t *mods = pl_calloc(fmt, num_mods + 1, sizeof(uint64_t)); + mods[0] = DRM_FORMAT_MOD_INVALID; + ok = eglQueryDmaBufModifiersEXT(p->egl_dpy, fmt->fourcc, num_mods, + &mods[1], NULL, &num_mods); + + if (ok) { + fmt->modifiers = mods; + fmt->num_modifiers = num_mods + 1; + } else { + pl_free(mods); + } + } + + eglGetError(); // ignore probing errors + } + + if (!fmt->num_modifiers) { + // Hacky fallback for older drivers that don't support properly + // querying modifiers + static const uint64_t static_mods[] = { + DRM_FORMAT_MOD_INVALID, + DRM_FORMAT_MOD_LINEAR, + }; + + fmt->num_modifiers = PL_ARRAY_SIZE(static_mods); + fmt->modifiers = static_mods; + } +#endif + + // Gathering requires checking the format type (and extension presence) + if (fmt->caps & PL_FMT_CAP_SAMPLEABLE) + fmt->gatherable = p->gather_comps >= fmt->num_components; + + // Reading from textures on GLES requires FBO support for this fmt + if (p->has_readback && (p->gl_ver || (fmt->caps & PL_FMT_CAP_RENDERABLE))) + fmt->caps |= PL_FMT_CAP_HOST_READABLE; + + if (gpu->glsl.compute && fmt->glsl_format && p->has_storage) + fmt->caps |= PL_FMT_CAP_STORABLE | PL_FMT_CAP_READWRITE; + + // Only float-type formats are considered blendable in OpenGL + switch (fmt->type) { + case PL_FMT_UNKNOWN: + case PL_FMT_UINT: + case PL_FMT_SINT: + break; + case PL_FMT_FLOAT: + case PL_FMT_UNORM: + case PL_FMT_SNORM: + if (fmt->caps & PL_FMT_CAP_RENDERABLE) + fmt->caps |= PL_FMT_CAP_BLENDABLE; + break; + case PL_FMT_TYPE_COUNT: + pl_unreachable(); + } + + // TODO: Texel buffers + + PL_ARRAY_APPEND_RAW(gpu, gpu->formats, gpu->num_formats, fmt); +} + +#define DO_FORMATS(formats) \ + do { \ + for (int i = 0; i < PL_ARRAY_SIZE(formats); i++) \ + add_format(gpu, &formats[i]); \ + } while (0) + +bool gl_setup_formats(struct pl_gpu_t *gpu) +{ + struct pl_gl *p = PL_PRIV(gpu); + +#ifdef PL_HAVE_UNIX + if (p->has_modifiers) { + EGLint num_formats = 0; + bool ok = eglQueryDmaBufFormatsEXT(p->egl_dpy, 0, NULL, + &num_formats); + if (ok && num_formats) { + p->egl_formats.elem = pl_calloc(gpu, num_formats, sizeof(EGLint)); + p->egl_formats.num = num_formats; + ok = eglQueryDmaBufFormatsEXT(p->egl_dpy, num_formats, + p->egl_formats.elem, &num_formats); + pl_assert(ok); + + PL_DEBUG(gpu, "EGL formats supported:"); + for (int i = 0; i < num_formats; ++i) { + PL_DEBUG(gpu, " 0x%08x(%.4s)", p->egl_formats.elem[i], + PRINT_FOURCC(p->egl_formats.elem[i])); + } + } + } +#endif + + if (p->gl_ver >= 30) { + // Desktop GL3+ has everything + DO_FORMATS(formats_norm8); + DO_FORMATS(formats_bgra8); + DO_FORMATS(formats_norm16); + DO_FORMATS(formats_rgb16_fbo); + DO_FORMATS(formats_float); + DO_FORMATS(formats_uint); + goto done; + } + + if (p->gl_ver >= 21) { + // If we have a reasonable set of extensions, we can enable most + // things. Otherwise, pick simple fallback formats + if (pl_opengl_has_ext(p->gl, "GL_ARB_texture_float") && + pl_opengl_has_ext(p->gl, "GL_ARB_texture_rg") && + pl_opengl_has_ext(p->gl, "GL_ARB_framebuffer_object")) + { + DO_FORMATS(formats_norm8); + DO_FORMATS(formats_bgra8); + DO_FORMATS(formats_norm16); + DO_FORMATS(formats_rgb16_fbo); + DO_FORMATS(formats_float); + } else { + // Fallback for GL2 + DO_FORMATS(formats_legacy_gl2); + DO_FORMATS(formats_basic_vertex); + } + goto done; + } + + if (p->gles_ver >= 30) { + // GLES 3.0 has some basic formats, with framebuffers for float16 + // depending on GL_EXT_color_buffer_(half_)float support + DO_FORMATS(formats_norm8); + if (pl_opengl_has_ext(p->gl, "GL_EXT_texture_norm16")) { + DO_FORMATS(formats_norm16); + DO_FORMATS(formats_rgb16_fallback); + } + if (pl_opengl_has_ext(p->gl, "GL_EXT_texture_format_BGRA8888")) + DO_FORMATS(formats_bgra_gles); + if (pl_opengl_has_ext(p->gl, "GL_EXT_texture_integer")) + DO_FORMATS(formats_uint); + DO_FORMATS(formats_basic_vertex); + if (p->gles_ver >= 32 || pl_opengl_has_ext(p->gl, "GL_EXT_color_buffer_half_float") + || pl_opengl_has_ext(p->gl, "GL_EXT_color_buffer_float")) { + DO_FORMATS(formats_float16_fbo); + } else { + DO_FORMATS(formats_float16_fallback); + } + goto done; + } + + if (p->gles_ver >= 20) { + // GLES 2.0 only has some legacy fallback formats, with support for + // float16 depending on GL_EXT_texture_norm16 being present + DO_FORMATS(formats_legacy_gles2); + DO_FORMATS(formats_basic_vertex); + if (pl_opengl_has_ext(p->gl, "GL_EXT_texture_rg")) { + DO_FORMATS(formats_norm8); + } + if (pl_opengl_has_ext(p->gl, "GL_EXT_texture_format_BGRA8888")) { + DO_FORMATS(formats_bgra_gles); + } + goto done; + } + + // Last resort fallback. Probably not very useful + DO_FORMATS(formats_basic_vertex); + goto done; + +done: + return gl_check_err(gpu, "gl_setup_formats"); +} diff --git a/src/opengl/formats.h b/src/opengl/formats.h new file mode 100644 index 0000000..b98c872 --- /dev/null +++ b/src/opengl/formats.h @@ -0,0 +1,32 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see . + */ + +#pragma once + +#include "common.h" + +struct gl_format { + GLint ifmt; // sized internal format (e.g. GL_RGBA16F) + GLenum fmt; // base internal format (e.g. GL_RGBA) + GLenum type; // host-visible type (e.g. GL_FLOAT) + struct pl_fmt_t tmpl; // pl_fmt template +}; + +typedef void (gl_format_cb)(pl_gpu gpu, const struct gl_format *glfmt); + +// Add all supported formats to the `pl_gpu` format list. +bool gl_setup_formats(struct pl_gpu_t *gpu); diff --git a/src/opengl/gpu.c b/src/opengl/gpu.c new file mode 100644 index 0000000..b711ac5 --- /dev/null +++ b/src/opengl/gpu.c @@ -0,0 +1,645 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see . + */ + +#include "gpu.h" +#include "common.h" +#include "formats.h" +#include "utils.h" + +#ifdef PL_HAVE_UNIX +#include +#endif + +#ifdef PL_HAVE_WIN32 +#include +#include +#endif + +static const struct pl_gpu_fns pl_fns_gl; + +static void gl_gpu_destroy(pl_gpu gpu) +{ + struct pl_gl *p = PL_PRIV(gpu); + + pl_gpu_finish(gpu); + while (p->callbacks.num > 0) + gl_poll_callbacks(gpu); + + pl_free((void *) gpu); +} + +pl_opengl pl_opengl_get(pl_gpu gpu) +{ + const struct pl_gpu_fns *impl = PL_PRIV(gpu); + if (impl->destroy == gl_gpu_destroy) { + struct pl_gl *p = (struct pl_gl *) impl; + return p->gl; + } + + return NULL; +} + +static pl_handle_caps tex_handle_caps(pl_gpu gpu, bool import) +{ + pl_handle_caps caps = 0; + struct pl_gl *p = PL_PRIV(gpu); + + if (!p->egl_dpy || (!p->has_egl_storage && !p->has_egl_import)) + return 0; + + if (import) { + if (pl_opengl_has_ext(p->gl, "EGL_EXT_image_dma_buf_import")) + caps |= PL_HANDLE_DMA_BUF; + } else if (!import && p->egl_ctx) { + if (pl_opengl_has_ext(p->gl, "EGL_MESA_image_dma_buf_export")) + caps |= PL_HANDLE_DMA_BUF; + } + + return caps; +} + +static inline size_t get_page_size(void) +{ + +#ifdef PL_HAVE_UNIX + return sysconf(_SC_PAGESIZE); +#endif + +#ifdef PL_HAVE_WIN32 + SYSTEM_INFO sysInfo; + GetSystemInfo(&sysInfo); + return sysInfo.dwAllocationGranularity; +#endif + + pl_assert(!"Unsupported platform!"); +} + +#define get(pname, field) \ + do { \ + GLint tmp = 0; \ + gl->GetIntegerv((pname), &tmp); \ + *(field) = tmp; \ + } while (0) + +#define geti(pname, i, field) \ + do { \ + GLint tmp = 0; \ + gl->GetIntegeri_v((pname), i, &tmp);\ + *(field) = tmp; \ + } while (0) + +pl_gpu pl_gpu_create_gl(pl_log log, pl_opengl pl_gl, const struct pl_opengl_params *params) +{ + struct pl_gpu_t *gpu = pl_zalloc_obj(NULL, gpu, struct pl_gl); + gpu->log = log; + + struct pl_gl *p = PL_PRIV(gpu); + p->impl = pl_fns_gl; + p->gl = pl_gl; + + const gl_funcs *gl = gl_funcs_get(gpu); + struct pl_glsl_version *glsl = &gpu->glsl; + glsl->gles = gl_is_gles(pl_gl); + int ver = pl_gl->major * 10 + pl_gl->minor; + p->gl_ver = glsl->gles ? 0 : ver; + p->gles_ver = glsl->gles ? ver : 0; + + // If possible, query the GLSL version from the implementation + const char *glslver = (char *) gl->GetString(GL_SHADING_LANGUAGE_VERSION); + if (glslver) { + PL_INFO(gpu, " GL_SHADING_LANGUAGE_VERSION: %s", glslver); + int major = 0, minor = 0; + if (sscanf(glslver, "%d.%d", &major, &minor) == 2) + glsl->version = major * 100 + minor; + } + + if (!glsl->version) { + // Otherwise, use the fixed magic versions 100 and 300 for GLES. + if (p->gles_ver >= 30) { + glsl->version = 300; + } else if (p->gles_ver >= 20) { + glsl->version = 100; + } else { + goto error; + } + } + + static const int glsl_ver_req = 130; + if (glsl->version < glsl_ver_req) { + PL_FATAL(gpu, "GLSL version too old (%d < %d), please use a newer " + "OpenGL implementation or downgrade libplacebo!", + glsl->version, glsl_ver_req); + goto error; + } + + if (params->max_glsl_version && params->max_glsl_version >= glsl_ver_req) { + glsl->version = PL_MIN(glsl->version, params->max_glsl_version); + PL_INFO(gpu, "Restricting GLSL version to %d... new version is %d", + params->max_glsl_version, glsl->version); + } + + if (gl_test_ext(gpu, "GL_ARB_compute_shader", 43, 0) && glsl->version >= 420) { + glsl->compute = true; + get(GL_MAX_COMPUTE_SHARED_MEMORY_SIZE, &glsl->max_shmem_size); + get(GL_MAX_COMPUTE_WORK_GROUP_INVOCATIONS, &glsl->max_group_threads); + for (int i = 0; i < 3; i++) + geti(GL_MAX_COMPUTE_WORK_GROUP_SIZE, i, &glsl->max_group_size[i]); + } + + if (gl_test_ext(gpu, "GL_ARB_texture_gather", 40, 0)) { + get(GL_MAX_PROGRAM_TEXTURE_GATHER_COMPONENTS_ARB, &p->gather_comps); + get(GL_MIN_PROGRAM_TEXTURE_GATHER_OFFSET_ARB, &glsl->min_gather_offset); + get(GL_MAX_PROGRAM_TEXTURE_GATHER_OFFSET_ARB, &glsl->max_gather_offset); + } + + // Query all device limits + struct pl_gpu_limits *limits = &gpu->limits; + limits->thread_safe = params->make_current; + limits->callbacks = gl_test_ext(gpu, "GL_ARB_sync", 32, 30); + limits->align_vertex_stride = 1; + if (gl_test_ext(gpu, "GL_ARB_pixel_buffer_object", 31, 0)) { + limits->max_buf_size = SIZE_MAX; // no restriction imposed by GL + if (gl_test_ext(gpu, "GL_ARB_uniform_buffer_object", 31, 0)) + get(GL_MAX_UNIFORM_BLOCK_SIZE, &limits->max_ubo_size); + if (gl_test_ext(gpu, "GL_ARB_shader_storage_buffer_object", 43, 0) && + gpu->glsl.version >= 140) + { + get(GL_MAX_SHADER_STORAGE_BLOCK_SIZE, &limits->max_ssbo_size); + } + limits->max_vbo_size = limits->max_buf_size; // No additional restrictions + if (gl_test_ext(gpu, "GL_ARB_buffer_storage", 44, 0)) { + const char *vendor = (char *) gl->GetString(GL_VENDOR); + limits->max_mapped_size = limits->max_buf_size; + limits->host_cached = strcmp(vendor, "AMD") == 0 || + strcmp(vendor, "NVIDIA Corporation") == 0; + } + } + + get(GL_MAX_TEXTURE_SIZE, &limits->max_tex_2d_dim); + if (gl_test_ext(gpu, "GL_EXT_texture3D", 21, 30)) + get(GL_MAX_3D_TEXTURE_SIZE, &limits->max_tex_3d_dim); + // There's no equivalent limit for 1D textures for whatever reason, so + // just set it to the same as the 2D limit + if (p->gl_ver >= 21) + limits->max_tex_1d_dim = limits->max_tex_2d_dim; + limits->buf_transfer = true; + + if (p->gl_ver || p->gles_ver >= 30) { + get(GL_MAX_FRAGMENT_UNIFORM_COMPONENTS, &limits->max_variable_comps); + } else { + // fallback for GLES 2.0, which doesn't have max_comps + get(GL_MAX_FRAGMENT_UNIFORM_VECTORS, &limits->max_variable_comps); + limits->max_variable_comps *= 4; + } + + if (glsl->compute) { + for (int i = 0; i < 3; i++) + geti(GL_MAX_COMPUTE_WORK_GROUP_COUNT, i, &limits->max_dispatch[i]); + } + + // Query import/export support + p->egl_dpy = params->egl_display; + p->egl_ctx = params->egl_context; + p->has_egl_storage = pl_opengl_has_ext(p->gl, "GL_EXT_EGL_image_storage"); + p->has_egl_import = pl_opengl_has_ext(p->gl, "GL_OES_EGL_image_external"); + gpu->export_caps.tex = tex_handle_caps(gpu, false); + gpu->import_caps.tex = tex_handle_caps(gpu, true); + + if (p->egl_dpy) { + p->has_modifiers = pl_opengl_has_ext(p->gl, + "EGL_EXT_image_dma_buf_import_modifiers"); + } + + if (pl_opengl_has_ext(pl_gl, "GL_AMD_pinned_memory")) { + gpu->import_caps.buf |= PL_HANDLE_HOST_PTR; + gpu->limits.align_host_ptr = get_page_size(); + } + + // Cache some internal capability checks + p->has_vao = gl_test_ext(gpu, "GL_ARB_vertex_array_object", 30, 0); + p->has_invalidate_fb = gl_test_ext(gpu, "GL_ARB_invalidate_subdata", 43, 30); + p->has_invalidate_tex = gl_test_ext(gpu, "GL_ARB_invalidate_subdata", 43, 0); + p->has_queries = gl_test_ext(gpu, "GL_ARB_timer_query", 33, 0); + p->has_storage = gl_test_ext(gpu, "GL_ARB_shader_image_load_store", 42, 0); + p->has_readback = true; + + if (p->has_readback && p->gles_ver) { + GLuint fbo = 0, tex = 0; + GLint read_type = 0, read_fmt = 0; + gl->GenTextures(1, &tex); + gl->BindTexture(GL_TEXTURE_2D, tex); + gl->GenFramebuffers(1, &fbo); + gl->TexImage2D(GL_TEXTURE_2D, 0, GL_R8, 64, 64, 0, GL_RED, + GL_UNSIGNED_BYTE, NULL); + gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo); + gl->FramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_TEXTURE_2D, tex, 0); + gl->GetIntegerv(GL_IMPLEMENTATION_COLOR_READ_TYPE, &read_type); + gl->GetIntegerv(GL_IMPLEMENTATION_COLOR_READ_FORMAT, &read_fmt); + if (read_type != GL_UNSIGNED_BYTE || read_fmt != GL_RED) { + PL_INFO(gpu, "GPU does not seem to support lossless texture " + "readback, restricting readback capabilities! This is a " + "GLES/driver limitation, there is little we can do to " + "work around it."); + p->has_readback = false; + } + gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + gl->BindTexture(GL_TEXTURE_2D, 0); + gl->DeleteFramebuffers(1, &fbo); + gl->DeleteTextures(1, &tex); + } + + // We simply don't know, so make up some values + limits->align_tex_xfer_offset = 32; + limits->align_tex_xfer_pitch = 4; + limits->fragment_queues = 1; + limits->compute_queues = glsl->compute ? 1 : 0; + + if (!gl_check_err(gpu, "pl_gpu_create_gl")) { + PL_WARN(gpu, "Encountered errors while detecting GPU capabilities... " + "ignoring, but expect limitations/issues"); + p->failed = false; + } + + // Filter out error messages during format probing + pl_log_level_cap(gpu->log, PL_LOG_INFO); + bool formats_ok = gl_setup_formats(gpu); + pl_log_level_cap(gpu->log, PL_LOG_NONE); + if (!formats_ok) + goto error; + + return pl_gpu_finalize(gpu); + +error: + gl_gpu_destroy(gpu); + return NULL; +} + +void gl_buf_destroy(pl_gpu gpu, pl_buf buf) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + if (!MAKE_CURRENT()) { + PL_ERR(gpu, "Failed uninitializing buffer, leaking resources!"); + return; + } + + struct pl_buf_gl *buf_gl = PL_PRIV(buf); + if (buf_gl->fence) + gl->DeleteSync(buf_gl->fence); + + if (buf_gl->mapped) { + gl->BindBuffer(GL_COPY_WRITE_BUFFER, buf_gl->buffer); + gl->UnmapBuffer(GL_COPY_WRITE_BUFFER); + gl->BindBuffer(GL_COPY_WRITE_BUFFER, 0); + } + + gl->DeleteBuffers(1, &buf_gl->buffer); + gl_check_err(gpu, "gl_buf_destroy"); + RELEASE_CURRENT(); + pl_free((void *) buf); +} + +pl_buf gl_buf_create(pl_gpu gpu, const struct pl_buf_params *params) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + if (!MAKE_CURRENT()) + return NULL; + + struct pl_buf_t *buf = pl_zalloc_obj(NULL, buf, struct pl_buf_gl); + buf->params = *params; + buf->params.initial_data = NULL; + + struct pl_gl *p = PL_PRIV(gpu); + struct pl_buf_gl *buf_gl = PL_PRIV(buf); + buf_gl->id = ++p->buf_id; + + // Just use this since the generic GL_BUFFER doesn't work + GLenum target = GL_ARRAY_BUFFER; + const void *data = params->initial_data; + size_t total_size = params->size; + bool import = false; + + if (params->import_handle == PL_HANDLE_HOST_PTR) { + const struct pl_shared_mem *shmem = ¶ms->shared_mem; + target = GL_EXTERNAL_VIRTUAL_MEMORY_BUFFER_AMD; + + data = shmem->handle.ptr; + buf_gl->offset = shmem->offset; + total_size = shmem->size; + import = true; + + if (params->host_mapped) + buf->data = (uint8_t *) data + buf_gl->offset; + + if (buf_gl->offset > 0 && params->drawable) { + PL_ERR(gpu, "Cannot combine non-aligned host pointer imports with " + "drawable (vertex) buffers! This is a design limitation, " + "open an issue if you absolutely need this."); + goto error; + } + } + + gl->GenBuffers(1, &buf_gl->buffer); + gl->BindBuffer(target, buf_gl->buffer); + + if (gl_test_ext(gpu, "GL_ARB_buffer_storage", 44, 0) && !import) { + + GLbitfield mapflags = 0, storflags = 0; + if (params->host_writable) + storflags |= GL_DYNAMIC_STORAGE_BIT; + if (params->host_mapped) { + mapflags |= GL_MAP_READ_BIT | GL_MAP_WRITE_BIT | + GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT; + } + if (params->memory_type == PL_BUF_MEM_HOST) + storflags |= GL_CLIENT_STORAGE_BIT; // hopefully this works + + gl->BufferStorage(target, total_size, data, storflags | mapflags); + + if (params->host_mapped) { + buf_gl->mapped = true; + buf->data = gl->MapBufferRange(target, buf_gl->offset, params->size, + mapflags); + if (!buf->data) { + gl->BindBuffer(target, 0); + if (!gl_check_err(gpu, "gl_buf_create: map")) + PL_ERR(gpu, "Failed mapping buffer: unknown reason"); + goto error; + } + } + + } else { + + // Make a random guess based on arbitrary criteria we can't know + GLenum hint = GL_STREAM_DRAW; + if (params->initial_data && !params->host_writable && !params->host_mapped) + hint = GL_STATIC_DRAW; + if (params->host_readable && !params->host_writable && !params->host_mapped) + hint = GL_STREAM_READ; + if (params->storable) + hint = GL_DYNAMIC_COPY; + + gl->BufferData(target, total_size, data, hint); + + if (import && gl->GetError() == GL_INVALID_OPERATION) { + PL_ERR(gpu, "Failed importing host pointer!"); + goto error; + } + + } + + gl->BindBuffer(target, 0); + if (!gl_check_err(gpu, "gl_buf_create")) + goto error; + + if (params->storable) { + buf_gl->barrier = GL_BUFFER_UPDATE_BARRIER_BIT | // for buf_copy etc. + GL_PIXEL_BUFFER_BARRIER_BIT | // for tex_upload + GL_SHADER_STORAGE_BARRIER_BIT; + + if (params->host_mapped) + buf_gl->barrier |= GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT; + if (params->uniform) + buf_gl->barrier |= GL_UNIFORM_BARRIER_BIT; + if (params->drawable) + buf_gl->barrier |= GL_VERTEX_ATTRIB_ARRAY_BARRIER_BIT; + } + + RELEASE_CURRENT(); + return buf; + +error: + gl_buf_destroy(gpu, buf); + RELEASE_CURRENT(); + return NULL; +} + +bool gl_buf_poll(pl_gpu gpu, pl_buf buf, uint64_t timeout) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + + // Non-persistently mapped buffers are always implicitly reusable in OpenGL, + // the implementation will create more buffers under the hood if needed. + if (!buf->data) + return false; + + if (!MAKE_CURRENT()) + return true; // conservative guess + + struct pl_buf_gl *buf_gl = PL_PRIV(buf); + if (buf_gl->fence) { + GLenum res = gl->ClientWaitSync(buf_gl->fence, + timeout ? GL_SYNC_FLUSH_COMMANDS_BIT : 0, + timeout); + if (res == GL_ALREADY_SIGNALED || res == GL_CONDITION_SATISFIED) { + gl->DeleteSync(buf_gl->fence); + buf_gl->fence = NULL; + } + } + + gl_poll_callbacks(gpu); + RELEASE_CURRENT(); + return !!buf_gl->fence; +} + +void gl_buf_write(pl_gpu gpu, pl_buf buf, size_t offset, + const void *data, size_t size) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + if (!MAKE_CURRENT()) + return; + + struct pl_buf_gl *buf_gl = PL_PRIV(buf); + gl->BindBuffer(GL_ARRAY_BUFFER, buf_gl->buffer); + gl->BufferSubData(GL_ARRAY_BUFFER, buf_gl->offset + offset, size, data); + gl->BindBuffer(GL_ARRAY_BUFFER, 0); + gl_check_err(gpu, "gl_buf_write"); + RELEASE_CURRENT(); +} + +bool gl_buf_read(pl_gpu gpu, pl_buf buf, size_t offset, + void *dest, size_t size) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + if (!MAKE_CURRENT()) + return false; + + struct pl_buf_gl *buf_gl = PL_PRIV(buf); + gl->BindBuffer(GL_ARRAY_BUFFER, buf_gl->buffer); + gl->GetBufferSubData(GL_ARRAY_BUFFER, buf_gl->offset + offset, size, dest); + gl->BindBuffer(GL_ARRAY_BUFFER, 0); + bool ok = gl_check_err(gpu, "gl_buf_read"); + RELEASE_CURRENT(); + return ok; +} + +void gl_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset, + pl_buf src, size_t src_offset, size_t size) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + if (!MAKE_CURRENT()) + return; + + struct pl_buf_gl *src_gl = PL_PRIV(src); + struct pl_buf_gl *dst_gl = PL_PRIV(dst); + gl->BindBuffer(GL_COPY_READ_BUFFER, src_gl->buffer); + gl->BindBuffer(GL_COPY_WRITE_BUFFER, dst_gl->buffer); + gl->CopyBufferSubData(GL_COPY_READ_BUFFER, GL_COPY_WRITE_BUFFER, + src_gl->offset + src_offset, + dst_gl->offset + dst_offset, size); + gl_check_err(gpu, "gl_buf_copy"); + RELEASE_CURRENT(); +} + +#define QUERY_OBJECT_NUM 8 + +struct pl_timer_t { + GLuint query[QUERY_OBJECT_NUM]; + int index_write; // next index to write to + int index_read; // next index to read from +}; + +static pl_timer gl_timer_create(pl_gpu gpu) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + struct pl_gl *p = PL_PRIV(gpu); + if (!p->has_queries || !MAKE_CURRENT()) + return NULL; + + pl_timer timer = pl_zalloc_ptr(NULL, timer); + gl->GenQueries(QUERY_OBJECT_NUM, timer->query); + RELEASE_CURRENT(); + return timer; +} + +static void gl_timer_destroy(pl_gpu gpu, pl_timer timer) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + if (!MAKE_CURRENT()) { + PL_ERR(gpu, "Failed uninitializing timer, leaking resources!"); + return; + } + + gl->DeleteQueries(QUERY_OBJECT_NUM, timer->query); + gl_check_err(gpu, "gl_timer_destroy"); + RELEASE_CURRENT(); + pl_free(timer); +} + +static uint64_t gl_timer_query(pl_gpu gpu, pl_timer timer) +{ + if (timer->index_read == timer->index_write) + return 0; // no more unprocessed results + + const gl_funcs *gl = gl_funcs_get(gpu); + if (!MAKE_CURRENT()) + return 0; + + uint64_t res = 0; + GLuint query = timer->query[timer->index_read]; + int avail = 0; + gl->GetQueryObjectiv(query, GL_QUERY_RESULT_AVAILABLE, &avail); + if (!avail) + goto done; + gl->GetQueryObjectui64v(query, GL_QUERY_RESULT, &res); + + timer->index_read = (timer->index_read + 1) % QUERY_OBJECT_NUM; + // fall through + +done: + RELEASE_CURRENT(); + return res; +} + +void gl_timer_begin(pl_gpu gpu, pl_timer timer) +{ + if (!timer) + return; + + const gl_funcs *gl = gl_funcs_get(gpu); + gl->BeginQuery(GL_TIME_ELAPSED, timer->query[timer->index_write]); +} + +void gl_timer_end(pl_gpu gpu, pl_timer timer) +{ + if (!timer) + return; + + const gl_funcs *gl = gl_funcs_get(gpu); + gl->EndQuery(GL_TIME_ELAPSED); + + timer->index_write = (timer->index_write + 1) % QUERY_OBJECT_NUM; + if (timer->index_write == timer->index_read) { + // forcibly drop the least recent result to make space + timer->index_read = (timer->index_read + 1) % QUERY_OBJECT_NUM; + } +} + +static void gl_gpu_flush(pl_gpu gpu) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + if (!MAKE_CURRENT()) + return; + + gl->Flush(); + gl_check_err(gpu, "gl_gpu_flush"); + RELEASE_CURRENT(); +} + +static void gl_gpu_finish(pl_gpu gpu) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + if (!MAKE_CURRENT()) + return; + + gl->Finish(); + gl_check_err(gpu, "gl_gpu_finish"); + RELEASE_CURRENT(); +} + +static bool gl_gpu_is_failed(pl_gpu gpu) +{ + struct pl_gl *gl = PL_PRIV(gpu); + return gl->failed; +} + +static const struct pl_gpu_fns pl_fns_gl = { + .destroy = gl_gpu_destroy, + .tex_create = gl_tex_create, + .tex_destroy = gl_tex_destroy, + .tex_invalidate = gl_tex_invalidate, + .tex_clear_ex = gl_tex_clear_ex, + .tex_blit = gl_tex_blit, + .tex_upload = gl_tex_upload, + .tex_download = gl_tex_download, + .buf_create = gl_buf_create, + .buf_destroy = gl_buf_destroy, + .buf_write = gl_buf_write, + .buf_read = gl_buf_read, + .buf_copy = gl_buf_copy, + .buf_poll = gl_buf_poll, + .desc_namespace = gl_desc_namespace, + .pass_create = gl_pass_create, + .pass_destroy = gl_pass_destroy, + .pass_run = gl_pass_run, + .timer_create = gl_timer_create, + .timer_destroy = gl_timer_destroy, + .timer_query = gl_timer_query, + .gpu_flush = gl_gpu_flush, + .gpu_finish = gl_gpu_finish, + .gpu_is_failed = gl_gpu_is_failed, +}; diff --git a/src/opengl/gpu.h b/src/opengl/gpu.h new file mode 100644 index 0000000..50741d0 --- /dev/null +++ b/src/opengl/gpu.h @@ -0,0 +1,141 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see . + */ + +#pragma once + +#include "../gpu.h" +#include "common.h" + +// Thread safety: Unsafe, same as pl_gpu_destroy +pl_gpu pl_gpu_create_gl(pl_log log, pl_opengl gl, const struct pl_opengl_params *params); + +// --- pl_gpu internal structs and functions + +struct pl_gl { + struct pl_gpu_fns impl; + pl_opengl gl; + bool failed; + + // For import/export + EGLDisplay egl_dpy; + EGLContext egl_ctx; + bool egl_storage; +#ifdef PL_HAVE_UNIX + // List of formats supported by EGL_EXT_image_dma_buf_import + PL_ARRAY(EGLint) egl_formats; +#endif + + // Sync objects and associated callbacks + PL_ARRAY(struct gl_cb) callbacks; + + + // Incrementing counters to keep track of object uniqueness + int buf_id; + + // Cached capabilities + int gl_ver; + int gles_ver; + bool has_storage; + bool has_invalidate_fb; + bool has_invalidate_tex; + bool has_vao; + bool has_queries; + bool has_modifiers; + bool has_readback; + bool has_egl_storage; + bool has_egl_import; + int gather_comps; +}; + +static inline const gl_funcs *gl_funcs_get(pl_gpu gpu) +{ + struct pl_gl *p = PL_PRIV(gpu); + struct gl_ctx *glctx = PL_PRIV(p->gl); + return &glctx->func; +} + +void gl_timer_begin(pl_gpu gpu, pl_timer timer); +void gl_timer_end(pl_gpu gpu, pl_timer timer); + +static inline bool _make_current(pl_gpu gpu) +{ + struct pl_gl *p = PL_PRIV(gpu); + if (!gl_make_current(p->gl)) { + p->failed = true; + return false; + } + + return true; +} + +static inline void _release_current(pl_gpu gpu) +{ + struct pl_gl *p = PL_PRIV(gpu); + gl_release_current(p->gl); +} + +#define MAKE_CURRENT() _make_current(gpu) +#define RELEASE_CURRENT() _release_current(gpu) + +struct pl_tex_gl { + GLenum target; + GLuint texture; + bool wrapped_tex; + GLuint fbo; // or 0 + bool wrapped_fb; + GLbitfield barrier; + + // GL format fields + GLenum format; + GLint iformat; + GLenum type; + + // For imported/exported textures + EGLImageKHR image; + int fd; +}; + +pl_tex gl_tex_create(pl_gpu, const struct pl_tex_params *); +void gl_tex_destroy(pl_gpu, pl_tex); +void gl_tex_invalidate(pl_gpu, pl_tex); +void gl_tex_clear_ex(pl_gpu, pl_tex, const union pl_clear_color); +void gl_tex_blit(pl_gpu, const struct pl_tex_blit_params *); +bool gl_tex_upload(pl_gpu, const struct pl_tex_transfer_params *); +bool gl_tex_download(pl_gpu, const struct pl_tex_transfer_params *); + +struct pl_buf_gl { + uint64_t id; // unique per buffer + GLuint buffer; + size_t offset; + GLsync fence; + GLbitfield barrier; + bool mapped; +}; + +pl_buf gl_buf_create(pl_gpu, const struct pl_buf_params *); +void gl_buf_destroy(pl_gpu, pl_buf); +void gl_buf_write(pl_gpu, pl_buf, size_t offset, const void *src, size_t size); +bool gl_buf_read(pl_gpu, pl_buf, size_t offset, void *dst, size_t size); +void gl_buf_copy(pl_gpu, pl_buf dst, size_t dst_offset, + pl_buf src, size_t src_offset, size_t size); +bool gl_buf_poll(pl_gpu, pl_buf, uint64_t timeout); + +struct pl_pass_gl; +int gl_desc_namespace(pl_gpu, enum pl_desc_type type); +pl_pass gl_pass_create(pl_gpu, const struct pl_pass_params *); +void gl_pass_destroy(pl_gpu, pl_pass); +void gl_pass_run(pl_gpu, const struct pl_pass_run_params *); diff --git a/src/opengl/gpu_pass.c b/src/opengl/gpu_pass.c new file mode 100644 index 0000000..58e69a5 --- /dev/null +++ b/src/opengl/gpu_pass.c @@ -0,0 +1,707 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see . + */ + +#include "gpu.h" +#include "cache.h" +#include "formats.h" +#include "utils.h" + +int gl_desc_namespace(pl_gpu gpu, enum pl_desc_type type) +{ + return (int) type; +} + +struct gl_cache_header { + GLenum format; +}; + +static GLuint load_cached_program(pl_gpu gpu, pl_cache cache, pl_cache_obj *obj) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + if (!gl_test_ext(gpu, "GL_ARB_get_program_binary", 41, 30)) + return 0; + + if (!pl_cache_get(cache, obj)) + return 0; + + if (obj->size < sizeof(struct gl_cache_header)) + return 0; + + GLuint prog = gl->CreateProgram(); + if (!gl_check_err(gpu, "load_cached_program: glCreateProgram")) + return 0; + + struct gl_cache_header *header = (struct gl_cache_header *) obj->data; + pl_str rest = (pl_str) { obj->data, obj->size }; + rest = pl_str_drop(rest, sizeof(*header)); + gl->ProgramBinary(prog, header->format, rest.buf, rest.len); + gl->GetError(); // discard potential useless error + + GLint status = 0; + gl->GetProgramiv(prog, GL_LINK_STATUS, &status); + if (status) + return prog; + + gl->DeleteProgram(prog); + gl_check_err(gpu, "load_cached_program: glProgramBinary"); + return 0; +} + +static enum pl_log_level gl_log_level(GLint status, GLint log_length) +{ + if (!status) { + return PL_LOG_ERR; + } else if (log_length > 0) { + return PL_LOG_INFO; + } else { + return PL_LOG_DEBUG; + } +} + +static bool gl_attach_shader(pl_gpu gpu, GLuint program, GLenum type, const char *src) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + GLuint shader = gl->CreateShader(type); + gl->ShaderSource(shader, 1, &src, NULL); + gl->CompileShader(shader); + + GLint status = 0; + gl->GetShaderiv(shader, GL_COMPILE_STATUS, &status); + GLint log_length = 0; + gl->GetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length); + + enum pl_log_level level = gl_log_level(status, log_length); + if (pl_msg_test(gpu->log, level)) { + GLchar *logstr = pl_zalloc(NULL, log_length + 1); + gl->GetShaderInfoLog(shader, log_length, NULL, logstr); + PL_MSG(gpu, level, "shader compile log (status=%d): %s", status, logstr); + pl_free(logstr); + } + + if (!status || !gl_check_err(gpu, "gl_attach_shader")) + goto error; + + gl->AttachShader(program, shader); + gl->DeleteShader(shader); + return true; + +error: + gl->DeleteShader(shader); + return false; +} + +static GLuint gl_compile_program(pl_gpu gpu, const struct pl_pass_params *params) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + GLuint prog = gl->CreateProgram(); + bool ok = true; + + switch (params->type) { + case PL_PASS_COMPUTE: + ok &= gl_attach_shader(gpu, prog, GL_COMPUTE_SHADER, params->glsl_shader); + break; + case PL_PASS_RASTER: + ok &= gl_attach_shader(gpu, prog, GL_VERTEX_SHADER, params->vertex_shader); + ok &= gl_attach_shader(gpu, prog, GL_FRAGMENT_SHADER, params->glsl_shader); + for (int i = 0; i < params->num_vertex_attribs; i++) + gl->BindAttribLocation(prog, i, params->vertex_attribs[i].name); + break; + case PL_PASS_INVALID: + case PL_PASS_TYPE_COUNT: + pl_unreachable(); + } + + if (!ok || !gl_check_err(gpu, "gl_compile_program: attach shader")) + goto error; + + gl->LinkProgram(prog); + GLint status = 0; + gl->GetProgramiv(prog, GL_LINK_STATUS, &status); + GLint log_length = 0; + gl->GetProgramiv(prog, GL_INFO_LOG_LENGTH, &log_length); + + enum pl_log_level level = gl_log_level(status, log_length); + if (pl_msg_test(gpu->log, level)) { + GLchar *logstr = pl_zalloc(NULL, log_length + 1); + gl->GetProgramInfoLog(prog, log_length, NULL, logstr); + PL_MSG(gpu, level, "shader link log (status=%d): %s", status, logstr); + pl_free(logstr); + } + + if (!gl_check_err(gpu, "gl_compile_program: link program")) + goto error; + + return prog; + +error: + gl->DeleteProgram(prog); + PL_ERR(gpu, "Failed compiling/linking GLSL program"); + return 0; +} + +// For pl_pass.priv +struct pl_pass_gl { + GLuint program; + GLuint vao; // the VAO object + uint64_t vao_id; // buf_gl.id of VAO + size_t vao_offset; // VBO offset of VAO + GLuint buffer; // VBO for raw vertex pointers + GLuint index_buffer; + GLint *var_locs; +}; + +void gl_pass_destroy(pl_gpu gpu, pl_pass pass) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + if (!MAKE_CURRENT()) { + PL_ERR(gpu, "Failed uninitializing pass, leaking resources!"); + return; + } + + struct pl_pass_gl *pass_gl = PL_PRIV(pass); + if (pass_gl->vao) + gl->DeleteVertexArrays(1, &pass_gl->vao); + gl->DeleteBuffers(1, &pass_gl->index_buffer); + gl->DeleteBuffers(1, &pass_gl->buffer); + gl->DeleteProgram(pass_gl->program); + + gl_check_err(gpu, "gl_pass_destroy"); + RELEASE_CURRENT(); + pl_free((void *) pass); +} + +static void gl_update_va(pl_gpu gpu, pl_pass pass, size_t vbo_offset) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + for (int i = 0; i < pass->params.num_vertex_attribs; i++) { + const struct pl_vertex_attrib *va = &pass->params.vertex_attribs[i]; + const struct gl_format **glfmtp = PL_PRIV(va->fmt); + const struct gl_format *glfmt = *glfmtp; + + bool norm = false; + switch (va->fmt->type) { + case PL_FMT_UNORM: + case PL_FMT_SNORM: + norm = true; + break; + + case PL_FMT_UNKNOWN: + case PL_FMT_FLOAT: + case PL_FMT_UINT: + case PL_FMT_SINT: + break; + case PL_FMT_TYPE_COUNT: + pl_unreachable(); + } + + gl->EnableVertexAttribArray(i); + gl->VertexAttribPointer(i, va->fmt->num_components, glfmt->type, norm, + pass->params.vertex_stride, + (void *) (va->offset + vbo_offset)); + } +} + +pl_pass gl_pass_create(pl_gpu gpu, const struct pl_pass_params *params) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + if (!MAKE_CURRENT()) + return NULL; + + struct pl_gl *p = PL_PRIV(gpu); + struct pl_pass_t *pass = pl_zalloc_obj(NULL, pass, struct pl_pass_gl); + struct pl_pass_gl *pass_gl = PL_PRIV(pass); + pl_cache cache = pl_gpu_cache(gpu); + pass->params = pl_pass_params_copy(pass, params); + + pl_cache_obj obj = { .key = CACHE_KEY_GL_PROG }; + if (cache) { + pl_hash_merge(&obj.key, pl_str0_hash(params->glsl_shader)); + if (params->type == PL_PASS_RASTER) + pl_hash_merge(&obj.key, pl_str0_hash(params->vertex_shader)); + } + + // Load/Compile program + if ((pass_gl->program = load_cached_program(gpu, cache, &obj))) { + PL_DEBUG(gpu, "Using cached GL program"); + } else { + pl_clock_t start = pl_clock_now(); + pass_gl->program = gl_compile_program(gpu, params); + pl_log_cpu_time(gpu->log, start, pl_clock_now(), "compiling shader"); + } + + if (!pass_gl->program) + goto error; + + // Update program cache if possible + if (cache && gl_test_ext(gpu, "GL_ARB_get_program_binary", 41, 30)) { + GLint buf_size = 0; + gl->GetProgramiv(pass_gl->program, GL_PROGRAM_BINARY_LENGTH, &buf_size); + if (buf_size > 0) { + buf_size += sizeof(struct gl_cache_header); + pl_cache_obj_resize(NULL, &obj, buf_size); + struct gl_cache_header *header = obj.data; + void *buffer = &header[1]; + GLsizei binary_size = 0; + gl->GetProgramBinary(pass_gl->program, buf_size, &binary_size, + &header->format, buffer); + bool ok = gl_check_err(gpu, "gl_pass_create: get program binary"); + if (ok) { + obj.size = sizeof(*header) + binary_size; + pl_assert(obj.size <= buf_size); + pl_cache_set(cache, &obj); + } + } + } + + gl->UseProgram(pass_gl->program); + pass_gl->var_locs = pl_calloc(pass, params->num_variables, sizeof(GLint)); + + for (int i = 0; i < params->num_variables; i++) { + pass_gl->var_locs[i] = gl->GetUniformLocation(pass_gl->program, + params->variables[i].name); + + // Due to OpenGL API restrictions, we need to ensure that this is a + // variable type we can actually *update*. Fortunately, this is easily + // checked by virtue of the fact that all legal combinations of + // parameters will have a valid GLSL type name + if (!pl_var_glsl_type_name(params->variables[i])) { + gl->UseProgram(0); + PL_ERR(gpu, "Input variable '%s' does not match any known type!", + params->variables[i].name); + goto error; + } + } + + for (int i = 0; i < params->num_descriptors; i++) { + const struct pl_desc *desc = ¶ms->descriptors[i]; + switch (desc->type) { + case PL_DESC_SAMPLED_TEX: + case PL_DESC_STORAGE_IMG: { + // For compatibility with older OpenGL, we need to explicitly + // update the texture/image unit bindings after creating the shader + // program, since specifying it directly requires GLSL 4.20+ + GLint loc = gl->GetUniformLocation(pass_gl->program, desc->name); + gl->Uniform1i(loc, desc->binding); + break; + } + case PL_DESC_BUF_UNIFORM: { + GLuint idx = gl->GetUniformBlockIndex(pass_gl->program, desc->name); + gl->UniformBlockBinding(pass_gl->program, idx, desc->binding); + break; + } + case PL_DESC_BUF_STORAGE: { + GLuint idx = gl->GetProgramResourceIndex(pass_gl->program, + GL_SHADER_STORAGE_BLOCK, + desc->name); + gl->ShaderStorageBlockBinding(pass_gl->program, idx, desc->binding); + break; + } + case PL_DESC_BUF_TEXEL_UNIFORM: + case PL_DESC_BUF_TEXEL_STORAGE: + assert(!"unimplemented"); // TODO + case PL_DESC_INVALID: + case PL_DESC_TYPE_COUNT: + pl_unreachable(); + } + } + + gl->UseProgram(0); + + // Initialize the VAO and single vertex buffer + gl->GenBuffers(1, &pass_gl->buffer); + if (p->has_vao) { + gl->GenVertexArrays(1, &pass_gl->vao); + gl->BindBuffer(GL_ARRAY_BUFFER, pass_gl->buffer); + gl->BindVertexArray(pass_gl->vao); + gl_update_va(gpu, pass, 0); + gl->BindVertexArray(0); + gl->BindBuffer(GL_ARRAY_BUFFER, 0); + } + + if (!gl_check_err(gpu, "gl_pass_create")) + goto error; + + pl_cache_obj_free(&obj); + RELEASE_CURRENT(); + return pass; + +error: + PL_ERR(gpu, "Failed creating pass"); + pl_cache_obj_free(&obj); + gl_pass_destroy(gpu, pass); + RELEASE_CURRENT(); + return NULL; +} + +static void update_var(pl_gpu gpu, pl_pass pass, + const struct pl_var_update *vu) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + struct pl_pass_gl *pass_gl = PL_PRIV(pass); + const struct pl_var *var = &pass->params.variables[vu->index]; + GLint loc = pass_gl->var_locs[vu->index]; + + switch (var->type) { + case PL_VAR_SINT: { + const int *i = vu->data; + pl_assert(var->dim_m == 1); + switch (var->dim_v) { + case 1: gl->Uniform1iv(loc, var->dim_a, i); break; + case 2: gl->Uniform2iv(loc, var->dim_a, i); break; + case 3: gl->Uniform3iv(loc, var->dim_a, i); break; + case 4: gl->Uniform4iv(loc, var->dim_a, i); break; + default: pl_unreachable(); + } + return; + } + case PL_VAR_UINT: { + const unsigned int *u = vu->data; + pl_assert(var->dim_m == 1); + switch (var->dim_v) { + case 1: gl->Uniform1uiv(loc, var->dim_a, u); break; + case 2: gl->Uniform2uiv(loc, var->dim_a, u); break; + case 3: gl->Uniform3uiv(loc, var->dim_a, u); break; + case 4: gl->Uniform4uiv(loc, var->dim_a, u); break; + default: pl_unreachable(); + } + return; + } + case PL_VAR_FLOAT: { + const float *f = vu->data; + if (var->dim_m == 1) { + switch (var->dim_v) { + case 1: gl->Uniform1fv(loc, var->dim_a, f); break; + case 2: gl->Uniform2fv(loc, var->dim_a, f); break; + case 3: gl->Uniform3fv(loc, var->dim_a, f); break; + case 4: gl->Uniform4fv(loc, var->dim_a, f); break; + default: pl_unreachable(); + } + } else if (var->dim_m == 2 && var->dim_v == 2) { + gl->UniformMatrix2fv(loc, var->dim_a, GL_FALSE, f); + } else if (var->dim_m == 3 && var->dim_v == 3) { + gl->UniformMatrix3fv(loc, var->dim_a, GL_FALSE, f); + } else if (var->dim_m == 4 && var->dim_v == 4) { + gl->UniformMatrix4fv(loc, var->dim_a, GL_FALSE, f); + } else if (var->dim_m == 2 && var->dim_v == 3) { + gl->UniformMatrix2x3fv(loc, var->dim_a, GL_FALSE, f); + } else if (var->dim_m == 3 && var->dim_v == 2) { + gl->UniformMatrix3x2fv(loc, var->dim_a, GL_FALSE, f); + } else if (var->dim_m == 2 && var->dim_v == 4) { + gl->UniformMatrix2x4fv(loc, var->dim_a, GL_FALSE, f); + } else if (var->dim_m == 4 && var->dim_v == 2) { + gl->UniformMatrix4x2fv(loc, var->dim_a, GL_FALSE, f); + } else if (var->dim_m == 3 && var->dim_v == 4) { + gl->UniformMatrix3x4fv(loc, var->dim_a, GL_FALSE, f); + } else if (var->dim_m == 4 && var->dim_v == 3) { + gl->UniformMatrix4x3fv(loc, var->dim_a, GL_FALSE, f); + } else { + pl_unreachable(); + } + return; + } + + case PL_VAR_INVALID: + case PL_VAR_TYPE_COUNT: + break; + } + + pl_unreachable(); +} + +static void update_desc(pl_gpu gpu, pl_pass pass, int index, + const struct pl_desc_binding *db) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + const struct pl_desc *desc = &pass->params.descriptors[index]; + + static const GLenum access[] = { + [PL_DESC_ACCESS_READWRITE] = GL_READ_WRITE, + [PL_DESC_ACCESS_READONLY] = GL_READ_ONLY, + [PL_DESC_ACCESS_WRITEONLY] = GL_WRITE_ONLY, + }; + + static const GLint wraps[PL_TEX_ADDRESS_MODE_COUNT] = { + [PL_TEX_ADDRESS_CLAMP] = GL_CLAMP_TO_EDGE, + [PL_TEX_ADDRESS_REPEAT] = GL_REPEAT, + [PL_TEX_ADDRESS_MIRROR] = GL_MIRRORED_REPEAT, + }; + + static const GLint filters[PL_TEX_SAMPLE_MODE_COUNT] = { + [PL_TEX_SAMPLE_NEAREST] = GL_NEAREST, + [PL_TEX_SAMPLE_LINEAR] = GL_LINEAR, + }; + + switch (desc->type) { + case PL_DESC_SAMPLED_TEX: { + pl_tex tex = db->object; + struct pl_tex_gl *tex_gl = PL_PRIV(tex); + gl->ActiveTexture(GL_TEXTURE0 + desc->binding); + gl->BindTexture(tex_gl->target, tex_gl->texture); + + GLint filter = filters[db->sample_mode]; + GLint wrap = wraps[db->address_mode]; + gl->TexParameteri(tex_gl->target, GL_TEXTURE_MIN_FILTER, filter); + gl->TexParameteri(tex_gl->target, GL_TEXTURE_MAG_FILTER, filter); + switch (pl_tex_params_dimension(tex->params)) { + case 3: gl->TexParameteri(tex_gl->target, GL_TEXTURE_WRAP_R, wrap); // fall through + case 2: gl->TexParameteri(tex_gl->target, GL_TEXTURE_WRAP_T, wrap); // fall through + case 1: gl->TexParameteri(tex_gl->target, GL_TEXTURE_WRAP_S, wrap); break; + } + return; + } + case PL_DESC_STORAGE_IMG: { + pl_tex tex = db->object; + struct pl_tex_gl *tex_gl = PL_PRIV(tex); + gl->BindImageTexture(desc->binding, tex_gl->texture, 0, GL_FALSE, 0, + access[desc->access], tex_gl->iformat); + return; + } + case PL_DESC_BUF_UNIFORM: { + pl_buf buf = db->object; + struct pl_buf_gl *buf_gl = PL_PRIV(buf); + gl->BindBufferRange(GL_UNIFORM_BUFFER, desc->binding, buf_gl->buffer, + buf_gl->offset, buf->params.size); + return; + } + case PL_DESC_BUF_STORAGE: { + pl_buf buf = db->object; + struct pl_buf_gl *buf_gl = PL_PRIV(buf); + gl->BindBufferRange(GL_SHADER_STORAGE_BUFFER, desc->binding, buf_gl->buffer, + buf_gl->offset, buf->params.size); + return; + } + case PL_DESC_BUF_TEXEL_UNIFORM: + case PL_DESC_BUF_TEXEL_STORAGE: + assert(!"unimplemented"); // TODO + + case PL_DESC_INVALID: + case PL_DESC_TYPE_COUNT: + break; + } + + pl_unreachable(); +} + +static void unbind_desc(pl_gpu gpu, pl_pass pass, int index, + const struct pl_desc_binding *db) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + const struct pl_desc *desc = &pass->params.descriptors[index]; + + switch (desc->type) { + case PL_DESC_SAMPLED_TEX: { + pl_tex tex = db->object; + struct pl_tex_gl *tex_gl = PL_PRIV(tex); + gl->ActiveTexture(GL_TEXTURE0 + desc->binding); + gl->BindTexture(tex_gl->target, 0); + return; + } + case PL_DESC_STORAGE_IMG: { + pl_tex tex = db->object; + struct pl_tex_gl *tex_gl = PL_PRIV(tex); + gl->BindImageTexture(desc->binding, 0, 0, GL_FALSE, 0, + GL_WRITE_ONLY, GL_R32F); + if (desc->access != PL_DESC_ACCESS_READONLY) + gl->MemoryBarrier(tex_gl->barrier); + return; + } + case PL_DESC_BUF_UNIFORM: + gl->BindBufferBase(GL_UNIFORM_BUFFER, desc->binding, 0); + return; + case PL_DESC_BUF_STORAGE: { + pl_buf buf = db->object; + struct pl_buf_gl *buf_gl = PL_PRIV(buf); + gl->BindBufferBase(GL_SHADER_STORAGE_BUFFER, desc->binding, 0); + if (desc->access != PL_DESC_ACCESS_READONLY) + gl->MemoryBarrier(buf_gl->barrier); + return; + } + case PL_DESC_BUF_TEXEL_UNIFORM: + case PL_DESC_BUF_TEXEL_STORAGE: + assert(!"unimplemented"); // TODO + case PL_DESC_INVALID: + case PL_DESC_TYPE_COUNT: + break; + } + + pl_unreachable(); +} + +void gl_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + if (!MAKE_CURRENT()) + return; + + pl_pass pass = params->pass; + struct pl_pass_gl *pass_gl = PL_PRIV(pass); + struct pl_gl *p = PL_PRIV(gpu); + + gl->UseProgram(pass_gl->program); + + for (int i = 0; i < params->num_var_updates; i++) + update_var(gpu, pass, ¶ms->var_updates[i]); + for (int i = 0; i < pass->params.num_descriptors; i++) + update_desc(gpu, pass, i, ¶ms->desc_bindings[i]); + gl->ActiveTexture(GL_TEXTURE0); + + if (!gl_check_err(gpu, "gl_pass_run: updating uniforms")) { + RELEASE_CURRENT(); + return; + } + + switch (pass->params.type) { + case PL_PASS_RASTER: { + struct pl_tex_gl *target_gl = PL_PRIV(params->target); + gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, target_gl->fbo); + if (!pass->params.load_target && p->has_invalidate_fb) { + GLenum fb = target_gl->fbo ? GL_COLOR_ATTACHMENT0 : GL_COLOR; + gl->InvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, 1, &fb); + } + + gl->Viewport(params->viewport.x0, params->viewport.y0, + pl_rect_w(params->viewport), pl_rect_h(params->viewport)); + gl->Scissor(params->scissors.x0, params->scissors.y0, + pl_rect_w(params->scissors), pl_rect_h(params->scissors)); + gl->Enable(GL_SCISSOR_TEST); + gl->Disable(GL_DEPTH_TEST); + gl->Disable(GL_CULL_FACE); + gl_check_err(gpu, "gl_pass_run: enabling viewport/scissor"); + + const struct pl_blend_params *blend = pass->params.blend_params; + if (blend) { + static const GLenum map_blend[] = { + [PL_BLEND_ZERO] = GL_ZERO, + [PL_BLEND_ONE] = GL_ONE, + [PL_BLEND_SRC_ALPHA] = GL_SRC_ALPHA, + [PL_BLEND_ONE_MINUS_SRC_ALPHA] = GL_ONE_MINUS_SRC_ALPHA, + }; + + gl->BlendFuncSeparate(map_blend[blend->src_rgb], + map_blend[blend->dst_rgb], + map_blend[blend->src_alpha], + map_blend[blend->dst_alpha]); + gl->Enable(GL_BLEND); + gl_check_err(gpu, "gl_pass_run: enabling blend"); + } + + // Update VBO and VAO + pl_buf vert = params->vertex_buf; + struct pl_buf_gl *vert_gl = vert ? PL_PRIV(vert) : NULL; + gl->BindBuffer(GL_ARRAY_BUFFER, vert ? vert_gl->buffer : pass_gl->buffer); + + if (!vert) { + // Update the buffer directly. In theory we could also do a memcmp + // cache here to avoid unnecessary updates. + gl->BufferData(GL_ARRAY_BUFFER, pl_vertex_buf_size(params), + params->vertex_data, GL_STREAM_DRAW); + } + + if (pass_gl->vao) + gl->BindVertexArray(pass_gl->vao); + + uint64_t vert_id = vert ? vert_gl->id : 0; + size_t vert_offset = vert ? params->buf_offset : 0; + if (!pass_gl->vao || pass_gl->vao_id != vert_id || + pass_gl->vao_offset != vert_offset) + { + // We need to update the VAO when the buffer ID or offset changes + gl_update_va(gpu, pass, vert_offset); + pass_gl->vao_id = vert_id; + pass_gl->vao_offset = vert_offset; + } + + gl_check_err(gpu, "gl_pass_run: update/bind vertex buffer"); + + static const GLenum map_prim[PL_PRIM_TYPE_COUNT] = { + [PL_PRIM_TRIANGLE_LIST] = GL_TRIANGLES, + [PL_PRIM_TRIANGLE_STRIP] = GL_TRIANGLE_STRIP, + }; + GLenum mode = map_prim[pass->params.vertex_type]; + + gl_timer_begin(gpu, params->timer); + + if (params->index_data) { + + static const GLenum index_fmts[PL_INDEX_FORMAT_COUNT] = { + [PL_INDEX_UINT16] = GL_UNSIGNED_SHORT, + [PL_INDEX_UINT32] = GL_UNSIGNED_INT, + }; + + // Upload indices to temporary buffer object + if (!pass_gl->index_buffer) + gl->GenBuffers(1, &pass_gl->index_buffer); // lazily allocated + gl->BindBuffer(GL_ELEMENT_ARRAY_BUFFER, pass_gl->index_buffer); + gl->BufferData(GL_ELEMENT_ARRAY_BUFFER, pl_index_buf_size(params), + params->index_data, GL_STREAM_DRAW); + gl->DrawElements(mode, params->vertex_count, + index_fmts[params->index_fmt], 0); + gl->BindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + + } else if (params->index_buf) { + + // The pointer argument becomes the index buffer offset + struct pl_buf_gl *index_gl = PL_PRIV(params->index_buf); + gl->BindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_gl->buffer); + gl->DrawElements(mode, params->vertex_count, GL_UNSIGNED_SHORT, + (void *) params->index_offset); + gl->BindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + + } else { + + // Note: the VBO offset is handled in the VAO + gl->DrawArrays(mode, 0, params->vertex_count); + } + + gl_timer_end(gpu, params->timer); + gl_check_err(gpu, "gl_pass_run: drawing"); + + if (pass_gl->vao) { + gl->BindVertexArray(0); + } else { + for (int i = 0; i < pass->params.num_vertex_attribs; i++) + gl->DisableVertexAttribArray(i); + } + + gl->BindBuffer(GL_ARRAY_BUFFER, 0); + gl->Disable(GL_SCISSOR_TEST); + gl->Disable(GL_BLEND); + gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + break; + } + + case PL_PASS_COMPUTE: + gl_timer_begin(gpu, params->timer); + gl->DispatchCompute(params->compute_groups[0], + params->compute_groups[1], + params->compute_groups[2]); + gl_timer_end(gpu, params->timer); + break; + + case PL_PASS_INVALID: + case PL_PASS_TYPE_COUNT: + pl_unreachable(); + } + + for (int i = 0; i < pass->params.num_descriptors; i++) + unbind_desc(gpu, pass, i, ¶ms->desc_bindings[i]); + gl->ActiveTexture(GL_TEXTURE0); + + gl->UseProgram(0); + gl_check_err(gpu, "gl_pass_run"); + RELEASE_CURRENT(); +} diff --git a/src/opengl/gpu_tex.c b/src/opengl/gpu_tex.c new file mode 100644 index 0000000..02eda77 --- /dev/null +++ b/src/opengl/gpu_tex.c @@ -0,0 +1,1078 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see . + */ + +#include "gpu.h" +#include "formats.h" +#include "utils.h" + +#ifdef PL_HAVE_UNIX +#include +#include +#endif + +void gl_tex_destroy(pl_gpu gpu, pl_tex tex) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + if (!MAKE_CURRENT()) { + PL_ERR(gpu, "Failed uninitializing texture, leaking resources!"); + return; + } + + struct pl_tex_gl *tex_gl = PL_PRIV(tex); + if (tex_gl->fbo && !tex_gl->wrapped_fb) + gl->DeleteFramebuffers(1, &tex_gl->fbo); + if (tex_gl->image) { + struct pl_gl *p = PL_PRIV(gpu); + eglDestroyImageKHR(p->egl_dpy, tex_gl->image); + } + if (!tex_gl->wrapped_tex) + gl->DeleteTextures(1, &tex_gl->texture); + +#ifdef PL_HAVE_UNIX + if (tex_gl->fd != -1) + close(tex_gl->fd); +#endif + + gl_check_err(gpu, "gl_tex_destroy"); + RELEASE_CURRENT(); + pl_free((void *) tex); +} + +static GLbitfield tex_barrier(pl_tex tex) +{ + GLbitfield barrier = 0; + const struct pl_tex_params *params = &tex->params; + + if (params->sampleable) + barrier |= GL_TEXTURE_FETCH_BARRIER_BIT; + if (params->renderable || params->blit_src || params->blit_dst) + barrier |= GL_FRAMEBUFFER_BARRIER_BIT; + if (params->storable) + barrier |= GL_SHADER_IMAGE_ACCESS_BARRIER_BIT; + if (params->host_writable || params->host_readable) + barrier |= GL_TEXTURE_UPDATE_BARRIER_BIT; + + return barrier; +} + +#define ADD_ATTRIB(name, value) \ + do { \ + assert(num_attribs + 3 < PL_ARRAY_SIZE(attribs)); \ + attribs[num_attribs++] = (name); \ + attribs[num_attribs++] = (value); \ + } while (0) + +#define ADD_DMABUF_PLANE_ATTRIBS(plane, fd, offset, stride) \ + do { \ + ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _FD_EXT, \ + fd); \ + ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _OFFSET_EXT, \ + offset); \ + ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _PITCH_EXT, \ + stride); \ + } while (0) + +#define ADD_DMABUF_PLANE_MODIFIERS(plane, mod) \ + do { \ + ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _MODIFIER_LO_EXT, \ + (uint32_t) ((mod) & 0xFFFFFFFFlu)); \ + ADD_ATTRIB(EGL_DMA_BUF_PLANE ## plane ## _MODIFIER_HI_EXT, \ + (uint32_t) (((mod) >> 32u) & 0xFFFFFFFFlu)); \ + } while (0) + +static bool gl_tex_import(pl_gpu gpu, + enum pl_handle_type handle_type, + const struct pl_shared_mem *shared_mem, + struct pl_tex_t *tex) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + struct pl_gl *p = PL_PRIV(gpu); + if (!MAKE_CURRENT()) + return false; + + struct pl_tex_gl *tex_gl = PL_PRIV(tex); + const struct pl_tex_params *params = &tex->params; + + int attribs[20] = {}; + int num_attribs = 0; + ADD_ATTRIB(EGL_WIDTH, params->w); + ADD_ATTRIB(EGL_HEIGHT, params->h); + + switch (handle_type) { + +#ifdef PL_HAVE_UNIX + case PL_HANDLE_DMA_BUF: + if (shared_mem->handle.fd == -1) { + PL_ERR(gpu, "%s: invalid fd", __func__); + goto error; + } + + tex_gl->fd = dup(shared_mem->handle.fd); + if (tex_gl->fd == -1) { + PL_ERR(gpu, "%s: cannot duplicate fd %d for importing: %s", + __func__, shared_mem->handle.fd, strerror(errno)); + goto error; + } + + ADD_ATTRIB(EGL_LINUX_DRM_FOURCC_EXT, params->format->fourcc); + ADD_DMABUF_PLANE_ATTRIBS(0, tex_gl->fd, shared_mem->offset, + PL_DEF(shared_mem->stride_w, params->w)); + if (p->has_modifiers) + ADD_DMABUF_PLANE_MODIFIERS(0, shared_mem->drm_format_mod); + + attribs[num_attribs] = EGL_NONE; + + // EGL_LINUX_DMA_BUF_EXT requires EGL_NO_CONTEXT + tex_gl->image = eglCreateImageKHR(p->egl_dpy, + EGL_NO_CONTEXT, + EGL_LINUX_DMA_BUF_EXT, + (EGLClientBuffer) NULL, + attribs); + + break; +#else // !PL_HAVE_UNIX + case PL_HANDLE_DMA_BUF: + pl_unreachable(); +#endif + + case PL_HANDLE_WIN32: + case PL_HANDLE_WIN32_KMT: + case PL_HANDLE_HOST_PTR: + case PL_HANDLE_FD: + case PL_HANDLE_MTL_TEX: + case PL_HANDLE_IOSURFACE: + pl_unreachable(); + + } + + if (!egl_check_err(gpu, "eglCreateImageKHR") || !tex_gl->image) + goto error; + + // tex_gl->image should be already bound + if (p->has_egl_storage) { + gl->EGLImageTargetTexStorageEXT(GL_TEXTURE_2D, tex_gl->image, NULL); + } else { + gl->EGLImageTargetTexture2DOES(GL_TEXTURE_2D, tex_gl->image); + } + if (!egl_check_err(gpu, "EGLImageTargetTexture2DOES")) + goto error; + + RELEASE_CURRENT(); + return true; + +error: + PL_ERR(gpu, "Failed importing GL texture!"); + RELEASE_CURRENT(); + return false; +} + +static EGLenum egl_from_gl_target(pl_gpu gpu, int target) +{ + switch(target) { + case GL_TEXTURE_2D: return EGL_GL_TEXTURE_2D; + case GL_TEXTURE_3D: return EGL_GL_TEXTURE_3D; + default: + PL_ERR(gpu, "%s: unsupported texture target 0x%x", __func__, target); + return 0; + } +} + +static bool gl_tex_export(pl_gpu gpu, enum pl_handle_type handle_type, + bool preserved, struct pl_tex_t *tex) +{ + struct pl_tex_gl *tex_gl = PL_PRIV(tex); + struct pl_gl *p = PL_PRIV(gpu); + + EGLenum egltarget = egl_from_gl_target(gpu, tex_gl->target); + if (!egltarget) + goto error; + + int attribs[] = { + EGL_IMAGE_PRESERVED, preserved, + EGL_NONE, + }; + + // We assume that tex_gl->texture is already bound + tex_gl->image = eglCreateImageKHR(p->egl_dpy, + p->egl_ctx, + egltarget, + (EGLClientBuffer) (uintptr_t) tex_gl->texture, + attribs); + if (!egl_check_err(gpu, "eglCreateImageKHR") || !tex_gl->image) + goto error; + + switch (handle_type) { + +#ifdef PL_HAVE_UNIX + case PL_HANDLE_DMA_BUF: { + int fourcc = 0; + int num_planes = 0; + EGLuint64KHR modifier = 0; + bool ok; + ok = eglExportDMABUFImageQueryMESA(p->egl_dpy, + tex_gl->image, + &fourcc, + &num_planes, + &modifier); + if (!egl_check_err(gpu, "eglExportDMABUFImageQueryMESA") || !ok) + goto error; + + if (fourcc != tex->params.format->fourcc) { + PL_ERR(gpu, "Exported DRM format %s does not match fourcc of " + "specified pl_fmt %s? Please open a bug.", + PRINT_FOURCC(fourcc), PRINT_FOURCC(tex->params.format->fourcc)); + goto error; + } + + if (num_planes != 1) { + PL_ERR(gpu, "Unsupported number of planes: %d", num_planes); + goto error; + } + + int offset = 0, stride = 0; + ok = eglExportDMABUFImageMESA(p->egl_dpy, + tex_gl->image, + &tex_gl->fd, + &stride, + &offset); + if (!egl_check_err(gpu, "eglExportDMABUFImageMesa") || !ok) + goto error; + + off_t fdsize = lseek(tex_gl->fd, 0, SEEK_END); + off_t err = fdsize > 0 && lseek(tex_gl->fd, 0, SEEK_SET); + if (fdsize <= 0 || err < 0) { + PL_ERR(gpu, "Failed querying FD size: %s", strerror(errno)); + goto error; + } + + tex->shared_mem = (struct pl_shared_mem) { + .handle.fd = tex_gl->fd, + .size = fdsize, + .offset = offset, + .drm_format_mod = modifier, + .stride_w = stride, + }; + break; + } +#else // !PL_HAVE_UNIX + case PL_HANDLE_DMA_BUF: + pl_unreachable(); +#endif + + case PL_HANDLE_WIN32: + case PL_HANDLE_WIN32_KMT: + case PL_HANDLE_HOST_PTR: + case PL_HANDLE_FD: + case PL_HANDLE_MTL_TEX: + case PL_HANDLE_IOSURFACE: + pl_unreachable(); + + } + + return true; + +error: + PL_ERR(gpu, "Failed exporting GL texture!"); + return false; +} + +static const char *fb_err_str(GLenum err) +{ + switch (err) { +#define CASE(name) case name: return #name + CASE(GL_FRAMEBUFFER_COMPLETE); + CASE(GL_FRAMEBUFFER_UNDEFINED); + CASE(GL_FRAMEBUFFER_INCOMPLETE_ATTACHMENT); + CASE(GL_FRAMEBUFFER_INCOMPLETE_MISSING_ATTACHMENT); + CASE(GL_FRAMEBUFFER_INCOMPLETE_DIMENSIONS); + CASE(GL_FRAMEBUFFER_INCOMPLETE_DRAW_BUFFER); + CASE(GL_FRAMEBUFFER_INCOMPLETE_READ_BUFFER); + CASE(GL_FRAMEBUFFER_UNSUPPORTED); + CASE(GL_FRAMEBUFFER_INCOMPLETE_MULTISAMPLE); + CASE(GL_FRAMEBUFFER_INCOMPLETE_LAYER_TARGETS); +#undef CASE + + default: return "unknown error"; + } +} + +pl_tex gl_tex_create(pl_gpu gpu, const struct pl_tex_params *params) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + if (!MAKE_CURRENT()) + return NULL; + + struct pl_gl *p = PL_PRIV(gpu); + struct pl_tex_t *tex = pl_zalloc_obj(NULL, tex, struct pl_tex_gl); + tex->params = *params; + tex->params.initial_data = NULL; + tex->sampler_type = PL_SAMPLER_NORMAL; + + struct pl_tex_gl *tex_gl = PL_PRIV(tex); + + const struct gl_format **fmtp = PL_PRIV(params->format); + const struct gl_format *fmt = *fmtp; + *tex_gl = (struct pl_tex_gl) { + .format = fmt->fmt, + .iformat = fmt->ifmt, + .type = fmt->type, + .barrier = tex_barrier(tex), + .fd = -1, + }; + + static const GLint targets[] = { + [1] = GL_TEXTURE_1D, + [2] = GL_TEXTURE_2D, + [3] = GL_TEXTURE_3D, + }; + + int dims = pl_tex_params_dimension(*params); + pl_assert(dims >= 1 && dims <= 3); + tex_gl->target = targets[dims]; + + gl->GenTextures(1, &tex_gl->texture); + gl->BindTexture(tex_gl->target, tex_gl->texture); + + if (params->import_handle) { + if (!gl_tex_import(gpu, params->import_handle, ¶ms->shared_mem, tex)) + goto error; + } else { + gl->PixelStorei(GL_UNPACK_ALIGNMENT, 1); + + switch (dims) { + case 1: + gl->TexImage1D(tex_gl->target, 0, tex_gl->iformat, params->w, 0, + tex_gl->format, tex_gl->type, params->initial_data); + break; + case 2: + gl->TexImage2D(tex_gl->target, 0, tex_gl->iformat, params->w, params->h, + 0, tex_gl->format, tex_gl->type, params->initial_data); + break; + case 3: + gl->TexImage3D(tex_gl->target, 0, tex_gl->iformat, params->w, params->h, + params->d, 0, tex_gl->format, tex_gl->type, + params->initial_data); + break; + } + + gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4); + } + + if (params->export_handle) { + if (!gl_tex_export(gpu, params->export_handle, params->initial_data, tex)) + goto error; + } + + gl->BindTexture(tex_gl->target, 0); + + if (!gl_check_err(gpu, "gl_tex_create: texture")) + goto error; + + bool need_fbo = tex->params.renderable; + if (tex->params.blit_src || tex->params.blit_dst) { + if (dims != 2) { + PL_ERR(gpu, "Blittable textures may only be 2D!"); + goto error; + } + + need_fbo = true; + } + + bool can_fbo = tex->params.format->caps & PL_FMT_CAP_RENDERABLE && + tex->params.d == 0; + + // Try creating an FBO for host-readable textures, since this allows + // reading back with glReadPixels instead of glGetTexImage. (Additionally, + // GLES does not support glGetTexImage) + if (tex->params.host_readable && (can_fbo || p->gles_ver)) + need_fbo = true; + + if (need_fbo) { + if (!can_fbo) { + PL_ERR(gpu, "Trying to create a renderable/blittable/readable " + "texture with an incompatible (non-renderable) format!"); + goto error; + } + + gl->GenFramebuffers(1, &tex_gl->fbo); + gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, tex_gl->fbo); + switch (dims) { + case 1: + gl->FramebufferTexture1D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_TEXTURE_1D, tex_gl->texture, 0); + break; + case 2: + gl->FramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + GL_TEXTURE_2D, tex_gl->texture, 0); + break; + case 3: pl_unreachable(); + } + + GLenum err = gl->CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER); + if (err != GL_FRAMEBUFFER_COMPLETE) { + gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + PL_ERR(gpu, "Failed creating framebuffer: %s", fb_err_str(err)); + goto error; + } + + if (params->host_readable && p->gles_ver) { + GLint read_type = 0, read_fmt = 0; + gl->GetIntegerv(GL_IMPLEMENTATION_COLOR_READ_TYPE, &read_type); + gl->GetIntegerv(GL_IMPLEMENTATION_COLOR_READ_FORMAT, &read_fmt); + if (read_type != tex_gl->type || read_fmt != tex_gl->format) { + gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + PL_ERR(gpu, "Trying to create host_readable texture whose " + "implementation-defined pixel read format " + "(type=0x%X, fmt=0x%X) does not match the texture's " + "internal format (type=0x%X, fmt=0x%X)! This is a " + "GLES/driver limitation, there's little we can do " + "about it.", + read_type, read_fmt, tex_gl->type, tex_gl->format); + goto error; + } + } + + gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + if (!gl_check_err(gpu, "gl_tex_create: fbo")) + goto error; + } + + RELEASE_CURRENT(); + return tex; + +error: + gl_tex_destroy(gpu, tex); + RELEASE_CURRENT(); + return NULL; +} + +static bool gl_fb_query(pl_gpu gpu, int fbo, struct pl_fmt_t *fmt, + struct gl_format *glfmt) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + struct pl_gl *p = PL_PRIV(gpu); + *fmt = (struct pl_fmt_t) { + .name = "fbo", + .type = PL_FMT_UNKNOWN, + .caps = PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_BLITTABLE | PL_FMT_CAP_BLENDABLE, + .num_components = 4, + .component_depth = {8, 8, 8, 8}, // default to rgba8 + .sample_order = {0, 1, 2, 3}, + }; + + *glfmt = (struct gl_format) { + .fmt = GL_RGBA, + }; + + bool can_query = gl_test_ext(gpu, "GL_ARB_framebuffer_object", 30, 20); + if (!fbo && p->gles_ver && p->gles_ver < 30) + can_query = false; // can't query default framebuffer on GLES 2.0 + + if (can_query) { + gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo); + + GLenum obj = p->gles_ver ? GL_BACK : GL_BACK_LEFT; + if (fbo != 0) + obj = GL_COLOR_ATTACHMENT0; + + GLint type = 0; + gl->GetFramebufferAttachmentParameteriv(GL_DRAW_FRAMEBUFFER, obj, + GL_FRAMEBUFFER_ATTACHMENT_COMPONENT_TYPE, &type); + switch (type) { + case GL_FLOAT: fmt->type = PL_FMT_FLOAT; break; + case GL_INT: fmt->type = PL_FMT_SINT; break; + case GL_UNSIGNED_INT: fmt->type = PL_FMT_UINT; break; + case GL_SIGNED_NORMALIZED: fmt->type = PL_FMT_SNORM; break; + case GL_UNSIGNED_NORMALIZED: fmt->type = PL_FMT_UNORM; break; + default: fmt->type = PL_FMT_UNKNOWN; break; + } + + gl->GetFramebufferAttachmentParameteriv(GL_DRAW_FRAMEBUFFER, obj, + GL_FRAMEBUFFER_ATTACHMENT_RED_SIZE, &fmt->component_depth[0]); + gl->GetFramebufferAttachmentParameteriv(GL_DRAW_FRAMEBUFFER, obj, + GL_FRAMEBUFFER_ATTACHMENT_GREEN_SIZE, &fmt->component_depth[1]); + gl->GetFramebufferAttachmentParameteriv(GL_DRAW_FRAMEBUFFER, obj, + GL_FRAMEBUFFER_ATTACHMENT_BLUE_SIZE, &fmt->component_depth[2]); + gl->GetFramebufferAttachmentParameteriv(GL_DRAW_FRAMEBUFFER, obj, + GL_FRAMEBUFFER_ATTACHMENT_ALPHA_SIZE, &fmt->component_depth[3]); + + gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + gl_check_err(gpu, "gl_fb_query"); + + if (!fmt->component_depth[0]) { + PL_INFO(gpu, "OpenGL framebuffer did not export depth information," + "assuming 8-bit framebuffer"); + for (int i = 0; i < PL_ARRAY_SIZE(fmt->component_depth); i++) + fmt->component_depth[i] = 8; + } + + // Strip missing components from component map + while (!fmt->component_depth[fmt->num_components - 1]) { + fmt->num_components--; + pl_assert(fmt->num_components); + } + } + + int gpu_bits = 0; + for (int i = 0; i < 4; i++) + gpu_bits += fmt->component_depth[i]; + fmt->internal_size = (gpu_bits + 7) / 8; + + size_t host_size = 0; + switch (fmt->type) { + case PL_FMT_UNKNOWN: + fmt->opaque = true; + return true; + case PL_FMT_FLOAT: + glfmt->type = GL_FLOAT; + host_size = sizeof(float); + break; + case PL_FMT_UNORM: + case PL_FMT_UINT: + if (gpu_bits > 32) { + glfmt->type = GL_UNSIGNED_SHORT; + host_size = sizeof(uint16_t); + } else { + glfmt->type = GL_UNSIGNED_BYTE; + host_size = sizeof(uint8_t); + } + break; + case PL_FMT_SNORM: + case PL_FMT_SINT: + if (gpu_bits > 32) { + glfmt->type = GL_SHORT; + host_size = sizeof(int16_t); + } else { + glfmt->type = GL_BYTE; + host_size = sizeof(int8_t); + } + break; + case PL_FMT_TYPE_COUNT: + pl_unreachable(); + } + + fmt->texel_size = fmt->num_components * host_size; + for (int i = 0; i < fmt->num_components; i++) + fmt->host_bits[i] = 8 * host_size; + fmt->caps |= PL_FMT_CAP_HOST_READABLE; + + return true; +} + +pl_tex pl_opengl_wrap(pl_gpu gpu, const struct pl_opengl_wrap_params *params) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + if (!MAKE_CURRENT()) + return NULL; + + struct pl_gl *p = PL_PRIV(gpu); + struct pl_tex_t *tex = pl_alloc_obj(NULL, tex, struct pl_tex_gl); + struct pl_tex_gl *tex_gl = PL_PRIV(tex); + *tex = (struct pl_tex_t) { + .params = { + .w = params->width, + .h = params->height, + .d = params->depth, + }, + }; + + pl_fmt fmt = NULL; + const struct gl_format *glfmt = NULL; + + if (params->texture) { + // Wrapping texture: Require matching iformat + pl_assert(params->iformat); + for (int i = 0; i < gpu->num_formats; i++) { + const struct gl_format **glfmtp = PL_PRIV(gpu->formats[i]); + if ((*glfmtp)->ifmt == params->iformat) { + fmt = gpu->formats[i]; + glfmt = *glfmtp; + break; + } + } + + if (!fmt) { + PL_ERR(gpu, "Failed mapping iformat %d to any equivalent `pl_fmt`", + params->iformat); + goto error; + } + } else { + // Wrapping framebuffer: Allocate/infer generic FBO format + fmt = pl_alloc_obj((void *) gpu, fmt, const struct gl_format *); + glfmt = pl_alloc_ptr((void *) fmt, glfmt); + const struct gl_format **glfmtp = PL_PRIV(fmt); + *glfmtp = glfmt; + if (!gl_fb_query(gpu, params->framebuffer, + (struct pl_fmt_t *) fmt, + (struct gl_format *) glfmt)) + { + PL_ERR(gpu, "Failed querying framebuffer specifics!"); + pl_free((void *) fmt); + goto error; + } + } + + *tex_gl = (struct pl_tex_gl) { + .target = params->target, + .texture = params->texture, + .fbo = params->framebuffer, + .wrapped_tex = !!params->texture, + .wrapped_fb = params->framebuffer || !params->texture, + .iformat = glfmt->ifmt, + .format = glfmt->fmt, + .type = glfmt->type, + .fd = -1, + }; + + int dims = pl_tex_params_dimension(tex->params); + if (!tex_gl->target) { + switch (dims) { + case 1: tex_gl->target = GL_TEXTURE_1D; break; + case 2: tex_gl->target = GL_TEXTURE_2D; break; + case 3: tex_gl->target = GL_TEXTURE_3D; break; + } + } + + // Map texture-specific sampling metadata + if (params->texture) { + switch (params->target) { + case GL_TEXTURE_1D: + if (params->width || params->depth) { + PL_ERR(gpu, "Invalid texture dimensions for GL_TEXTURE_1D"); + goto error; + } + // fall through + case GL_TEXTURE_2D: + if (params->depth) { + PL_ERR(gpu, "Invalid texture dimensions for GL_TEXTURE_2D"); + goto error; + } + // fall through + case 0: + case GL_TEXTURE_3D: + tex->sampler_type = PL_SAMPLER_NORMAL; + break; + + case GL_TEXTURE_RECTANGLE: tex->sampler_type = PL_SAMPLER_RECT; break; + case GL_TEXTURE_EXTERNAL_OES: tex->sampler_type = PL_SAMPLER_EXTERNAL; break; + + default: + PL_ERR(gpu, "Failed mapping texture target %u to any equivalent " + "`pl_sampler_type`", params->target); + goto error; + } + } + + // Create optional extra fbo if needed/possible + bool can_fbo = tex_gl->texture && + (fmt->caps & PL_FMT_CAP_RENDERABLE) && + tex->sampler_type != PL_SAMPLER_EXTERNAL && + dims < 3; + + if (can_fbo && !tex_gl->fbo) { + gl->GenFramebuffers(1, &tex_gl->fbo); + gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, tex_gl->fbo); + switch (dims) { + case 1: + gl->FramebufferTexture1D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + tex_gl->target, tex_gl->texture, 0); + break; + case 2: + gl->FramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, + tex_gl->target, tex_gl->texture, 0); + break; + } + + GLenum err = gl->CheckFramebufferStatus(GL_DRAW_FRAMEBUFFER); + if (err != GL_FRAMEBUFFER_COMPLETE) { + gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + PL_ERR(gpu, "Failed creating framebuffer: error code %d", err); + goto error; + } + + if (p->gles_ver) { + GLint read_type = 0, read_fmt = 0; + gl->GetIntegerv(GL_IMPLEMENTATION_COLOR_READ_TYPE, &read_type); + gl->GetIntegerv(GL_IMPLEMENTATION_COLOR_READ_FORMAT, &read_fmt); + tex->params.host_readable = read_type == tex_gl->type && + read_fmt == tex_gl->format; + } else { + tex->params.host_readable = true; + } + + gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + if (!gl_check_err(gpu, "pl_opengl_wrap: fbo")) + goto error; + } + + // Complete the process of inferring the texture capabilities + tex->params.format = fmt; + if (tex_gl->texture) { + tex->params.sampleable = fmt->caps & PL_FMT_CAP_SAMPLEABLE; + tex->params.storable = fmt->caps & PL_FMT_CAP_STORABLE; + tex->params.host_writable = !fmt->opaque; + tex->params.host_readable |= fmt->caps & PL_FMT_CAP_HOST_READABLE; + } + if (tex_gl->fbo || tex_gl->wrapped_fb) { + tex->params.renderable = fmt->caps & PL_FMT_CAP_RENDERABLE; + tex->params.host_readable |= fmt->caps & PL_FMT_CAP_HOST_READABLE; + if (dims == 2 && (fmt->caps & PL_FMT_CAP_BLITTABLE)) { + tex->params.blit_src = true; + tex->params.blit_dst = true; + } + } + + tex_gl->barrier = tex_barrier(tex); + RELEASE_CURRENT(); + return tex; + +error: + gl_tex_destroy(gpu, tex); + RELEASE_CURRENT(); + return NULL; +} + +unsigned int pl_opengl_unwrap(pl_gpu gpu, pl_tex tex, + unsigned int *out_target, int *out_iformat, + unsigned int *out_fbo) +{ + struct pl_tex_gl *tex_gl = PL_PRIV(tex); + if (!tex_gl->texture) { + PL_ERR(gpu, "Trying to call `pl_opengl_unwrap` on a pseudo-texture " + "(perhaps obtained by `pl_swapchain_start_frame`?)"); + return 0; + } + + if (out_target) + *out_target = tex_gl->target; + if (out_iformat) + *out_iformat = tex_gl->iformat; + if (out_fbo) + *out_fbo = tex_gl->fbo; + + return tex_gl->texture; +} + +void gl_tex_invalidate(pl_gpu gpu, pl_tex tex) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + struct pl_gl *p = PL_PRIV(gpu); + struct pl_tex_gl *tex_gl = PL_PRIV(tex); + if (!MAKE_CURRENT()) + return; + + if (tex_gl->texture && p->has_invalidate_tex) + gl->InvalidateTexImage(tex_gl->texture, 0); + + if ((tex_gl->wrapped_fb || tex_gl->fbo) && p->has_invalidate_fb) { + GLenum attachment = tex_gl->fbo ? GL_COLOR_ATTACHMENT0 : GL_COLOR; + gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, tex_gl->fbo); + gl->InvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, 1, &attachment); + gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + } + + gl_check_err(gpu, "gl_tex_invalidate"); + RELEASE_CURRENT(); +} + +void gl_tex_clear_ex(pl_gpu gpu, pl_tex tex, const union pl_clear_color color) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + if (!MAKE_CURRENT()) + return; + + struct pl_tex_gl *tex_gl = PL_PRIV(tex); + pl_assert(tex_gl->fbo || tex_gl->wrapped_fb); + + switch (tex->params.format->type) { + case PL_FMT_UNKNOWN: + case PL_FMT_FLOAT: + case PL_FMT_UNORM: + case PL_FMT_SNORM: + gl->ClearColor(color.f[0], color.f[1], color.f[2], color.f[3]); + break; + + case PL_FMT_UINT: + gl->ClearColorIuiEXT(color.u[0], color.u[1], color.u[2], color.u[3]); + break; + + case PL_FMT_SINT: + gl->ClearColorIiEXT(color.i[0], color.i[1], color.i[2], color.i[3]); + break; + + case PL_FMT_TYPE_COUNT: + pl_unreachable(); + } + + gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, tex_gl->fbo); + gl->Clear(GL_COLOR_BUFFER_BIT); + gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + gl_check_err(gpu, "gl_tex_clear"); + RELEASE_CURRENT(); +} + +void gl_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + if (!MAKE_CURRENT()) + return; + + struct pl_tex_gl *src_gl = PL_PRIV(params->src); + struct pl_tex_gl *dst_gl = PL_PRIV(params->dst); + + pl_assert(src_gl->fbo || src_gl->wrapped_fb); + pl_assert(dst_gl->fbo || dst_gl->wrapped_fb); + gl->BindFramebuffer(GL_READ_FRAMEBUFFER, src_gl->fbo); + gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, dst_gl->fbo); + + static const GLint filters[PL_TEX_SAMPLE_MODE_COUNT] = { + [PL_TEX_SAMPLE_NEAREST] = GL_NEAREST, + [PL_TEX_SAMPLE_LINEAR] = GL_LINEAR, + }; + + pl_rect3d src_rc = params->src_rc, dst_rc = params->dst_rc; + gl->BlitFramebuffer(src_rc.x0, src_rc.y0, src_rc.x1, src_rc.y1, + dst_rc.x0, dst_rc.y0, dst_rc.x1, dst_rc.y1, + GL_COLOR_BUFFER_BIT, filters[params->sample_mode]); + + gl->BindFramebuffer(GL_READ_FRAMEBUFFER, 0); + gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + gl_check_err(gpu, "gl_tex_blit"); + RELEASE_CURRENT(); +} + +static int get_alignment(size_t pitch) +{ + if (pitch % 8 == 0) + return 8; + if (pitch % 4 == 0) + return 4; + if (pitch % 2 == 0) + return 2; + return 1; +} + +bool gl_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + struct pl_gl *p = PL_PRIV(gpu); + pl_tex tex = params->tex; + pl_fmt fmt = tex->params.format; + pl_buf buf = params->buf; + struct pl_tex_gl *tex_gl = PL_PRIV(tex); + struct pl_buf_gl *buf_gl = buf ? PL_PRIV(buf) : NULL; + + // If the user requests asynchronous uploads, it's more efficient to do + // them via a PBO - this allows us to skip blocking the caller, especially + // when the host pointer can be imported directly. + if (params->callback && !buf) { + size_t buf_size = pl_tex_transfer_size(params); + const size_t min_size = 32*1024; // 32 KiB + if (buf_size >= min_size && buf_size <= gpu->limits.max_buf_size) + return pl_tex_upload_pbo(gpu, params); + } + + if (!MAKE_CURRENT()) + return false; + + uintptr_t src = (uintptr_t) params->ptr; + if (buf) { + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, buf_gl->buffer); + src = buf_gl->offset + params->buf_offset; + } + + bool misaligned = params->row_pitch % fmt->texel_size; + int stride_w = params->row_pitch / fmt->texel_size; + int stride_h = params->depth_pitch / params->row_pitch; + + int dims = pl_tex_params_dimension(tex->params); + if (dims > 1) + gl->PixelStorei(GL_UNPACK_ALIGNMENT, get_alignment(params->row_pitch)); + + int rows = pl_rect_h(params->rc); + if (misaligned) { + rows = 1; + } else if (stride_w != pl_rect_w(params->rc)) { + gl->PixelStorei(GL_UNPACK_ROW_LENGTH, stride_w); + } + + int imgs = pl_rect_d(params->rc); + if (stride_h != pl_rect_h(params->rc) || rows < stride_h) + gl->PixelStorei(GL_UNPACK_IMAGE_HEIGHT, stride_h); + + gl->BindTexture(tex_gl->target, tex_gl->texture); + gl_timer_begin(gpu, params->timer); + + switch (dims) { + case 1: + gl->TexSubImage1D(tex_gl->target, 0, params->rc.x0, pl_rect_w(params->rc), + tex_gl->format, tex_gl->type, (void *) src); + break; + case 2: + for (int y = params->rc.y0; y < params->rc.y1; y += rows) { + gl->TexSubImage2D(tex_gl->target, 0, params->rc.x0, y, + pl_rect_w(params->rc), rows, tex_gl->format, + tex_gl->type, (void *) src); + src += params->row_pitch * rows; + } + break; + case 3: + for (int z = params->rc.z0; z < params->rc.z1; z += imgs) { + uintptr_t row_src = src; + for (int y = params->rc.y0; y < params->rc.y1; y += rows) { + gl->TexSubImage3D(tex_gl->target, 0, params->rc.x0, y, z, + pl_rect_w(params->rc), rows, imgs, + tex_gl->format, tex_gl->type, (void *) row_src); + row_src = (uintptr_t) row_src + params->row_pitch * rows; + } + src += params->depth_pitch * imgs; + } + break; + } + + gl_timer_end(gpu, params->timer); + gl->BindTexture(tex_gl->target, 0); + gl->PixelStorei(GL_UNPACK_ALIGNMENT, 4); + gl->PixelStorei(GL_UNPACK_ROW_LENGTH, 0); + gl->PixelStorei(GL_UNPACK_IMAGE_HEIGHT, 0); + + if (buf) { + gl->BindBuffer(GL_PIXEL_UNPACK_BUFFER, 0); + if (buf->params.host_mapped) { + // Make sure the PBO is not reused until GL is done with it. If a + // previous operation is pending, "update" it by creating a new + // fence that will cover the previous operation as well. + gl->DeleteSync(buf_gl->fence); + buf_gl->fence = gl->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + } + } + + if (params->callback) { + PL_ARRAY_APPEND(gpu, p->callbacks, (struct gl_cb) { + .sync = gl->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0), + .callback = params->callback, + .priv = params->priv, + }); + } + + bool ok = gl_check_err(gpu, "gl_tex_upload"); + RELEASE_CURRENT(); + return ok; +} + +bool gl_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + struct pl_gl *p = PL_PRIV(gpu); + pl_tex tex = params->tex; + pl_fmt fmt = tex->params.format; + pl_buf buf = params->buf; + struct pl_tex_gl *tex_gl = PL_PRIV(tex); + struct pl_buf_gl *buf_gl = buf ? PL_PRIV(buf) : NULL; + bool ok = true; + + if (params->callback && !buf) { + size_t buf_size = pl_tex_transfer_size(params); + const size_t min_size = 32*1024; // 32 KiB + if (buf_size >= min_size && buf_size <= gpu->limits.max_buf_size) + return pl_tex_download_pbo(gpu, params); + } + + if (!MAKE_CURRENT()) + return false; + + uintptr_t dst = (uintptr_t) params->ptr; + if (buf) { + gl->BindBuffer(GL_PIXEL_PACK_BUFFER, buf_gl->buffer); + dst = buf_gl->offset + params->buf_offset; + } + + pl_rect3d full = { + 0, 0, 0, + tex->params.w, + PL_DEF(tex->params.h, 1), + PL_DEF(tex->params.d, 1), + }; + + bool misaligned = params->row_pitch % fmt->texel_size; + int stride_w = params->row_pitch / fmt->texel_size; + int stride_h = params->depth_pitch / params->row_pitch; + + int dims = pl_tex_params_dimension(tex->params); + bool is_copy = pl_rect3d_eq(params->rc, full) && + stride_w == tex->params.w && + stride_h == PL_DEF(tex->params.h, 1) && + !misaligned; + + gl_timer_begin(gpu, params->timer); + + if (tex_gl->fbo || tex_gl->wrapped_fb) { + // We can use a more efficient path when we have an FBO available + if (dims > 1) + gl->PixelStorei(GL_PACK_ALIGNMENT, get_alignment(params->row_pitch)); + + int rows = pl_rect_h(params->rc); + if (misaligned) { + rows = 1; + } else if (stride_w != tex->params.w) { + gl->PixelStorei(GL_PACK_ROW_LENGTH, stride_w); + } + + // No 3D framebuffers + pl_assert(pl_rect_d(params->rc) == 1); + + gl->BindFramebuffer(GL_READ_FRAMEBUFFER, tex_gl->fbo); + for (int y = params->rc.y0; y < params->rc.y1; y += rows) { + gl->ReadPixels(params->rc.x0, y, pl_rect_w(params->rc), rows, + tex_gl->format, tex_gl->type, (void *) dst); + dst += params->row_pitch * rows; + } + gl->BindFramebuffer(GL_READ_FRAMEBUFFER, 0); + gl->PixelStorei(GL_PACK_ALIGNMENT, 4); + gl->PixelStorei(GL_PACK_ROW_LENGTH, 0); + } else if (is_copy) { + // We're downloading the entire texture + gl->BindTexture(tex_gl->target, tex_gl->texture); + gl->GetTexImage(tex_gl->target, 0, tex_gl->format, tex_gl->type, (void *) dst); + gl->BindTexture(tex_gl->target, 0); + } else { + PL_ERR(gpu, "Partial downloads of 3D textures not implemented!"); + ok = false; + } + + gl_timer_end(gpu, params->timer); + + if (buf) { + gl->BindBuffer(GL_PIXEL_PACK_BUFFER, 0); + if (ok && buf->params.host_mapped) { + gl->DeleteSync(buf_gl->fence); + buf_gl->fence = gl->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + } + } + + if (params->callback) { + PL_ARRAY_APPEND(gpu, p->callbacks, (struct gl_cb) { + .sync = gl->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0), + .callback = params->callback, + .priv = params->priv, + }); + } + + ok &= gl_check_err(gpu, "gl_tex_download"); + RELEASE_CURRENT(); + return ok; +} diff --git a/src/opengl/include/glad/meson.build b/src/opengl/include/glad/meson.build new file mode 100644 index 0000000..05b3f02 --- /dev/null +++ b/src/opengl/include/glad/meson.build @@ -0,0 +1,29 @@ +glad_check = run_command([ python, '-c', 'import glad; print(glad.__version__)' ], + env: python_env, + capture: true, + check: false, +) + +glad_ver = glad_check.returncode() == 0 ? glad_check.stdout().strip() : 'none' +glad_req = '>= 2.0' + +if not glad_ver.version_compare(glad_req) + error(f'glad (required: @glad_req@, found: @glad_ver@) was not found in ' + + 'PYTHONPATH or `3rdparty`. Please run `git submodule update --init` ' + + 'followed by `meson --wipe`.') +endif + +glad = custom_target('gl.h', + output: 'gl.h', + env: python_env, + command: [ + python, '-m', 'glad', '--out-path=@OUTDIR@/../../', + '--reproducible', '--merge', '--api=gl:core,gles2,egl', + '--extensions=' + ','.join(gl_extensions), 'c', '--header-only', '--mx' + ] + (opengl_link.allowed() ? ['--loader'] : []) +) + +glad_dep = declare_dependency( + include_directories: include_directories('..'), + sources: glad, +) diff --git a/src/opengl/loader_egl.c b/src/opengl/loader_egl.c new file mode 100644 index 0000000..0e04c71 --- /dev/null +++ b/src/opengl/loader_egl.c @@ -0,0 +1,2 @@ +#define GLAD_EGL_IMPLEMENTATION +#include "common.h" diff --git a/src/opengl/loader_gl.c b/src/opengl/loader_gl.c new file mode 100644 index 0000000..26b8bef --- /dev/null +++ b/src/opengl/loader_gl.c @@ -0,0 +1,2 @@ +#define GLAD_GL_IMPLEMENTATION +#include "common.h" diff --git a/src/opengl/meson.build b/src/opengl/meson.build new file mode 100644 index 0000000..59ba921 --- /dev/null +++ b/src/opengl/meson.build @@ -0,0 +1,76 @@ +opengl_build = get_option('opengl') +opengl_link = get_option('gl-proc-addr') + +if host_machine.system() == 'windows' or host_machine.system().endswith('bsd') or \ + host_machine.system() == 'dragonfly' + libdl = declare_dependency() +else + libdl = cc.find_library('dl', required : opengl_link) +endif +opengl_link = opengl_link.require(libdl.found()) +components.set('opengl', opengl_build.allowed()) +components.set('gl-proc-addr', opengl_link.allowed()) + +if opengl_build.allowed() + sources += [ + 'opengl/context.c', + 'opengl/formats.c', + 'opengl/loader_gl.c', + 'opengl/loader_egl.c', + 'opengl/gpu.c', + 'opengl/gpu_tex.c', + 'opengl/gpu_pass.c', + 'opengl/swapchain.c', + 'opengl/utils.c', + ] + + if opengl_link.allowed() + build_deps += libdl + tests += 'opengl_surfaceless.c' + endif + + gl_extensions = [ + 'GL_AMD_pinned_memory', + 'GL_ARB_buffer_storage', + 'GL_ARB_compute_shader', + 'GL_ARB_framebuffer_object', + 'GL_ARB_get_program_binary', + 'GL_ARB_invalidate_subdata', + 'GL_ARB_pixel_buffer_object', + 'GL_ARB_program_interface_query', + 'GL_ARB_shader_image_load_store', + 'GL_ARB_shader_storage_buffer_object', + 'GL_ARB_sync', + 'GL_ARB_texture_float', + 'GL_ARB_texture_gather', + 'GL_ARB_texture_rg', + 'GL_ARB_timer_query', + 'GL_ARB_uniform_buffer_object', + 'GL_ARB_vertex_array_object', + 'GL_EXT_EGL_image_storage', + 'GL_EXT_color_buffer_float', + 'GL_EXT_color_buffer_half_float', + 'GL_EXT_texture3D', + 'GL_EXT_texture_format_BGRA8888', + 'GL_EXT_texture_integer', + 'GL_EXT_texture_norm16', + 'GL_EXT_texture_rg', + 'GL_EXT_unpack_subimage', + 'GL_KHR_debug', + 'GL_OES_EGL_image', + 'GL_OES_EGL_image_external', + 'EGL_EXT_image_dma_buf_import', + 'EGL_EXT_image_dma_buf_import_modifiers', + 'EGL_EXT_platform_base', + 'EGL_KHR_debug', + 'EGL_KHR_image_base', + 'EGL_MESA_image_dma_buf_export', + 'EGL_MESA_platform_surfaceless', + ] + + # Generate GL loader + subdir('include/glad') +else + glad_dep = [] + sources += 'opengl/stubs.c' +endif diff --git a/src/opengl/stubs.c b/src/opengl/stubs.c new file mode 100644 index 0000000..20395f9 --- /dev/null +++ b/src/opengl/stubs.c @@ -0,0 +1,63 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see . + */ + +#include "../common.h" +#include "log.h" + +#include + +const struct pl_opengl_params pl_opengl_default_params = {0}; + +pl_opengl pl_opengl_create(pl_log log, const struct pl_opengl_params *params) +{ + pl_fatal(log, "libplacebo compiled without OpenGL support!"); + return NULL; +} + +void pl_opengl_destroy(pl_opengl *pgl) +{ + pl_opengl gl = *pgl; + pl_assert(!gl); +} + +pl_opengl pl_opengl_get(pl_gpu gpu) +{ + return NULL; +} + +pl_swapchain pl_opengl_create_swapchain(pl_opengl gl, + const struct pl_opengl_swapchain_params *params) +{ + pl_unreachable(); +} + +void pl_opengl_swapchain_update_fb(pl_swapchain sw, + const struct pl_opengl_framebuffer *fb) +{ + pl_unreachable(); +} + +pl_tex pl_opengl_wrap(pl_gpu gpu, const struct pl_opengl_wrap_params *params) +{ + pl_unreachable(); +} + +unsigned int pl_opengl_unwrap(pl_gpu gpu, pl_tex tex, unsigned int *out_target, + int *out_iformat, unsigned int *out_fbo) +{ + pl_unreachable(); +} diff --git a/src/opengl/swapchain.c b/src/opengl/swapchain.c new file mode 100644 index 0000000..46d5f9e --- /dev/null +++ b/src/opengl/swapchain.c @@ -0,0 +1,278 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see . + */ + +#include "common.h" +#include "formats.h" +#include "gpu.h" +#include "swapchain.h" +#include "utils.h" +#include "pl_thread.h" + +struct priv { + struct pl_sw_fns impl; + + struct pl_opengl_swapchain_params params; + pl_opengl gl; + pl_mutex lock; + bool has_sync; + + // current parameters + pl_tex fb; + bool frame_started; + + // vsync fences + int swapchain_depth; + PL_ARRAY(GLsync) vsync_fences; +}; + +static const struct pl_sw_fns opengl_swapchain; + +pl_swapchain pl_opengl_create_swapchain(pl_opengl pl_gl, + const struct pl_opengl_swapchain_params *params) +{ + pl_gpu gpu = pl_gl->gpu; + + if (params->max_swapchain_depth < 0) { + PL_ERR(gpu, "Tried specifying negative swapchain depth?"); + return NULL; + } + + if (!gl_make_current(pl_gl)) + return NULL; + + struct pl_swapchain_t *sw = pl_zalloc_obj(NULL, sw, struct priv); + sw->log = gpu->log; + sw->gpu = gpu; + + struct priv *p = PL_PRIV(sw); + pl_mutex_init(&p->lock); + p->impl = opengl_swapchain; + p->params = *params; + p->has_sync = pl_opengl_has_ext(pl_gl, "GL_ARB_sync"); + p->gl = pl_gl; + + gl_release_current(pl_gl); + return sw; +} + +static void gl_sw_destroy(pl_swapchain sw) +{ + pl_gpu gpu = sw->gpu; + struct priv *p = PL_PRIV(sw); + + pl_gpu_flush(gpu); + pl_tex_destroy(gpu, &p->fb); + pl_mutex_destroy(&p->lock); + pl_free((void *) sw); +} + +static int gl_sw_latency(pl_swapchain sw) +{ + struct priv *p = PL_PRIV(sw); + return p->params.max_swapchain_depth; +} + +static bool gl_sw_resize(pl_swapchain sw, int *width, int *height) +{ + struct priv *p = PL_PRIV(sw); + const int w = *width, h = *height; + + pl_mutex_lock(&p->lock); + if (p->fb && w == p->fb->params.w && h == p->fb->params.h) { + pl_mutex_unlock(&p->lock); + return true; + } + + if (p->frame_started && (w || h)) { + PL_ERR(sw, "Tried resizing the swapchain while a frame was in progress! " + "Please submit the current frame first."); + pl_mutex_unlock(&p->lock); + return false; + } + + if (w && h) { + pl_tex_destroy(sw->gpu, &p->fb); + p->fb = pl_opengl_wrap(sw->gpu, pl_opengl_wrap_params( + .framebuffer = p->params.framebuffer.id, + .width = w, + .height = h, + )); + if (!p->fb) { + PL_ERR(sw, "Failed wrapping OpenGL framebuffer!"); + pl_mutex_unlock(&p->lock); + return false; + } + } + + if (!p->fb) { + PL_ERR(sw, "Tried calling `pl_swapchain_resize` with unknown size! " + "This is forbidden for OpenGL. The first call to " + "`pl_swapchain_resize` must include the width and height of the " + "swapchain, because there's no way to figure this out from " + "within the API."); + pl_mutex_unlock(&p->lock); + return false; + } + + *width = p->fb->params.w; + *height = p->fb->params.h; + pl_mutex_unlock(&p->lock); + return true; +} + +void pl_opengl_swapchain_update_fb(pl_swapchain sw, + const struct pl_opengl_framebuffer *fb) +{ + struct priv *p = PL_PRIV(sw); + pl_mutex_lock(&p->lock); + if (p->frame_started) { + PL_ERR(sw,"Tried calling `pl_opengl_swapchain_update_fb` while a frame " + "was in progress! Please submit the current frame first."); + pl_mutex_unlock(&p->lock); + return; + } + + if (p->params.framebuffer.id != fb->id) + pl_tex_destroy(sw->gpu, &p->fb); + + p->params.framebuffer = *fb; + pl_mutex_unlock(&p->lock); +} + +static bool gl_sw_start_frame(pl_swapchain sw, + struct pl_swapchain_frame *out_frame) +{ + struct priv *p = PL_PRIV(sw); + pl_mutex_lock(&p->lock); + bool ok = false; + + if (!p->fb) { + PL_ERR(sw, "Unknown framebuffer size. Please call `pl_swapchain_resize` " + "before `pl_swapchain_start_frame` for OpenGL swapchains!"); + goto error; + } + + if (p->frame_started) { + PL_ERR(sw, "Attempted calling `pl_swapchain_start` while a frame was " + "already in progress! Call `pl_swapchain_submit_frame` first."); + goto error; + } + + if (!gl_make_current(p->gl)) + goto error; + + *out_frame = (struct pl_swapchain_frame) { + .fbo = p->fb, + .flipped = !p->params.framebuffer.flipped, + .color_repr = { + .sys = PL_COLOR_SYSTEM_RGB, + .levels = PL_COLOR_LEVELS_FULL, + .alpha = p->fb->params.format->num_components == 4 + ? PL_ALPHA_PREMULTIPLIED + : PL_ALPHA_UNKNOWN, + .bits = { + // Just use the red channel in the absence of anything more + // sane to do, because the red channel is both guaranteed to + // exist and also typically has the minimum number of bits + // (which is arguably what matters for dithering) + .sample_depth = p->fb->params.format->component_depth[0], + .color_depth = p->fb->params.format->component_depth[0], + }, + }, + .color_space = pl_color_space_monitor, + }; + + p->frame_started = gl_check_err(sw->gpu, "gl_sw_start_frame"); + if (!p->frame_started) + goto error; + + // keep p->lock held + gl_release_current(p->gl); + return true; + +error: + gl_release_current(p->gl); + pl_mutex_unlock(&p->lock); + return ok; +} + +static bool gl_sw_submit_frame(pl_swapchain sw) +{ + struct priv *p = PL_PRIV(sw); + struct gl_ctx *glctx = PL_PRIV(p->gl); + const gl_funcs *gl = &glctx->func; + if (!gl_make_current(p->gl)) { + p->frame_started = false; + pl_mutex_unlock(&p->lock); + return false; + } + + pl_assert(p->frame_started); + if (p->has_sync && p->params.max_swapchain_depth) { + GLsync fence = gl->FenceSync(GL_SYNC_GPU_COMMANDS_COMPLETE, 0); + if (fence) + PL_ARRAY_APPEND(sw, p->vsync_fences, fence); + } + + gl->Flush(); + p->frame_started = false; + bool ok = gl_check_err(sw->gpu, "gl_sw_submit_frame"); + gl_release_current(p->gl); + pl_mutex_unlock(&p->lock); + + return ok; +} + +static void gl_sw_swap_buffers(pl_swapchain sw) +{ + struct priv *p = PL_PRIV(sw); + struct gl_ctx *glctx = PL_PRIV(p->gl); + const gl_funcs *gl = &glctx->func; + if (!p->params.swap_buffers) { + PL_ERR(sw, "`pl_swapchain_swap_buffers` called but no " + "`params.swap_buffers` callback set!"); + return; + } + + pl_mutex_lock(&p->lock); + if (!gl_make_current(p->gl)) { + pl_mutex_unlock(&p->lock); + return; + } + + p->params.swap_buffers(p->params.priv); + + const int max_depth = p->params.max_swapchain_depth; + while (max_depth && p->vsync_fences.num >= max_depth) { + gl->ClientWaitSync(p->vsync_fences.elem[0], GL_SYNC_FLUSH_COMMANDS_BIT, 1e9); + gl->DeleteSync(p->vsync_fences.elem[0]); + PL_ARRAY_REMOVE_AT(p->vsync_fences, 0); + } + + gl_check_err(sw->gpu, "gl_sw_swap_buffers"); + gl_release_current(p->gl); + pl_mutex_unlock(&p->lock); +} + +static const struct pl_sw_fns opengl_swapchain = { + .destroy = gl_sw_destroy, + .latency = gl_sw_latency, + .resize = gl_sw_resize, + .start_frame = gl_sw_start_frame, + .submit_frame = gl_sw_submit_frame, + .swap_buffers = gl_sw_swap_buffers, +}; diff --git a/src/opengl/utils.c b/src/opengl/utils.c new file mode 100644 index 0000000..d96a3e7 --- /dev/null +++ b/src/opengl/utils.c @@ -0,0 +1,158 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see . + */ + +#include "common.h" +#include "gpu.h" +#include "utils.h" + +const char *gl_err_str(GLenum err) +{ + switch (err) { +#define CASE(name) case name: return #name + CASE(GL_NO_ERROR); + CASE(GL_INVALID_ENUM); + CASE(GL_INVALID_VALUE); + CASE(GL_INVALID_OPERATION); + CASE(GL_INVALID_FRAMEBUFFER_OPERATION); + CASE(GL_OUT_OF_MEMORY); + CASE(GL_STACK_UNDERFLOW); + CASE(GL_STACK_OVERFLOW); +#undef CASE + + default: return "unknown error"; + } +} + +void gl_poll_callbacks(pl_gpu gpu) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + struct pl_gl *p = PL_PRIV(gpu); + while (p->callbacks.num) { + struct gl_cb cb = p->callbacks.elem[0]; + GLenum res = gl->ClientWaitSync(cb.sync, 0, 0); + switch (res) { + case GL_ALREADY_SIGNALED: + case GL_CONDITION_SATISFIED: + PL_ARRAY_REMOVE_AT(p->callbacks, 0); + cb.callback(cb.priv); + continue; + + case GL_WAIT_FAILED: + PL_ARRAY_REMOVE_AT(p->callbacks, 0); + gl->DeleteSync(cb.sync); + p->failed = true; + gl_check_err(gpu, "gl_poll_callbacks"); // NOTE: will recurse! + return; + + case GL_TIMEOUT_EXPIRED: + return; + + default: + pl_unreachable(); + } + } +} + +bool gl_check_err(pl_gpu gpu, const char *fun) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + struct pl_gl *p = PL_PRIV(gpu); + bool ret = true; + + while (true) { + GLenum error = gl->GetError(); + if (error == GL_NO_ERROR) + break; + PL_ERR(gpu, "%s: OpenGL error: %s", fun, gl_err_str(error)); + ret = false; + p->failed = true; + } + + gl_poll_callbacks(gpu); + return ret; +} + +bool gl_is_software(pl_opengl pl_gl) +{ + struct gl_ctx *glctx = PL_PRIV(pl_gl); + const gl_funcs *gl = &glctx->func; + const char *renderer = (char *) gl->GetString(GL_RENDERER); + return !renderer || + strcmp(renderer, "Software Rasterizer") == 0 || + strstr(renderer, "llvmpipe") || + strstr(renderer, "softpipe") || + strcmp(renderer, "Mesa X11") == 0 || + strcmp(renderer, "Apple Software Renderer") == 0; +} + +bool gl_is_gles(pl_opengl pl_gl) +{ + struct gl_ctx *glctx = PL_PRIV(pl_gl); + const gl_funcs *gl = &glctx->func; + const char *version = (char *) gl->GetString(GL_VERSION); + return pl_str_startswith0(pl_str0(version), "OpenGL ES"); +} + +bool gl_test_ext(pl_gpu gpu, const char *ext, int gl_ver, int gles_ver) +{ + struct pl_gl *p = PL_PRIV(gpu); + if (gl_ver && p->gl_ver >= gl_ver) + return true; + if (gles_ver && p->gles_ver >= gles_ver) + return true; + + return ext ? pl_opengl_has_ext(p->gl, ext) : false; +} + +const char *egl_err_str(EGLenum err) +{ + switch (err) { +#define CASE(name) case name: return #name + CASE(EGL_SUCCESS); + CASE(EGL_NOT_INITIALIZED); + CASE(EGL_BAD_ACCESS); + CASE(EGL_BAD_ALLOC); + CASE(EGL_BAD_ATTRIBUTE); + CASE(EGL_BAD_CONFIG); + CASE(EGL_BAD_CONTEXT); + CASE(EGL_BAD_CURRENT_SURFACE); + CASE(EGL_BAD_DISPLAY); + CASE(EGL_BAD_MATCH); + CASE(EGL_BAD_NATIVE_PIXMAP); + CASE(EGL_BAD_NATIVE_WINDOW); + CASE(EGL_BAD_PARAMETER); + CASE(EGL_BAD_SURFACE); +#undef CASE + + default: return "unknown error"; + } +} + +bool egl_check_err(pl_gpu gpu, const char *fun) +{ + struct pl_gl *p = PL_PRIV(gpu); + bool ret = true; + + while (true) { + GLenum error = eglGetError(); + if (error == EGL_SUCCESS) + return ret; + PL_ERR(gpu, "%s: EGL error: %s", fun, egl_err_str(error)); + ret = false; + p->failed = true; + } +} diff --git a/src/opengl/utils.h b/src/opengl/utils.h new file mode 100644 index 0000000..0be229d --- /dev/null +++ b/src/opengl/utils.h @@ -0,0 +1,57 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see . + */ + +#pragma once + +#include "common.h" + +// Iterate through callbacks attached to the `pl_gl` and execute all of the +// ones that have completed. +// +// Thread-safety: Unsafe +void gl_poll_callbacks(pl_gpu gpu); + +// Return a human-readable name for various OpenGL errors +// +// Thread-safety: Safe +const char *gl_err_str(GLenum err); + +// Check for errors and log them + return false if detected +// +// Thread-safety: Unsafe +bool gl_check_err(pl_gpu gpu, const char *fun); + +// Returns true if the context is a suspected software rasterizer +// +// Thread-safety: Unsafe +bool gl_is_software(pl_opengl gl); + +// Returns true if the context is detected as OpenGL ES +// +// Thread-safety: Unsafe +bool gl_is_gles(pl_opengl gl); + +// Check for presence of an extension, alternatively a minimum GL version +// +// Thread-safety: Unsafe +bool gl_test_ext(pl_gpu gpu, const char *ext, int gl_ver, int gles_ver); + +// Thread-safety: Safe +const char *egl_err_str(EGLenum err); + +// Thread-safety: Unsafe +bool egl_check_err(pl_gpu gpu, const char *fun); -- cgit v1.2.3