From ff6e3c025658a5fa1affd094f220b623e7e1b24b Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 15 Apr 2024 22:38:23 +0200 Subject: Adding upstream version 6.338.2. Signed-off-by: Daniel Baumann --- src/opengl/gpu_pass.c | 707 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 707 insertions(+) create mode 100644 src/opengl/gpu_pass.c (limited to 'src/opengl/gpu_pass.c') diff --git a/src/opengl/gpu_pass.c b/src/opengl/gpu_pass.c new file mode 100644 index 0000000..58e69a5 --- /dev/null +++ b/src/opengl/gpu_pass.c @@ -0,0 +1,707 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see . + */ + +#include "gpu.h" +#include "cache.h" +#include "formats.h" +#include "utils.h" + +int gl_desc_namespace(pl_gpu gpu, enum pl_desc_type type) +{ + return (int) type; +} + +struct gl_cache_header { + GLenum format; +}; + +static GLuint load_cached_program(pl_gpu gpu, pl_cache cache, pl_cache_obj *obj) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + if (!gl_test_ext(gpu, "GL_ARB_get_program_binary", 41, 30)) + return 0; + + if (!pl_cache_get(cache, obj)) + return 0; + + if (obj->size < sizeof(struct gl_cache_header)) + return 0; + + GLuint prog = gl->CreateProgram(); + if (!gl_check_err(gpu, "load_cached_program: glCreateProgram")) + return 0; + + struct gl_cache_header *header = (struct gl_cache_header *) obj->data; + pl_str rest = (pl_str) { obj->data, obj->size }; + rest = pl_str_drop(rest, sizeof(*header)); + gl->ProgramBinary(prog, header->format, rest.buf, rest.len); + gl->GetError(); // discard potential useless error + + GLint status = 0; + gl->GetProgramiv(prog, GL_LINK_STATUS, &status); + if (status) + return prog; + + gl->DeleteProgram(prog); + gl_check_err(gpu, "load_cached_program: glProgramBinary"); + return 0; +} + +static enum pl_log_level gl_log_level(GLint status, GLint log_length) +{ + if (!status) { + return PL_LOG_ERR; + } else if (log_length > 0) { + return PL_LOG_INFO; + } else { + return PL_LOG_DEBUG; + } +} + +static bool gl_attach_shader(pl_gpu gpu, GLuint program, GLenum type, const char *src) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + GLuint shader = gl->CreateShader(type); + gl->ShaderSource(shader, 1, &src, NULL); + gl->CompileShader(shader); + + GLint status = 0; + gl->GetShaderiv(shader, GL_COMPILE_STATUS, &status); + GLint log_length = 0; + gl->GetShaderiv(shader, GL_INFO_LOG_LENGTH, &log_length); + + enum pl_log_level level = gl_log_level(status, log_length); + if (pl_msg_test(gpu->log, level)) { + GLchar *logstr = pl_zalloc(NULL, log_length + 1); + gl->GetShaderInfoLog(shader, log_length, NULL, logstr); + PL_MSG(gpu, level, "shader compile log (status=%d): %s", status, logstr); + pl_free(logstr); + } + + if (!status || !gl_check_err(gpu, "gl_attach_shader")) + goto error; + + gl->AttachShader(program, shader); + gl->DeleteShader(shader); + return true; + +error: + gl->DeleteShader(shader); + return false; +} + +static GLuint gl_compile_program(pl_gpu gpu, const struct pl_pass_params *params) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + GLuint prog = gl->CreateProgram(); + bool ok = true; + + switch (params->type) { + case PL_PASS_COMPUTE: + ok &= gl_attach_shader(gpu, prog, GL_COMPUTE_SHADER, params->glsl_shader); + break; + case PL_PASS_RASTER: + ok &= gl_attach_shader(gpu, prog, GL_VERTEX_SHADER, params->vertex_shader); + ok &= gl_attach_shader(gpu, prog, GL_FRAGMENT_SHADER, params->glsl_shader); + for (int i = 0; i < params->num_vertex_attribs; i++) + gl->BindAttribLocation(prog, i, params->vertex_attribs[i].name); + break; + case PL_PASS_INVALID: + case PL_PASS_TYPE_COUNT: + pl_unreachable(); + } + + if (!ok || !gl_check_err(gpu, "gl_compile_program: attach shader")) + goto error; + + gl->LinkProgram(prog); + GLint status = 0; + gl->GetProgramiv(prog, GL_LINK_STATUS, &status); + GLint log_length = 0; + gl->GetProgramiv(prog, GL_INFO_LOG_LENGTH, &log_length); + + enum pl_log_level level = gl_log_level(status, log_length); + if (pl_msg_test(gpu->log, level)) { + GLchar *logstr = pl_zalloc(NULL, log_length + 1); + gl->GetProgramInfoLog(prog, log_length, NULL, logstr); + PL_MSG(gpu, level, "shader link log (status=%d): %s", status, logstr); + pl_free(logstr); + } + + if (!gl_check_err(gpu, "gl_compile_program: link program")) + goto error; + + return prog; + +error: + gl->DeleteProgram(prog); + PL_ERR(gpu, "Failed compiling/linking GLSL program"); + return 0; +} + +// For pl_pass.priv +struct pl_pass_gl { + GLuint program; + GLuint vao; // the VAO object + uint64_t vao_id; // buf_gl.id of VAO + size_t vao_offset; // VBO offset of VAO + GLuint buffer; // VBO for raw vertex pointers + GLuint index_buffer; + GLint *var_locs; +}; + +void gl_pass_destroy(pl_gpu gpu, pl_pass pass) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + if (!MAKE_CURRENT()) { + PL_ERR(gpu, "Failed uninitializing pass, leaking resources!"); + return; + } + + struct pl_pass_gl *pass_gl = PL_PRIV(pass); + if (pass_gl->vao) + gl->DeleteVertexArrays(1, &pass_gl->vao); + gl->DeleteBuffers(1, &pass_gl->index_buffer); + gl->DeleteBuffers(1, &pass_gl->buffer); + gl->DeleteProgram(pass_gl->program); + + gl_check_err(gpu, "gl_pass_destroy"); + RELEASE_CURRENT(); + pl_free((void *) pass); +} + +static void gl_update_va(pl_gpu gpu, pl_pass pass, size_t vbo_offset) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + for (int i = 0; i < pass->params.num_vertex_attribs; i++) { + const struct pl_vertex_attrib *va = &pass->params.vertex_attribs[i]; + const struct gl_format **glfmtp = PL_PRIV(va->fmt); + const struct gl_format *glfmt = *glfmtp; + + bool norm = false; + switch (va->fmt->type) { + case PL_FMT_UNORM: + case PL_FMT_SNORM: + norm = true; + break; + + case PL_FMT_UNKNOWN: + case PL_FMT_FLOAT: + case PL_FMT_UINT: + case PL_FMT_SINT: + break; + case PL_FMT_TYPE_COUNT: + pl_unreachable(); + } + + gl->EnableVertexAttribArray(i); + gl->VertexAttribPointer(i, va->fmt->num_components, glfmt->type, norm, + pass->params.vertex_stride, + (void *) (va->offset + vbo_offset)); + } +} + +pl_pass gl_pass_create(pl_gpu gpu, const struct pl_pass_params *params) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + if (!MAKE_CURRENT()) + return NULL; + + struct pl_gl *p = PL_PRIV(gpu); + struct pl_pass_t *pass = pl_zalloc_obj(NULL, pass, struct pl_pass_gl); + struct pl_pass_gl *pass_gl = PL_PRIV(pass); + pl_cache cache = pl_gpu_cache(gpu); + pass->params = pl_pass_params_copy(pass, params); + + pl_cache_obj obj = { .key = CACHE_KEY_GL_PROG }; + if (cache) { + pl_hash_merge(&obj.key, pl_str0_hash(params->glsl_shader)); + if (params->type == PL_PASS_RASTER) + pl_hash_merge(&obj.key, pl_str0_hash(params->vertex_shader)); + } + + // Load/Compile program + if ((pass_gl->program = load_cached_program(gpu, cache, &obj))) { + PL_DEBUG(gpu, "Using cached GL program"); + } else { + pl_clock_t start = pl_clock_now(); + pass_gl->program = gl_compile_program(gpu, params); + pl_log_cpu_time(gpu->log, start, pl_clock_now(), "compiling shader"); + } + + if (!pass_gl->program) + goto error; + + // Update program cache if possible + if (cache && gl_test_ext(gpu, "GL_ARB_get_program_binary", 41, 30)) { + GLint buf_size = 0; + gl->GetProgramiv(pass_gl->program, GL_PROGRAM_BINARY_LENGTH, &buf_size); + if (buf_size > 0) { + buf_size += sizeof(struct gl_cache_header); + pl_cache_obj_resize(NULL, &obj, buf_size); + struct gl_cache_header *header = obj.data; + void *buffer = &header[1]; + GLsizei binary_size = 0; + gl->GetProgramBinary(pass_gl->program, buf_size, &binary_size, + &header->format, buffer); + bool ok = gl_check_err(gpu, "gl_pass_create: get program binary"); + if (ok) { + obj.size = sizeof(*header) + binary_size; + pl_assert(obj.size <= buf_size); + pl_cache_set(cache, &obj); + } + } + } + + gl->UseProgram(pass_gl->program); + pass_gl->var_locs = pl_calloc(pass, params->num_variables, sizeof(GLint)); + + for (int i = 0; i < params->num_variables; i++) { + pass_gl->var_locs[i] = gl->GetUniformLocation(pass_gl->program, + params->variables[i].name); + + // Due to OpenGL API restrictions, we need to ensure that this is a + // variable type we can actually *update*. Fortunately, this is easily + // checked by virtue of the fact that all legal combinations of + // parameters will have a valid GLSL type name + if (!pl_var_glsl_type_name(params->variables[i])) { + gl->UseProgram(0); + PL_ERR(gpu, "Input variable '%s' does not match any known type!", + params->variables[i].name); + goto error; + } + } + + for (int i = 0; i < params->num_descriptors; i++) { + const struct pl_desc *desc = ¶ms->descriptors[i]; + switch (desc->type) { + case PL_DESC_SAMPLED_TEX: + case PL_DESC_STORAGE_IMG: { + // For compatibility with older OpenGL, we need to explicitly + // update the texture/image unit bindings after creating the shader + // program, since specifying it directly requires GLSL 4.20+ + GLint loc = gl->GetUniformLocation(pass_gl->program, desc->name); + gl->Uniform1i(loc, desc->binding); + break; + } + case PL_DESC_BUF_UNIFORM: { + GLuint idx = gl->GetUniformBlockIndex(pass_gl->program, desc->name); + gl->UniformBlockBinding(pass_gl->program, idx, desc->binding); + break; + } + case PL_DESC_BUF_STORAGE: { + GLuint idx = gl->GetProgramResourceIndex(pass_gl->program, + GL_SHADER_STORAGE_BLOCK, + desc->name); + gl->ShaderStorageBlockBinding(pass_gl->program, idx, desc->binding); + break; + } + case PL_DESC_BUF_TEXEL_UNIFORM: + case PL_DESC_BUF_TEXEL_STORAGE: + assert(!"unimplemented"); // TODO + case PL_DESC_INVALID: + case PL_DESC_TYPE_COUNT: + pl_unreachable(); + } + } + + gl->UseProgram(0); + + // Initialize the VAO and single vertex buffer + gl->GenBuffers(1, &pass_gl->buffer); + if (p->has_vao) { + gl->GenVertexArrays(1, &pass_gl->vao); + gl->BindBuffer(GL_ARRAY_BUFFER, pass_gl->buffer); + gl->BindVertexArray(pass_gl->vao); + gl_update_va(gpu, pass, 0); + gl->BindVertexArray(0); + gl->BindBuffer(GL_ARRAY_BUFFER, 0); + } + + if (!gl_check_err(gpu, "gl_pass_create")) + goto error; + + pl_cache_obj_free(&obj); + RELEASE_CURRENT(); + return pass; + +error: + PL_ERR(gpu, "Failed creating pass"); + pl_cache_obj_free(&obj); + gl_pass_destroy(gpu, pass); + RELEASE_CURRENT(); + return NULL; +} + +static void update_var(pl_gpu gpu, pl_pass pass, + const struct pl_var_update *vu) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + struct pl_pass_gl *pass_gl = PL_PRIV(pass); + const struct pl_var *var = &pass->params.variables[vu->index]; + GLint loc = pass_gl->var_locs[vu->index]; + + switch (var->type) { + case PL_VAR_SINT: { + const int *i = vu->data; + pl_assert(var->dim_m == 1); + switch (var->dim_v) { + case 1: gl->Uniform1iv(loc, var->dim_a, i); break; + case 2: gl->Uniform2iv(loc, var->dim_a, i); break; + case 3: gl->Uniform3iv(loc, var->dim_a, i); break; + case 4: gl->Uniform4iv(loc, var->dim_a, i); break; + default: pl_unreachable(); + } + return; + } + case PL_VAR_UINT: { + const unsigned int *u = vu->data; + pl_assert(var->dim_m == 1); + switch (var->dim_v) { + case 1: gl->Uniform1uiv(loc, var->dim_a, u); break; + case 2: gl->Uniform2uiv(loc, var->dim_a, u); break; + case 3: gl->Uniform3uiv(loc, var->dim_a, u); break; + case 4: gl->Uniform4uiv(loc, var->dim_a, u); break; + default: pl_unreachable(); + } + return; + } + case PL_VAR_FLOAT: { + const float *f = vu->data; + if (var->dim_m == 1) { + switch (var->dim_v) { + case 1: gl->Uniform1fv(loc, var->dim_a, f); break; + case 2: gl->Uniform2fv(loc, var->dim_a, f); break; + case 3: gl->Uniform3fv(loc, var->dim_a, f); break; + case 4: gl->Uniform4fv(loc, var->dim_a, f); break; + default: pl_unreachable(); + } + } else if (var->dim_m == 2 && var->dim_v == 2) { + gl->UniformMatrix2fv(loc, var->dim_a, GL_FALSE, f); + } else if (var->dim_m == 3 && var->dim_v == 3) { + gl->UniformMatrix3fv(loc, var->dim_a, GL_FALSE, f); + } else if (var->dim_m == 4 && var->dim_v == 4) { + gl->UniformMatrix4fv(loc, var->dim_a, GL_FALSE, f); + } else if (var->dim_m == 2 && var->dim_v == 3) { + gl->UniformMatrix2x3fv(loc, var->dim_a, GL_FALSE, f); + } else if (var->dim_m == 3 && var->dim_v == 2) { + gl->UniformMatrix3x2fv(loc, var->dim_a, GL_FALSE, f); + } else if (var->dim_m == 2 && var->dim_v == 4) { + gl->UniformMatrix2x4fv(loc, var->dim_a, GL_FALSE, f); + } else if (var->dim_m == 4 && var->dim_v == 2) { + gl->UniformMatrix4x2fv(loc, var->dim_a, GL_FALSE, f); + } else if (var->dim_m == 3 && var->dim_v == 4) { + gl->UniformMatrix3x4fv(loc, var->dim_a, GL_FALSE, f); + } else if (var->dim_m == 4 && var->dim_v == 3) { + gl->UniformMatrix4x3fv(loc, var->dim_a, GL_FALSE, f); + } else { + pl_unreachable(); + } + return; + } + + case PL_VAR_INVALID: + case PL_VAR_TYPE_COUNT: + break; + } + + pl_unreachable(); +} + +static void update_desc(pl_gpu gpu, pl_pass pass, int index, + const struct pl_desc_binding *db) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + const struct pl_desc *desc = &pass->params.descriptors[index]; + + static const GLenum access[] = { + [PL_DESC_ACCESS_READWRITE] = GL_READ_WRITE, + [PL_DESC_ACCESS_READONLY] = GL_READ_ONLY, + [PL_DESC_ACCESS_WRITEONLY] = GL_WRITE_ONLY, + }; + + static const GLint wraps[PL_TEX_ADDRESS_MODE_COUNT] = { + [PL_TEX_ADDRESS_CLAMP] = GL_CLAMP_TO_EDGE, + [PL_TEX_ADDRESS_REPEAT] = GL_REPEAT, + [PL_TEX_ADDRESS_MIRROR] = GL_MIRRORED_REPEAT, + }; + + static const GLint filters[PL_TEX_SAMPLE_MODE_COUNT] = { + [PL_TEX_SAMPLE_NEAREST] = GL_NEAREST, + [PL_TEX_SAMPLE_LINEAR] = GL_LINEAR, + }; + + switch (desc->type) { + case PL_DESC_SAMPLED_TEX: { + pl_tex tex = db->object; + struct pl_tex_gl *tex_gl = PL_PRIV(tex); + gl->ActiveTexture(GL_TEXTURE0 + desc->binding); + gl->BindTexture(tex_gl->target, tex_gl->texture); + + GLint filter = filters[db->sample_mode]; + GLint wrap = wraps[db->address_mode]; + gl->TexParameteri(tex_gl->target, GL_TEXTURE_MIN_FILTER, filter); + gl->TexParameteri(tex_gl->target, GL_TEXTURE_MAG_FILTER, filter); + switch (pl_tex_params_dimension(tex->params)) { + case 3: gl->TexParameteri(tex_gl->target, GL_TEXTURE_WRAP_R, wrap); // fall through + case 2: gl->TexParameteri(tex_gl->target, GL_TEXTURE_WRAP_T, wrap); // fall through + case 1: gl->TexParameteri(tex_gl->target, GL_TEXTURE_WRAP_S, wrap); break; + } + return; + } + case PL_DESC_STORAGE_IMG: { + pl_tex tex = db->object; + struct pl_tex_gl *tex_gl = PL_PRIV(tex); + gl->BindImageTexture(desc->binding, tex_gl->texture, 0, GL_FALSE, 0, + access[desc->access], tex_gl->iformat); + return; + } + case PL_DESC_BUF_UNIFORM: { + pl_buf buf = db->object; + struct pl_buf_gl *buf_gl = PL_PRIV(buf); + gl->BindBufferRange(GL_UNIFORM_BUFFER, desc->binding, buf_gl->buffer, + buf_gl->offset, buf->params.size); + return; + } + case PL_DESC_BUF_STORAGE: { + pl_buf buf = db->object; + struct pl_buf_gl *buf_gl = PL_PRIV(buf); + gl->BindBufferRange(GL_SHADER_STORAGE_BUFFER, desc->binding, buf_gl->buffer, + buf_gl->offset, buf->params.size); + return; + } + case PL_DESC_BUF_TEXEL_UNIFORM: + case PL_DESC_BUF_TEXEL_STORAGE: + assert(!"unimplemented"); // TODO + + case PL_DESC_INVALID: + case PL_DESC_TYPE_COUNT: + break; + } + + pl_unreachable(); +} + +static void unbind_desc(pl_gpu gpu, pl_pass pass, int index, + const struct pl_desc_binding *db) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + const struct pl_desc *desc = &pass->params.descriptors[index]; + + switch (desc->type) { + case PL_DESC_SAMPLED_TEX: { + pl_tex tex = db->object; + struct pl_tex_gl *tex_gl = PL_PRIV(tex); + gl->ActiveTexture(GL_TEXTURE0 + desc->binding); + gl->BindTexture(tex_gl->target, 0); + return; + } + case PL_DESC_STORAGE_IMG: { + pl_tex tex = db->object; + struct pl_tex_gl *tex_gl = PL_PRIV(tex); + gl->BindImageTexture(desc->binding, 0, 0, GL_FALSE, 0, + GL_WRITE_ONLY, GL_R32F); + if (desc->access != PL_DESC_ACCESS_READONLY) + gl->MemoryBarrier(tex_gl->barrier); + return; + } + case PL_DESC_BUF_UNIFORM: + gl->BindBufferBase(GL_UNIFORM_BUFFER, desc->binding, 0); + return; + case PL_DESC_BUF_STORAGE: { + pl_buf buf = db->object; + struct pl_buf_gl *buf_gl = PL_PRIV(buf); + gl->BindBufferBase(GL_SHADER_STORAGE_BUFFER, desc->binding, 0); + if (desc->access != PL_DESC_ACCESS_READONLY) + gl->MemoryBarrier(buf_gl->barrier); + return; + } + case PL_DESC_BUF_TEXEL_UNIFORM: + case PL_DESC_BUF_TEXEL_STORAGE: + assert(!"unimplemented"); // TODO + case PL_DESC_INVALID: + case PL_DESC_TYPE_COUNT: + break; + } + + pl_unreachable(); +} + +void gl_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params) +{ + const gl_funcs *gl = gl_funcs_get(gpu); + if (!MAKE_CURRENT()) + return; + + pl_pass pass = params->pass; + struct pl_pass_gl *pass_gl = PL_PRIV(pass); + struct pl_gl *p = PL_PRIV(gpu); + + gl->UseProgram(pass_gl->program); + + for (int i = 0; i < params->num_var_updates; i++) + update_var(gpu, pass, ¶ms->var_updates[i]); + for (int i = 0; i < pass->params.num_descriptors; i++) + update_desc(gpu, pass, i, ¶ms->desc_bindings[i]); + gl->ActiveTexture(GL_TEXTURE0); + + if (!gl_check_err(gpu, "gl_pass_run: updating uniforms")) { + RELEASE_CURRENT(); + return; + } + + switch (pass->params.type) { + case PL_PASS_RASTER: { + struct pl_tex_gl *target_gl = PL_PRIV(params->target); + gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, target_gl->fbo); + if (!pass->params.load_target && p->has_invalidate_fb) { + GLenum fb = target_gl->fbo ? GL_COLOR_ATTACHMENT0 : GL_COLOR; + gl->InvalidateFramebuffer(GL_DRAW_FRAMEBUFFER, 1, &fb); + } + + gl->Viewport(params->viewport.x0, params->viewport.y0, + pl_rect_w(params->viewport), pl_rect_h(params->viewport)); + gl->Scissor(params->scissors.x0, params->scissors.y0, + pl_rect_w(params->scissors), pl_rect_h(params->scissors)); + gl->Enable(GL_SCISSOR_TEST); + gl->Disable(GL_DEPTH_TEST); + gl->Disable(GL_CULL_FACE); + gl_check_err(gpu, "gl_pass_run: enabling viewport/scissor"); + + const struct pl_blend_params *blend = pass->params.blend_params; + if (blend) { + static const GLenum map_blend[] = { + [PL_BLEND_ZERO] = GL_ZERO, + [PL_BLEND_ONE] = GL_ONE, + [PL_BLEND_SRC_ALPHA] = GL_SRC_ALPHA, + [PL_BLEND_ONE_MINUS_SRC_ALPHA] = GL_ONE_MINUS_SRC_ALPHA, + }; + + gl->BlendFuncSeparate(map_blend[blend->src_rgb], + map_blend[blend->dst_rgb], + map_blend[blend->src_alpha], + map_blend[blend->dst_alpha]); + gl->Enable(GL_BLEND); + gl_check_err(gpu, "gl_pass_run: enabling blend"); + } + + // Update VBO and VAO + pl_buf vert = params->vertex_buf; + struct pl_buf_gl *vert_gl = vert ? PL_PRIV(vert) : NULL; + gl->BindBuffer(GL_ARRAY_BUFFER, vert ? vert_gl->buffer : pass_gl->buffer); + + if (!vert) { + // Update the buffer directly. In theory we could also do a memcmp + // cache here to avoid unnecessary updates. + gl->BufferData(GL_ARRAY_BUFFER, pl_vertex_buf_size(params), + params->vertex_data, GL_STREAM_DRAW); + } + + if (pass_gl->vao) + gl->BindVertexArray(pass_gl->vao); + + uint64_t vert_id = vert ? vert_gl->id : 0; + size_t vert_offset = vert ? params->buf_offset : 0; + if (!pass_gl->vao || pass_gl->vao_id != vert_id || + pass_gl->vao_offset != vert_offset) + { + // We need to update the VAO when the buffer ID or offset changes + gl_update_va(gpu, pass, vert_offset); + pass_gl->vao_id = vert_id; + pass_gl->vao_offset = vert_offset; + } + + gl_check_err(gpu, "gl_pass_run: update/bind vertex buffer"); + + static const GLenum map_prim[PL_PRIM_TYPE_COUNT] = { + [PL_PRIM_TRIANGLE_LIST] = GL_TRIANGLES, + [PL_PRIM_TRIANGLE_STRIP] = GL_TRIANGLE_STRIP, + }; + GLenum mode = map_prim[pass->params.vertex_type]; + + gl_timer_begin(gpu, params->timer); + + if (params->index_data) { + + static const GLenum index_fmts[PL_INDEX_FORMAT_COUNT] = { + [PL_INDEX_UINT16] = GL_UNSIGNED_SHORT, + [PL_INDEX_UINT32] = GL_UNSIGNED_INT, + }; + + // Upload indices to temporary buffer object + if (!pass_gl->index_buffer) + gl->GenBuffers(1, &pass_gl->index_buffer); // lazily allocated + gl->BindBuffer(GL_ELEMENT_ARRAY_BUFFER, pass_gl->index_buffer); + gl->BufferData(GL_ELEMENT_ARRAY_BUFFER, pl_index_buf_size(params), + params->index_data, GL_STREAM_DRAW); + gl->DrawElements(mode, params->vertex_count, + index_fmts[params->index_fmt], 0); + gl->BindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + + } else if (params->index_buf) { + + // The pointer argument becomes the index buffer offset + struct pl_buf_gl *index_gl = PL_PRIV(params->index_buf); + gl->BindBuffer(GL_ELEMENT_ARRAY_BUFFER, index_gl->buffer); + gl->DrawElements(mode, params->vertex_count, GL_UNSIGNED_SHORT, + (void *) params->index_offset); + gl->BindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0); + + } else { + + // Note: the VBO offset is handled in the VAO + gl->DrawArrays(mode, 0, params->vertex_count); + } + + gl_timer_end(gpu, params->timer); + gl_check_err(gpu, "gl_pass_run: drawing"); + + if (pass_gl->vao) { + gl->BindVertexArray(0); + } else { + for (int i = 0; i < pass->params.num_vertex_attribs; i++) + gl->DisableVertexAttribArray(i); + } + + gl->BindBuffer(GL_ARRAY_BUFFER, 0); + gl->Disable(GL_SCISSOR_TEST); + gl->Disable(GL_BLEND); + gl->BindFramebuffer(GL_DRAW_FRAMEBUFFER, 0); + break; + } + + case PL_PASS_COMPUTE: + gl_timer_begin(gpu, params->timer); + gl->DispatchCompute(params->compute_groups[0], + params->compute_groups[1], + params->compute_groups[2]); + gl_timer_end(gpu, params->timer); + break; + + case PL_PASS_INVALID: + case PL_PASS_TYPE_COUNT: + pl_unreachable(); + } + + for (int i = 0; i < pass->params.num_descriptors; i++) + unbind_desc(gpu, pass, i, ¶ms->desc_bindings[i]); + gl->ActiveTexture(GL_TEXTURE0); + + gl->UseProgram(0); + gl_check_err(gpu, "gl_pass_run"); + RELEASE_CURRENT(); +} -- cgit v1.2.3