1 files changed, 992 insertions, 0 deletions
diff --git a/src/shaders.c b/src/shaders.c
new file mode 100644
index 0000000..503ea78
--- /dev/null
+++ b/src/shaders.c
@@ -0,0 +1,992 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include <stdio.h>
+#include <math.h>
+
+#include "common.h"
+#include "log.h"
+#include "shaders.h"
+
+pl_shader_info pl_shader_info_ref(pl_shader_info pinfo)
+{
+    struct sh_info *info = (struct sh_info *) pinfo;
+    if (!info)
+        return NULL;
+
+    pl_rc_ref(&info->rc);
+    return &info->info;
+}
+
+void pl_shader_info_deref(pl_shader_info *pinfo)
+{
+    struct sh_info *info = (struct sh_info *) *pinfo;
+    if (!info)
+        return;
+
+    if (pl_rc_deref(&info->rc))
+        pl_free(info);
+    *pinfo = NULL;
+}
+
+static struct sh_info *sh_info_alloc(void *alloc)
+{
+    struct sh_info *info = pl_zalloc_ptr(alloc, info);
+    info->tmp = pl_tmp(info);
+    pl_rc_init(&info->rc);
+    return info;
+}
+
+// Re-use `sh_info` allocation if possible, allocate new otherwise
+static struct sh_info *sh_info_recycle(struct sh_info *info)
+{
+    if (!pl_rc_deref(&info->rc))
+        return sh_info_alloc(NULL);
+
+    memset(&info->info, 0, sizeof(info->info)); // reset public fields
+    pl_free_children(info->tmp);
+    pl_rc_ref(&info->rc);
+    info->desc.len = 0;
+    info->steps.num = 0;
+    return info;
+}
+
+static uint8_t reverse_bits(uint8_t x)
+{
+    static const uint8_t reverse_nibble[16] = {
+        0x0, 0x8, 0x4, 0xc, 0x2, 0xa, 0x6, 0xe,
+        0x1, 0x9, 0x5, 0xd, 0x3, 0xb, 0x7, 0xf,
+    };
+
+    return reverse_nibble[x & 0xF] << 4 | reverse_nibble[x >> 4];
+}
+
+static void init_shader(pl_shader sh, const struct pl_shader_params *params)
+{
+    if (params) {
+        sh->info->info.params = *params;
+
+        // To avoid collisions for shaders with very high number of
+        // identifiers, pack the shader ID into the highest bits (MSB -> LSB)
+        pl_static_assert(sizeof(sh->prefix) > sizeof(params->id));
+        const int shift = 8 * (sizeof(sh->prefix) - sizeof(params->id));
+        sh->prefix = reverse_bits(params->id) << shift;
+    }
+
+    sh->name = sh_fresh(sh, "main");
+}
+
+pl_shader pl_shader_alloc(pl_log log, const struct pl_shader_params *params)
+{
+    static const int glsl_ver_req = 130;
+    if (params && params->glsl.version && params->glsl.version < 130) {
+        pl_err(log, "Requested GLSL version %d too low (required: %d)",
+               params->glsl.version, glsl_ver_req);
+        return NULL;
+    }
+
+    pl_shader sh = pl_alloc_ptr(NULL, sh);
+    *sh = (struct pl_shader_t) {
+        .log        = log,
+        .tmp        = pl_tmp(sh),
+        .info       = sh_info_alloc(NULL),
+        .mutable    = true,
+    };
+
+    for (int i = 0; i < PL_ARRAY_SIZE(sh->buffers); i++)
+        sh->buffers[i] = pl_str_builder_alloc(sh);
+
+    init_shader(sh, params);
+    return sh;
+}
+
+static void sh_obj_deref(pl_shader_obj obj);
+
+void sh_deref(pl_shader sh)
+{
+    pl_free_children(sh->tmp);
+
+    for (int i = 0; i < sh->obj.num; i++)
+        sh_obj_deref(sh->obj.elem[i]);
+    sh->obj.num = 0;
+}
+
+void pl_shader_free(pl_shader *psh)
+{
+    pl_shader sh = *psh;
+    if (!sh)
+        return;
+
+    sh_deref(sh);
+    pl_shader_info_deref((pl_shader_info *) &sh->info);
+    pl_free_ptr(psh);
+}
+
+void pl_shader_reset(pl_shader sh, const struct pl_shader_params *params)
+{
+    sh_deref(sh);
+
+    struct pl_shader_t new = {
+        .log            = sh->log,
+        .tmp            = sh->tmp,
+        .info           = sh_info_recycle(sh->info),
+        .data.buf       = sh->data.buf,
+        .mutable        = true,
+
+        // Preserve array allocations
+        .obj.elem       = sh->obj.elem,
+        .vas.elem       = sh->vas.elem,
+        .vars.elem      = sh->vars.elem,
+        .descs.elem     = sh->descs.elem,
+        .consts.elem    = sh->consts.elem,
+    };
+
+    // Preserve buffer allocations
+    memcpy(new.buffers, sh->buffers, sizeof(new.buffers));
+    for (int i = 0; i < PL_ARRAY_SIZE(new.buffers); i++)
+        pl_str_builder_reset(new.buffers[i]);
+
+    *sh = new;
+    init_shader(sh, params);
+}
+
+static void *sh_alloc(pl_shader sh, size_t size, size_t align)
+{
+    const size_t offset = PL_ALIGN2(sh->data.len, align);
+    const size_t req_size = offset + size;
+    if (req_size <= pl_get_size(sh->data.buf)) {
+        sh->data.len = offset + size;
+        return sh->data.buf + offset;
+    }
+
+    // We can't realloc this buffer because various pointers will be left
+    // dangling, so just reparent it onto `sh->tmp` (so it will be cleaned
+    // up when the shader is next reset) and allocate a new, larger buffer
+    // in its place
+    const size_t new_size = PL_MAX(req_size << 1, 256);
+    pl_steal(sh->tmp, sh->data.buf);
+    sh->data.buf = pl_alloc(sh, new_size);
+    sh->data.len = size;
+    return sh->data.buf;
+}
+
+static void *sh_memdup(pl_shader sh, const void *data, size_t size, size_t align)
+{
+    if (!size)
+        return NULL;
+
+    void *dst = sh_alloc(sh, size, align);
+    assert(data);
+    memcpy(dst, data, size);
+    return dst;
+}
+
+bool pl_shader_is_failed(const pl_shader sh)
+{
+    return sh->failed;
+}
+
+struct pl_glsl_version sh_glsl(const pl_shader sh)
+{
+    if (SH_PARAMS(sh).glsl.version)
+        return SH_PARAMS(sh).glsl;
+
+    if (SH_GPU(sh))
+        return SH_GPU(sh)->glsl;
+
+    return (struct pl_glsl_version) { .version = 130 };
+}
+
+bool sh_try_compute(pl_shader sh, int bw, int bh, bool flex, size_t mem)
+{
+    pl_assert(bw && bh);
+    int *sh_bw = &sh->group_size[0];
+    int *sh_bh = &sh->group_size[1];
+
+    struct pl_glsl_version glsl = sh_glsl(sh);
+    if (!glsl.compute) {
+        PL_TRACE(sh, "Disabling compute shader due to missing `compute` support");
+        return false;
+    }
+
+    if (sh->shmem + mem > glsl.max_shmem_size) {
+        PL_TRACE(sh, "Disabling compute shader due to insufficient shmem");
+        return false;
+    }
+
+    if (sh->type == SH_FRAGMENT) {
+        PL_TRACE(sh, "Disabling compute shader because shader is already marked "
+                 "as fragment shader");
+        return false;
+    }
+
+    if (bw > glsl.max_group_size[0] ||
+        bh > glsl.max_group_size[1] ||
+        (bw * bh) > glsl.max_group_threads)
+    {
+        if (!flex) {
+            PL_TRACE(sh, "Disabling compute shader due to exceeded group "
+                     "thread count.");
+            return false;
+        } else {
+            // Pick better group sizes
+            bw = PL_MIN(bw, glsl.max_group_size[0]);
+            bh = glsl.max_group_threads / bw;
+        }
+    }
+
+    sh->shmem += mem;
+
+    // If the current shader is either not a compute shader, or we have no
+    // choice but to override the metadata, always do so
+    if (sh->type != SH_COMPUTE || (sh->flexible_work_groups && !flex)) {
+        *sh_bw = bw;
+        *sh_bh = bh;
+        sh->type = SH_COMPUTE;
+        sh->flexible_work_groups = flex;
+        return true;
+    }
+
+    // If both shaders are flexible, pick the larger of the two
+    if (sh->flexible_work_groups && flex) {
+        *sh_bw = PL_MAX(*sh_bw, bw);
+        *sh_bh = PL_MAX(*sh_bh, bh);
+        pl_assert(*sh_bw * *sh_bh <= glsl.max_group_threads);
+        return true;
+    }
+
+    // At this point we're looking only at a non-flexible compute shader
+    pl_assert(sh->type == SH_COMPUTE && !sh->flexible_work_groups);
+    if (!flex) {
+        // Ensure parameters match
+        if (bw != *sh_bw || bh != *sh_bh) {
+            PL_TRACE(sh, "Disabling compute shader due to incompatible group "
+                     "sizes %dx%d and %dx%d", *sh_bw, *sh_bh, bw, bh);
+            sh->shmem -= mem;
+            return false;
+        }
+    }
+
+    return true;
+}
+
+bool pl_shader_is_compute(const pl_shader sh)
+{
+    return sh->type == SH_COMPUTE;
+}
+
+bool pl_shader_output_size(const pl_shader sh, int *w, int *h)
+{
+    if (!sh->output_w || !sh->output_h)
+        return false;
+
+    *w = sh->transpose ? sh->output_h : sh->output_w;
+    *h = sh->transpose ? sh->output_w : sh->output_h;
+    return true;
+}
+
+ident_t sh_fresh(pl_shader sh, const char *name)
+{
+    unsigned short id = ++sh->fresh;
+    assert(!(sh->prefix & id));
+    id |= sh->prefix;
+
+    assert(name);
+    return sh_mkident(id, name);
+}
+
+static inline ident_t sh_fresh_name(pl_shader sh, const char **pname)
+{
+    ident_t id = sh_fresh(sh, *pname);
+    *pname = sh_ident_pack(id);
+    return id;
+}
+
+ident_t sh_var(pl_shader sh, struct pl_shader_var sv)
+{
+    ident_t id = sh_fresh_name(sh, &sv.var.name);
+    struct pl_var_layout layout = pl_var_host_layout(0, &sv.var);
+    sv.data = sh_memdup(sh, sv.data, layout.size, layout.stride);
+    PL_ARRAY_APPEND(sh, sh->vars, sv);
+    return id;
+}
+
+ident_t sh_var_int(pl_shader sh, const char *name, int val, bool dynamic)
+{
+    return sh_var(sh, (struct pl_shader_var) {
+        .var     = pl_var_int(name),
+        .data    = &val,
+        .dynamic = dynamic,
+    });
+}
+
+ident_t sh_var_uint(pl_shader sh, const char *name, unsigned int val, bool dynamic)
+{
+    return sh_var(sh, (struct pl_shader_var) {
+        .var     = pl_var_uint(name),
+        .data    = &val,
+        .dynamic = dynamic,
+    });
+}
+
+ident_t sh_var_float(pl_shader sh, const char *name, float val, bool dynamic)
+{
+    return sh_var(sh, (struct pl_shader_var) {
+        .var     = pl_var_float(name),
+        .data    = &val,
+        .dynamic = dynamic,
+    });
+}
+
+ident_t sh_var_mat3(pl_shader sh, const char *name, pl_matrix3x3 val)
+{
+    return sh_var(sh, (struct pl_shader_var) {
+        .var     = pl_var_mat3(name),
+        .data    = PL_TRANSPOSE_3X3(val.m),
+    });
+}
+
+ident_t sh_desc(pl_shader sh, struct pl_shader_desc sd)
+{
+    switch (sd.desc.type) {
+    case PL_DESC_BUF_UNIFORM:
+    case PL_DESC_BUF_STORAGE:
+        for (int i = 0; i < sh->descs.num; i++) // ensure uniqueness
+            pl_assert(sh->descs.elem[i].binding.object != sd.binding.object);
+        size_t bsize = sizeof(sd.buffer_vars[0]) * sd.num_buffer_vars;
+        sd.buffer_vars = sh_memdup(sh, sd.buffer_vars, bsize,
+                                   alignof(struct pl_buffer_var));
+        for (int i = 0; i < sd.num_buffer_vars; i++) {
+            struct pl_var *bv = &sd.buffer_vars[i].var;
+            const char *name = bv->name;
+            GLSLP("#define %s "$"\n", name, sh_fresh_name(sh, &bv->name));
+        }
+        break;
+
+    case PL_DESC_BUF_TEXEL_UNIFORM:
+    case PL_DESC_BUF_TEXEL_STORAGE:
+    case PL_DESC_SAMPLED_TEX:
+    case PL_DESC_STORAGE_IMG:
+        pl_assert(!sd.num_buffer_vars);
+        break;
+
+    case PL_DESC_INVALID:
+    case PL_DESC_TYPE_COUNT:
+        pl_unreachable();
+    }
+
+    ident_t id = sh_fresh_name(sh, &sd.desc.name);
+    PL_ARRAY_APPEND(sh, sh->descs, sd);
+    return id;
+}
+
+ident_t sh_const(pl_shader sh, struct pl_shader_const sc)
+{
+    if (SH_PARAMS(sh).dynamic_constants && !sc.compile_time) {
+        return sh_var(sh, (struct pl_shader_var) {
+            .var = {
+                .name = sc.name,
+                .type = sc.type,
+                .dim_v = 1,
+                .dim_m = 1,
+                .dim_a = 1,
+            },
+            .data = sc.data,
+        });
+    }
+
+    ident_t id = sh_fresh_name(sh, &sc.name);
+
+    pl_gpu gpu = SH_GPU(sh);
+    if (gpu && gpu->limits.max_constants) {
+        if (!sc.compile_time || gpu->limits.array_size_constants) {
+            size_t size = pl_var_type_size(sc.type);
+            sc.data = sh_memdup(sh, sc.data, size, size);
+            PL_ARRAY_APPEND(sh, sh->consts, sc);
+            return id;
+        }
+    }
+
+    // Fallback for GPUs without specialization constants
+    switch (sc.type) {
+    case PL_VAR_SINT:
+        GLSLH("const int "$" = %d; \n", id, *(int *) sc.data);
+        return id;
+    case PL_VAR_UINT:
+        GLSLH("const uint "$" = uint(%u); \n", id, *(unsigned int *) sc.data);
+        return id;
+    case PL_VAR_FLOAT:
+        GLSLH("const float "$" = float(%f); \n", id, *(float *) sc.data);
+        return id;
+    case PL_VAR_INVALID:
+    case PL_VAR_TYPE_COUNT:
+        break;
+    }
+
+    pl_unreachable();
+}
+
+ident_t sh_const_int(pl_shader sh, const char *name, int val)
+{
+    return sh_const(sh, (struct pl_shader_const) {
+        .type = PL_VAR_SINT,
+        .name = name,
+        .data = &val,
+    });
+}
+
+ident_t sh_const_uint(pl_shader sh, const char *name, unsigned int val)
+{
+    return sh_const(sh, (struct pl_shader_const) {
+        .type = PL_VAR_UINT,
+        .name = name,
+        .data = &val,
+    });
+}
+
+ident_t sh_const_float(pl_shader sh, const char *name, float val)
+{
+    return sh_const(sh, (struct pl_shader_const) {
+        .type = PL_VAR_FLOAT,
+        .name = name,
+        .data = &val,
+    });
+}
+
+ident_t sh_attr(pl_shader sh, struct pl_shader_va sva)
+{
+    const size_t vsize = sva.attr.fmt->texel_size;
+    uint8_t *data = sh_alloc(sh, vsize * 4, vsize);
+    for (int i = 0; i < 4; i++) {
+        memcpy(data, sva.data[i], vsize);
+        sva.data[i] = data;
+        data += vsize;
+    }
+
+    ident_t id = sh_fresh_name(sh, &sva.attr.name);
+    PL_ARRAY_APPEND(sh, sh->vas, sva);
+    return id;
+}
+
+ident_t sh_attr_vec2(pl_shader sh, const char *name, const pl_rect2df *rc)
+{
+    pl_gpu gpu = SH_GPU(sh);
+    if (!gpu) {
+        SH_FAIL(sh, "Failed adding vertex attr '%s': No GPU available!", name);
+        return NULL_IDENT;
+    }
+
+    pl_fmt fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2);
+    if (!fmt) {
+        SH_FAIL(sh, "Failed adding vertex attr '%s': no vertex fmt!", name);
+        return NULL_IDENT;
+    }
+
+    float verts[4][2] = {
+        { rc->x0, rc->y0 },
+        { rc->x1, rc->y0 },
+        { rc->x0, rc->y1 },
+        { rc->x1, rc->y1 },
+    };
+
+    return sh_attr(sh, (struct pl_shader_va) {
+        .attr = {
+            .name     = name,
+            .fmt      = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2),
+        },
+        .data = { verts[0], verts[1], verts[2], verts[3] },
+    });
+}
+
+ident_t sh_bind(pl_shader sh, pl_tex tex,
+                enum pl_tex_address_mode address_mode,
+                enum pl_tex_sample_mode sample_mode,
+                const char *name, const pl_rect2df *rect,
+                ident_t *out_pos, ident_t *out_pt)
+{
+    if (pl_tex_params_dimension(tex->params) != 2) {
+        SH_FAIL(sh, "Failed binding texture '%s': not a 2D texture!", name);
+        return NULL_IDENT;
+    }
+
+    if (!tex->params.sampleable) {
+        SH_FAIL(sh, "Failed binding texture '%s': texture not sampleable!", name);
+        return NULL_IDENT;
+    }
+
+    ident_t itex = sh_desc(sh, (struct pl_shader_desc) {
+        .desc = {
+            .name = name,
+            .type = PL_DESC_SAMPLED_TEX,
+        },
+        .binding = {
+            .object = tex,
+            .address_mode = address_mode,
+            .sample_mode = sample_mode,
+        },
+    });
+
+    float sx, sy;
+    if (tex->sampler_type == PL_SAMPLER_RECT) {
+        sx = 1.0;
+        sy = 1.0;
+    } else {
+        sx = 1.0 / tex->params.w;
+        sy = 1.0 / tex->params.h;
+    }
+
+    if (out_pos) {
+        pl_rect2df full = {
+            .x1 = tex->params.w,
+            .y1 = tex->params.h,
+        };
+
+        rect = PL_DEF(rect, &full);
+        *out_pos = sh_attr_vec2(sh, "tex_coord", &(pl_rect2df) {
+            .x0 = sx * rect->x0, .y0 = sy * rect->y0,
+            .x1 = sx * rect->x1, .y1 = sy * rect->y1,
+        });
+    }
+
+    if (out_pt) {
+        *out_pt = sh_var(sh, (struct pl_shader_var) {
+            .var  = pl_var_vec2("tex_pt"),
+            .data = &(float[2]) {sx, sy},
+        });
+    }
+
+    return itex;
+}
+
+bool sh_buf_desc_append(void *alloc, pl_gpu gpu,
+                        struct pl_shader_desc *buf_desc,
+                        struct pl_var_layout *out_layout,
+                        const struct pl_var new_var)
+{
+    struct pl_buffer_var bv = { .var = new_var };
+    size_t cur_size = sh_buf_desc_size(buf_desc);
+
+    switch (buf_desc->desc.type) {
+    case PL_DESC_BUF_UNIFORM:
+        bv.layout = pl_std140_layout(cur_size, &new_var);
+        if (bv.layout.offset + bv.layout.size > gpu->limits.max_ubo_size)
+            return false;
+        break;
+    case PL_DESC_BUF_STORAGE:
+        bv.layout = pl_std430_layout(cur_size, &new_var);
+        if (bv.layout.offset + bv.layout.size > gpu->limits.max_ssbo_size)
+            return false;
+        break;
+    case PL_DESC_INVALID:
+    case PL_DESC_SAMPLED_TEX:
+    case PL_DESC_STORAGE_IMG:
+    case PL_DESC_BUF_TEXEL_UNIFORM:
+    case PL_DESC_BUF_TEXEL_STORAGE:
+    case PL_DESC_TYPE_COUNT:
+        pl_unreachable();
+    }
+
+    if (out_layout)
+        *out_layout = bv.layout;
+    PL_ARRAY_APPEND_RAW(alloc, buf_desc->buffer_vars, buf_desc->num_buffer_vars, bv);
+    return true;
+}
+
+size_t sh_buf_desc_size(const struct pl_shader_desc *buf_desc)
+{
+    if (!buf_desc->num_buffer_vars)
+        return 0;
+
+    const struct pl_buffer_var *last;
+    last = &buf_desc->buffer_vars[buf_desc->num_buffer_vars - 1];
+    return last->layout.offset + last->layout.size;
+}
+
+void sh_describef(pl_shader sh, const char *fmt, ...)
+{
+    va_list ap;
+    va_start(ap, fmt);
+    sh_describe(sh, pl_vasprintf(sh->info->tmp, fmt, ap));
+    va_end(ap);
+}
+
+static const char *insigs[] = {
+    [PL_SHADER_SIG_NONE]  = "",
+    [PL_SHADER_SIG_COLOR] = "vec4 color",
+};
+
+static const char *outsigs[] = {
+    [PL_SHADER_SIG_NONE]  = "void",
+    [PL_SHADER_SIG_COLOR] = "vec4",
+};
+
+static const char *retvals[] = {
+    [PL_SHADER_SIG_NONE]  = "",
+    [PL_SHADER_SIG_COLOR] = "return color;",
+};
+
+// libplacebo currently only allows 2D samplers for shader signatures
+static const char *samplers2D[] = {
+    [PL_SAMPLER_NORMAL]     = "sampler2D",
+    [PL_SAMPLER_RECT]       = "sampler2DRect",
+    [PL_SAMPLER_EXTERNAL]   = "samplerExternalOES",
+};
+
+ident_t sh_subpass(pl_shader sh, pl_shader sub)
+{
+    pl_assert(sh->mutable);
+
+    if (sh->prefix == sub->prefix) {
+        PL_TRACE(sh, "Can't merge shaders: conflicting identifiers!");
+        return NULL_IDENT;
+    }
+
+    // Check for shader compatibility
+    int res_w = PL_DEF(sh->output_w, sub->output_w),
+        res_h = PL_DEF(sh->output_h, sub->output_h);
+
+    if ((sub->output_w && res_w != sub->output_w) ||
+        (sub->output_h && res_h != sub->output_h))
+    {
+        PL_TRACE(sh, "Can't merge shaders: incompatible sizes: %dx%d and %dx%d",
+                 sh->output_w, sh->output_h, sub->output_w, sub->output_h);
+        return NULL_IDENT;
+    }
+
+    if (sub->type == SH_COMPUTE) {
+        int subw = sub->group_size[0],
+            subh = sub->group_size[1];
+        bool flex = sub->flexible_work_groups;
+
+        if (!sh_try_compute(sh, subw, subh, flex, sub->shmem)) {
+            PL_TRACE(sh, "Can't merge shaders: incompatible block sizes or "
+                     "exceeded shared memory resource capabilities");
+            return NULL_IDENT;
+        }
+    }
+
+    sh->output_w = res_w;
+    sh->output_h = res_h;
+
+    // Append the prelude and header
+    pl_str_builder_concat(sh->buffers[SH_BUF_PRELUDE], sub->buffers[SH_BUF_PRELUDE]);
+    pl_str_builder_concat(sh->buffers[SH_BUF_HEADER], sub->buffers[SH_BUF_HEADER]);
+
+    // Append the body as a new header function
+    if (sub->input == PL_SHADER_SIG_SAMPLER) {
+        pl_assert(sub->sampler_prefix);
+        GLSLH("%s "$"(%c%s src_tex, vec2 tex_coord) {\n",
+              outsigs[sub->output], sub->name,
+              sub->sampler_prefix, samplers2D[sub->sampler_type]);
+    } else {
+        GLSLH("%s "$"(%s) {\n",
+              outsigs[sub->output], sub->name, insigs[sub->input]);
+    }
+    pl_str_builder_concat(sh->buffers[SH_BUF_HEADER], sub->buffers[SH_BUF_BODY]);
+    GLSLH("%s\n}\n\n", retvals[sub->output]);
+
+    // Steal all inputs and objects from the subpass
+#define ARRAY_STEAL(arr) do                 \
+{                                           \
+    PL_ARRAY_CONCAT(sh, sh->arr, sub->arr); \
+    sub->arr.num = 0;                       \
+} while (0)
+
+    ARRAY_STEAL(obj);
+    ARRAY_STEAL(vas);
+    ARRAY_STEAL(vars);
+    ARRAY_STEAL(descs);
+    ARRAY_STEAL(consts);
+#undef ARRAY_STEAL
+
+    // Steal the scratch buffer (if it holds data)
+    if (sub->data.len) {
+        pl_steal(sh->tmp, sub->data.buf);
+        sub->data = (pl_str) {0};
+    }
+
+    // Steal all temporary allocations and mark the child as unusable
+    pl_steal(sh->tmp, sub->tmp);
+    sub->tmp = pl_tmp(sub);
+    sub->failed = true;
+
+    // Steal the shader steps array (and allocations)
+    pl_assert(pl_rc_count(&sub->info->rc) == 1);
+    PL_ARRAY_CONCAT(sh->info, sh->info->steps, sub->info->steps);
+    pl_steal(sh->info->tmp, sub->info->tmp);
+    sub->info->tmp = pl_tmp(sub->info);
+    sub->info->steps.num = 0; // sanity
+
+    return sub->name;
+}
+
+pl_str_builder sh_finalize_internal(pl_shader sh)
+{
+    pl_assert(sh->mutable); // this function should only ever be called once
+    if (sh->failed)
+        return NULL;
+
+    // Padding for readability
+    GLSLP("\n");
+
+    // Concatenate everything onto the prelude to form the final output
+    pl_str_builder_concat(sh->buffers[SH_BUF_PRELUDE], sh->buffers[SH_BUF_HEADER]);
+
+    if (sh->input == PL_SHADER_SIG_SAMPLER) {
+        pl_assert(sh->sampler_prefix);
+        GLSLP("%s "$"(%c%s src_tex, vec2 tex_coord) {\n",
+              outsigs[sh->output], sh->name,
+              sh->sampler_prefix,
+              samplers2D[sh->sampler_type]);
+    } else {
+        GLSLP("%s "$"(%s) {\n", outsigs[sh->output], sh->name, insigs[sh->input]);
+    }
+
+    pl_str_builder_concat(sh->buffers[SH_BUF_PRELUDE], sh->buffers[SH_BUF_BODY]);
+    pl_str_builder_concat(sh->buffers[SH_BUF_PRELUDE], sh->buffers[SH_BUF_FOOTER]);
+    GLSLP("%s\n}\n\n", retvals[sh->output]);
+
+    // Generate the shader info
+    struct sh_info *info = sh->info;
+    info->info.steps = info->steps.elem;
+    info->info.num_steps = info->steps.num;
+    info->info.description = "(unknown shader)";
+
+    // Generate pretty description
+    for (int i = 0; i < info->steps.num; i++) {
+        const char *step = info->steps.elem[i];
+
+        // Prevent duplicates. We're okay using a weak equality check here
+        // because most pass descriptions are static strings.
+        for (int j = 0; j < i; j++) {
+            if (info->steps.elem[j] == step)
+                goto next_step;
+        }
+
+        int count = 1;
+        for (int j = i+1; j < info->steps.num; j++) {
+            if (info->steps.elem[j] == step)
+                count++;
+        }
+
+        const char *prefix = i > 0 ? ", " : "";
+        if (count > 1) {
+            pl_str_append_asprintf(info, &info->desc, "%s%s x%d",
+                                   prefix, step, count);
+        } else {
+            pl_str_append_asprintf(info, &info->desc, "%s%s", prefix, step);
+        }
+
+next_step: ;
+    }
+
+    if (info->desc.len)
+        info->info.description = (char *) info->desc.buf;
+
+    sh->mutable = false;
+    return sh->buffers[SH_BUF_PRELUDE];
+}
+
+const struct pl_shader_res *pl_shader_finalize(pl_shader sh)
+{
+    if (sh->failed) {
+        return NULL;
+    } else if (!sh->mutable) {
+        return &sh->result;
+    }
+
+    pl_shader_info info = &sh->info->info;
+    pl_str_builder glsl = sh_finalize_internal(sh);
+
+    // Turn ident_t into friendly strings before passing it to users
+#define FIX_IDENT(name) \
+    name = sh_ident_tostr(sh_ident_unpack(name))
+    for (int i = 0; i < sh->vas.num; i++)
+        FIX_IDENT(sh->vas.elem[i].attr.name);
+    for (int i = 0; i < sh->vars.num; i++)
+        FIX_IDENT(sh->vars.elem[i].var.name);
+    for (int i = 0; i < sh->consts.num; i++)
+        FIX_IDENT(sh->consts.elem[i].name);
+    for (int i = 0; i < sh->descs.num; i++) {
+        struct pl_shader_desc *sd = &sh->descs.elem[i];
+        FIX_IDENT(sd->desc.name);
+        for (int j = 0; j < sd->num_buffer_vars; sd++)
+            FIX_IDENT(sd->buffer_vars[j].var.name);
+    }
+#undef FIX_IDENT
+
+    sh->result = (struct pl_shader_res) {
+        .info               = info,
+        .glsl               = (char *) pl_str_builder_exec(glsl).buf,
+        .name               = sh_ident_tostr(sh->name),
+        .input              = sh->input,
+        .output             = sh->output,
+        .compute_group_size = { sh->group_size[0], sh->group_size[1] },
+        .compute_shmem      = sh->shmem,
+        .vertex_attribs     = sh->vas.elem,
+        .num_vertex_attribs = sh->vas.num,
+        .variables          = sh->vars.elem,
+        .num_variables      = sh->vars.num,
+        .descriptors        = sh->descs.elem,
+        .num_descriptors    = sh->descs.num,
+        .constants          = sh->consts.elem,
+        .num_constants      = sh->consts.num,
+        // deprecated fields
+        .params             = info->params,
+        .steps              = info->steps,
+        .num_steps          = info->num_steps,
+        .description        = info->description,
+    };
+
+    return &sh->result;
+}
+
+bool sh_require(pl_shader sh, enum pl_shader_sig insig, int w, int h)
+{
+    if (sh->failed) {
+        SH_FAIL(sh, "Attempting to modify a failed shader!");
+        return false;
+    }
+
+    if (!sh->mutable) {
+        SH_FAIL(sh, "Attempted to modify an immutable shader!");
+        return false;
+    }
+
+    if ((w && sh->output_w && sh->output_w != w) ||
+        (h && sh->output_h && sh->output_h != h))
+    {
+        SH_FAIL(sh, "Illegal sequence of shader operations: Incompatible "
+                "output size requirements %dx%d and %dx%d",
+                sh->output_w, sh->output_h, w, h);
+        return false;
+    }
+
+    static const char *names[] = {
+        [PL_SHADER_SIG_NONE]  = "PL_SHADER_SIG_NONE",
+        [PL_SHADER_SIG_COLOR] = "PL_SHADER_SIG_COLOR",
+    };
+
+    // If we require an input, but there is none available - just get it from
+    // the user by turning it into an explicit input signature.
+    if (!sh->output && insig) {
+        pl_assert(!sh->input);
+        sh->input = insig;
+    } else if (sh->output != insig) {
+        SH_FAIL(sh, "Illegal sequence of shader operations! Current output "
+                "signature is '%s', but called operation expects '%s'!",
+                names[sh->output], names[insig]);
+        return false;
+    }
+
+    // All of our shaders end up returning a vec4 color
+    sh->output = PL_SHADER_SIG_COLOR;
+    sh->output_w = PL_DEF(sh->output_w, w);
+    sh->output_h = PL_DEF(sh->output_h, h);
+    return true;
+}
+
+static void sh_obj_deref(pl_shader_obj obj)
+{
+    if (!pl_rc_deref(&obj->rc))
+        return;
+
+    if (obj->uninit)
+        obj->uninit(obj->gpu, obj->priv);
+
+    pl_free(obj);
+}
+
+void pl_shader_obj_destroy(pl_shader_obj *ptr)
+{
+    pl_shader_obj obj = *ptr;
+    if (!obj)
+        return;
+
+    sh_obj_deref(obj);
+    *ptr = NULL;
+}
+
+void *sh_require_obj(pl_shader sh, pl_shader_obj *ptr,
+                     enum pl_shader_obj_type type, size_t priv_size,
+                     void (*uninit)(pl_gpu gpu, void *priv))
+{
+    if (!ptr)
+        return NULL;
+
+    pl_shader_obj obj = *ptr;
+    if (obj && obj->gpu != SH_GPU(sh)) {
+        SH_FAIL(sh, "Passed pl_shader_obj belongs to different GPU!");
+        return NULL;
+    }
+
+    if (obj && obj->type != type) {
+        SH_FAIL(sh, "Passed pl_shader_obj of wrong type! Shader objects must "
+                "always be used with the same type of shader.");
+        return NULL;
+    }
+
+    if (!obj) {
+        obj = pl_zalloc_ptr(NULL, obj);
+        pl_rc_init(&obj->rc);
+        obj->gpu = SH_GPU(sh);
+        obj->type = type;
+        obj->priv = pl_zalloc(obj, priv_size);
+        obj->uninit = uninit;
+    }
+
+    PL_ARRAY_APPEND(sh, sh->obj, obj);
+    pl_rc_ref(&obj->rc);
+
+    *ptr = obj;
+    return obj->priv;
+}
+
+ident_t sh_prng(pl_shader sh, bool temporal, ident_t *p_state)
+{
+    ident_t randfun = sh_fresh(sh, "rand"),
+            state = sh_fresh(sh, "state");
+
+    // Based on pcg3d (http://jcgt.org/published/0009/03/02/)
+    GLSLP("#define prng_t uvec3\n");
+    GLSLH("vec3 "$"(inout uvec3 s) {                    \n"
+          "    s = 1664525u * s + uvec3(1013904223u);   \n"
+          "    s.x += s.y * s.z;                        \n"
+          "    s.y += s.z * s.x;                        \n"
+          "    s.z += s.x * s.y;                        \n"
+          "    s ^= s >> 16u;                           \n"
+          "    s.x += s.y * s.z;                        \n"
+          "    s.y += s.z * s.x;                        \n"
+          "    s.z += s.x * s.y;                        \n"
+          "    return vec3(s) * 1.0/float(0xFFFFFFFFu); \n"
+          "}                                            \n",
+          randfun);
+
+    if (temporal) {
+        GLSL("uvec3 "$" = uvec3(gl_FragCoord.xy, "$"); \n",
+             state, SH_UINT_DYN(SH_PARAMS(sh).index));
+    } else {
+        GLSL("uvec3 "$" = uvec3(gl_FragCoord.xy, 0.0); \n", state);
+    }
+
+    if (p_state)
+        *p_state = state;
+
+    ident_t res = sh_fresh(sh, "RAND");
+    GLSLH("#define "$" ("$"("$"))\n", res, randfun, state);
+    return res;
+}