/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include #include #include "shaders.h" #include static inline bool isnumeric(char c) { return (c >= '0' && c <= '9') || c == '-'; } void pl_lut_free(struct pl_custom_lut **lut) { pl_free_ptr(lut); } struct pl_custom_lut *pl_lut_parse_cube(pl_log log, const char *cstr, size_t cstr_len) { struct pl_custom_lut *lut = pl_zalloc_ptr(NULL, lut); pl_str str = (pl_str) { (uint8_t *) cstr, cstr_len }; lut->signature = pl_str_hash(str); int entries = 0; float min[3] = { 0.0, 0.0, 0.0 }; float max[3] = { 1.0, 1.0, 1.0 }; // Parse header while (str.len && !isnumeric(str.buf[0])) { pl_str line = pl_str_strip(pl_str_getline(str, &str)); if (!line.len) continue; // skip empty line if (pl_str_eatstart0(&line, "TITLE")) { pl_info(log, "Loading LUT: %.*s", PL_STR_FMT(pl_str_strip(line))); continue; } if (pl_str_eatstart0(&line, "LUT_3D_SIZE")) { line = pl_str_strip(line); int size; if (!pl_str_parse_int(line, &size)) { pl_err(log, "Failed parsing dimension '%.*s'", PL_STR_FMT(line)); goto error; } if (size <= 0 || size > 1024) { pl_err(log, "Invalid 3DLUT size: %dx%d%x", size, size, size); goto error; } lut->size[0] = lut->size[1] = lut->size[2] = size; entries = size * size * size; continue; } if (pl_str_eatstart0(&line, "LUT_1D_SIZE")) { line = pl_str_strip(line); int size; if (!pl_str_parse_int(line, &size)) { pl_err(log, "Failed parsing dimension '%.*s'", PL_STR_FMT(line)); goto error; } if (size <= 0 || size > 65536) { pl_err(log, "Invalid 1DLUT size: %d", size); goto error; } lut->size[0] = size; lut->size[1] = lut->size[2] = 0; entries = size; continue; } if (pl_str_eatstart0(&line, "DOMAIN_MIN")) { line = pl_str_strip(line); if (!pl_str_parse_float(pl_str_split_char(line, ' ', &line), &min[0]) || !pl_str_parse_float(pl_str_split_char(line, ' ', &line), &min[1]) || !pl_str_parse_float(line, &min[2])) { pl_err(log, "Failed parsing domain: '%.*s'", PL_STR_FMT(line)); goto error; } continue; } if (pl_str_eatstart0(&line, "DOMAIN_MAX")) { line = pl_str_strip(line); if (!pl_str_parse_float(pl_str_split_char(line, ' ', &line), &max[0]) || !pl_str_parse_float(pl_str_split_char(line, ' ', &line), &max[1]) || !pl_str_parse_float(line, &max[2])) { pl_err(log, "Failed parsing domain: '%.*s'", PL_STR_FMT(line)); goto error; } continue; } if (pl_str_eatstart0(&line, "#")) { pl_debug(log, "Unhandled .cube comment: %.*s", PL_STR_FMT(pl_str_strip(line))); continue; } pl_warn(log, "Unhandled .cube line: %.*s", PL_STR_FMT(pl_str_strip(line))); } if (!entries) { pl_err(log, "Missing LUT size specification?"); goto error; } for (int i = 0; i < 3; i++) { if (max[i] - min[i] < 1e-6) { pl_err(log, "Invalid domain range: [%f, %f]", min[i], max[i]); goto error; } } float *data = pl_alloc(lut, sizeof(float[3]) * entries); lut->data = data; // Parse LUT body pl_clock_t start = pl_clock_now(); for (int n = 0; n < entries; n++) { for (int c = 0; c < 3; c++) { static const char * const digits = "0123456789.-+e"; // Extract valid digit sequence size_t len = pl_strspn(str, digits); pl_str entry = (pl_str) { str.buf, len }; str.buf += len; str.len -= len; if (!entry.len) { if (!str.len) { pl_err(log, "Failed parsing LUT: Unexpected EOF, expected " "%d entries, got %d", entries * 3, n * 3 + c + 1); } else { pl_err(log, "Failed parsing LUT: Unexpected '%c', expected " "digit", str.buf[0]); } goto error; } float num; if (!pl_str_parse_float(entry, &num)) { pl_err(log, "Failed parsing float value '%.*s'", PL_STR_FMT(entry)); goto error; } // Rescale to range 0.0 - 1.0 *data++ = (num - min[c]) / (max[c] - min[c]); // Skip whitespace between digits str = pl_str_strip(str); } } str = pl_str_strip(str); if (str.len) pl_warn(log, "Extra data after LUT?... ignoring '%c'", str.buf[0]); pl_log_cpu_time(log, start, pl_clock_now(), "parsing .cube LUT"); return lut; error: pl_free(lut); return NULL; } static void fill_lut(void *datap, const struct sh_lut_params *params) { const struct pl_custom_lut *lut = params->priv; int dim_r = params->width; int dim_g = PL_DEF(params->height, 1); int dim_b = PL_DEF(params->depth, 1); float *data = datap; for (int b = 0; b < dim_b; b++) { for (int g = 0; g < dim_g; g++) { for (int r = 0; r < dim_r; r++) { size_t offset = (b * dim_g + g) * dim_r + r; const float *src = &lut->data[offset * 3]; float *dst = &data[offset * 4]; dst[0] = src[0]; dst[1] = src[1]; dst[2] = src[2]; dst[3] = 0.0f; } } } } void pl_shader_custom_lut(pl_shader sh, const struct pl_custom_lut *lut, pl_shader_obj *lut_state) { if (!lut) return; int dims; if (lut->size[0] > 0 && lut->size[1] > 0 && lut->size[2] > 0) { dims = 3; } else if (lut->size[0] > 0 && !lut->size[1] && !lut->size[2]) { dims = 1; } else { SH_FAIL(sh, "Invalid dimensions %dx%dx%d for pl_custom_lut, must be 1D " "or 3D!", lut->size[0], lut->size[1], lut->size[2]); return; } if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) return; ident_t fun = sh_lut(sh, sh_lut_params( .object = lut_state, .var_type = PL_VAR_FLOAT, .method = SH_LUT_TETRAHEDRAL, .width = lut->size[0], .height = lut->size[1], .depth = lut->size[2], .comps = 4, // for better texel alignment .signature = lut->signature, .fill = fill_lut, .priv = (void *) lut, )); if (!fun) { SH_FAIL(sh, "pl_shader_custom_lut: failed generating LUT object"); return; } GLSL("// pl_shader_custom_lut \n"); static const pl_matrix3x3 zero = {0}; if (memcmp(&lut->shaper_in, &zero, sizeof(zero)) != 0) { GLSL("color.rgb = "$" * color.rgb; \n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat3("shaper_in"), .data = PL_TRANSPOSE_3X3(lut->shaper_in.m), })); } switch (dims) { case 1: sh_describe(sh, "custom 1DLUT"); GLSL("color.rgb = vec3("$"(color.r).r, \n" " "$"(color.g).g, \n" " "$"(color.b).b); \n", fun, fun, fun); break; case 3: sh_describe(sh, "custom 3DLUT"); GLSL("color.rgb = "$"(color.rgb).rgb; \n", fun); break; } if (memcmp(&lut->shaper_out, &zero, sizeof(zero)) != 0) { GLSL("color.rgb = "$" * color.rgb; \n", sh_var(sh, (struct pl_shader_var) { .var = pl_var_mat3("shaper_out"), .data = PL_TRANSPOSE_3X3(lut->shaper_out.m), })); } } // Defines a LUT position helper macro. This translates from an absolute texel // scale (either in texels, or normalized to [0,1]) to the texture coordinate // scale for the corresponding sample in a texture of dimension `lut_size`. static ident_t texel_scale(pl_shader sh, int lut_size, bool normalized) { const float base = 0.5f / lut_size; const float end = 1.0f - 0.5f / lut_size; const float scale = (end - base) / (normalized ? 1.0f : (lut_size - 1)); ident_t name = sh_fresh(sh, "LUT_SCALE"); GLSLH("#define "$"(x) ("$" * (x) + "$") \n", name, SH_FLOAT(scale), SH_FLOAT(base)); return name; } struct sh_lut_obj { enum sh_lut_type type; enum sh_lut_method method; enum pl_var_type vartype; pl_fmt fmt; int width, height, depth, comps; uint64_t signature; bool error; // reset if params change // weights, depending on the lut type pl_tex tex; pl_str str; void *data; }; static void sh_lut_uninit(pl_gpu gpu, void *ptr) { struct sh_lut_obj *lut = ptr; pl_tex_destroy(gpu, &lut->tex); pl_free(lut->str.buf); pl_free(lut->data); *lut = (struct sh_lut_obj) {0}; } // Maximum number of floats to embed as a literal array (when using SH_LUT_AUTO) #define SH_LUT_MAX_LITERAL_SOFT 64 #define SH_LUT_MAX_LITERAL_HARD 256 ident_t sh_lut(pl_shader sh, const struct sh_lut_params *params) { pl_gpu gpu = SH_GPU(sh); pl_cache_obj obj = { .key = CACHE_KEY_SH_LUT ^ params->signature }; const enum pl_var_type vartype = params->var_type; pl_assert(vartype != PL_VAR_INVALID); pl_assert(params->method == SH_LUT_NONE || vartype == PL_VAR_FLOAT); pl_assert(params->width > 0 && params->height >= 0 && params->depth >= 0); pl_assert(params->comps > 0); pl_assert(!params->cache || params->signature); int sizes[] = { params->width, params->height, params->depth }; int size = params->width * PL_DEF(params->height, 1) * PL_DEF(params->depth, 1); int dims = params->depth ? 3 : params->height ? 2 : 1; enum sh_lut_method method = params->method; if (method == SH_LUT_TETRAHEDRAL && dims != 3) method = SH_LUT_LINEAR; if (method == SH_LUT_CUBIC && dims != 3) method = SH_LUT_LINEAR; int texdim = 0; uint32_t max_tex_dim[] = { gpu ? gpu->limits.max_tex_1d_dim : 0, gpu ? gpu->limits.max_tex_2d_dim : 0, (gpu && gpu->glsl.version > 100) ? gpu->limits.max_tex_3d_dim : 0, }; struct sh_lut_obj *lut = SH_OBJ(sh, params->object, PL_SHADER_OBJ_LUT, struct sh_lut_obj, sh_lut_uninit); if (!lut) return NULL_IDENT; bool update = params->update || lut->signature != params->signature || vartype != lut->vartype || params->fmt != lut->fmt || params->width != lut->width || params->height != lut->height || params->depth != lut->depth || params->comps != lut->comps; if (lut->error && !update) return NULL_IDENT; // suppress error spam until something changes // Try picking the right number of dimensions for the texture LUT. This // allows e.g. falling back to 2D textures if 1D textures are unsupported. for (int d = dims; d <= PL_ARRAY_SIZE(max_tex_dim); d++) { // For a given dimension to be compatible, all coordinates need to be // within the maximum texture size for that dimension for (int i = 0; i < d; i++) { if (sizes[i] > max_tex_dim[d - 1]) goto next_dim; } // All dimensions are compatible, so pick this texture dimension texdim = d; break; next_dim: ; // `continue` out of the inner loop } static const enum pl_fmt_type fmt_type[PL_VAR_TYPE_COUNT] = { [PL_VAR_SINT] = PL_FMT_SINT, [PL_VAR_UINT] = PL_FMT_UINT, [PL_VAR_FLOAT] = PL_FMT_FLOAT, }; enum pl_fmt_caps texcaps = PL_FMT_CAP_SAMPLEABLE; bool is_linear = method == SH_LUT_LINEAR || method == SH_LUT_CUBIC; if (is_linear) texcaps |= PL_FMT_CAP_LINEAR; pl_fmt texfmt = params->fmt; if (texfmt) { bool ok; switch (texfmt->type) { case PL_FMT_SINT: ok = vartype == PL_VAR_SINT; break; case PL_FMT_UINT: ok = vartype == PL_VAR_UINT; break; default: ok = vartype == PL_VAR_FLOAT; break; } if (!ok) { PL_ERR(sh, "Specified texture format '%s' does not match LUT " "data type!", texfmt->name); goto error; } if (~texfmt->caps & texcaps) { PL_ERR(sh, "Specified texture format '%s' does not match " "required capabilities 0x%x!\n", texfmt->name, texcaps); goto error; } } if (texdim && !texfmt) { texfmt = pl_find_fmt(gpu, fmt_type[vartype], params->comps, vartype == PL_VAR_FLOAT ? 16 : 32, pl_var_type_size(vartype) * 8, texcaps); } enum sh_lut_type type = params->lut_type; // The linear sampling code currently only supports 1D linear interpolation if (is_linear && dims > 1) { if (texfmt) { type = SH_LUT_TEXTURE; } else { PL_ERR(sh, "Can't emulate linear LUTs for 2D/3D LUTs and no " "texture support available!"); goto error; } } bool can_uniform = gpu && gpu->limits.max_variable_comps >= size * params->comps; bool can_literal = sh_glsl(sh).version > 110; // needed for literal arrays can_literal &= size <= SH_LUT_MAX_LITERAL_HARD && !params->dynamic; // Deselect unsupported methods if (type == SH_LUT_UNIFORM && !can_uniform) type = SH_LUT_AUTO; if (type == SH_LUT_LITERAL && !can_literal) type = SH_LUT_AUTO; if (type == SH_LUT_TEXTURE && !texfmt) type = SH_LUT_AUTO; // Sorted by priority if (!type && can_literal && !method && size <= SH_LUT_MAX_LITERAL_SOFT) type = SH_LUT_LITERAL; if (!type && texfmt) type = SH_LUT_TEXTURE; if (!type && can_uniform) type = SH_LUT_UNIFORM; if (!type && can_literal) type = SH_LUT_LITERAL; if (!type) { PL_ERR(sh, "Can't generate LUT: no compatible methods!"); goto error; } // Reinitialize the existing LUT if needed update |= type != lut->type; update |= method != lut->method; if (update) { if (params->dynamic) pl_log_level_cap(sh->log, PL_LOG_TRACE); size_t el_size = params->comps * pl_var_type_size(vartype); if (type == SH_LUT_TEXTURE) el_size = texfmt->texel_size; size_t buf_size = size * el_size; if (pl_cache_get(params->cache, &obj) && obj.size == buf_size) { PL_DEBUG(sh, "Re-using cached LUT (0x%"PRIx64") with size %zu", obj.key, obj.size); } else { PL_DEBUG(sh, "LUT invalidated, regenerating.."); pl_cache_obj_resize(NULL, &obj, buf_size); pl_clock_t start = pl_clock_now(); params->fill(obj.data, params); pl_log_cpu_time(sh->log, start, pl_clock_now(), "generating shader LUT"); } pl_assert(obj.data && obj.size); if (params->dynamic) pl_log_level_cap(sh->log, PL_LOG_NONE); switch (type) { case SH_LUT_TEXTURE: { if (!texdim) { PL_ERR(sh, "Texture LUT exceeds texture dimensions!"); goto error; } if (!texfmt) { PL_ERR(sh, "Found no compatible texture format for LUT!"); goto error; } struct pl_tex_params tex_params = { .w = params->width, .h = PL_DEF(params->height, texdim >= 2 ? 1 : 0), .d = PL_DEF(params->depth, texdim >= 3 ? 1 : 0), .format = texfmt, .sampleable = true, .host_writable = params->dynamic, .initial_data = params->dynamic ? NULL : obj.data, .debug_tag = params->debug_tag, }; bool ok; if (params->dynamic) { ok = pl_tex_recreate(gpu, &lut->tex, &tex_params); if (ok) { ok = pl_tex_upload(gpu, pl_tex_transfer_params( .tex = lut->tex, .ptr = obj.data, )); } } else { // Can't use pl_tex_recreate because of `initial_data` pl_tex_destroy(gpu, &lut->tex); lut->tex = pl_tex_create(gpu, &tex_params); ok = lut->tex; } if (!ok) { PL_ERR(sh, "Failed creating LUT texture!"); goto error; } break; } case SH_LUT_UNIFORM: pl_free(lut->data); lut->data = pl_memdup(NULL, obj.data, obj.size); break; case SH_LUT_LITERAL: { lut->str.len = 0; static const char prefix[PL_VAR_TYPE_COUNT] = { [PL_VAR_SINT] = 'i', [PL_VAR_UINT] = 'u', [PL_VAR_FLOAT] = ' ', }; for (int i = 0; i < size * params->comps; i += params->comps) { if (i > 0) pl_str_append_asprintf_c(lut, &lut->str, ","); if (params->comps > 1) { pl_str_append_asprintf_c(lut, &lut->str, "%cvec%d(", prefix[vartype], params->comps); } for (int c = 0; c < params->comps; c++) { switch (vartype) { case PL_VAR_FLOAT: pl_str_append_asprintf_c(lut, &lut->str, "%s%f", c > 0 ? "," : "", ((float *) obj.data)[i+c]); break; case PL_VAR_UINT: pl_str_append_asprintf_c(lut, &lut->str, "%s%u", c > 0 ? "," : "", ((unsigned int *) obj.data)[i+c]); break; case PL_VAR_SINT: pl_str_append_asprintf_c(lut, &lut->str, "%s%d", c > 0 ? "," : "", ((int *) obj.data)[i+c]); break; case PL_VAR_INVALID: case PL_VAR_TYPE_COUNT: pl_unreachable(); } } if (params->comps > 1) pl_str_append_asprintf_c(lut, &lut->str, ")"); } break; } case SH_LUT_AUTO: pl_unreachable(); } lut->type = type; lut->method = method; lut->vartype = vartype; lut->fmt = params->fmt; lut->width = params->width; lut->height = params->height; lut->depth = params->depth; lut->comps = params->comps; lut->signature = params->signature; pl_cache_set(params->cache, &obj); } // Done updating, generate the GLSL ident_t name = sh_fresh(sh, "lut"); ident_t arr_name = NULL_IDENT; static const char * const swizzles[] = {"x", "xy", "xyz", "xyzw"}; static const char * const vartypes[PL_VAR_TYPE_COUNT][4] = { [PL_VAR_SINT] = { "int", "ivec2", "ivec3", "ivec4" }, [PL_VAR_UINT] = { "uint", "uvec2", "uvec3", "uvec4" }, [PL_VAR_FLOAT] = { "float", "vec2", "vec3", "vec4" }, }; switch (type) { case SH_LUT_TEXTURE: { assert(texdim); ident_t tex = sh_desc(sh, (struct pl_shader_desc) { .desc = { .name = "weights", .type = PL_DESC_SAMPLED_TEX, }, .binding = { .object = lut->tex, .sample_mode = is_linear ? PL_TEX_SAMPLE_LINEAR : PL_TEX_SAMPLE_NEAREST, } }); if (is_linear) { ident_t pos_macros[PL_ARRAY_SIZE(sizes)] = {0}; for (int i = 0; i < dims; i++) pos_macros[i] = texel_scale(sh, sizes[i], true); GLSLH("#define "$"(pos) (textureLod("$", %s(\\\n", name, tex, vartypes[PL_VAR_FLOAT][texdim - 1]); for (int i = 0; i < texdim; i++) { char sep = i == 0 ? ' ' : ','; if (pos_macros[i]) { if (dims > 1) { GLSLH(" %c"$"(%s(pos).%c)\\\n", sep, pos_macros[i], vartypes[PL_VAR_FLOAT][dims - 1], "xyzw"[i]); } else { GLSLH(" %c"$"(float(pos))\\\n", sep, pos_macros[i]); } } else { GLSLH(" %c%f\\\n", sep, 0.5); } } GLSLH(" ), 0.0).%s)\n", swizzles[params->comps - 1]); } else { GLSLH("#define "$"(pos) (texelFetch("$", %s(pos", name, tex, vartypes[PL_VAR_SINT][texdim - 1]); // Fill up extra components of the index for (int i = dims; i < texdim; i++) GLSLH(", 0"); GLSLH("), 0).%s)\n", swizzles[params->comps - 1]); } break; } case SH_LUT_UNIFORM: arr_name = sh_var(sh, (struct pl_shader_var) { .var = { .name = "weights", .type = vartype, .dim_v = params->comps, .dim_m = 1, .dim_a = size, }, .data = lut->data, }); break; case SH_LUT_LITERAL: arr_name = sh_fresh(sh, "weights"); GLSLH("const %s "$"[%d] = %s[](\n ", vartypes[vartype][params->comps - 1], arr_name, size, vartypes[vartype][params->comps - 1]); sh_append_str(sh, SH_BUF_HEADER, lut->str); GLSLH(");\n"); break; case SH_LUT_AUTO: pl_unreachable(); } if (arr_name) { GLSLH("#define "$"(pos) ("$"[int((pos)%s)\\\n", name, arr_name, dims > 1 ? "[0]" : ""); int shift = params->width; for (int i = 1; i < dims; i++) { GLSLH(" + %d * int((pos)[%d])\\\n", shift, i); shift *= sizes[i]; } GLSLH(" ])\n"); if (is_linear) { pl_assert(dims == 1); pl_assert(vartype == PL_VAR_FLOAT); ident_t arr_lut = name; name = sh_fresh(sh, "lut_lin"); GLSLH("%s "$"(float fpos) { \n" " fpos = clamp(fpos, 0.0, 1.0) * %d.0; \n" " float fbase = floor(fpos); \n" " float fceil = ceil(fpos); \n" " float fcoord = fpos - fbase; \n" " return mix("$"(fbase), "$"(fceil), fcoord); \n" "} \n", vartypes[PL_VAR_FLOAT][params->comps - 1], name, size - 1, arr_lut, arr_lut); } } if (method == SH_LUT_CUBIC && dims == 3) { ident_t lin_lut = name; name = sh_fresh(sh, "lut_tricubic"); GLSLH("%s "$"(vec3 pos) { \n" " vec3 scale = vec3(%d.0, %d.0, %d.0); \n" " vec3 scale_inv = 1.0 / scale; \n" " pos *= scale; \n" " vec3 fpos = fract(pos); \n" " vec3 base = pos - fpos; \n" " vec3 fpos2 = fpos * fpos; \n" " vec3 inv = 1.0 - fpos; \n" " vec3 inv2 = inv * inv; \n" " vec3 w0 = 1.0/6.0 * inv2 * inv; \n" " vec3 w1 = 2.0/3.0 - 0.5 * fpos2 * (2.0 - fpos); \n" " vec3 w2 = 2.0/3.0 - 0.5 * inv2 * (2.0 - inv); \n" " vec3 w3 = 1.0/6.0 * fpos2 * fpos; \n" " vec3 g0 = w0 + w1; \n" " vec3 g1 = w2 + w3; \n" " vec3 h0 = scale_inv * ((w1 / g0) - 1.0 + base); \n" " vec3 h1 = scale_inv * ((w3 / g1) + 1.0 + base); \n" " %s c000, c001, c010, c011, c100, c101, c110, c111; \n" " c000 = "$"(h0); \n" " c100 = "$"(vec3(h1.x, h0.y, h0.z)); \n" " c000 = mix(c100, c000, g0.x); \n" " c010 = "$"(vec3(h0.x, h1.y, h0.z)); \n" " c110 = "$"(vec3(h1.x, h1.y, h0.z)); \n" " c010 = mix(c110, c010, g0.x); \n" " c000 = mix(c010, c000, g0.y); \n" " c001 = "$"(vec3(h0.x, h0.y, h1.z)); \n" " c101 = "$"(vec3(h1.x, h0.y, h1.z)); \n" " c001 = mix(c101, c001, g0.x); \n" " c011 = "$"(vec3(h0.x, h1.y, h1.z)); \n" " c111 = "$"(h1); \n" " c011 = mix(c111, c011, g0.x); \n" " c001 = mix(c011, c001, g0.y); \n" " return mix(c001, c000, g0.z); \n" "} \n", vartypes[PL_VAR_FLOAT][params->comps - 1], name, sizes[0] - 1, sizes[1] - 1, sizes[2] - 1, vartypes[PL_VAR_FLOAT][params->comps - 1], lin_lut, lin_lut, lin_lut, lin_lut, lin_lut, lin_lut, lin_lut, lin_lut); } if (method == SH_LUT_TETRAHEDRAL) { ident_t int_lut = name; name = sh_fresh(sh, "lut_barycentric"); GLSLH("%s "$"(vec3 pos) { \n" // Compute bounding vertices and fractional part " pos = clamp(pos, 0.0, 1.0) * vec3(%d.0, %d.0, %d.0); \n" " vec3 base = floor(pos); \n" " vec3 fpart = pos - base; \n" // v0 and v3 are always 'black' and 'white', respectively // v1 and v2 are the closest RGB and CMY vertices, respectively " ivec3 v0 = ivec3(base), v3 = ivec3(ceil(pos)); \n" " ivec3 v1 = v0, v2 = v3; \n" // Table of boolean checks to simplify following math " bvec3 c = greaterThanEqual(fpart.xyz, fpart.yzx); \n" " bool c_xy = c.x, c_yx = !c.x, \n" " c_yz = c.y, c_zy = !c.y, \n" " c_zx = c.z, c_xz = !c.z; \n" " vec3 s = fpart.xyz; \n" " bool cond; \n", vartypes[PL_VAR_FLOAT][params->comps - 1], name, sizes[0] - 1, sizes[1] - 1, sizes[2] - 1); // Subdivision of the cube into six congruent tetrahedras // // For each tetrahedron, test if the point is inside, and if so, update // the edge vertices. We test all six, even though only one case will // ever be true, because this avoids branches. static const char *indices[] = { "xyz", "xzy", "zxy", "zyx", "yzx", "yxz"}; for (int i = 0; i < PL_ARRAY_SIZE(indices); i++) { const char x = indices[i][0], y = indices[i][1], z = indices[i][2]; GLSLH("cond = c_%c%c && c_%c%c; \n" "s = cond ? fpart.%c%c%c : s; \n" "v1.%c = cond ? v3.%c : v1.%c; \n" "v2.%c = cond ? v0.%c : v2.%c; \n", x, y, y, z, x, y, z, x, x, x, z, z, z); } // Interpolate in barycentric coordinates, with four texel fetches GLSLH(" return (1.0 - s.x) * "$"(v0) + \n" " (s.x - s.y) * "$"(v1) + \n" " (s.y - s.z) * "$"(v2) + \n" " (s.z) * "$"(v3); \n" "} \n", int_lut, int_lut, int_lut, int_lut); } lut->error = false; pl_cache_obj_free(&obj); pl_assert(name); return name; error: lut->error = true; pl_cache_obj_free(&obj); return NULL_IDENT; }