From ff6e3c025658a5fa1affd094f220b623e7e1b24b Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 15 Apr 2024 22:38:23 +0200 Subject: Adding upstream version 6.338.2. Signed-off-by: Daniel Baumann --- src/shaders/lut.c | 820 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 820 insertions(+) create mode 100644 src/shaders/lut.c (limited to 'src/shaders/lut.c') diff --git a/src/shaders/lut.c b/src/shaders/lut.c new file mode 100644 index 0000000..b0124fc --- /dev/null +++ b/src/shaders/lut.c @@ -0,0 +1,820 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see . + */ + +#include +#include + +#include "shaders.h" + +#include + +static inline bool isnumeric(char c) +{ + return (c >= '0' && c <= '9') || c == '-'; +} + +void pl_lut_free(struct pl_custom_lut **lut) +{ + pl_free_ptr(lut); +} + +struct pl_custom_lut *pl_lut_parse_cube(pl_log log, const char *cstr, size_t cstr_len) +{ + struct pl_custom_lut *lut = pl_zalloc_ptr(NULL, lut); + pl_str str = (pl_str) { (uint8_t *) cstr, cstr_len }; + lut->signature = pl_str_hash(str); + int entries = 0; + + float min[3] = { 0.0, 0.0, 0.0 }; + float max[3] = { 1.0, 1.0, 1.0 }; + + // Parse header + while (str.len && !isnumeric(str.buf[0])) { + pl_str line = pl_str_strip(pl_str_getline(str, &str)); + if (!line.len) + continue; // skip empty line + + if (pl_str_eatstart0(&line, "TITLE")) { + pl_info(log, "Loading LUT: %.*s", PL_STR_FMT(pl_str_strip(line))); + continue; + } + + if (pl_str_eatstart0(&line, "LUT_3D_SIZE")) { + line = pl_str_strip(line); + int size; + if (!pl_str_parse_int(line, &size)) { + pl_err(log, "Failed parsing dimension '%.*s'", PL_STR_FMT(line)); + goto error; + } + if (size <= 0 || size > 1024) { + pl_err(log, "Invalid 3DLUT size: %dx%d%x", size, size, size); + goto error; + } + + lut->size[0] = lut->size[1] = lut->size[2] = size; + entries = size * size * size; + continue; + } + + if (pl_str_eatstart0(&line, "LUT_1D_SIZE")) { + line = pl_str_strip(line); + int size; + if (!pl_str_parse_int(line, &size)) { + pl_err(log, "Failed parsing dimension '%.*s'", PL_STR_FMT(line)); + goto error; + } + if (size <= 0 || size > 65536) { + pl_err(log, "Invalid 1DLUT size: %d", size); + goto error; + } + + lut->size[0] = size; + lut->size[1] = lut->size[2] = 0; + entries = size; + continue; + } + + if (pl_str_eatstart0(&line, "DOMAIN_MIN")) { + line = pl_str_strip(line); + if (!pl_str_parse_float(pl_str_split_char(line, ' ', &line), &min[0]) || + !pl_str_parse_float(pl_str_split_char(line, ' ', &line), &min[1]) || + !pl_str_parse_float(line, &min[2])) + { + pl_err(log, "Failed parsing domain: '%.*s'", PL_STR_FMT(line)); + goto error; + } + continue; + } + + if (pl_str_eatstart0(&line, "DOMAIN_MAX")) { + line = pl_str_strip(line); + if (!pl_str_parse_float(pl_str_split_char(line, ' ', &line), &max[0]) || + !pl_str_parse_float(pl_str_split_char(line, ' ', &line), &max[1]) || + !pl_str_parse_float(line, &max[2])) + { + pl_err(log, "Failed parsing domain: '%.*s'", PL_STR_FMT(line)); + goto error; + } + continue; + } + + if (pl_str_eatstart0(&line, "#")) { + pl_debug(log, "Unhandled .cube comment: %.*s", + PL_STR_FMT(pl_str_strip(line))); + continue; + } + + pl_warn(log, "Unhandled .cube line: %.*s", PL_STR_FMT(pl_str_strip(line))); + } + + if (!entries) { + pl_err(log, "Missing LUT size specification?"); + goto error; + } + + for (int i = 0; i < 3; i++) { + if (max[i] - min[i] < 1e-6) { + pl_err(log, "Invalid domain range: [%f, %f]", min[i], max[i]); + goto error; + } + } + + float *data = pl_alloc(lut, sizeof(float[3]) * entries); + lut->data = data; + + // Parse LUT body + pl_clock_t start = pl_clock_now(); + for (int n = 0; n < entries; n++) { + for (int c = 0; c < 3; c++) { + static const char * const digits = "0123456789.-+e"; + + // Extract valid digit sequence + size_t len = pl_strspn(str, digits); + pl_str entry = (pl_str) { str.buf, len }; + str.buf += len; + str.len -= len; + + if (!entry.len) { + if (!str.len) { + pl_err(log, "Failed parsing LUT: Unexpected EOF, expected " + "%d entries, got %d", entries * 3, n * 3 + c + 1); + } else { + pl_err(log, "Failed parsing LUT: Unexpected '%c', expected " + "digit", str.buf[0]); + } + goto error; + } + + float num; + if (!pl_str_parse_float(entry, &num)) { + pl_err(log, "Failed parsing float value '%.*s'", PL_STR_FMT(entry)); + goto error; + } + + // Rescale to range 0.0 - 1.0 + *data++ = (num - min[c]) / (max[c] - min[c]); + + // Skip whitespace between digits + str = pl_str_strip(str); + } + } + + str = pl_str_strip(str); + if (str.len) + pl_warn(log, "Extra data after LUT?... ignoring '%c'", str.buf[0]); + + pl_log_cpu_time(log, start, pl_clock_now(), "parsing .cube LUT"); + return lut; + +error: + pl_free(lut); + return NULL; +} + +static void fill_lut(void *datap, const struct sh_lut_params *params) +{ + const struct pl_custom_lut *lut = params->priv; + + int dim_r = params->width; + int dim_g = PL_DEF(params->height, 1); + int dim_b = PL_DEF(params->depth, 1); + + float *data = datap; + for (int b = 0; b < dim_b; b++) { + for (int g = 0; g < dim_g; g++) { + for (int r = 0; r < dim_r; r++) { + size_t offset = (b * dim_g + g) * dim_r + r; + const float *src = &lut->data[offset * 3]; + float *dst = &data[offset * 4]; + dst[0] = src[0]; + dst[1] = src[1]; + dst[2] = src[2]; + dst[3] = 0.0f; + } + } + } +} + +void pl_shader_custom_lut(pl_shader sh, const struct pl_custom_lut *lut, + pl_shader_obj *lut_state) +{ + if (!lut) + return; + + int dims; + if (lut->size[0] > 0 && lut->size[1] > 0 && lut->size[2] > 0) { + dims = 3; + } else if (lut->size[0] > 0 && !lut->size[1] && !lut->size[2]) { + dims = 1; + } else { + SH_FAIL(sh, "Invalid dimensions %dx%dx%d for pl_custom_lut, must be 1D " + "or 3D!", lut->size[0], lut->size[1], lut->size[2]); + return; + } + + if (!sh_require(sh, PL_SHADER_SIG_COLOR, 0, 0)) + return; + + ident_t fun = sh_lut(sh, sh_lut_params( + .object = lut_state, + .var_type = PL_VAR_FLOAT, + .method = SH_LUT_TETRAHEDRAL, + .width = lut->size[0], + .height = lut->size[1], + .depth = lut->size[2], + .comps = 4, // for better texel alignment + .signature = lut->signature, + .fill = fill_lut, + .priv = (void *) lut, + )); + + if (!fun) { + SH_FAIL(sh, "pl_shader_custom_lut: failed generating LUT object"); + return; + } + + GLSL("// pl_shader_custom_lut \n"); + + static const pl_matrix3x3 zero = {0}; + if (memcmp(&lut->shaper_in, &zero, sizeof(zero)) != 0) { + GLSL("color.rgb = "$" * color.rgb; \n", sh_var(sh, (struct pl_shader_var) { + .var = pl_var_mat3("shaper_in"), + .data = PL_TRANSPOSE_3X3(lut->shaper_in.m), + })); + } + + switch (dims) { + case 1: + sh_describe(sh, "custom 1DLUT"); + GLSL("color.rgb = vec3("$"(color.r).r, \n" + " "$"(color.g).g, \n" + " "$"(color.b).b); \n", + fun, fun, fun); + break; + case 3: + sh_describe(sh, "custom 3DLUT"); + GLSL("color.rgb = "$"(color.rgb).rgb; \n", fun); + break; + } + + if (memcmp(&lut->shaper_out, &zero, sizeof(zero)) != 0) { + GLSL("color.rgb = "$" * color.rgb; \n", sh_var(sh, (struct pl_shader_var) { + .var = pl_var_mat3("shaper_out"), + .data = PL_TRANSPOSE_3X3(lut->shaper_out.m), + })); + } +} + +// Defines a LUT position helper macro. This translates from an absolute texel +// scale (either in texels, or normalized to [0,1]) to the texture coordinate +// scale for the corresponding sample in a texture of dimension `lut_size`. +static ident_t texel_scale(pl_shader sh, int lut_size, bool normalized) +{ + const float base = 0.5f / lut_size; + const float end = 1.0f - 0.5f / lut_size; + const float scale = (end - base) / (normalized ? 1.0f : (lut_size - 1)); + + ident_t name = sh_fresh(sh, "LUT_SCALE"); + GLSLH("#define "$"(x) ("$" * (x) + "$") \n", + name, SH_FLOAT(scale), SH_FLOAT(base)); + return name; +} + +struct sh_lut_obj { + enum sh_lut_type type; + enum sh_lut_method method; + enum pl_var_type vartype; + pl_fmt fmt; + int width, height, depth, comps; + uint64_t signature; + bool error; // reset if params change + + // weights, depending on the lut type + pl_tex tex; + pl_str str; + void *data; +}; + +static void sh_lut_uninit(pl_gpu gpu, void *ptr) +{ + struct sh_lut_obj *lut = ptr; + pl_tex_destroy(gpu, &lut->tex); + pl_free(lut->str.buf); + pl_free(lut->data); + + *lut = (struct sh_lut_obj) {0}; +} + +// Maximum number of floats to embed as a literal array (when using SH_LUT_AUTO) +#define SH_LUT_MAX_LITERAL_SOFT 64 +#define SH_LUT_MAX_LITERAL_HARD 256 + +ident_t sh_lut(pl_shader sh, const struct sh_lut_params *params) +{ + pl_gpu gpu = SH_GPU(sh); + pl_cache_obj obj = { .key = CACHE_KEY_SH_LUT ^ params->signature }; + + const enum pl_var_type vartype = params->var_type; + pl_assert(vartype != PL_VAR_INVALID); + pl_assert(params->method == SH_LUT_NONE || vartype == PL_VAR_FLOAT); + pl_assert(params->width > 0 && params->height >= 0 && params->depth >= 0); + pl_assert(params->comps > 0); + pl_assert(!params->cache || params->signature); + + int sizes[] = { params->width, params->height, params->depth }; + int size = params->width * PL_DEF(params->height, 1) * PL_DEF(params->depth, 1); + int dims = params->depth ? 3 : params->height ? 2 : 1; + enum sh_lut_method method = params->method; + if (method == SH_LUT_TETRAHEDRAL && dims != 3) + method = SH_LUT_LINEAR; + if (method == SH_LUT_CUBIC && dims != 3) + method = SH_LUT_LINEAR; + + int texdim = 0; + uint32_t max_tex_dim[] = { + gpu ? gpu->limits.max_tex_1d_dim : 0, + gpu ? gpu->limits.max_tex_2d_dim : 0, + (gpu && gpu->glsl.version > 100) ? gpu->limits.max_tex_3d_dim : 0, + }; + + struct sh_lut_obj *lut = SH_OBJ(sh, params->object, PL_SHADER_OBJ_LUT, + struct sh_lut_obj, sh_lut_uninit); + + if (!lut) + return NULL_IDENT; + + bool update = params->update || lut->signature != params->signature || + vartype != lut->vartype || params->fmt != lut->fmt || + params->width != lut->width || params->height != lut->height || + params->depth != lut->depth || params->comps != lut->comps; + + if (lut->error && !update) + return NULL_IDENT; // suppress error spam until something changes + + // Try picking the right number of dimensions for the texture LUT. This + // allows e.g. falling back to 2D textures if 1D textures are unsupported. + for (int d = dims; d <= PL_ARRAY_SIZE(max_tex_dim); d++) { + // For a given dimension to be compatible, all coordinates need to be + // within the maximum texture size for that dimension + for (int i = 0; i < d; i++) { + if (sizes[i] > max_tex_dim[d - 1]) + goto next_dim; + } + + // All dimensions are compatible, so pick this texture dimension + texdim = d; + break; + +next_dim: ; // `continue` out of the inner loop + } + + static const enum pl_fmt_type fmt_type[PL_VAR_TYPE_COUNT] = { + [PL_VAR_SINT] = PL_FMT_SINT, + [PL_VAR_UINT] = PL_FMT_UINT, + [PL_VAR_FLOAT] = PL_FMT_FLOAT, + }; + + enum pl_fmt_caps texcaps = PL_FMT_CAP_SAMPLEABLE; + bool is_linear = method == SH_LUT_LINEAR || method == SH_LUT_CUBIC; + if (is_linear) + texcaps |= PL_FMT_CAP_LINEAR; + + pl_fmt texfmt = params->fmt; + if (texfmt) { + bool ok; + switch (texfmt->type) { + case PL_FMT_SINT: ok = vartype == PL_VAR_SINT; break; + case PL_FMT_UINT: ok = vartype == PL_VAR_UINT; break; + default: ok = vartype == PL_VAR_FLOAT; break; + } + + if (!ok) { + PL_ERR(sh, "Specified texture format '%s' does not match LUT " + "data type!", texfmt->name); + goto error; + } + + if (~texfmt->caps & texcaps) { + PL_ERR(sh, "Specified texture format '%s' does not match " + "required capabilities 0x%x!\n", texfmt->name, texcaps); + goto error; + } + } + + if (texdim && !texfmt) { + texfmt = pl_find_fmt(gpu, fmt_type[vartype], params->comps, + vartype == PL_VAR_FLOAT ? 16 : 32, + pl_var_type_size(vartype) * 8, + texcaps); + } + + enum sh_lut_type type = params->lut_type; + + // The linear sampling code currently only supports 1D linear interpolation + if (is_linear && dims > 1) { + if (texfmt) { + type = SH_LUT_TEXTURE; + } else { + PL_ERR(sh, "Can't emulate linear LUTs for 2D/3D LUTs and no " + "texture support available!"); + goto error; + } + } + + bool can_uniform = gpu && gpu->limits.max_variable_comps >= size * params->comps; + bool can_literal = sh_glsl(sh).version > 110; // needed for literal arrays + can_literal &= size <= SH_LUT_MAX_LITERAL_HARD && !params->dynamic; + + // Deselect unsupported methods + if (type == SH_LUT_UNIFORM && !can_uniform) + type = SH_LUT_AUTO; + if (type == SH_LUT_LITERAL && !can_literal) + type = SH_LUT_AUTO; + if (type == SH_LUT_TEXTURE && !texfmt) + type = SH_LUT_AUTO; + + // Sorted by priority + if (!type && can_literal && !method && size <= SH_LUT_MAX_LITERAL_SOFT) + type = SH_LUT_LITERAL; + if (!type && texfmt) + type = SH_LUT_TEXTURE; + if (!type && can_uniform) + type = SH_LUT_UNIFORM; + if (!type && can_literal) + type = SH_LUT_LITERAL; + + if (!type) { + PL_ERR(sh, "Can't generate LUT: no compatible methods!"); + goto error; + } + + // Reinitialize the existing LUT if needed + update |= type != lut->type; + update |= method != lut->method; + + if (update) { + if (params->dynamic) + pl_log_level_cap(sh->log, PL_LOG_TRACE); + + size_t el_size = params->comps * pl_var_type_size(vartype); + if (type == SH_LUT_TEXTURE) + el_size = texfmt->texel_size; + + size_t buf_size = size * el_size; + if (pl_cache_get(params->cache, &obj) && obj.size == buf_size) { + PL_DEBUG(sh, "Re-using cached LUT (0x%"PRIx64") with size %zu", + obj.key, obj.size); + } else { + PL_DEBUG(sh, "LUT invalidated, regenerating.."); + pl_cache_obj_resize(NULL, &obj, buf_size); + pl_clock_t start = pl_clock_now(); + params->fill(obj.data, params); + pl_log_cpu_time(sh->log, start, pl_clock_now(), "generating shader LUT"); + } + + pl_assert(obj.data && obj.size); + if (params->dynamic) + pl_log_level_cap(sh->log, PL_LOG_NONE); + + switch (type) { + case SH_LUT_TEXTURE: { + if (!texdim) { + PL_ERR(sh, "Texture LUT exceeds texture dimensions!"); + goto error; + } + + if (!texfmt) { + PL_ERR(sh, "Found no compatible texture format for LUT!"); + goto error; + } + + struct pl_tex_params tex_params = { + .w = params->width, + .h = PL_DEF(params->height, texdim >= 2 ? 1 : 0), + .d = PL_DEF(params->depth, texdim >= 3 ? 1 : 0), + .format = texfmt, + .sampleable = true, + .host_writable = params->dynamic, + .initial_data = params->dynamic ? NULL : obj.data, + .debug_tag = params->debug_tag, + }; + + bool ok; + if (params->dynamic) { + ok = pl_tex_recreate(gpu, &lut->tex, &tex_params); + if (ok) { + ok = pl_tex_upload(gpu, pl_tex_transfer_params( + .tex = lut->tex, + .ptr = obj.data, + )); + } + } else { + // Can't use pl_tex_recreate because of `initial_data` + pl_tex_destroy(gpu, &lut->tex); + lut->tex = pl_tex_create(gpu, &tex_params); + ok = lut->tex; + } + + if (!ok) { + PL_ERR(sh, "Failed creating LUT texture!"); + goto error; + } + break; + } + + case SH_LUT_UNIFORM: + pl_free(lut->data); + lut->data = pl_memdup(NULL, obj.data, obj.size); + break; + + case SH_LUT_LITERAL: { + lut->str.len = 0; + static const char prefix[PL_VAR_TYPE_COUNT] = { + [PL_VAR_SINT] = 'i', + [PL_VAR_UINT] = 'u', + [PL_VAR_FLOAT] = ' ', + }; + + for (int i = 0; i < size * params->comps; i += params->comps) { + if (i > 0) + pl_str_append_asprintf_c(lut, &lut->str, ","); + if (params->comps > 1) { + pl_str_append_asprintf_c(lut, &lut->str, "%cvec%d(", + prefix[vartype], params->comps); + } + for (int c = 0; c < params->comps; c++) { + switch (vartype) { + case PL_VAR_FLOAT: + pl_str_append_asprintf_c(lut, &lut->str, "%s%f", + c > 0 ? "," : "", + ((float *) obj.data)[i+c]); + break; + case PL_VAR_UINT: + pl_str_append_asprintf_c(lut, &lut->str, "%s%u", + c > 0 ? "," : "", + ((unsigned int *) obj.data)[i+c]); + break; + case PL_VAR_SINT: + pl_str_append_asprintf_c(lut, &lut->str, "%s%d", + c > 0 ? "," : "", + ((int *) obj.data)[i+c]); + break; + case PL_VAR_INVALID: + case PL_VAR_TYPE_COUNT: + pl_unreachable(); + } + } + if (params->comps > 1) + pl_str_append_asprintf_c(lut, &lut->str, ")"); + } + break; + } + + case SH_LUT_AUTO: + pl_unreachable(); + } + + lut->type = type; + lut->method = method; + lut->vartype = vartype; + lut->fmt = params->fmt; + lut->width = params->width; + lut->height = params->height; + lut->depth = params->depth; + lut->comps = params->comps; + lut->signature = params->signature; + pl_cache_set(params->cache, &obj); + } + + // Done updating, generate the GLSL + ident_t name = sh_fresh(sh, "lut"); + ident_t arr_name = NULL_IDENT; + + static const char * const swizzles[] = {"x", "xy", "xyz", "xyzw"}; + static const char * const vartypes[PL_VAR_TYPE_COUNT][4] = { + [PL_VAR_SINT] = { "int", "ivec2", "ivec3", "ivec4" }, + [PL_VAR_UINT] = { "uint", "uvec2", "uvec3", "uvec4" }, + [PL_VAR_FLOAT] = { "float", "vec2", "vec3", "vec4" }, + }; + + switch (type) { + case SH_LUT_TEXTURE: { + assert(texdim); + ident_t tex = sh_desc(sh, (struct pl_shader_desc) { + .desc = { + .name = "weights", + .type = PL_DESC_SAMPLED_TEX, + }, + .binding = { + .object = lut->tex, + .sample_mode = is_linear ? PL_TEX_SAMPLE_LINEAR + : PL_TEX_SAMPLE_NEAREST, + } + }); + + if (is_linear) { + ident_t pos_macros[PL_ARRAY_SIZE(sizes)] = {0}; + for (int i = 0; i < dims; i++) + pos_macros[i] = texel_scale(sh, sizes[i], true); + + GLSLH("#define "$"(pos) (textureLod("$", %s(\\\n", + name, tex, vartypes[PL_VAR_FLOAT][texdim - 1]); + + for (int i = 0; i < texdim; i++) { + char sep = i == 0 ? ' ' : ','; + if (pos_macros[i]) { + if (dims > 1) { + GLSLH(" %c"$"(%s(pos).%c)\\\n", sep, pos_macros[i], + vartypes[PL_VAR_FLOAT][dims - 1], "xyzw"[i]); + } else { + GLSLH(" %c"$"(float(pos))\\\n", sep, pos_macros[i]); + } + } else { + GLSLH(" %c%f\\\n", sep, 0.5); + } + } + GLSLH(" ), 0.0).%s)\n", swizzles[params->comps - 1]); + } else { + GLSLH("#define "$"(pos) (texelFetch("$", %s(pos", + name, tex, vartypes[PL_VAR_SINT][texdim - 1]); + + // Fill up extra components of the index + for (int i = dims; i < texdim; i++) + GLSLH(", 0"); + + GLSLH("), 0).%s)\n", swizzles[params->comps - 1]); + } + break; + } + + case SH_LUT_UNIFORM: + arr_name = sh_var(sh, (struct pl_shader_var) { + .var = { + .name = "weights", + .type = vartype, + .dim_v = params->comps, + .dim_m = 1, + .dim_a = size, + }, + .data = lut->data, + }); + break; + + case SH_LUT_LITERAL: + arr_name = sh_fresh(sh, "weights"); + GLSLH("const %s "$"[%d] = %s[](\n ", + vartypes[vartype][params->comps - 1], arr_name, size, + vartypes[vartype][params->comps - 1]); + sh_append_str(sh, SH_BUF_HEADER, lut->str); + GLSLH(");\n"); + break; + + case SH_LUT_AUTO: + pl_unreachable(); + } + + if (arr_name) { + GLSLH("#define "$"(pos) ("$"[int((pos)%s)\\\n", + name, arr_name, dims > 1 ? "[0]" : ""); + int shift = params->width; + for (int i = 1; i < dims; i++) { + GLSLH(" + %d * int((pos)[%d])\\\n", shift, i); + shift *= sizes[i]; + } + GLSLH(" ])\n"); + + if (is_linear) { + pl_assert(dims == 1); + pl_assert(vartype == PL_VAR_FLOAT); + ident_t arr_lut = name; + name = sh_fresh(sh, "lut_lin"); + GLSLH("%s "$"(float fpos) { \n" + " fpos = clamp(fpos, 0.0, 1.0) * %d.0; \n" + " float fbase = floor(fpos); \n" + " float fceil = ceil(fpos); \n" + " float fcoord = fpos - fbase; \n" + " return mix("$"(fbase), "$"(fceil), fcoord); \n" + "} \n", + vartypes[PL_VAR_FLOAT][params->comps - 1], name, + size - 1, + arr_lut, arr_lut); + } + } + + if (method == SH_LUT_CUBIC && dims == 3) { + ident_t lin_lut = name; + name = sh_fresh(sh, "lut_tricubic"); + GLSLH("%s "$"(vec3 pos) { \n" + " vec3 scale = vec3(%d.0, %d.0, %d.0); \n" + " vec3 scale_inv = 1.0 / scale; \n" + " pos *= scale; \n" + " vec3 fpos = fract(pos); \n" + " vec3 base = pos - fpos; \n" + " vec3 fpos2 = fpos * fpos; \n" + " vec3 inv = 1.0 - fpos; \n" + " vec3 inv2 = inv * inv; \n" + " vec3 w0 = 1.0/6.0 * inv2 * inv; \n" + " vec3 w1 = 2.0/3.0 - 0.5 * fpos2 * (2.0 - fpos); \n" + " vec3 w2 = 2.0/3.0 - 0.5 * inv2 * (2.0 - inv); \n" + " vec3 w3 = 1.0/6.0 * fpos2 * fpos; \n" + " vec3 g0 = w0 + w1; \n" + " vec3 g1 = w2 + w3; \n" + " vec3 h0 = scale_inv * ((w1 / g0) - 1.0 + base); \n" + " vec3 h1 = scale_inv * ((w3 / g1) + 1.0 + base); \n" + " %s c000, c001, c010, c011, c100, c101, c110, c111; \n" + " c000 = "$"(h0); \n" + " c100 = "$"(vec3(h1.x, h0.y, h0.z)); \n" + " c000 = mix(c100, c000, g0.x); \n" + " c010 = "$"(vec3(h0.x, h1.y, h0.z)); \n" + " c110 = "$"(vec3(h1.x, h1.y, h0.z)); \n" + " c010 = mix(c110, c010, g0.x); \n" + " c000 = mix(c010, c000, g0.y); \n" + " c001 = "$"(vec3(h0.x, h0.y, h1.z)); \n" + " c101 = "$"(vec3(h1.x, h0.y, h1.z)); \n" + " c001 = mix(c101, c001, g0.x); \n" + " c011 = "$"(vec3(h0.x, h1.y, h1.z)); \n" + " c111 = "$"(h1); \n" + " c011 = mix(c111, c011, g0.x); \n" + " c001 = mix(c011, c001, g0.y); \n" + " return mix(c001, c000, g0.z); \n" + "} \n", + vartypes[PL_VAR_FLOAT][params->comps - 1], name, + sizes[0] - 1, sizes[1] - 1, sizes[2] - 1, + vartypes[PL_VAR_FLOAT][params->comps - 1], + lin_lut, lin_lut, lin_lut, lin_lut, + lin_lut, lin_lut, lin_lut, lin_lut); + } + + if (method == SH_LUT_TETRAHEDRAL) { + ident_t int_lut = name; + name = sh_fresh(sh, "lut_barycentric"); + GLSLH("%s "$"(vec3 pos) { \n" + // Compute bounding vertices and fractional part + " pos = clamp(pos, 0.0, 1.0) * vec3(%d.0, %d.0, %d.0); \n" + " vec3 base = floor(pos); \n" + " vec3 fpart = pos - base; \n" + // v0 and v3 are always 'black' and 'white', respectively + // v1 and v2 are the closest RGB and CMY vertices, respectively + " ivec3 v0 = ivec3(base), v3 = ivec3(ceil(pos)); \n" + " ivec3 v1 = v0, v2 = v3; \n" + // Table of boolean checks to simplify following math + " bvec3 c = greaterThanEqual(fpart.xyz, fpart.yzx); \n" + " bool c_xy = c.x, c_yx = !c.x, \n" + " c_yz = c.y, c_zy = !c.y, \n" + " c_zx = c.z, c_xz = !c.z; \n" + " vec3 s = fpart.xyz; \n" + " bool cond; \n", + vartypes[PL_VAR_FLOAT][params->comps - 1], name, + sizes[0] - 1, sizes[1] - 1, sizes[2] - 1); + + // Subdivision of the cube into six congruent tetrahedras + // + // For each tetrahedron, test if the point is inside, and if so, update + // the edge vertices. We test all six, even though only one case will + // ever be true, because this avoids branches. + static const char *indices[] = { "xyz", "xzy", "zxy", "zyx", "yzx", "yxz"}; + for (int i = 0; i < PL_ARRAY_SIZE(indices); i++) { + const char x = indices[i][0], y = indices[i][1], z = indices[i][2]; + GLSLH("cond = c_%c%c && c_%c%c; \n" + "s = cond ? fpart.%c%c%c : s; \n" + "v1.%c = cond ? v3.%c : v1.%c; \n" + "v2.%c = cond ? v0.%c : v2.%c; \n", + x, y, y, z, + x, y, z, + x, x, x, + z, z, z); + } + + // Interpolate in barycentric coordinates, with four texel fetches + GLSLH(" return (1.0 - s.x) * "$"(v0) + \n" + " (s.x - s.y) * "$"(v1) + \n" + " (s.y - s.z) * "$"(v2) + \n" + " (s.z) * "$"(v3); \n" + "} \n", + int_lut, int_lut, int_lut, int_lut); + } + + lut->error = false; + pl_cache_obj_free(&obj); + pl_assert(name); + return name; + +error: + lut->error = true; + pl_cache_obj_free(&obj); + return NULL_IDENT; +} -- cgit v1.2.3