diff options
Diffstat (limited to 'src/tests/gpu_tests.h')
-rw-r--r-- | src/tests/gpu_tests.h | 1741 |
1 files changed, 1741 insertions, 0 deletions
diff --git a/src/tests/gpu_tests.h b/src/tests/gpu_tests.h new file mode 100644 index 0000000..f14f260 --- /dev/null +++ b/src/tests/gpu_tests.h @@ -0,0 +1,1741 @@ +#include "tests.h" +#include "shaders.h" + +#include <libplacebo/renderer.h> +#include <libplacebo/utils/frame_queue.h> +#include <libplacebo/utils/upload.h> + +//#define PRINT_OUTPUT + +static void pl_buffer_tests(pl_gpu gpu) +{ + const size_t buf_size = 1024; + if (buf_size > gpu->limits.max_buf_size) + return; + + uint8_t *test_src = malloc(buf_size * 2); + uint8_t *test_dst = test_src + buf_size; + assert(test_src && test_dst); + memset(test_dst, 0, buf_size); + for (int i = 0; i < buf_size; i++) + test_src[i] = RANDOM_U8; + + pl_buf buf = NULL, tbuf = NULL; + + printf("test buffer static creation and readback\n"); + buf = pl_buf_create(gpu, pl_buf_params( + .size = buf_size, + .host_readable = true, + .initial_data = test_src, + )); + + REQUIRE(buf); + REQUIRE(pl_buf_read(gpu, buf, 0, test_dst, buf_size)); + REQUIRE_MEMEQ(test_src, test_dst, buf_size); + pl_buf_destroy(gpu, &buf); + + printf("test buffer empty creation, update and readback\n"); + memset(test_dst, 0, buf_size); + buf = pl_buf_create(gpu, pl_buf_params( + .size = buf_size, + .host_writable = true, + .host_readable = true, + )); + + REQUIRE(buf); + pl_buf_write(gpu, buf, 0, test_src, buf_size); + REQUIRE(pl_buf_read(gpu, buf, 0, test_dst, buf_size)); + REQUIRE_MEMEQ(test_src, test_dst, buf_size); + pl_buf_destroy(gpu, &buf); + + printf("test buffer-buffer copy and readback\n"); + memset(test_dst, 0, buf_size); + buf = pl_buf_create(gpu, pl_buf_params( + .size = buf_size, + .initial_data = test_src, + )); + + tbuf = pl_buf_create(gpu, pl_buf_params( + .size = buf_size, + .host_readable = true, + )); + + REQUIRE(buf && tbuf); + pl_buf_copy(gpu, tbuf, 0, buf, 0, buf_size); + REQUIRE(pl_buf_read(gpu, tbuf, 0, test_dst, buf_size)); + REQUIRE_MEMEQ(test_src, test_dst, buf_size); + pl_buf_destroy(gpu, &buf); + pl_buf_destroy(gpu, &tbuf); + + if (buf_size <= gpu->limits.max_mapped_size) { + printf("test host mapped buffer readback\n"); + buf = pl_buf_create(gpu, pl_buf_params( + .size = buf_size, + .host_mapped = true, + .initial_data = test_src, + )); + + REQUIRE(buf); + REQUIRE(!pl_buf_poll(gpu, buf, 0)); + REQUIRE_MEMEQ(test_src, buf->data, buf_size); + pl_buf_destroy(gpu, &buf); + } + + // `compute_queues` check is to exclude dummy GPUs here + if (buf_size <= gpu->limits.max_ssbo_size && gpu->limits.compute_queues) + { + printf("test endian swapping\n"); + buf = pl_buf_create(gpu, pl_buf_params( + .size = buf_size, + .storable = true, + .initial_data = test_src, + )); + + tbuf = pl_buf_create(gpu, pl_buf_params( + .size = buf_size, + .storable = true, + .host_readable = true, + )); + + REQUIRE(buf && tbuf); + REQUIRE(pl_buf_copy_swap(gpu, &(struct pl_buf_copy_swap_params) { + .src = buf, + .dst = tbuf, + .size = buf_size, + .wordsize = 2, + })); + REQUIRE(pl_buf_read(gpu, tbuf, 0, test_dst, buf_size)); + for (int i = 0; i < buf_size / 2; i++) { + REQUIRE_CMP(test_src[2 * i + 0], ==, test_dst[2 * i + 1], PRIu8); + REQUIRE_CMP(test_src[2 * i + 1], ==, test_dst[2 * i + 0], PRIu8); + } + // test endian swap in-place + REQUIRE(pl_buf_copy_swap(gpu, &(struct pl_buf_copy_swap_params) { + .src = tbuf, + .dst = tbuf, + .size = buf_size, + .wordsize = 4, + })); + REQUIRE(pl_buf_read(gpu, tbuf, 0, test_dst, buf_size)); + for (int i = 0; i < buf_size / 4; i++) { + REQUIRE_CMP(test_src[4 * i + 0], ==, test_dst[4 * i + 2], PRIu8); + REQUIRE_CMP(test_src[4 * i + 1], ==, test_dst[4 * i + 3], PRIu8); + REQUIRE_CMP(test_src[4 * i + 2], ==, test_dst[4 * i + 0], PRIu8); + REQUIRE_CMP(test_src[4 * i + 3], ==, test_dst[4 * i + 1], PRIu8); + } + pl_buf_destroy(gpu, &buf); + pl_buf_destroy(gpu, &tbuf); + } + + free(test_src); +} + +static void test_cb(void *priv) +{ + bool *flag = priv; + *flag = true; +} + +static void pl_test_roundtrip(pl_gpu gpu, pl_tex tex[2], + uint8_t *src, uint8_t *dst) +{ + if (!tex[0] || !tex[1]) { + printf("failed creating test textures... skipping this test\n"); + return; + } + + int texels = tex[0]->params.w; + texels *= tex[0]->params.h ? tex[0]->params.h : 1; + texels *= tex[0]->params.d ? tex[0]->params.d : 1; + + pl_fmt fmt = tex[0]->params.format; + size_t bytes = texels * fmt->texel_size; + memset(src, 0, bytes); + memset(dst, 0, bytes); + + for (size_t i = 0; i < bytes; i++) + src[i] = RANDOM_U8; + + pl_timer ul, dl; + ul = pl_timer_create(gpu); + dl = pl_timer_create(gpu); + + bool ran_ul = false, ran_dl = false; + + REQUIRE(pl_tex_upload(gpu, &(struct pl_tex_transfer_params){ + .tex = tex[0], + .ptr = src, + .timer = ul, + .callback = gpu->limits.callbacks ? test_cb : NULL, + .priv = &ran_ul, + })); + + // Test blitting, if possible for this format + pl_tex dst_tex = tex[0]; + if (tex[0]->params.blit_src && tex[1]->params.blit_dst) { + pl_tex_clear_ex(gpu, tex[1], (union pl_clear_color){0}); // for testing + pl_tex_blit(gpu, &(struct pl_tex_blit_params) { + .src = tex[0], + .dst = tex[1], + }); + dst_tex = tex[1]; + } + + REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params){ + .tex = dst_tex, + .ptr = dst, + .timer = dl, + .callback = gpu->limits.callbacks ? test_cb : NULL, + .priv = &ran_dl, + })); + + pl_gpu_finish(gpu); + if (gpu->limits.callbacks) + REQUIRE(ran_ul && ran_dl); + + if (fmt->emulated && fmt->type == PL_FMT_FLOAT) { + // TODO: can't memcmp here because bits might be lost due to the + // emulated 16/32 bit upload paths, figure out a better way to + // generate data and verify the roundtrip! + } else { + REQUIRE_MEMEQ(src, dst, bytes); + } + + // Report timer results + printf("upload time: %"PRIu64", download time: %"PRIu64"\n", + pl_timer_query(gpu, ul), pl_timer_query(gpu, dl)); + + pl_timer_destroy(gpu, &ul); + pl_timer_destroy(gpu, &dl); +} + +static void pl_texture_tests(pl_gpu gpu) +{ + const size_t max_size = 16*16*16 * 4 *sizeof(double); + uint8_t *test_src = malloc(max_size * 2); + uint8_t *test_dst = test_src + max_size; + + for (int f = 0; f < gpu->num_formats; f++) { + pl_fmt fmt = gpu->formats[f]; + if (fmt->opaque || !(fmt->caps & PL_FMT_CAP_HOST_READABLE)) + continue; + + printf("testing texture roundtrip for format %s\n", fmt->name); + assert(fmt->texel_size <= 4 * sizeof(double)); + + struct pl_tex_params ref_params = { + .format = fmt, + .blit_src = (fmt->caps & PL_FMT_CAP_BLITTABLE), + .blit_dst = (fmt->caps & PL_FMT_CAP_BLITTABLE), + .host_writable = true, + .host_readable = true, + .debug_tag = PL_DEBUG_TAG, + }; + + pl_tex tex[2]; + + if (gpu->limits.max_tex_1d_dim >= 16) { + printf("... 1D\n"); + struct pl_tex_params params = ref_params; + params.w = 16; + if (!gpu->limits.blittable_1d_3d) + params.blit_src = params.blit_dst = false; + for (int i = 0; i < PL_ARRAY_SIZE(tex); i++) + tex[i] = pl_tex_create(gpu, ¶ms); + pl_test_roundtrip(gpu, tex, test_src, test_dst); + for (int i = 0; i < PL_ARRAY_SIZE(tex); i++) + pl_tex_destroy(gpu, &tex[i]); + } + + if (gpu->limits.max_tex_2d_dim >= 16) { + printf("... 2D\n"); + struct pl_tex_params params = ref_params; + params.w = params.h = 16; + for (int i = 0; i < PL_ARRAY_SIZE(tex); i++) + tex[i] = pl_tex_create(gpu, ¶ms); + pl_test_roundtrip(gpu, tex, test_src, test_dst); + for (int i = 0; i < PL_ARRAY_SIZE(tex); i++) + pl_tex_destroy(gpu, &tex[i]); + } + + if (gpu->limits.max_tex_3d_dim >= 16) { + printf("... 3D\n"); + struct pl_tex_params params = ref_params; + params.w = params.h = params.d = 16; + if (!gpu->limits.blittable_1d_3d) + params.blit_src = params.blit_dst = false; + for (int i = 0; i < PL_ARRAY_SIZE(tex); i++) + tex[i] = pl_tex_create(gpu, ¶ms); + pl_test_roundtrip(gpu, tex, test_src, test_dst); + for (int i = 0; i < PL_ARRAY_SIZE(tex); i++) + pl_tex_destroy(gpu, &tex[i]); + } + } + + free(test_src); +} + +static void pl_planar_tests(pl_gpu gpu) +{ + pl_fmt fmt = pl_find_named_fmt(gpu, "g8_b8_r8_420"); + if (!fmt) + return; + REQUIRE_CMP(fmt->num_planes, ==, 3, "d"); + + const int width = 64, height = 32; + pl_tex tex = pl_tex_create(gpu, pl_tex_params( + .w = width, + .h = height, + .format = fmt, + .blit_dst = true, + .host_readable = true, + )); + if (!tex) + return; + for (int i = 0; i < fmt->num_planes; i++) + REQUIRE(tex->planes[i]); + + pl_tex plane = tex->planes[1]; + uint8_t data[(width * height) >> 2]; + REQUIRE_CMP(plane->params.w * plane->params.h, ==, PL_ARRAY_SIZE(data), "d"); + + pl_tex_clear(gpu, plane, (float[]){ (float) 0x80 / 0xFF, 0.0, 0.0, 1.0 }); + REQUIRE(pl_tex_download(gpu, pl_tex_transfer_params( + .tex = plane, + .ptr = data, + ))); + + uint8_t ref[PL_ARRAY_SIZE(data)]; + memset(ref, 0x80, sizeof(ref)); + REQUIRE_MEMEQ(data, ref, PL_ARRAY_SIZE(data)); + + pl_tex_destroy(gpu, &tex); +} + +static void pl_shader_tests(pl_gpu gpu) +{ + if (gpu->glsl.version < 410) + return; + + const char *vert_shader = + "#version 410 \n" + "layout(location=0) in vec2 vertex_pos; \n" + "layout(location=1) in vec3 vertex_color; \n" + "layout(location=0) out vec3 frag_color; \n" + "void main() { \n" + " gl_Position = vec4(vertex_pos, 0, 1); \n" + " frag_color = vertex_color; \n" + "}"; + + const char *frag_shader = + "#version 410 \n" + "layout(location=0) in vec3 frag_color; \n" + "layout(location=0) out vec4 out_color; \n" + "void main() { \n" + " out_color = vec4(frag_color, 1.0); \n" + "}"; + + pl_fmt fbo_fmt; + enum pl_fmt_caps caps = PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_BLITTABLE | + PL_FMT_CAP_LINEAR; + + fbo_fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 4, 16, 32, caps); + if (!fbo_fmt) + return; + +#define FBO_W 16 +#define FBO_H 16 + + pl_tex fbo; + fbo = pl_tex_create(gpu, &(struct pl_tex_params) { + .format = fbo_fmt, + .w = FBO_W, + .h = FBO_H, + .renderable = true, + .storable = !!(fbo_fmt->caps & PL_FMT_CAP_STORABLE), + .host_readable = true, + .blit_dst = true, + }); + REQUIRE(fbo); + + pl_tex_clear_ex(gpu, fbo, (union pl_clear_color){0}); + + pl_fmt vert_fmt; + vert_fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 3); + REQUIRE(vert_fmt); + + static const struct vertex { float pos[2]; float color[3]; } vertices[] = { + {{-1.0, -1.0}, {0, 0, 0}}, + {{ 1.0, -1.0}, {1, 0, 0}}, + {{-1.0, 1.0}, {0, 1, 0}}, + {{ 1.0, 1.0}, {1, 1, 0}}, + }; + + pl_pass pass; + pass = pl_pass_create(gpu, &(struct pl_pass_params) { + .type = PL_PASS_RASTER, + .target_format = fbo_fmt, + .vertex_shader = vert_shader, + .glsl_shader = frag_shader, + + .vertex_type = PL_PRIM_TRIANGLE_STRIP, + .vertex_stride = sizeof(struct vertex), + .num_vertex_attribs = 2, + .vertex_attribs = (struct pl_vertex_attrib[]) {{ + .name = "vertex_pos", + .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2), + .location = 0, + .offset = offsetof(struct vertex, pos), + }, { + .name = "vertex_color", + .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 3), + .location = 1, + .offset = offsetof(struct vertex, color), + }}, + }); + REQUIRE(pass); + if (pass->params.cached_program || pass->params.cached_program_len) { + // Ensure both are set if either one is set + REQUIRE(pass->params.cached_program); + REQUIRE(pass->params.cached_program_len); + } + + pl_timer timer = pl_timer_create(gpu); + pl_pass_run(gpu, &(struct pl_pass_run_params) { + .pass = pass, + .target = fbo, + .vertex_count = PL_ARRAY_SIZE(vertices), + .vertex_data = vertices, + .timer = timer, + }); + + // Wait until this pass is complete and report the timer result + pl_gpu_finish(gpu); + printf("timer query result: %"PRIu64"\n", pl_timer_query(gpu, timer)); + pl_timer_destroy(gpu, &timer); + + static float test_data[FBO_H * FBO_W * 4] = {0}; + + // Test against the known pattern of `src`, only useful for roundtrip tests +#define TEST_FBO_PATTERN(eps, fmt, ...) \ + do { \ + printf("testing pattern of " fmt "\n", __VA_ARGS__); \ + REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params) { \ + .tex = fbo, \ + .ptr = test_data, \ + })); \ + \ + for (int y = 0; y < FBO_H; y++) { \ + for (int x = 0; x < FBO_W; x++) { \ + float *color = &test_data[(y * FBO_W + x) * 4]; \ + REQUIRE_FEQ(color[0], (x + 0.5) / FBO_W, eps); \ + REQUIRE_FEQ(color[1], (y + 0.5) / FBO_H, eps); \ + REQUIRE_FEQ(color[2], 0.0, eps); \ + REQUIRE_FEQ(color[3], 1.0, eps); \ + } \ + } \ + } while (0) + + TEST_FBO_PATTERN(1e-6, "%s", "initial rendering"); + + if (sizeof(vertices) <= gpu->limits.max_vbo_size) { + // Test the use of an explicit vertex buffer + pl_buf vert = pl_buf_create(gpu, &(struct pl_buf_params) { + .size = sizeof(vertices), + .initial_data = vertices, + .drawable = true, + }); + + REQUIRE(vert); + pl_pass_run(gpu, &(struct pl_pass_run_params) { + .pass = pass, + .target = fbo, + .vertex_count = sizeof(vertices) / sizeof(struct vertex), + .vertex_buf = vert, + .buf_offset = 0, + }); + + pl_buf_destroy(gpu, &vert); + TEST_FBO_PATTERN(1e-6, "%s", "using vertex buffer"); + } + + // Test the use of index buffers + static const uint16_t indices[] = { 3, 2, 1, 0 }; + pl_pass_run(gpu, &(struct pl_pass_run_params) { + .pass = pass, + .target = fbo, + .vertex_count = PL_ARRAY_SIZE(indices), + .vertex_data = vertices, + .index_data = indices, + }); + + pl_pass_destroy(gpu, &pass); + TEST_FBO_PATTERN(1e-6, "%s", "using indexed rendering"); + + // Test the use of pl_dispatch + pl_dispatch dp = pl_dispatch_create(gpu->log, gpu); + pl_shader sh = pl_dispatch_begin(dp); + REQUIRE(pl_shader_custom(sh, &(struct pl_custom_shader) { + .body = "color = vec4(col, 1.0);", + .input = PL_SHADER_SIG_NONE, + .output = PL_SHADER_SIG_COLOR, + })); + + REQUIRE(pl_dispatch_vertex(dp, &(struct pl_dispatch_vertex_params) { + .shader = &sh, + .target = fbo, + .vertex_stride = sizeof(struct vertex), + .vertex_position_idx = 0, + .num_vertex_attribs = 2, + .vertex_attribs = (struct pl_vertex_attrib[]) {{ + .name = "pos", + .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2), + .offset = offsetof(struct vertex, pos), + }, { + .name = "col", + .fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 3), + .offset = offsetof(struct vertex, color), + }}, + + .vertex_type = PL_PRIM_TRIANGLE_STRIP, + .vertex_coords = PL_COORDS_NORMALIZED, + .vertex_count = PL_ARRAY_SIZE(vertices), + .vertex_data = vertices, + })); + + TEST_FBO_PATTERN(1e-6, "%s", "using custom vertices"); + + static float src_data[FBO_H * FBO_W * 4] = {0}; + memcpy(src_data, test_data, sizeof(src_data)); + + pl_tex src; + src = pl_tex_create(gpu, &(struct pl_tex_params) { + .format = fbo_fmt, + .w = FBO_W, + .h = FBO_H, + .storable = fbo->params.storable, + .sampleable = true, + .initial_data = src_data, + }); + + if (fbo->params.storable) { + // Test 1x1 blit, to make sure the scaling code runs + REQUIRE(pl_tex_blit_compute(gpu, &(struct pl_tex_blit_params) { + .src = src, + .dst = fbo, + .src_rc = {0, 0, 0, 1, 1, 1}, + .dst_rc = {0, 0, 0, FBO_W, FBO_H, 1}, + .sample_mode = PL_TEX_SAMPLE_NEAREST, + })); + + // Test non-resizing blit, which uses the efficient imageLoad path + REQUIRE(pl_tex_blit_compute(gpu, &(struct pl_tex_blit_params) { + .src = src, + .dst = fbo, + .src_rc = {0, 0, 0, FBO_W, FBO_H, 1}, + .dst_rc = {0, 0, 0, FBO_W, FBO_H, 1}, + .sample_mode = PL_TEX_SAMPLE_NEAREST, + })); + + TEST_FBO_PATTERN(1e-6, "%s", "pl_tex_blit_compute"); + } + + // Test encoding/decoding of all gamma functions, color spaces, etc. + for (enum pl_color_transfer trc = 0; trc < PL_COLOR_TRC_COUNT; trc++) { + struct pl_color_space test_csp = { + .transfer = trc, + .hdr.min_luma = PL_COLOR_HDR_BLACK, + }; + sh = pl_dispatch_begin(dp); + pl_shader_sample_nearest(sh, pl_sample_src( .tex = src )); + pl_shader_delinearize(sh, &test_csp); + pl_shader_linearize(sh, &test_csp); + REQUIRE(pl_dispatch_finish(dp, pl_dispatch_params( + .shader = &sh, + .target = fbo, + ))); + + float epsilon = pl_color_transfer_is_hdr(trc) ? 1e-4 : 1e-6; + TEST_FBO_PATTERN(epsilon, "transfer function %d", (int) trc); + } + + for (enum pl_color_system sys = 0; sys < PL_COLOR_SYSTEM_COUNT; sys++) { + if (sys == PL_COLOR_SYSTEM_DOLBYVISION) + continue; // requires metadata + sh = pl_dispatch_begin(dp); + pl_shader_sample_nearest(sh, pl_sample_src( .tex = src )); + pl_shader_encode_color(sh, &(struct pl_color_repr) { .sys = sys }); + pl_shader_decode_color(sh, &(struct pl_color_repr) { .sys = sys }, NULL); + REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) { + .shader = &sh, + .target = fbo, + })); + + float epsilon; + switch (sys) { + case PL_COLOR_SYSTEM_BT_2020_C: + case PL_COLOR_SYSTEM_XYZ: + epsilon = 1e-5; + break; + + case PL_COLOR_SYSTEM_BT_2100_PQ: + case PL_COLOR_SYSTEM_BT_2100_HLG: + // These seem to be horrifically noisy and prone to breaking on + // edge cases for some reason + // TODO: figure out why! + continue; + + default: epsilon = 1e-6; break; + } + + TEST_FBO_PATTERN(epsilon, "color system %d", (int) sys); + } + + // Repeat this a few times to test the caching + pl_cache cache = pl_cache_create(pl_cache_params( .log = gpu->log )); + pl_gpu_set_cache(gpu, cache); + for (int i = 0; i < 10; i++) { + if (i == 5) { + printf("Recreating pl_dispatch to test the caching\n"); + size_t size = pl_dispatch_save(dp, NULL); + REQUIRE(size); + uint8_t *cache_data = malloc(size); + REQUIRE(cache_data); + REQUIRE_CMP(pl_dispatch_save(dp, cache_data), ==, size, "zu"); + + pl_dispatch_destroy(&dp); + dp = pl_dispatch_create(gpu->log, gpu); + pl_dispatch_load(dp, cache_data); + + // Test to make sure the pass regenerates the same cache + uint64_t hash = pl_str_hash((pl_str) { cache_data, size }); + REQUIRE_CMP(pl_dispatch_save(dp, NULL), ==, size, "zu"); + REQUIRE_CMP(pl_dispatch_save(dp, cache_data), ==, size, "zu"); + REQUIRE_CMP(pl_str_hash((pl_str) { cache_data, size }), ==, hash, PRIu64); + free(cache_data); + } + + sh = pl_dispatch_begin(dp); + + // For testing, force the use of CS if possible + if (gpu->glsl.compute) { + sh->type = SH_COMPUTE; + sh->group_size[0] = 8; + sh->group_size[1] = 8; + } + + pl_shader_deband(sh, pl_sample_src( .tex = src ), pl_deband_params( + .iterations = 0, + .grain = 0.0, + )); + + REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) { + .shader = &sh, + .target = fbo, + })); + TEST_FBO_PATTERN(1e-6, "deband iter %d", i); + } + + pl_gpu_set_cache(gpu, NULL); + pl_cache_destroy(&cache); + + // Test peak detection and readback if possible + sh = pl_dispatch_begin(dp); + pl_shader_sample_nearest(sh, pl_sample_src( .tex = src )); + + pl_shader_obj peak_state = NULL; + struct pl_color_space csp_gamma22 = { .transfer = PL_COLOR_TRC_GAMMA22 }; + struct pl_peak_detect_params peak_params = { .minimum_peak = 0.01 }; + if (pl_shader_detect_peak(sh, csp_gamma22, &peak_state, &peak_params)) { + REQUIRE(pl_dispatch_compute(dp, &(struct pl_dispatch_compute_params) { + .shader = &sh, + .width = fbo->params.w, + .height = fbo->params.h, + })); + + float peak, avg; + REQUIRE(pl_get_detected_peak(peak_state, &peak, &avg)); + + float real_peak = 0, real_avg = 0; + for (int y = 0; y < FBO_H; y++) { + for (int x = 0; x < FBO_W; x++) { + float *color = &src_data[(y * FBO_W + x) * 4]; + float luma = 0.212639f * powf(color[0], 2.2f) + + 0.715169f * powf(color[1], 2.2f) + + 0.072192f * powf(color[2], 2.2f); + luma = pl_hdr_rescale(PL_HDR_NORM, PL_HDR_PQ, luma); + real_peak = PL_MAX(real_peak, luma); + real_avg += luma; + } + } + real_avg = real_avg / (FBO_W * FBO_H); + + real_avg = pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NORM, real_avg); + real_peak = pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NORM, real_peak); + REQUIRE_FEQ(peak, real_peak, 1e-3); + REQUIRE_FEQ(avg, real_avg, 1e-2); + } + + pl_dispatch_abort(dp, &sh); + pl_shader_obj_destroy(&peak_state); + + // Test film grain synthesis + pl_shader_obj grain = NULL; + struct pl_film_grain_params grain_params = { + .tex = src, + .components = 3, + .component_mapping = { 0, 1, 2}, + .repr = &(struct pl_color_repr) { + .sys = PL_COLOR_SYSTEM_BT_709, + .levels = PL_COLOR_LEVELS_LIMITED, + .bits = { .color_depth = 10, .sample_depth = 10 }, + }, + }; + + for (int i = 0; i < 2; i++) { + grain_params.data.type = PL_FILM_GRAIN_AV1; + grain_params.data.params.av1 = av1_grain_data; + grain_params.data.params.av1.overlap = !!i; + grain_params.data.seed = rand(); + + sh = pl_dispatch_begin(dp); + pl_shader_film_grain(sh, &grain, &grain_params); + REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) { + .shader = &sh, + .target = fbo, + })); + } + + if (gpu->glsl.compute) { + grain_params.data.type = PL_FILM_GRAIN_H274; + grain_params.data.params.h274 = h274_grain_data; + grain_params.data.seed = rand(); + + sh = pl_dispatch_begin(dp); + pl_shader_film_grain(sh, &grain, &grain_params); + REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) { + .shader = &sh, + .target = fbo, + })); + } + pl_shader_obj_destroy(&grain); + + // Test custom shaders + struct pl_custom_shader custom = { + .header = + "vec3 invert(vec3 color) \n" + "{ \n" + " return vec3(1.0) - color; \n" + "} \n", + + .body = + "color = vec4(gl_FragCoord.xy, 0.0, 1.0); \n" + "color.rgb = invert(color.rgb) + offset; \n", + + .input = PL_SHADER_SIG_NONE, + .output = PL_SHADER_SIG_COLOR, + + .num_variables = 1, + .variables = &(struct pl_shader_var) { + .var = pl_var_float("offset"), + .data = &(float) { 0.1 }, + }, + }; + + sh = pl_dispatch_begin(dp); + REQUIRE(pl_shader_custom(sh, &custom)); + REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) { + .shader = &sh, + .target = fbo, + })); + + // Test dolbyvision + struct pl_color_repr repr = { + .sys = PL_COLOR_SYSTEM_DOLBYVISION, + .dovi = &dovi_meta, + }; + + sh = pl_dispatch_begin(dp); + pl_shader_sample_direct(sh, pl_sample_src( .tex = src )); + pl_shader_decode_color(sh, &repr, NULL); + REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) { + .shader = &sh, + .target = fbo, + })); + + // Test deinterlacing + sh = pl_dispatch_begin(dp); + pl_shader_deinterlace(sh, pl_deinterlace_source( .cur = pl_field_pair(src) ), NULL); + REQUIRE(pl_dispatch_finish(dp, pl_dispatch_params( + .shader = &sh, + .target = fbo, + ))); + + // Test error diffusion + if (fbo->params.storable) { + for (int i = 0; i < pl_num_error_diffusion_kernels; i++) { + const struct pl_error_diffusion_kernel *k = pl_error_diffusion_kernels[i]; + printf("testing error diffusion kernel '%s'\n", k->name); + sh = pl_dispatch_begin(dp); + bool ok = pl_shader_error_diffusion(sh, pl_error_diffusion_params( + .input_tex = src, + .output_tex = fbo, + .new_depth = 8, + .kernel = k, + )); + + if (!ok) { + fprintf(stderr, "kernel '%s' exceeds GPU limits, skipping...\n", k->name); + continue; + } + + REQUIRE(pl_dispatch_compute(dp, pl_dispatch_compute_params( + .shader = &sh, + .dispatch_size = {1, 1, 1}, + ))); + } + } + + pl_dispatch_destroy(&dp); + pl_tex_destroy(gpu, &src); + pl_tex_destroy(gpu, &fbo); +} + +static void pl_scaler_tests(pl_gpu gpu) +{ + pl_fmt src_fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 1, 16, 32, PL_FMT_CAP_LINEAR); + pl_fmt fbo_fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 1, 16, 32, PL_FMT_CAP_RENDERABLE); + if (!src_fmt || !fbo_fmt) + return; + + float *fbo_data = NULL; + pl_shader_obj lut = NULL; + + static float data_5x5[5][5] = { + { 0, 0, 0, 0, 0 }, + { 0, 0, 0, 0, 0 }, + { 0, 0, 1, 0, 0 }, + { 0, 0, 0, 0, 0 }, + { 0, 0, 0, 0, 0 }, + }; + + pl_tex dot5x5 = pl_tex_create(gpu, &(struct pl_tex_params) { + .w = 5, + .h = 5, + .format = src_fmt, + .sampleable = true, + .initial_data = &data_5x5[0][0], + }); + + struct pl_tex_params fbo_params = { + .w = 100, + .h = 100, + .format = fbo_fmt, + .renderable = true, + .storable = fbo_fmt->caps & PL_FMT_CAP_STORABLE, + .host_readable = fbo_fmt->caps & PL_FMT_CAP_HOST_READABLE, + }; + + pl_tex fbo = pl_tex_create(gpu, &fbo_params); + pl_dispatch dp = pl_dispatch_create(gpu->log, gpu); + if (!dot5x5 || !fbo || !dp) + goto error; + + pl_shader sh = pl_dispatch_begin(dp); + REQUIRE(pl_shader_sample_polar(sh, + pl_sample_src( + .tex = dot5x5, + .new_w = fbo->params.w, + .new_h = fbo->params.h, + ), + pl_sample_filter_params( + .filter = pl_filter_ewa_lanczos, + .lut = &lut, + .no_compute = !fbo->params.storable, + ) + )); + REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) { + .shader = &sh, + .target = fbo, + })); + + if (fbo->params.host_readable) { + fbo_data = malloc(fbo->params.w * fbo->params.h * sizeof(float)); + REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params) { + .tex = fbo, + .ptr = fbo_data, + })); + +#ifdef PRINT_OUTPUT + int max = 255; + printf("P2\n%d %d\n%d\n", fbo->params.w, fbo->params.h, max); + for (int y = 0; y < fbo->params.h; y++) { + for (int x = 0; x < fbo->params.w; x++) { + float v = fbo_data[y * fbo->params.h + x]; + printf("%d ", (int) round(fmin(fmax(v, 0.0), 1.0) * max)); + } + printf("\n"); + } +#endif + } + +error: + free(fbo_data); + pl_shader_obj_destroy(&lut); + pl_dispatch_destroy(&dp); + pl_tex_destroy(gpu, &dot5x5); + pl_tex_destroy(gpu, &fbo); +} + +static const char *user_shader_tests[] = { + // Test hooking, saving and loading + "// Example of a comment at the beginning \n" + " \n" + "//!HOOK NATIVE \n" + "//!DESC upscale image \n" + "//!BIND HOOKED \n" + "//!WIDTH HOOKED.w 10 * \n" + "//!HEIGHT HOOKED.h 10 * \n" + "//!SAVE NATIVEBIG \n" + "//!WHEN NATIVE.w 500 < \n" + " \n" + "vec4 hook() \n" + "{ \n" + " return HOOKED_texOff(0); \n" + "} \n" + " \n" + "//!HOOK MAIN \n" + "//!DESC downscale bigger image \n" + "//!WHEN NATIVE.w 500 < \n" + "//!BIND NATIVEBIG \n" + " \n" + "vec4 hook() \n" + "{ \n" + " return NATIVEBIG_texOff(0); \n" + "} \n", + + // Test use of textures + "//!HOOK MAIN \n" + "//!DESC turn everything into colorful pixels \n" + "//!BIND HOOKED \n" + "//!BIND DISCO \n" + "//!COMPONENTS 3 \n" + " \n" + "vec4 hook() \n" + "{ \n" + " return vec4(DISCO_tex(HOOKED_pos * 10.0).rgb, 1); \n" + "} \n" + " \n" + "//!TEXTURE DISCO \n" + "//!SIZE 3 3 \n" + "//!FORMAT rgba8 \n" + "//!FILTER NEAREST \n" + "//!BORDER REPEAT \n" + "ff0000ff00ff00ff0000ffff00ffffffff00ffffffff00ff4c4c4cff999999ffffffffff\n" + + // Test custom parameters + "//!PARAM test \n" + "//!DESC test parameter \n" + "//!TYPE DYNAMIC float \n" + "//!MINIMUM 0.0 \n" + "//!MAXIMUM 100.0 \n" + "1.0 \n" + " \n" + "//!PARAM testconst \n" + "//!TYPE CONSTANT uint \n" + "//!MAXIMUM 16 \n" + "3 \n" + " \n" + "//!PARAM testdefine \n" + "//!TYPE DEFINE \n" + "100 \n" + " \n" + "//!PARAM testenum \n" + "//!TYPE ENUM DEFINE \n" + "FOO \n" + "BAR \n" + " \n" + "//!HOOK MAIN \n" + "//!WHEN testconst 30 > \n" + "#error should not be run \n" + " \n" + "//!HOOK MAIN \n" + "//!WHEN testenum FOO = \n" + "#if testenum == BAR \n" + " #error bad \n" + "#endif \n" + "vec4 hook() { return vec4(0.0); } \n" +}; + +static const char *compute_shader_tests[] = { + // Test use of storage/buffer resources + "//!HOOK MAIN \n" + "//!DESC attach some storage objects \n" + "//!BIND tex_storage \n" + "//!BIND buf_uniform \n" + "//!BIND buf_storage \n" + "//!COMPONENTS 4 \n" + " \n" + "vec4 hook() \n" + "{ \n" + " return vec4(foo, bar, bat); \n" + "} \n" + " \n" + "//!TEXTURE tex_storage \n" + "//!SIZE 100 100 \n" + "//!FORMAT r32f \n" + "//!STORAGE \n" + " \n" + "//!BUFFER buf_uniform \n" + "//!VAR float foo \n" + "//!VAR float bar \n" + "0000000000000000 \n" + " \n" + "//!BUFFER buf_storage \n" + "//!VAR vec2 bat \n" + "//!VAR int big[32]; \n" + "//!STORAGE \n", + +}; + +static const char *test_luts[] = { + + "TITLE \"1D identity\" \n" + "LUT_1D_SIZE 2 \n" + "0.0 0.0 0.0 \n" + "1.0 1.0 1.0 \n", + + "TITLE \"3D identity\" \n" + "LUT_3D_SIZE 2 \n" + "0.0 0.0 0.0 \n" + "1.0 0.0 0.0 \n" + "0.0 1.0 0.0 \n" + "1.0 1.0 0.0 \n" + "0.0 0.0 1.0 \n" + "1.0 0.0 1.0 \n" + "0.0 1.0 1.0 \n" + "1.0 1.0 1.0 \n" + +}; + +static bool frame_passthrough(pl_gpu gpu, pl_tex *tex, + const struct pl_source_frame *src, struct pl_frame *out_frame) +{ + const struct pl_frame *frame = src->frame_data; + *out_frame = *frame; + return true; +} + +static enum pl_queue_status get_frame_ptr(struct pl_source_frame *out_frame, + const struct pl_queue_params *qparams) +{ + const struct pl_source_frame **pframe = qparams->priv; + if (!(*pframe)->frame_data) + return PL_QUEUE_EOF; + + *out_frame = *(*pframe)++; + return PL_QUEUE_OK; +} + +static void render_info_cb(void *priv, const struct pl_render_info *info) +{ + printf("{%d} Executed shader: %s\n", info->index, + info->pass->shader->description); +} + +static void pl_render_tests(pl_gpu gpu) +{ + pl_tex img_tex = NULL, fbo = NULL; + pl_renderer rr = NULL; + + enum { width = 50, height = 50 }; + static float data[width][height]; + for (int y = 0; y < height; y++) { + for (int x = 0; x < width; x++) + data[y][x] = RANDOM; + } + + struct pl_plane img_plane = {0}; + struct pl_plane_data plane_data = { + .type = PL_FMT_FLOAT, + .width = width, + .height = height, + .component_size = { 8 * sizeof(float) }, + .component_map = { 0 }, + .pixel_stride = sizeof(float), + .pixels = data, + }; + + if (!pl_recreate_plane(gpu, NULL, &fbo, &plane_data)) + return; + + if (!pl_upload_plane(gpu, &img_plane, &img_tex, &plane_data)) + goto error; + + rr = pl_renderer_create(gpu->log, gpu); + pl_tex_clear_ex(gpu, fbo, (union pl_clear_color){0}); + + struct pl_frame image = { + .num_planes = 1, + .planes = { img_plane }, + .repr = { + .sys = PL_COLOR_SYSTEM_BT_709, + .levels = PL_COLOR_LEVELS_FULL, + }, + .color = pl_color_space_srgb, + }; + + struct pl_frame target = { + .num_planes = 1, + .planes = {{ + .texture = fbo, + .components = 3, + .component_mapping = {0, 1, 2}, + }}, + .repr = { + .sys = PL_COLOR_SYSTEM_RGB, + .levels = PL_COLOR_LEVELS_FULL, + .bits.color_depth = 32, + }, + .color = pl_color_space_srgb, + }; + + REQUIRE(pl_render_image(rr, &image, &target, NULL)); + REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); + + // TODO: embed a reference texture and ensure it matches + + // Test a bunch of different params +#define TEST(SNAME, STYPE, DEFAULT, FIELD, LIMIT) \ + do { \ + for (int i = 0; i <= LIMIT; i++) { \ + printf("testing `" #STYPE "." #FIELD " = %d`\n", i); \ + struct pl_render_params params = pl_render_default_params; \ + params.force_dither = true; \ + struct STYPE tmp = DEFAULT; \ + tmp.FIELD = i; \ + params.SNAME = &tmp; \ + REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); \ + pl_gpu_flush(gpu); \ + REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); \ + } \ + } while (0) + +#define TEST_PARAMS(NAME, FIELD, LIMIT) \ + TEST(NAME##_params, pl_##NAME##_params, pl_##NAME##_default_params, FIELD, LIMIT) + + image.crop.x1 = width / 2.0; + image.crop.y1 = height / 2.0; + for (int i = 0; i < pl_num_scale_filters; i++) { + struct pl_render_params params = pl_render_default_params; + params.upscaler = pl_scale_filters[i].filter; + printf("testing `params.upscaler = /* %s */`\n", pl_scale_filters[i].name); + REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); + pl_gpu_flush(gpu); + REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); + } + image.crop.x1 = image.crop.y1 = 0; + + target.crop.x1 = width / 2.0; + target.crop.y1 = height / 2.0; + for (int i = 0; i < pl_num_scale_filters; i++) { + struct pl_render_params params = pl_render_default_params; + params.downscaler = pl_scale_filters[i].filter; + printf("testing `params.downscaler = /* %s */`\n", pl_scale_filters[i].name); + REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); + pl_gpu_flush(gpu); + REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); + } + target.crop.x1 = target.crop.y1 = 0; + + TEST_PARAMS(deband, iterations, 3); + TEST_PARAMS(sigmoid, center, 1); + TEST_PARAMS(color_map, intent, PL_INTENT_ABSOLUTE_COLORIMETRIC); + TEST_PARAMS(dither, method, PL_DITHER_WHITE_NOISE); + TEST_PARAMS(dither, temporal, true); + TEST_PARAMS(distort, alpha_mode, PL_ALPHA_INDEPENDENT); + TEST_PARAMS(distort, constrain, true); + TEST_PARAMS(distort, bicubic, true); + TEST(cone_params, pl_cone_params, pl_vision_deuteranomaly, strength, 0); + + // Test gamma-correct dithering + target.repr.bits.color_depth = 2; + TEST_PARAMS(dither, transfer, PL_COLOR_TRC_GAMMA22); + target.repr.bits.color_depth = 32; + + // Test HDR tone mapping + image.color = pl_color_space_hdr10; + TEST_PARAMS(color_map, visualize_lut, true); + if (gpu->limits.max_ssbo_size) + TEST_PARAMS(peak_detect, allow_delayed, true); + + // Test inverse tone-mapping and pure BPC + image.color.hdr.max_luma = 1000; + target.color.hdr.max_luma = 4000; + target.color.hdr.min_luma = 0.02; + TEST_PARAMS(color_map, inverse_tone_mapping, true); + + image.color = pl_color_space_srgb; + target.color = pl_color_space_srgb; + + // Test some misc stuff + struct pl_render_params params = pl_render_default_params; + params.color_adjustment = &(struct pl_color_adjustment) { + .brightness = 0.1, + .contrast = 0.9, + .saturation = 1.5, + .gamma = 0.8, + .temperature = 0.3, + }; + REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); + REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); + params = pl_render_default_params; + + struct pl_frame inferred_image = image, inferred_target = target; + pl_frames_infer(rr, &inferred_image, &inferred_target); + REQUIRE(pl_render_image(rr, &inferred_image, &inferred_target, ¶ms)); + REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); + + // Test background blending and alpha transparency + params.blend_against_tiles = true; + params.corner_rounding = 0.25f; + REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); + REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); + params = pl_render_default_params; + + // Test film grain synthesis + image.film_grain.type = PL_FILM_GRAIN_AV1; + image.film_grain.params.av1 = av1_grain_data; + REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); + REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); + + image.film_grain.type = PL_FILM_GRAIN_H274; + image.film_grain.params.h274 = h274_grain_data; + REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); + // H.274 film grain synthesis requires compute shaders + if (gpu->glsl.compute) { + REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); + } else { + const struct pl_render_errors rr_err = pl_renderer_get_errors(rr); + REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_FILM_GRAIN); + pl_renderer_reset_errors(rr, &rr_err); + } + image.film_grain = (struct pl_film_grain_data) {0}; + + // Test mpv-style custom shaders + for (int i = 0; i < PL_ARRAY_SIZE(user_shader_tests); i++) { + printf("testing user shader:\n\n%s\n", user_shader_tests[i]); + const struct pl_hook *hook; + hook = pl_mpv_user_shader_parse(gpu, user_shader_tests[i], + strlen(user_shader_tests[i])); + REQUIRE(hook); + + params.hooks = &hook; + params.num_hooks = 1; + REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); + REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); + + pl_mpv_user_shader_destroy(&hook); + } + + if (gpu->glsl.compute && gpu->limits.max_ssbo_size) { + for (int i = 0; i < PL_ARRAY_SIZE(compute_shader_tests); i++) { + printf("testing user shader:\n\n%s\n", compute_shader_tests[i]); + const struct pl_hook *hook; + hook = pl_mpv_user_shader_parse(gpu, compute_shader_tests[i], + strlen(compute_shader_tests[i])); + REQUIRE(hook); + + params.hooks = &hook; + params.num_hooks = 1; + REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); + REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); + + pl_mpv_user_shader_destroy(&hook); + } + } + params = pl_render_default_params; + + // Test custom LUTs + for (int i = 0; i < PL_ARRAY_SIZE(test_luts); i++) { + printf("testing custom lut %d\n", i); + struct pl_custom_lut *lut; + lut = pl_lut_parse_cube(gpu->log, test_luts[i], strlen(test_luts[i])); + REQUIRE(lut); + + bool has_3dlut = gpu->limits.max_tex_3d_dim && gpu->glsl.version > 100; + if (lut->size[2] && !has_3dlut) { + pl_lut_free(&lut); + continue; + } + + // Test all three at the same time to reduce the number of tests + image.lut = target.lut = params.lut = lut; + + for (enum pl_lut_type t = PL_LUT_UNKNOWN; t <= PL_LUT_CONVERSION; t++) { + printf("testing LUT method %d\n", t); + image.lut_type = target.lut_type = params.lut_type = t; + REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); + REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); + } + + image.lut = target.lut = params.lut = NULL; + pl_lut_free(&lut); + } + +#ifdef PL_HAVE_LCMS + + // It doesn't fit without use of 3D textures on GLES2 + if (gpu->glsl.version > 100) { + // Test ICC profiles + image.profile = TEST_PROFILE(sRGB_v2_nano_icc); + REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); + REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); + image.profile = (struct pl_icc_profile) {0}; + + target.profile = TEST_PROFILE(sRGB_v2_nano_icc); + REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); + REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); + target.profile = (struct pl_icc_profile) {0}; + + image.profile = TEST_PROFILE(sRGB_v2_nano_icc); + target.profile = image.profile; + REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); + REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); + image.profile = (struct pl_icc_profile) {0}; + target.profile = (struct pl_icc_profile) {0}; + } + +#endif + + // Test overlays + image.num_overlays = 1; + image.overlays = &(struct pl_overlay) { + .tex = img_plane.texture, + .mode = PL_OVERLAY_NORMAL, + .num_parts = 2, + .parts = (struct pl_overlay_part[]) {{ + .src = {0, 0, 2, 2}, + .dst = {30, 100, 40, 200}, + }, { + .src = {2, 2, 5, 5}, + .dst = {1000, -1, 3, 5}, + }}, + }; + REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); + REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); + params.disable_fbos = true; + REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); + REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); + image.num_overlays = 0; + params = pl_render_default_params; + + target.num_overlays = 1; + target.overlays = &(struct pl_overlay) { + .tex = img_plane.texture, + .mode = PL_OVERLAY_MONOCHROME, + .num_parts = 1, + .parts = &(struct pl_overlay_part) { + .src = {5, 5, 15, 15}, + .dst = {5, 5, 15, 15}, + .color = {1.0, 0.5, 0.0}, + }, + }; + REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); + REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); + REQUIRE(pl_render_image(rr, NULL, &target, ¶ms)); + REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); + target.num_overlays = 0; + + // Test rotation + for (pl_rotation rot = 0; rot < PL_ROTATION_360; rot += PL_ROTATION_90) { + image.rotation = rot; + REQUIRE(pl_render_image(rr, &image, &target, ¶ms)); + REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); + } + + // Attempt frame mixing, using the mixer queue helper + printf("testing frame mixing \n"); + struct pl_render_params mix_params = { + .frame_mixer = &pl_filter_mitchell_clamp, + .info_callback = render_info_cb, + }; + + struct pl_queue_params qparams = { + .radius = pl_frame_mix_radius(&mix_params), + .vsync_duration = 1.0 / 60.0, + }; + + // Test large PTS jumps in frame mix + struct pl_frame_mix mix = (struct pl_frame_mix) { + .num_frames = 2, + .frames = (const struct pl_frame *[]) { &image, &image }, + .signatures = (uint64_t[]) { 0xFFF1, 0xFFF2 }, + .timestamps = (float[]) { -100, 100 }, + .vsync_duration = 1.6, + }; + REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params)); + + // Test inferring frame mix + inferred_target = target; + pl_frames_infer_mix(rr, &mix, &inferred_target, &inferred_image); + REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params)); + + // Test empty frame mix + mix = (struct pl_frame_mix) {0}; + REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params)); + + // Test inferring empty frame mix + inferred_target = target; + pl_frames_infer_mix(rr, &mix, &inferred_target, &inferred_image); + REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params)); + + // Test mixer queue +#define NUM_MIX_FRAMES 20 + const float frame_duration = 1.0 / 24.0; + struct pl_source_frame srcframes[NUM_MIX_FRAMES+1]; + srcframes[NUM_MIX_FRAMES] = (struct pl_source_frame) {0}; + for (int i = 0; i < NUM_MIX_FRAMES; i++) { + srcframes[i] = (struct pl_source_frame) { + .pts = i * frame_duration, + .duration = frame_duration, + .map = frame_passthrough, + .frame_data = &image, + }; + } + + pl_queue queue = pl_queue_create(gpu); + enum pl_queue_status ret; + + // Test pre-pushing all frames, with delayed EOF. + for (int i = 0; i < NUM_MIX_FRAMES; i++) { + const struct pl_source_frame *src = &srcframes[i]; + if (i > 10) // test pushing in reverse order + src = &srcframes[NUM_MIX_FRAMES + 10 - i]; + if (!pl_queue_push_block(queue, 1, src)) // mini-sleep + pl_queue_push(queue, src); // push it anyway, for testing + } + + while ((ret = pl_queue_update(queue, &mix, &qparams)) != PL_QUEUE_EOF) { + if (ret == PL_QUEUE_MORE) { + REQUIRE_CMP(qparams.pts, >, 0.0f, "f"); + pl_queue_push(queue, NULL); // push delayed EOF + continue; + } + + REQUIRE_CMP(ret, ==, PL_QUEUE_OK, "u"); + REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params)); + + // Simulate advancing vsync + qparams.pts += qparams.vsync_duration; + } + + // Test dynamically pulling all frames, with oversample mixer + const struct pl_source_frame *frame_ptr = &srcframes[0]; + mix_params.frame_mixer = &pl_oversample_frame_mixer; + + qparams = (struct pl_queue_params) { + .radius = pl_frame_mix_radius(&mix_params), + .vsync_duration = qparams.vsync_duration, + .get_frame = get_frame_ptr, + .priv = &frame_ptr, + }; + + pl_queue_reset(queue); + while ((ret = pl_queue_update(queue, &mix, &qparams)) != PL_QUEUE_EOF) { + REQUIRE_CMP(ret, ==, PL_QUEUE_OK, "u"); + REQUIRE_CMP(mix.num_frames, <=, 2, "d"); + REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params)); + qparams.pts += qparams.vsync_duration; + } + + // Test large PTS jump + pl_queue_reset(queue); + REQUIRE(pl_queue_update(queue, &mix, &qparams) == PL_QUEUE_EOF); + + // Test deinterlacing + pl_queue_reset(queue); + printf("testing deinterlacing \n"); + for (int i = 0; i < NUM_MIX_FRAMES; i++) { + struct pl_source_frame *src = &srcframes[i]; + if (i > 10) + src = &srcframes[NUM_MIX_FRAMES + 10 - i]; + src->first_field = PL_FIELD_EVEN; + pl_queue_push(queue, src); + } + pl_queue_push(queue, NULL); + + qparams.pts = 0; + qparams.get_frame = NULL; + while ((ret = pl_queue_update(queue, &mix, &qparams)) != PL_QUEUE_EOF) { + REQUIRE_CMP(ret, ==, PL_QUEUE_OK, "u"); + REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params)); + qparams.pts += qparams.vsync_duration; + } + + pl_queue_destroy(&queue); + +error: + pl_renderer_destroy(&rr); + pl_tex_destroy(gpu, &img_tex); + pl_tex_destroy(gpu, &fbo); +} + +static struct pl_hook_res noop_hook(void *priv, const struct pl_hook_params *params) +{ + return (struct pl_hook_res) {0}; +} + +static void pl_ycbcr_tests(pl_gpu gpu) +{ + struct pl_plane_data data[3]; + for (int i = 0; i < 3; i++) { + const int sub = i > 0 ? 1 : 0; + const int width = (323 + sub) >> sub; + const int height = (255 + sub) >> sub; + + data[i] = (struct pl_plane_data) { + .type = PL_FMT_UNORM, + .width = width, + .height = height, + .component_size = {16}, + .component_map = {i}, + .pixel_stride = sizeof(uint16_t), + .row_stride = PL_ALIGN2(width * sizeof(uint16_t), + gpu->limits.align_tex_xfer_pitch), + }; + } + + pl_fmt fmt = pl_plane_find_fmt(gpu, NULL, &data[0]); + enum pl_fmt_caps caps = PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_HOST_READABLE; + if (!fmt || (fmt->caps & caps) != caps) + return; + + pl_renderer rr = pl_renderer_create(gpu->log, gpu); + if (!rr) + return; + + pl_tex src_tex[3] = {0}; + pl_tex dst_tex[3] = {0}; + struct pl_frame img = { + .num_planes = 3, + .repr = pl_color_repr_hdtv, + .color = pl_color_space_bt709, + }; + + struct pl_frame target = { + .num_planes = 3, + .repr = pl_color_repr_hdtv, + .color = pl_color_space_bt709, + }; + + uint8_t *src_buffer[3] = {0}; + uint8_t *dst_buffer = NULL; + for (int i = 0; i < 3; i++) { + // Generate some arbitrary data for the buffer + src_buffer[i] = malloc(data[i].height * data[i].row_stride); + if (!src_buffer[i]) + goto error; + + data[i].pixels = src_buffer[i]; + for (int y = 0; y < data[i].height; y++) { + for (int x = 0; x < data[i].width; x++) { + size_t off = y * data[i].row_stride + x * data[i].pixel_stride; + uint16_t *pixel = (uint16_t *) &src_buffer[i][off]; + int gx = 200 + 100 * i, gy = 300 + 150 * i; + *pixel = (gx * x) ^ (gy * y); // whatever + } + } + + REQUIRE(pl_upload_plane(gpu, &img.planes[i], &src_tex[i], &data[i])); + } + + // This co-sites chroma pixels with pixels in the RGB image, meaning we + // get an exact round-trip when sampling both ways. This makes it useful + // as a test case, even though it's not common in the real world. + pl_frame_set_chroma_location(&img, PL_CHROMA_TOP_LEFT); + + for (int i = 0; i < 3; i++) { + dst_tex[i] = pl_tex_create(gpu, &(struct pl_tex_params) { + .format = fmt, + .w = data[i].width, + .h = data[i].height, + .renderable = true, + .host_readable = true, + .storable = fmt->caps & PL_FMT_CAP_STORABLE, + .blit_dst = fmt->caps & PL_FMT_CAP_BLITTABLE, + }); + + if (!dst_tex[i]) + goto error; + + target.planes[i] = img.planes[i]; + target.planes[i].texture = dst_tex[i]; + } + + REQUIRE(pl_render_image(rr, &img, &target, &(struct pl_render_params) { + .num_hooks = 1, + .hooks = &(const struct pl_hook *){&(struct pl_hook) { + // Forces chroma merging, to test the chroma merging code + .stages = PL_HOOK_CHROMA_INPUT, + .hook = noop_hook, + }}, + })); + REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); + + size_t buf_size = data[0].height * data[0].row_stride; + dst_buffer = malloc(buf_size); + if (!dst_buffer) + goto error; + + for (int i = 0; i < 3; i++) { + memset(dst_buffer, 0xAA, buf_size); + REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params) { + .tex = dst_tex[i], + .ptr = dst_buffer, + .row_pitch = data[i].row_stride, + })); + + for (int y = 0; y < data[i].height; y++) { + for (int x = 0; x < data[i].width; x++) { + size_t off = y * data[i].row_stride + x * data[i].pixel_stride; + uint16_t *src_pixel = (uint16_t *) &src_buffer[i][off]; + uint16_t *dst_pixel = (uint16_t *) &dst_buffer[off]; + int diff = abs((int) *src_pixel - (int) *dst_pixel); + REQUIRE_CMP(diff, <=, 50, "d"); // a little under 0.1% + } + } + } + +error: + pl_renderer_destroy(&rr); + free(dst_buffer); + for (int i = 0; i < 3; i++) { + free(src_buffer[i]); + pl_tex_destroy(gpu, &src_tex[i]); + pl_tex_destroy(gpu, &dst_tex[i]); + } +} + +static void pl_test_export_import(pl_gpu gpu, + enum pl_handle_type handle_type) +{ + // Test texture roundtrip + + if (!(gpu->export_caps.tex & handle_type) || + !(gpu->import_caps.tex & handle_type)) + goto skip_tex; + + pl_fmt fmt = pl_find_fmt(gpu, PL_FMT_UNORM, 4, 0, 0, PL_FMT_CAP_BLITTABLE); + if (!fmt) + goto skip_tex; + + printf("testing texture import/export with fmt %s\n", fmt->name); + + pl_tex export = pl_tex_create(gpu, &(struct pl_tex_params) { + .w = 32, + .h = 32, + .format = fmt, + .export_handle = handle_type, + }); + REQUIRE(export); + REQUIRE_HANDLE(export->shared_mem, handle_type); + + pl_tex import = pl_tex_create(gpu, &(struct pl_tex_params) { + .w = export->params.w, + .h = export->params.h, + .format = fmt, + .import_handle = handle_type, + .shared_mem = export->shared_mem, + }); + REQUIRE(import); + + pl_tex_destroy(gpu, &import); + pl_tex_destroy(gpu, &export); + +skip_tex: ; + + // Test buffer roundtrip + + if (!(gpu->export_caps.buf & handle_type) || + !(gpu->import_caps.buf & handle_type)) + return; + + printf("testing buffer import/export\n"); + + pl_buf exp_buf = pl_buf_create(gpu, &(struct pl_buf_params) { + .size = 32, + .export_handle = handle_type, + }); + REQUIRE(exp_buf); + REQUIRE_HANDLE(exp_buf->shared_mem, handle_type); + + pl_buf imp_buf = pl_buf_create(gpu, &(struct pl_buf_params) { + .size = 32, + .import_handle = handle_type, + .shared_mem = exp_buf->shared_mem, + }); + REQUIRE(imp_buf); + + pl_buf_destroy(gpu, &imp_buf); + pl_buf_destroy(gpu, &exp_buf); +} + +static void pl_test_host_ptr(pl_gpu gpu) +{ + if (!(gpu->import_caps.buf & PL_HANDLE_HOST_PTR)) + return; + +#ifdef __unix__ + + printf("testing host ptr\n"); + REQUIRE(gpu->limits.max_mapped_size); + + const size_t size = 2 << 20; + const size_t offset = 2 << 10; + const size_t slice = 2 << 16; + + uint8_t *data = aligned_alloc(0x1000, size); + for (int i = 0; i < size; i++) + data[i] = (uint8_t) i; + + pl_buf buf = pl_buf_create(gpu, &(struct pl_buf_params) { + .size = slice, + .import_handle = PL_HANDLE_HOST_PTR, + .shared_mem = { + .handle.ptr = data, + .size = size, + .offset = offset, + }, + .host_mapped = true, + }); + + REQUIRE(buf); + REQUIRE_MEMEQ(data + offset, buf->data, slice); + + pl_buf_destroy(gpu, &buf); + free(data); + +#endif // unix +} + +static void gpu_shader_tests(pl_gpu gpu) +{ + pl_buffer_tests(gpu); + pl_texture_tests(gpu); + pl_planar_tests(gpu); + pl_shader_tests(gpu); + pl_scaler_tests(gpu); + pl_render_tests(gpu); + pl_ycbcr_tests(gpu); + + REQUIRE(!pl_gpu_is_failed(gpu)); +} + +static void gpu_interop_tests(pl_gpu gpu) +{ + pl_test_export_import(gpu, PL_HANDLE_DMA_BUF); + pl_test_host_ptr(gpu); + + REQUIRE(!pl_gpu_is_failed(gpu)); +} |