Adding upstream version 6.338.2.upstream/6.338.2 upstream

Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
author: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-15 20:38:23 +0000
committer: Daniel Baumann <daniel.baumann@progress-linux.org> 2024-04-15 20:38:23 +0000
commit: ff6e3c025658a5fa1affd094f220b623e7e1b24b (patch)
tree: 9faab72d69c92d24e349d184f5869b9796f17e0c /src/tests
parent: Initial commit. (diff)
download: libplacebo-ff6e3c025658a5fa1affd094f220b623e7e1b24b.tar.xz
libplacebo-ff6e3c025658a5fa1affd094f220b623e7e1b24b.zip
28 files changed, 5811 insertions, 0 deletions
diff --git a/src/tests/bench.c b/src/tests/bench.c
new file mode 100644
index 0000000..22638d8
--- /dev/null
+++ b/src/tests/bench.c
@@ -0,0 +1,550 @@
+#include "tests.h"
+
+#include <libplacebo/dispatch.h>
+#include <libplacebo/vulkan.h>
+#include <libplacebo/shaders/colorspace.h>
+#include <libplacebo/shaders/deinterlacing.h>
+#include <libplacebo/shaders/sampling.h>
+
+enum {
+    // Image configuration
+    NUM_TEX     = 16,
+    WIDTH       = 2048,
+    HEIGHT      = 2048,
+    DEPTH       = 16,
+    COMPS       = 4,
+
+    // Queue configuration
+    NUM_QUEUES  = NUM_TEX,
+    ASYNC_TX    = 1,
+    ASYNC_COMP  = 1,
+
+    // Test configuration
+    TEST_MS     = 1000,
+    WARMUP_MS   = 500,
+};
+
+static pl_tex create_test_img(pl_gpu gpu)
+{
+    pl_fmt fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, COMPS, DEPTH, 32, PL_FMT_CAP_LINEAR);
+    REQUIRE(fmt);
+
+    const float xc = (WIDTH  - 1) / 2.0f;
+    const float yc = (HEIGHT - 1) / 2.0f;
+    const float kf = 0.5f / sqrtf(xc * xc + yc * yc);
+    const float invphi = 0.61803398874989;
+    const float freqR = kf * M_PI * 0.2f;
+    const float freqG = freqR * invphi;
+    const float freqB = freqG * invphi;
+    float *data = malloc(WIDTH * HEIGHT * COMPS * sizeof(float));
+    for (int y = 0; y < HEIGHT; y++) {
+        for (int x = 0; x < WIDTH; x++) {
+            float *color = &data[(y * WIDTH + x) * COMPS];
+            float xx = x - xc, yy = y - yc;
+            float r2 = xx * xx + yy * yy;
+            switch (COMPS) {
+            case 4: color[3] = 1.0;
+            case 3: color[2] = 0.5f * sinf(freqB * r2) + 0.5f;;
+            case 2: color[1] = 0.5f * sinf(freqG * r2) + 0.5f;;
+            case 1: color[0] = 0.5f * sinf(freqR * r2) + 0.5f;;
+            }
+        }
+    }
+
+    pl_tex tex = pl_tex_create(gpu, pl_tex_params(
+        .format         = fmt,
+        .w              = WIDTH,
+        .h              = HEIGHT,
+        .sampleable     = true,
+        .initial_data   = data,
+    ));
+
+    free(data);
+    REQUIRE(tex);
+    return tex;
+}
+
+struct bench {
+    void (*run_sh)(pl_shader sh, pl_shader_obj *state,
+                   pl_tex src);
+
+    void (*run_tex)(pl_gpu gpu, pl_tex tex);
+};
+
+static void run_bench(pl_gpu gpu, pl_dispatch dp,
+                      pl_shader_obj *state, pl_tex src,
+                      pl_tex fbo, pl_timer timer,
+                      const struct bench *bench)
+{
+    REQUIRE(bench);
+    REQUIRE(bench->run_sh || bench->run_tex);
+    if (bench->run_sh) {
+        pl_shader sh = pl_dispatch_begin(dp);
+        bench->run_sh(sh, state, src);
+
+        pl_dispatch_finish(dp, pl_dispatch_params(
+            .shader = &sh,
+            .target = fbo,
+            .timer = timer,
+        ));
+    } else {
+        bench->run_tex(gpu, fbo);
+    }
+}
+
+static void benchmark(pl_gpu gpu, const char *name,
+                      const struct bench *bench)
+{
+    pl_dispatch dp = pl_dispatch_create(gpu->log, gpu);
+    REQUIRE(dp);
+    pl_shader_obj state = NULL;
+    pl_tex src = create_test_img(gpu);
+
+    // Create the FBOs
+    pl_fmt fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, COMPS, DEPTH, 32,
+                             PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_BLITTABLE);
+    REQUIRE(fmt);
+
+    pl_tex fbos[NUM_TEX] = {0};
+    for (int i = 0; i < NUM_TEX; i++) {
+        fbos[i] = pl_tex_create(gpu, pl_tex_params(
+            .format         = fmt,
+            .w              = WIDTH,
+            .h              = HEIGHT,
+            .renderable     = true,
+            .blit_dst       = true,
+            .host_writable  = true,
+            .host_readable  = true,
+            .storable       = !!(fmt->caps & PL_FMT_CAP_STORABLE),
+        ));
+        REQUIRE(fbos[i]);
+
+        pl_tex_clear(gpu, fbos[i], (float[4]){ 0.0 });
+    }
+
+    // Run the benchmark and flush+block once to force shader compilation etc.
+    run_bench(gpu, dp, &state, src, fbos[0], NULL, bench);
+    pl_gpu_finish(gpu);
+
+    // Perform the actual benchmark
+    pl_clock_t start_warmup = 0, start_test = 0;
+    unsigned long frames = 0, frames_warmup = 0;
+
+    pl_timer timer = pl_timer_create(gpu);
+    uint64_t gputime_total = 0;
+    unsigned long gputime_count = 0;
+    uint64_t gputime;
+
+    start_warmup = pl_clock_now();
+    do {
+        const int idx = frames % NUM_TEX;
+        while (pl_tex_poll(gpu, fbos[idx], UINT64_MAX))
+            ; // do nothing
+        run_bench(gpu, dp, &state, src, fbos[idx], start_test ? timer : NULL, bench);
+        pl_gpu_flush(gpu);
+        frames++;
+
+        if (start_test) {
+            while ((gputime = pl_timer_query(gpu, timer))) {
+                gputime_total += gputime;
+                gputime_count++;
+            }
+        }
+
+        pl_clock_t now = pl_clock_now();
+        if (start_test) {
+            if (pl_clock_diff(now, start_test) > TEST_MS * 1e-3)
+                break;
+        } else if (pl_clock_diff(now, start_warmup) > WARMUP_MS * 1e-3) {
+            start_test = now;
+            frames_warmup = frames;
+        }
+    } while (true);
+
+    // Force the GPU to finish execution and re-measure the final stop time
+    pl_gpu_finish(gpu);
+
+    pl_clock_t stop = pl_clock_now();
+    while ((gputime = pl_timer_query(gpu, timer))) {
+        gputime_total += gputime;
+        gputime_count++;
+    }
+
+    frames -= frames_warmup;
+    double secs = pl_clock_diff(stop, start_test);
+    printf("'%s':\t%4lu frames in %1.6f seconds => %2.6f ms/frame (%5.2f FPS)",
+          name, frames, secs, 1000 * secs / frames, frames / secs);
+    if (gputime_count)
+        printf(", gpu time: %2.6f ms", 1e-6 * gputime_total / gputime_count);
+    printf("\n");
+
+    pl_timer_destroy(gpu, &timer);
+    pl_shader_obj_destroy(&state);
+    pl_dispatch_destroy(&dp);
+    pl_tex_destroy(gpu, &src);
+    for (int i = 0; i < NUM_TEX; i++)
+        pl_tex_destroy(gpu, &fbos[i]);
+}
+
+// List of benchmarks
+static void bench_deband(pl_shader sh, pl_shader_obj *state, pl_tex src)
+{
+    pl_shader_deband(sh, pl_sample_src( .tex = src ), NULL);
+}
+
+static void bench_deband_heavy(pl_shader sh, pl_shader_obj *state, pl_tex src)
+{
+    pl_shader_deband(sh, pl_sample_src( .tex = src ), pl_deband_params(
+        .iterations = 4,
+        .threshold  = 4.0,
+        .radius     = 4.0,
+        .grain      = 16.0,
+    ));
+}
+
+static void bench_bilinear(pl_shader sh, pl_shader_obj *state, pl_tex src)
+{
+    REQUIRE(pl_shader_sample_bilinear(sh, pl_sample_src( .tex = src )));
+}
+
+static void bench_bicubic(pl_shader sh, pl_shader_obj *state, pl_tex src)
+{
+    REQUIRE(pl_shader_sample_bicubic(sh, pl_sample_src( .tex = src )));
+}
+
+static void bench_hermite(pl_shader sh, pl_shader_obj *state, pl_tex src)
+{
+    REQUIRE(pl_shader_sample_hermite(sh, pl_sample_src( .tex = src )));
+}
+
+static void bench_gaussian(pl_shader sh, pl_shader_obj *state, pl_tex src)
+{
+    REQUIRE(pl_shader_sample_gaussian(sh, pl_sample_src( .tex = src )));
+}
+
+static void bench_dither_blue(pl_shader sh, pl_shader_obj *state, pl_tex src)
+{
+    REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src )));
+    pl_shader_dither(sh, 8, state, pl_dither_params(
+        .method = PL_DITHER_BLUE_NOISE,
+    ));
+}
+
+static void bench_dither_white(pl_shader sh, pl_shader_obj *state, pl_tex src)
+{
+    REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src )));
+    pl_shader_dither(sh, 8, state, pl_dither_params(
+        .method = PL_DITHER_WHITE_NOISE,
+    ));
+}
+
+static void bench_dither_ordered_fix(pl_shader sh, pl_shader_obj *state, pl_tex src)
+{
+    REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src )));
+    pl_shader_dither(sh, 8, state, pl_dither_params(
+        .method = PL_DITHER_ORDERED_FIXED,
+    ));
+}
+
+static void bench_polar(pl_shader sh, pl_shader_obj *state, pl_tex src)
+{
+    struct pl_sample_filter_params params = {
+        .filter = pl_filter_ewa_lanczos,
+        .lut = state,
+    };
+
+    REQUIRE(pl_shader_sample_polar(sh, pl_sample_src( .tex = src ), &params));
+}
+
+static void bench_polar_nocompute(pl_shader sh, pl_shader_obj *state, pl_tex src)
+{
+    struct pl_sample_filter_params params = {
+        .filter = pl_filter_ewa_lanczos,
+        .no_compute = true,
+        .lut = state,
+    };
+
+    REQUIRE(pl_shader_sample_polar(sh, pl_sample_src( .tex = src ), &params));
+}
+
+static void bench_hdr_peak(pl_shader sh, pl_shader_obj *state, pl_tex src)
+{
+    REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src )));
+    REQUIRE(pl_shader_detect_peak(sh, pl_color_space_hdr10, state, &pl_peak_detect_default_params));
+}
+
+static void bench_hdr_peak_hq(pl_shader sh, pl_shader_obj *state, pl_tex src)
+{
+    REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src )));
+    REQUIRE(pl_shader_detect_peak(sh, pl_color_space_hdr10, state, &pl_peak_detect_high_quality_params));
+}
+
+static void bench_hdr_lut(pl_shader sh, pl_shader_obj *state, pl_tex src)
+{
+    struct pl_color_map_params params = {
+        PL_COLOR_MAP_DEFAULTS
+        .tone_mapping_function  = &pl_tone_map_bt2390,
+        .tone_mapping_mode      = PL_TONE_MAP_RGB,
+    };
+
+    REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src )));
+    pl_shader_color_map_ex(sh, &params, pl_color_map_args(
+        .src = pl_color_space_hdr10,
+        .dst = pl_color_space_monitor,
+        .state = state,
+    ));
+}
+
+static void bench_hdr_clip(pl_shader sh, pl_shader_obj *state, pl_tex src)
+{
+    struct pl_color_map_params params = {
+        PL_COLOR_MAP_DEFAULTS
+        .tone_mapping_function  = &pl_tone_map_clip,
+        .tone_mapping_mode      = PL_TONE_MAP_RGB,
+    };
+
+    REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src )));
+    pl_shader_color_map_ex(sh, &params, pl_color_map_args(
+        .src = pl_color_space_hdr10,
+        .dst = pl_color_space_monitor,
+        .state = state,
+    ));
+}
+
+static void bench_weave(pl_shader sh, pl_shader_obj *state, pl_tex src)
+{
+    struct pl_deinterlace_source dsrc = {
+        .cur = pl_field_pair(src),
+        .field = PL_FIELD_TOP,
+    };
+
+    pl_shader_deinterlace(sh, &dsrc, pl_deinterlace_params(
+        .algo = PL_DEINTERLACE_WEAVE,
+    ));
+}
+
+static void bench_bob(pl_shader sh, pl_shader_obj *state, pl_tex src)
+{
+    struct pl_deinterlace_source dsrc = {
+        .cur = pl_field_pair(src),
+        .field = PL_FIELD_TOP,
+    };
+
+    pl_shader_deinterlace(sh, &dsrc, pl_deinterlace_params(
+        .algo = PL_DEINTERLACE_BOB,
+    ));
+}
+
+static void bench_yadif(pl_shader sh, pl_shader_obj *state, pl_tex src)
+{
+    struct pl_deinterlace_source dsrc = {
+        .prev = pl_field_pair(src),
+        .cur = pl_field_pair(src),
+        .next = pl_field_pair(src),
+        .field = PL_FIELD_TOP,
+    };
+
+    pl_shader_deinterlace(sh, &dsrc, pl_deinterlace_params(
+        .algo = PL_DEINTERLACE_YADIF,
+    ));
+}
+
+static void bench_av1_grain(pl_shader sh, pl_shader_obj *state, pl_tex src)
+{
+    struct pl_film_grain_params params = {
+        .data = {
+            .type = PL_FILM_GRAIN_AV1,
+            .params.av1 = av1_grain_data,
+            .seed = rand(),
+        },
+        .tex = src,
+        .components = 3,
+        .component_mapping = {0, 1, 2},
+        .repr = &(struct pl_color_repr) {0},
+    };
+
+    REQUIRE(pl_shader_film_grain(sh, state, &params));
+}
+
+static void bench_av1_grain_lap(pl_shader sh, pl_shader_obj *state, pl_tex src)
+{
+    struct pl_film_grain_params params = {
+        .data = {
+            .type = PL_FILM_GRAIN_AV1,
+            .params.av1 = av1_grain_data,
+            .seed = rand(),
+        },
+        .tex = src,
+        .components = 3,
+        .component_mapping = {0, 1, 2},
+        .repr = &(struct pl_color_repr) {0},
+    };
+
+    params.data.params.av1.overlap = true;
+    REQUIRE(pl_shader_film_grain(sh, state, &params));
+}
+
+static void bench_h274_grain(pl_shader sh, pl_shader_obj *state, pl_tex src)
+{
+    struct pl_film_grain_params params = {
+        .data = {
+            .type = PL_FILM_GRAIN_H274,
+            .params.h274 = h274_grain_data,
+            .seed = rand(),
+        },
+        .tex = src,
+        .components = 3,
+        .component_mapping = {0, 1, 2},
+        .repr = &(struct pl_color_repr) {0},
+    };
+
+    REQUIRE(pl_shader_film_grain(sh, state, &params));
+}
+
+static void bench_reshape_poly(pl_shader sh, pl_shader_obj *state, pl_tex src)
+{
+    REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src )));
+    pl_shader_dovi_reshape(sh, &(struct pl_dovi_metadata) { .comp = {
+        {
+            .num_pivots = 8,
+            .pivots = {0.0, 0.00488758553, 0.0420332365, 0.177908108,
+                       0.428152502, 0.678396881, 0.92864126, 1.0},
+            .method = {0, 0, 0, 0, 0, 0, 0},
+            .poly_coeffs = {
+                {0.00290930271, 2.30019712, 50.1446037},
+                {0.00725257397, 1.88119054, -4.49443769},
+                {0.0150123835, 1.61106598, -1.64833081},
+                {0.0498571396, 1.2059114, -0.430627108},
+                {0.0878019333, 1.01845241, -0.19669354},
+                {0.120447636, 0.920134187, -0.122338772},
+                {2.12430835, -3.30913281, 2.10893941},
+            },
+        }, {
+            .num_pivots = 2,
+            .pivots = {0.0, 1.0},
+            .method = {0},
+            .poly_coeffs = {{-0.397901177, 1.85908031, 0}},
+        }, {
+            .num_pivots = 2,
+            .pivots = {0.0, 1.0},
+            .method = {0},
+            .poly_coeffs = {{-0.399355531, 1.85591626, 0}},
+        },
+    }});
+}
+
+static void bench_reshape_mmr(pl_shader sh, pl_shader_obj *state, pl_tex src)
+{
+    REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src )));
+    pl_shader_dovi_reshape(sh, &dovi_meta); // this includes MMR
+}
+
+static float data[WIDTH * HEIGHT * COMPS + 8192];
+
+static void bench_download(pl_gpu gpu, pl_tex tex)
+{
+    REQUIRE(pl_tex_download(gpu, pl_tex_transfer_params(
+        .tex = tex,
+        .ptr = (uint8_t *) PL_ALIGN((uintptr_t) data, 4096),
+    )));
+}
+
+static void bench_upload(pl_gpu gpu, pl_tex tex)
+{
+    REQUIRE(pl_tex_upload(gpu, pl_tex_transfer_params(
+        .tex = tex,
+        .ptr = (uint8_t *) PL_ALIGN((uintptr_t) data, 4096),
+    )));
+}
+
+static void dummy_cb(void *arg) {}
+
+static void bench_download_async(pl_gpu gpu, pl_tex tex)
+{
+    REQUIRE(pl_tex_download(gpu, pl_tex_transfer_params(
+        .tex = tex,
+        .ptr = (uint8_t *) PL_ALIGN((uintptr_t) data, 4096),
+        .callback = dummy_cb,
+    )));
+}
+
+static void bench_upload_async(pl_gpu gpu, pl_tex tex)
+{
+    REQUIRE(pl_tex_upload(gpu, pl_tex_transfer_params(
+        .tex = tex,
+        .ptr = (uint8_t *) PL_ALIGN((uintptr_t) data, 4096),
+        .callback = dummy_cb,
+    )));
+}
+
+int main()
+{
+    setbuf(stdout, NULL);
+    setbuf(stderr, NULL);
+
+    pl_log log = pl_log_create(PL_API_VER, pl_log_params(
+        .log_cb     = isatty(fileno(stdout)) ? pl_log_color : pl_log_simple,
+        .log_level  = PL_LOG_WARN,
+    ));
+
+    pl_vulkan vk = pl_vulkan_create(log, pl_vulkan_params(
+        .allow_software = true,
+        .async_transfer = ASYNC_TX,
+        .async_compute  = ASYNC_COMP,
+        .queue_count    = NUM_QUEUES,
+    ));
+
+    if (!vk)
+        return SKIP;
+
+#define BENCH_SH(fn)  &(struct bench) { .run_sh = fn }
+#define BENCH_TEX(fn) &(struct bench) { .run_tex = fn }
+
+    printf("= Running benchmarks =\n");
+    benchmark(vk->gpu, "tex_download ptr", BENCH_TEX(bench_download));
+    benchmark(vk->gpu, "tex_download ptr async", BENCH_TEX(bench_download_async));
+    benchmark(vk->gpu, "tex_upload ptr", BENCH_TEX(bench_upload));
+    benchmark(vk->gpu, "tex_upload ptr async", BENCH_TEX(bench_upload_async));
+    benchmark(vk->gpu, "bilinear", BENCH_SH(bench_bilinear));
+    benchmark(vk->gpu, "bicubic", BENCH_SH(bench_bicubic));
+    benchmark(vk->gpu, "hermite", BENCH_SH(bench_hermite));
+    benchmark(vk->gpu, "gaussian", BENCH_SH(bench_gaussian));
+    benchmark(vk->gpu, "deband", BENCH_SH(bench_deband));
+    benchmark(vk->gpu, "deband_heavy", BENCH_SH(bench_deband_heavy));
+
+    // Deinterlacing
+    benchmark(vk->gpu, "weave", BENCH_SH(bench_weave));
+    benchmark(vk->gpu, "bob", BENCH_SH(bench_bob));
+    benchmark(vk->gpu, "yadif", BENCH_SH(bench_yadif));
+
+    // Polar sampling
+    benchmark(vk->gpu, "polar", BENCH_SH(bench_polar));
+    if (vk->gpu->glsl.compute)
+        benchmark(vk->gpu, "polar_nocompute", BENCH_SH(bench_polar_nocompute));
+
+    // Dithering algorithms
+    benchmark(vk->gpu, "dither_blue", BENCH_SH(bench_dither_blue));
+    benchmark(vk->gpu, "dither_white", BENCH_SH(bench_dither_white));
+    benchmark(vk->gpu, "dither_ordered_fixed", BENCH_SH(bench_dither_ordered_fix));
+
+    // HDR peak detection
+    if (vk->gpu->glsl.compute) {
+        benchmark(vk->gpu, "hdr_peakdetect",    BENCH_SH(bench_hdr_peak));
+        benchmark(vk->gpu, "hdr_peakdetect_hq", BENCH_SH(bench_hdr_peak_hq));
+    }
+
+    // Tone mapping
+    benchmark(vk->gpu, "hdr_lut", BENCH_SH(bench_hdr_lut));
+    benchmark(vk->gpu, "hdr_clip", BENCH_SH(bench_hdr_clip));
+
+    // Misc stuff
+    benchmark(vk->gpu, "av1_grain", BENCH_SH(bench_av1_grain));
+    benchmark(vk->gpu, "av1_grain_lap", BENCH_SH(bench_av1_grain_lap));
+    benchmark(vk->gpu, "h274_grain", BENCH_SH(bench_h274_grain));
+    benchmark(vk->gpu, "reshape_poly", BENCH_SH(bench_reshape_poly));
+    benchmark(vk->gpu, "reshape_mmr", BENCH_SH(bench_reshape_mmr));
+
+    pl_vulkan_destroy(&vk);
+    pl_log_destroy(&log);
+    return 0;
+}
diff --git a/src/tests/cache.c b/src/tests/cache.c
new file mode 100644
index 0000000..667435d
--- /dev/null
+++ b/src/tests/cache.c
@@ -0,0 +1,215 @@
+#include "tests.h"
+
+#include <libplacebo/cache.h>
+
+// Returns "foo" for even keys, "bar" for odd
+static pl_cache_obj lookup_foobar(void *priv, uint64_t key)
+{
+    return (pl_cache_obj) {
+        .key = 0xFFFF, // test key sanity
+        .data = (key & 1) ? "bar" : "foo",
+        .size = 3,
+    };
+}
+
+static void update_count(void *priv, pl_cache_obj obj)
+{
+    int *count = priv;
+    *count += obj.size ? 1 : -1;
+}
+
+enum {
+    KEY1 = 0x9c65575f419288f5,
+    KEY2 = 0x92da969be9b88086,
+    KEY3 = 0x7fcb62540b00bc8b,
+    KEY4 = 0x46c60ec11af9dde3,
+    KEY5 = 0xcb6760b98ece2477,
+    KEY6 = 0xf37dc72b7f9e5c88,
+    KEY7 = 0x30c18c962d82e5f5,
+};
+
+int main()
+{
+    pl_log log = pl_test_logger();
+    pl_cache test = pl_cache_create(pl_cache_params(
+        .log             = log,
+        .max_object_size = 16,
+        .max_total_size  = 32,
+    ));
+
+    pl_cache_obj obj1 = { .key  = KEY1, .data = "abc",  .size = 3 };
+    pl_cache_obj obj2 = { .key  = KEY2, .data = "de",   .size = 2 };
+    pl_cache_obj obj3 = { .key  = KEY3, .data = "xyzw", .size = 4 };
+
+    REQUIRE(pl_cache_try_set(test, &obj1));
+    REQUIRE(pl_cache_try_set(test, &obj2));
+    REQUIRE(pl_cache_try_set(test, &obj3));
+    REQUIRE_CMP(pl_cache_size(test), ==, 9, "zu");
+    REQUIRE_CMP(pl_cache_objects(test), ==, 3, "d");
+    REQUIRE(pl_cache_try_set(test, &obj2)); // delete KEY2
+    REQUIRE_CMP(pl_cache_size(test), ==, 7, "zu");
+    REQUIRE_CMP(pl_cache_objects(test), ==, 2, "d");
+
+    REQUIRE(pl_cache_get(test, &obj1));
+    REQUIRE(!pl_cache_get(test, &obj2));
+    REQUIRE(pl_cache_get(test, &obj3));
+    REQUIRE_CMP(pl_cache_size(test), ==, 0, "zu");
+    REQUIRE_CMP(pl_cache_objects(test), ==, 0, "d");
+    REQUIRE_MEMEQ(obj1.data, "abc", 3);
+    REQUIRE_MEMEQ(obj3.data, "xyzw", 4);
+
+    // Re-insert removed objects (in reversed order)
+    REQUIRE(pl_cache_try_set(test, &obj3));
+    REQUIRE(pl_cache_try_set(test, &obj1));
+    REQUIRE_CMP(pl_cache_size(test), ==, 7, "zu");
+    REQUIRE_CMP(pl_cache_objects(test), ==, 2, "d");
+
+    uint8_t ref[72];
+    memset(ref, 0xbe, sizeof(ref));
+    uint8_t *refp = ref;
+
+#define PAD_ALIGN(x) PL_ALIGN2(x, sizeof(uint32_t))
+#define W(type, ...)                                    \
+    do {                                                \
+        size_t sz = sizeof((type){__VA_ARGS__});        \
+        pl_assert(ref + sizeof(ref) - refp >= sz);      \
+        memcpy(refp, &(type){__VA_ARGS__}, sz);         \
+        refp += sz;                                     \
+        size_t pad_sz = PAD_ALIGN(sz) - sz;             \
+        pl_assert(ref + sizeof(ref) - refp >= pad_sz);  \
+        memcpy(refp, &(char[PAD_ALIGN(1)]){0}, pad_sz); \
+        refp += pad_sz;                                 \
+    } while (0)
+
+    W(char[], 'p', 'l', '_', 'c', 'a', 'c', 'h', 'e');  // cache magic
+    W(uint32_t, 1);                                     // cache version
+    W(uint32_t, 2);                                     // number of objects
+
+    // object 3
+    W(uint64_t, KEY3);                // key
+    W(uint64_t, 4);                   // size
+#ifdef PL_HAVE_XXHASH
+    W(uint64_t, 0xd43612ef3fbee8be);  // hash
+#else
+    W(uint64_t, 0xec18884e5e471117);  // hash
+#endif
+    W(char[], 'x', 'y', 'z', 'w');    // data
+
+    // object 1
+    W(uint64_t, KEY1);                // key
+    W(uint64_t, 3);                   // size
+#ifdef PL_HAVE_XXHASH
+    W(uint64_t, 0x78af5f94892f3950);  // hash
+#else
+    W(uint64_t, 0x3a204d408a2e2d77);  // hash
+#endif
+    W(char[], 'a', 'b', 'c');         // data
+
+#undef W
+#undef PAD_ALIGN
+
+    uint8_t data[100];
+    pl_static_assert(sizeof(data) >= sizeof(ref));
+    REQUIRE_CMP(pl_cache_save(test, data, sizeof(data)), ==, sizeof(ref), "zu");
+    REQUIRE_MEMEQ(data, ref, sizeof(ref));
+
+    pl_cache test2 = pl_cache_create(pl_cache_params( .log = log ));
+    REQUIRE_CMP(pl_cache_load(test2, data, sizeof(data)), ==, 2, "d");
+    REQUIRE_CMP(pl_cache_size(test2), ==, 7, "zu");
+    REQUIRE_CMP(pl_cache_save(test2, NULL, 0), ==, sizeof(ref), "zu");
+    REQUIRE_CMP(pl_cache_save(test2, data, sizeof(data)), ==, sizeof(ref), "zu");
+    REQUIRE_MEMEQ(data, ref, sizeof(ref));
+
+    // Test loading invalid data
+    REQUIRE_CMP(pl_cache_load(test2, ref, 0),   <, 0, "d"); // empty file
+    REQUIRE_CMP(pl_cache_load(test2, ref, 5),   <, 0, "d"); // truncated header
+    REQUIRE_CMP(pl_cache_load(test2, ref, 64), ==, 1, "d"); // truncated object data
+    data[sizeof(ref) - 2] = 'X'; // corrupt data
+    REQUIRE_CMP(pl_cache_load(test2, data, sizeof(ref)), ==, 1, "d"); // bad checksum
+    pl_cache_destroy(&test2);
+
+    // Inserting too large object should fail
+    uint8_t zero[32] = {0};
+    pl_cache_obj obj4 = { .key = KEY4, .data = zero, .size = 32 };
+    REQUIRE(!pl_cache_try_set(test, &obj4));
+    REQUIRE(!pl_cache_get(test, &obj4));
+    REQUIRE_CMP(pl_cache_size(test), ==, 7, "zu");
+    REQUIRE_CMP(pl_cache_objects(test), ==, 2, "d");
+
+    // Inserting 16-byte object should succeed, and not purge old entries
+    obj4 = (pl_cache_obj) { .key = KEY4, .data = zero, .size = 16 };
+    REQUIRE(pl_cache_try_set(test, &obj4));
+    REQUIRE_CMP(pl_cache_size(test), ==, 23, "zu");
+    REQUIRE_CMP(pl_cache_objects(test), ==, 3, "d");
+    REQUIRE(pl_cache_get(test, &obj1));
+    REQUIRE(pl_cache_get(test, &obj3));
+    REQUIRE(pl_cache_get(test, &obj4));
+    pl_cache_set(test, &obj1);
+    pl_cache_set(test, &obj3);
+    pl_cache_set(test, &obj4);
+    REQUIRE_CMP(pl_cache_size(test), ==, 23, "zu");
+    REQUIRE_CMP(pl_cache_objects(test), ==, 3, "d");
+
+    // Inserting another 10-byte object should purge entry KEY1
+    pl_cache_obj obj5 = { .key = KEY5, .data = zero, .size = 10 };
+    REQUIRE(pl_cache_try_set(test, &obj5));
+    REQUIRE_CMP(pl_cache_size(test), ==, 30, "zu");
+    REQUIRE_CMP(pl_cache_objects(test), ==, 3, "d");
+    REQUIRE(!pl_cache_get(test, &obj1));
+    REQUIRE(pl_cache_get(test, &obj3));
+    REQUIRE(pl_cache_get(test, &obj4));
+    REQUIRE(pl_cache_get(test, &obj5));
+    pl_cache_set(test, &obj3);
+    pl_cache_set(test, &obj4);
+    pl_cache_set(test, &obj5);
+    REQUIRE_CMP(pl_cache_size(test), ==, 30, "zu");
+    REQUIRE_CMP(pl_cache_objects(test), ==, 3, "d");
+
+    // Inserting final 6-byte object should purge entry KEY3
+    pl_cache_obj obj6 = { .key = KEY6, .data = zero, .size = 6 };
+    REQUIRE(pl_cache_try_set(test, &obj6));
+    REQUIRE_CMP(pl_cache_size(test), ==, 32, "zu");
+    REQUIRE_CMP(pl_cache_objects(test), ==, 3, "d");
+    REQUIRE(!pl_cache_get(test, &obj3));
+    REQUIRE(pl_cache_get(test, &obj4));
+    REQUIRE(pl_cache_get(test, &obj5));
+    REQUIRE(pl_cache_get(test, &obj6));
+    REQUIRE_CMP(pl_cache_size(test), ==, 0, "zu");
+    REQUIRE_CMP(pl_cache_objects(test), ==, 0, "d");
+    pl_cache_obj_free(&obj4);
+    pl_cache_obj_free(&obj5);
+    pl_cache_obj_free(&obj6);
+
+    // Test callback API
+    int num_objects = 0;
+    test2 = pl_cache_create(pl_cache_params(
+        .get  = lookup_foobar,
+        .set  = update_count,
+        .priv = &num_objects,
+    ));
+
+    REQUIRE(pl_cache_get(test2, &obj1));
+    REQUIRE_CMP(obj1.key, ==, KEY1, PRIu64);
+    REQUIRE_CMP(obj1.size, ==, 3, "zu");
+    REQUIRE_MEMEQ(obj1.data, "bar", 3);
+    REQUIRE(pl_cache_get(test2, &obj2));
+    REQUIRE_CMP(obj2.key, ==, KEY2, PRIu64);
+    REQUIRE_CMP(obj2.size, ==, 3, "zu");
+    REQUIRE_MEMEQ(obj2.data, "foo", 3);
+    REQUIRE_CMP(pl_cache_objects(test2), ==, 0, "d");
+    REQUIRE_CMP(num_objects, ==, 0, "d");
+    REQUIRE(pl_cache_try_set(test2, &obj1));
+    REQUIRE(pl_cache_try_set(test2, &obj2));
+    REQUIRE(pl_cache_try_set(test2, &(pl_cache_obj) { .key = KEY7, .data = "abcde", .size = 5 }));
+    REQUIRE_CMP(pl_cache_objects(test2), ==, 3, "d");
+    REQUIRE_CMP(num_objects, ==, 3, "d");
+    REQUIRE(pl_cache_try_set(test2, &obj1));
+    REQUIRE(pl_cache_try_set(test2, &obj2));
+    REQUIRE_CMP(pl_cache_objects(test2), ==, 1, "d");
+    REQUIRE_CMP(num_objects, ==, 1, "d");
+    pl_cache_destroy(&test2);
+
+    pl_cache_destroy(&test);
+    pl_log_destroy(&log);
+    return 0;
+}
diff --git a/src/tests/colorspace.c b/src/tests/colorspace.c
new file mode 100644
index 0000000..4b0662b
--- /dev/null
+++ b/src/tests/colorspace.c
@@ -0,0 +1,488 @@
+#include "tests.h"
+
+int main()
+{
+    for (enum pl_color_system sys = 0; sys < PL_COLOR_SYSTEM_COUNT; sys++) {
+        bool ycbcr = sys >= PL_COLOR_SYSTEM_BT_601 && sys <= PL_COLOR_SYSTEM_YCGCO;
+        REQUIRE_CMP(ycbcr, ==, pl_color_system_is_ycbcr_like(sys), "d");
+    }
+
+    for (enum pl_color_transfer trc = 0; trc < PL_COLOR_TRC_COUNT; trc++) {
+        bool hdr = trc >= PL_COLOR_TRC_PQ && trc <= PL_COLOR_TRC_S_LOG2;
+        REQUIRE_CMP(hdr, ==, pl_color_transfer_is_hdr(trc), "d");
+        REQUIRE_CMP(pl_color_transfer_nominal_peak(trc), >=, 1.0, "f");
+    }
+
+    float pq_peak = pl_color_transfer_nominal_peak(PL_COLOR_TRC_PQ);
+    REQUIRE_FEQ(PL_COLOR_SDR_WHITE * pq_peak, 10000, 1e-7);
+
+    struct pl_color_repr tv_repr = {
+        .sys       = PL_COLOR_SYSTEM_BT_709,
+        .levels    = PL_COLOR_LEVELS_LIMITED,
+    };
+
+    struct pl_color_repr pc_repr = {
+        .sys       = PL_COLOR_SYSTEM_RGB,
+        .levels    = PL_COLOR_LEVELS_FULL,
+    };
+
+    // Ensure this is a no-op for bits == bits
+    for (int bits = 1; bits <= 16; bits++) {
+        tv_repr.bits.color_depth = tv_repr.bits.sample_depth = bits;
+        pc_repr.bits.color_depth = pc_repr.bits.sample_depth = bits;
+        REQUIRE_FEQ(pl_color_repr_normalize(&tv_repr), 1.0, 1e-7);
+        REQUIRE_FEQ(pl_color_repr_normalize(&pc_repr), 1.0, 1e-7);
+    }
+
+    tv_repr.bits.color_depth  = 8;
+    tv_repr.bits.sample_depth = 10;
+    float tv8to10 = pl_color_repr_normalize(&tv_repr);
+
+    tv_repr.bits.color_depth  = 8;
+    tv_repr.bits.sample_depth = 12;
+    float tv8to12 = pl_color_repr_normalize(&tv_repr);
+
+    // Simulate the effect of GPU texture sampling on UNORM texture
+    REQUIRE_FEQ(tv8to10 * 16 /1023.,  64/1023., 1e-7); // black
+    REQUIRE_FEQ(tv8to10 * 235/1023., 940/1023., 1e-7); // nominal white
+    REQUIRE_FEQ(tv8to10 * 128/1023., 512/1023., 1e-7); // achromatic
+    REQUIRE_FEQ(tv8to10 * 240/1023., 960/1023., 1e-7); // nominal chroma peak
+
+    REQUIRE_FEQ(tv8to12 * 16 /4095., 256 /4095., 1e-7); // black
+    REQUIRE_FEQ(tv8to12 * 235/4095., 3760/4095., 1e-7); // nominal white
+    REQUIRE_FEQ(tv8to12 * 128/4095., 2048/4095., 1e-7); // achromatic
+    REQUIRE_FEQ(tv8to12 * 240/4095., 3840/4095., 1e-7); // nominal chroma peak
+
+    // Ensure lavc's xyz12 is handled correctly
+    struct pl_color_repr xyz12 = {
+        .sys    = PL_COLOR_SYSTEM_XYZ,
+        .levels = PL_COLOR_LEVELS_UNKNOWN,
+        .bits   = {
+            .sample_depth = 16,
+            .color_depth  = 12,
+            .bit_shift    = 4,
+        },
+    };
+
+    float xyz = pl_color_repr_normalize(&xyz12);
+    REQUIRE_FEQ(xyz * (4095 << 4), 65535, 1e-7);
+
+    // Assume we uploaded a 10-bit source directly (unshifted) as a 16-bit
+    // texture. This texture multiplication factor should make it behave as if
+    // it was uploaded as a 10-bit texture instead.
+    pc_repr.bits.color_depth = 10;
+    pc_repr.bits.sample_depth = 16;
+    float pc10to16 = pl_color_repr_normalize(&pc_repr);
+    REQUIRE_FEQ(pc10to16 * 1000/65535., 1000/1023., 1e-7);
+
+    const struct pl_raw_primaries *bt709, *bt2020, *dcip3;
+    bt709 = pl_raw_primaries_get(PL_COLOR_PRIM_BT_709);
+    bt2020 = pl_raw_primaries_get(PL_COLOR_PRIM_BT_2020);
+    dcip3 = pl_raw_primaries_get(PL_COLOR_PRIM_DCI_P3);
+    REQUIRE(pl_primaries_superset(bt2020, bt709));
+    REQUIRE(!pl_primaries_superset(bt2020, dcip3)); // small region doesn't overlap
+    REQUIRE(pl_primaries_superset(dcip3, bt709));
+    REQUIRE(!pl_primaries_superset(bt709, bt2020));
+    REQUIRE(pl_primaries_compatible(bt2020, bt2020));
+    REQUIRE(pl_primaries_compatible(bt2020, bt709));
+    REQUIRE(pl_primaries_compatible(bt709, bt2020));
+    REQUIRE(pl_primaries_compatible(bt2020, dcip3));
+    REQUIRE(pl_primaries_compatible(bt709, dcip3));
+
+    struct pl_raw_primaries bt709_2020 = pl_primaries_clip(bt709, bt2020);
+    struct pl_raw_primaries bt2020_709 = pl_primaries_clip(bt2020, bt709);
+    REQUIRE(pl_raw_primaries_similar(&bt709_2020, bt709));
+    REQUIRE(pl_raw_primaries_similar(&bt2020_709, bt709));
+
+    struct pl_raw_primaries dcip3_bt2020 = pl_primaries_clip(dcip3, bt2020);
+    struct pl_raw_primaries dcip3_bt709  = pl_primaries_clip(dcip3, bt709);
+    REQUIRE(pl_primaries_superset(dcip3,  &dcip3_bt2020));
+    REQUIRE(pl_primaries_superset(dcip3,  &dcip3_bt709));
+    REQUIRE(pl_primaries_superset(bt2020, &dcip3_bt2020));
+    REQUIRE(pl_primaries_superset(bt709,  &dcip3_bt709));
+
+    pl_matrix3x3 rgb2xyz, rgb2xyz_;
+    rgb2xyz = rgb2xyz_ = pl_get_rgb2xyz_matrix(bt709);
+    pl_matrix3x3_invert(&rgb2xyz_);
+    pl_matrix3x3_invert(&rgb2xyz_);
+
+    // Make sure the double-inversion round trips
+    for (int y = 0; y < 3; y++) {
+        for (int x = 0; x < 3; x++)
+            REQUIRE_FEQ(rgb2xyz.m[y][x], rgb2xyz_.m[y][x], 1e-6);
+    }
+
+    // Make sure mapping the spectral RGB colors (i.e. the matrix rows) matches
+    // our original primaries
+    float Y = rgb2xyz.m[1][0];
+    REQUIRE_FEQ(rgb2xyz.m[0][0], pl_cie_X(bt709->red) * Y, 1e-7);
+    REQUIRE_FEQ(rgb2xyz.m[2][0], pl_cie_Z(bt709->red) * Y, 1e-7);
+    Y = rgb2xyz.m[1][1];
+    REQUIRE_FEQ(rgb2xyz.m[0][1], pl_cie_X(bt709->green) * Y, 1e-7);
+    REQUIRE_FEQ(rgb2xyz.m[2][1], pl_cie_Z(bt709->green) * Y, 1e-7);
+    Y = rgb2xyz.m[1][2];
+    REQUIRE_FEQ(rgb2xyz.m[0][2], pl_cie_X(bt709->blue) * Y, 1e-7);
+    REQUIRE_FEQ(rgb2xyz.m[2][2], pl_cie_Z(bt709->blue) * Y, 1e-7);
+
+    // Make sure the gamut mapping round-trips
+    pl_matrix3x3 bt709_bt2020, bt2020_bt709;
+    bt709_bt2020 = pl_get_color_mapping_matrix(bt709, bt2020, PL_INTENT_RELATIVE_COLORIMETRIC);
+    bt2020_bt709 = pl_get_color_mapping_matrix(bt2020, bt709, PL_INTENT_RELATIVE_COLORIMETRIC);
+    for (int n = 0; n < 10; n++) {
+        float vec[3] = { RANDOM, RANDOM, RANDOM };
+        float dst[3] = { vec[0],    vec[1],    vec[2]    };
+        pl_matrix3x3_apply(&bt709_bt2020, dst);
+        pl_matrix3x3_apply(&bt2020_bt709, dst);
+        for (int i = 0; i < 3; i++)
+            REQUIRE_FEQ(dst[i], vec[i], 1e-6);
+    }
+
+    // Ensure the decoding matrix round-trips to white/black
+    for (enum pl_color_system sys = 0; sys < PL_COLOR_SYSTEM_COUNT; sys++) {
+        if (!pl_color_system_is_linear(sys))
+            continue;
+
+        printf("testing color system %u\n", (unsigned) sys);
+        struct pl_color_repr repr = {
+            .levels = PL_COLOR_LEVELS_LIMITED,
+            .sys = sys,
+            .bits = {
+                // synthetic test
+                .color_depth = 8,
+                .sample_depth = 10,
+            },
+        };
+
+        float scale = pl_color_repr_normalize(&repr);
+        pl_transform3x3 yuv2rgb = pl_color_repr_decode(&repr, NULL);
+        pl_matrix3x3_scale(&yuv2rgb.mat, scale);
+
+        static const float white_ycbcr[3] = { 235/1023., 128/1023., 128/1023. };
+        static const float black_ycbcr[3] = {  16/1023., 128/1023., 128/1023. };
+        static const float white_other[3] = { 235/1023., 235/1023., 235/1023. };
+        static const float black_other[3] = {  16/1023.,  16/1023.,  16/1023. };
+
+        float white[3], black[3];
+        for (int i = 0; i < 3; i++) {
+            if (pl_color_system_is_ycbcr_like(sys)) {
+                white[i] = white_ycbcr[i];
+                black[i] = black_ycbcr[i];
+            } else {
+                white[i] = white_other[i];
+                black[i] = black_other[i];
+            }
+        }
+
+        pl_transform3x3_apply(&yuv2rgb, white);
+        REQUIRE_FEQ(white[0], 1.0, 1e-6);
+        REQUIRE_FEQ(white[1], 1.0, 1e-6);
+        REQUIRE_FEQ(white[2], 1.0, 1e-6);
+
+        pl_transform3x3_apply(&yuv2rgb, black);
+        REQUIRE_FEQ(black[0], 0.0, 1e-6);
+        REQUIRE_FEQ(black[1], 0.0, 1e-6);
+        REQUIRE_FEQ(black[2], 0.0, 1e-6);
+    }
+
+    // Make sure chromatic adaptation works
+    struct pl_raw_primaries bt709_d50;
+    bt709_d50 = *pl_raw_primaries_get(PL_COLOR_PRIM_BT_709);
+    bt709_d50.white = (struct pl_cie_xy) { 0.34567, 0.35850 };
+
+    pl_matrix3x3 d50_d65;
+    d50_d65 = pl_get_color_mapping_matrix(&bt709_d50, bt709, PL_INTENT_RELATIVE_COLORIMETRIC);
+
+    float white[3] = { 1.0, 1.0, 1.0 };
+    pl_matrix3x3_apply(&d50_d65, white);
+    REQUIRE_FEQ(white[0], 1.0, 1e-6);
+    REQUIRE_FEQ(white[1], 1.0, 1e-6);
+    REQUIRE_FEQ(white[2], 1.0, 1e-6);
+
+    // Simulate a typical 10-bit YCbCr -> 16 bit texture conversion
+    tv_repr.bits.color_depth  = 10;
+    tv_repr.bits.sample_depth = 16;
+    pl_transform3x3 yuv2rgb;
+    yuv2rgb = pl_color_repr_decode(&tv_repr, NULL);
+    float test[3] = { 575/65535., 336/65535., 640/65535. };
+    pl_transform3x3_apply(&yuv2rgb, test);
+    REQUIRE_FEQ(test[0], 0.808305, 1e-6);
+    REQUIRE_FEQ(test[1], 0.553254, 1e-6);
+    REQUIRE_FEQ(test[2], 0.218841, 1e-6);
+
+    // DVD
+    REQUIRE_CMP(pl_color_system_guess_ycbcr(720, 480), ==, PL_COLOR_SYSTEM_BT_601, "u");
+    REQUIRE_CMP(pl_color_system_guess_ycbcr(720, 576), ==, PL_COLOR_SYSTEM_BT_601, "u");
+    REQUIRE_CMP(pl_color_primaries_guess(720, 576), ==, PL_COLOR_PRIM_BT_601_625, "u");
+    REQUIRE_CMP(pl_color_primaries_guess(720, 480), ==, PL_COLOR_PRIM_BT_601_525, "u");
+    // PAL 16:9
+    REQUIRE_CMP(pl_color_system_guess_ycbcr(1024, 576), ==, PL_COLOR_SYSTEM_BT_601, "u");
+    REQUIRE_CMP(pl_color_primaries_guess(1024, 576), ==, PL_COLOR_PRIM_BT_601_625, "u");
+    // HD
+    REQUIRE_CMP(pl_color_system_guess_ycbcr(1280, 720),  ==, PL_COLOR_SYSTEM_BT_709, "u");
+    REQUIRE_CMP(pl_color_system_guess_ycbcr(1920, 1080), ==, PL_COLOR_SYSTEM_BT_709, "u");
+    REQUIRE_CMP(pl_color_primaries_guess(1280, 720),  ==, PL_COLOR_PRIM_BT_709, "u");
+    REQUIRE_CMP(pl_color_primaries_guess(1920, 1080), ==, PL_COLOR_PRIM_BT_709, "u");
+
+    // Odd/weird videos
+    REQUIRE_CMP(pl_color_primaries_guess(2000, 576), ==, PL_COLOR_PRIM_BT_709, "u");
+    REQUIRE_CMP(pl_color_primaries_guess(200, 200),  ==, PL_COLOR_PRIM_BT_709, "u");
+
+    REQUIRE(pl_color_repr_equal(&pl_color_repr_sdtv, &pl_color_repr_sdtv));
+    REQUIRE(!pl_color_repr_equal(&pl_color_repr_sdtv, &pl_color_repr_hdtv));
+
+    struct pl_color_repr repr = pl_color_repr_unknown;
+    pl_color_repr_merge(&repr, &pl_color_repr_uhdtv);
+    REQUIRE(pl_color_repr_equal(&repr, &pl_color_repr_uhdtv));
+
+    REQUIRE(!pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_UNKNOWN));
+    REQUIRE(!pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_BT_601_525));
+    REQUIRE(!pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_BT_601_625));
+    REQUIRE(!pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_BT_709));
+    REQUIRE(!pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_BT_470M));
+    REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_BT_2020));
+    REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_APPLE));
+    REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_ADOBE));
+    REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_PRO_PHOTO));
+    REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_CIE_1931));
+    REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_DCI_P3));
+    REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_DISPLAY_P3));
+    REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_V_GAMUT));
+    REQUIRE(pl_color_primaries_is_wide_gamut(PL_COLOR_PRIM_S_GAMUT));
+
+    struct pl_color_space space = pl_color_space_unknown;
+    pl_color_space_merge(&space, &pl_color_space_bt709);
+    REQUIRE(pl_color_space_equal(&space, &pl_color_space_bt709));
+
+    // Infer some color spaces
+    struct pl_color_space hlg = {
+        .primaries = PL_COLOR_PRIM_BT_2020,
+        .transfer = PL_COLOR_TRC_HLG,
+    };
+
+    pl_color_space_infer(&hlg);
+    REQUIRE_CMP(hlg.hdr.max_luma, ==, PL_COLOR_HLG_PEAK, "f");
+
+    struct pl_color_space unknown = {0};
+    struct pl_color_space display = {
+        .primaries = PL_COLOR_PRIM_BT_709,
+        .transfer = PL_COLOR_TRC_BT_1886,
+    };
+
+    pl_color_space_infer(&unknown);
+    pl_color_space_infer(&display);
+    REQUIRE(pl_color_space_equal(&unknown, &display));
+
+    float x, y;
+    pl_chroma_location_offset(PL_CHROMA_LEFT, &x, &y);
+    REQUIRE_CMP(x, ==, -0.5f, "f");
+    REQUIRE_CMP(y, ==,  0.0f, "f");
+    pl_chroma_location_offset(PL_CHROMA_TOP_LEFT, &x, &y);
+    REQUIRE_CMP(x, ==, -0.5f, "f");
+    REQUIRE_CMP(y, ==, -0.5f, "f");
+    pl_chroma_location_offset(PL_CHROMA_CENTER, &x, &y);
+    REQUIRE_CMP(x, ==,  0.0f, "f");
+    REQUIRE_CMP(y, ==,  0.0f, "f");
+    pl_chroma_location_offset(PL_CHROMA_BOTTOM_CENTER, &x, &y);
+    REQUIRE_CMP(x, ==,  0.0f, "f");
+    REQUIRE_CMP(y, ==,  0.5f, "f");
+
+    REQUIRE_CMP(pl_raw_primaries_get(PL_COLOR_PRIM_UNKNOWN), ==,
+                pl_raw_primaries_get(PL_COLOR_PRIM_BT_709), "p");
+
+    // Color blindness tests
+    float red[3]   = { 1.0, 0.0, 0.0 };
+    float green[3] = { 0.0, 1.0, 0.0 };
+    float blue[3]  = { 0.0, 0.0, 1.0 };
+
+#define TEST_CONE(model, color)                                                 \
+    do {                                                                        \
+        float tmp[3] = { (color)[0], (color)[1], (color)[2] };                  \
+        pl_matrix3x3 mat = pl_get_cone_matrix(&(model), bt709);                 \
+        pl_matrix3x3_apply(&mat, tmp);                                          \
+        printf("%s + %s = %f %f %f\n", #model, #color, tmp[0], tmp[1], tmp[2]); \
+        for (int i = 0; i < 3; i++)                                             \
+            REQUIRE_FEQ((color)[i], tmp[i], 1e-5f);                             \
+    } while(0)
+
+    struct pl_cone_params red_only = { .cones = PL_CONE_MS };
+    struct pl_cone_params green_only = { .cones = PL_CONE_LS };
+    struct pl_cone_params blue_only = pl_vision_monochromacy;
+
+    // These models should all round-trip white
+    TEST_CONE(pl_vision_normal, white);
+    TEST_CONE(pl_vision_protanopia, white);
+    TEST_CONE(pl_vision_protanomaly, white);
+    TEST_CONE(pl_vision_deuteranomaly, white);
+    TEST_CONE(pl_vision_tritanomaly, white);
+    TEST_CONE(pl_vision_achromatopsia, white);
+    TEST_CONE(red_only, white);
+    TEST_CONE(green_only, white);
+    TEST_CONE(blue_only, white);
+
+    // These models should round-trip blue
+    TEST_CONE(pl_vision_normal, blue);
+    TEST_CONE(pl_vision_protanomaly, blue);
+    TEST_CONE(pl_vision_deuteranomaly, blue);
+
+    // These models should round-trip red
+    TEST_CONE(pl_vision_normal, red);
+    TEST_CONE(pl_vision_tritanomaly, red);
+    TEST_CONE(pl_vision_tritanopia, red);
+
+    // These models should round-trip green
+    TEST_CONE(pl_vision_normal, green);
+
+    // Color adaptation tests
+    struct pl_cie_xy d65 = pl_white_from_temp(6504);
+    REQUIRE_FEQ(d65.x, 0.31271, 1e-3);
+    REQUIRE_FEQ(d65.y, 0.32902, 1e-3);
+    struct pl_cie_xy d55 = pl_white_from_temp(5503);
+    REQUIRE_FEQ(d55.x, 0.33242, 1e-3);
+    REQUIRE_FEQ(d55.y, 0.34743, 1e-3);
+
+    // Make sure we infer the correct set of metadata parameters
+#define TEST_METADATA(CSP, TYPE, MIN, MAX, AVG)                             \
+    do {                                                                    \
+        float _min, _max, _avg;                                             \
+        pl_color_space_nominal_luma_ex(pl_nominal_luma_params(              \
+            .color    = &(CSP),                                             \
+            .metadata = TYPE,                                               \
+            .scaling  = PL_HDR_PQ,                                          \
+            .out_min  = &_min,                                              \
+            .out_max  = &_max,                                              \
+            .out_avg  = &_avg,                                              \
+        ));                                                                 \
+        const float _min_ref = pl_hdr_rescale(PL_HDR_NITS, PL_HDR_PQ, MIN); \
+        const float _max_ref = pl_hdr_rescale(PL_HDR_NITS, PL_HDR_PQ, MAX); \
+        const float _avg_ref = pl_hdr_rescale(PL_HDR_NITS, PL_HDR_PQ, AVG); \
+        REQUIRE_FEQ(_min, _min_ref, 1e-5);                                  \
+        REQUIRE_FEQ(_max, _max_ref, 1e-5);                                  \
+        REQUIRE_FEQ(_avg, _avg_ref, 1e-5);                                  \
+    } while (0)
+
+    const struct pl_color_space hdr10plus = {
+        .primaries = PL_COLOR_PRIM_BT_2020,
+        .transfer  = PL_COLOR_TRC_PQ,
+        .hdr = {
+            .min_luma  = 0.005,
+            .max_luma  = 4000,
+            .scene_max = {596.69, 1200, 500},
+            .scene_avg = 300,
+        },
+    };
+
+    REQUIRE(pl_hdr_metadata_contains(&hdr10plus.hdr, PL_HDR_METADATA_ANY));
+    REQUIRE(pl_hdr_metadata_contains(&hdr10plus.hdr, PL_HDR_METADATA_NONE));
+    REQUIRE(pl_hdr_metadata_contains(&hdr10plus.hdr, PL_HDR_METADATA_HDR10));
+    REQUIRE(pl_hdr_metadata_contains(&hdr10plus.hdr, PL_HDR_METADATA_HDR10PLUS));
+    REQUIRE(!pl_hdr_metadata_contains(&hdr10plus.hdr, PL_HDR_METADATA_CIE_Y));
+
+    TEST_METADATA(hdr10plus, PL_HDR_METADATA_NONE,      PL_COLOR_HDR_BLACK, 10000, 0);
+    TEST_METADATA(hdr10plus, PL_HDR_METADATA_CIE_Y,     PL_COLOR_HDR_BLACK, 4000, 0);
+    TEST_METADATA(hdr10plus, PL_HDR_METADATA_HDR10,     PL_COLOR_HDR_BLACK, 4000, 0);
+    TEST_METADATA(hdr10plus, PL_HDR_METADATA_HDR10PLUS, PL_COLOR_HDR_BLACK, 1000, 250);
+    TEST_METADATA(hdr10plus, PL_HDR_METADATA_ANY,       PL_COLOR_HDR_BLACK, 1000, 250);
+
+    const struct pl_color_space dovi = {
+        .primaries = PL_COLOR_PRIM_BT_2020,
+        .transfer  = PL_COLOR_TRC_PQ,
+        .hdr = {
+            .min_luma = 0.005,
+            .max_luma = 4000,
+            .max_pq_y = pl_hdr_rescale(PL_HDR_NITS, PL_HDR_PQ, 1000),
+            .avg_pq_y = pl_hdr_rescale(PL_HDR_NITS, PL_HDR_PQ, 250),
+        },
+    };
+
+    REQUIRE(pl_hdr_metadata_contains(&dovi.hdr, PL_HDR_METADATA_ANY));
+    REQUIRE(pl_hdr_metadata_contains(&dovi.hdr, PL_HDR_METADATA_NONE));
+    REQUIRE(pl_hdr_metadata_contains(&dovi.hdr, PL_HDR_METADATA_HDR10));
+    REQUIRE(pl_hdr_metadata_contains(&dovi.hdr, PL_HDR_METADATA_CIE_Y));
+    REQUIRE(!pl_hdr_metadata_contains(&dovi.hdr, PL_HDR_METADATA_HDR10PLUS));
+
+    TEST_METADATA(dovi, PL_HDR_METADATA_NONE,      PL_COLOR_HDR_BLACK, 10000, 0);
+    TEST_METADATA(dovi, PL_HDR_METADATA_HDR10,     PL_COLOR_HDR_BLACK, 4000, 0);
+    TEST_METADATA(dovi, PL_HDR_METADATA_HDR10PLUS, PL_COLOR_HDR_BLACK, 4000, 0);
+    TEST_METADATA(dovi, PL_HDR_METADATA_CIE_Y,     PL_COLOR_HDR_BLACK, 1000, 250);
+    TEST_METADATA(dovi, PL_HDR_METADATA_ANY,       PL_COLOR_HDR_BLACK, 1000, 250);
+
+    const struct pl_color_space hlg4000 = {
+        .primaries    = PL_COLOR_PRIM_BT_2020,
+        .transfer     = PL_COLOR_TRC_HLG,
+        .hdr.max_luma = 4000,
+        .hdr.min_luma = 0.005,
+    };
+
+    TEST_METADATA(hlg4000, PL_HDR_METADATA_NONE,  PL_COLOR_HDR_BLACK, PL_COLOR_HLG_PEAK, 0);
+    TEST_METADATA(hlg4000, PL_HDR_METADATA_HDR10, 0.005, 4000, 0);
+    TEST_METADATA(hlg4000, PL_HDR_METADATA_ANY,   0.005, 4000, 0);
+
+    const struct pl_color_space untagged = {
+        .primaries = PL_COLOR_PRIM_BT_709,
+        .transfer  = PL_COLOR_TRC_BT_1886,
+    };
+
+    REQUIRE(pl_hdr_metadata_contains(&untagged.hdr, PL_HDR_METADATA_NONE));
+    REQUIRE(!pl_hdr_metadata_contains(&untagged.hdr, PL_HDR_METADATA_ANY));
+    REQUIRE(!pl_hdr_metadata_contains(&untagged.hdr, PL_HDR_METADATA_HDR10));
+    REQUIRE(!pl_hdr_metadata_contains(&untagged.hdr, PL_HDR_METADATA_CIE_Y));
+    REQUIRE(!pl_hdr_metadata_contains(&untagged.hdr, PL_HDR_METADATA_HDR10PLUS));
+
+    const float sdr_black = PL_COLOR_SDR_WHITE / PL_COLOR_SDR_CONTRAST;
+    TEST_METADATA(untagged, PL_HDR_METADATA_NONE, sdr_black, PL_COLOR_SDR_WHITE, 0);
+    TEST_METADATA(untagged, PL_HDR_METADATA_ANY,  sdr_black, PL_COLOR_SDR_WHITE, 0);
+
+    const struct pl_color_space sdr50 = {
+        .primaries    = PL_COLOR_PRIM_BT_709,
+        .transfer     = PL_COLOR_TRC_BT_1886,
+        .hdr.max_luma = 50,
+    };
+
+    REQUIRE(pl_hdr_metadata_contains(&sdr50.hdr, PL_HDR_METADATA_NONE));
+    REQUIRE(pl_hdr_metadata_contains(&sdr50.hdr, PL_HDR_METADATA_ANY));
+    REQUIRE(pl_hdr_metadata_contains(&sdr50.hdr, PL_HDR_METADATA_HDR10));
+    REQUIRE(!pl_hdr_metadata_contains(&sdr50.hdr, PL_HDR_METADATA_CIE_Y));
+    REQUIRE(!pl_hdr_metadata_contains(&sdr50.hdr, PL_HDR_METADATA_HDR10PLUS));
+
+    TEST_METADATA(sdr50, PL_HDR_METADATA_NONE,  sdr_black, PL_COLOR_SDR_WHITE, 0);
+    TEST_METADATA(sdr50, PL_HDR_METADATA_HDR10, 50 / PL_COLOR_SDR_CONTRAST, 50, 0);
+    TEST_METADATA(sdr50, PL_HDR_METADATA_ANY,   50 / PL_COLOR_SDR_CONTRAST, 50, 0);
+
+    const struct pl_color_space sdr10k = {
+        .primaries    = PL_COLOR_PRIM_BT_709,
+        .transfer     = PL_COLOR_TRC_BT_1886,
+        .hdr.min_luma = PL_COLOR_SDR_WHITE / 10000,
+    };
+
+    REQUIRE(pl_hdr_metadata_contains(&sdr10k.hdr, PL_HDR_METADATA_NONE));
+    REQUIRE(!pl_hdr_metadata_contains(&sdr10k.hdr, PL_HDR_METADATA_ANY));
+    REQUIRE(!pl_hdr_metadata_contains(&sdr10k.hdr, PL_HDR_METADATA_HDR10));
+    TEST_METADATA(sdr10k, PL_HDR_METADATA_NONE,  sdr_black, PL_COLOR_SDR_WHITE, 0);
+    TEST_METADATA(sdr10k, PL_HDR_METADATA_HDR10, PL_COLOR_SDR_WHITE / 10000, PL_COLOR_SDR_WHITE, 0);
+    TEST_METADATA(sdr10k, PL_HDR_METADATA_ANY,   PL_COLOR_SDR_WHITE / 10000, PL_COLOR_SDR_WHITE, 0);
+
+    const struct pl_color_space bogus_vals = {
+        .primaries = PL_COLOR_PRIM_BT_2020,
+        .transfer  = PL_COLOR_TRC_HLG,
+        .hdr.min_luma = 1e-9,
+        .hdr.max_luma = 1000000,
+    };
+
+    const struct pl_color_space bogus_flip = {
+        .primaries = PL_COLOR_PRIM_BT_2020,
+        .transfer  = PL_COLOR_TRC_PQ,
+        .hdr.min_luma = 4000,
+        .hdr.max_luma = 0.05,
+    };
+
+    const struct pl_color_space bogus_sign = {
+        .primaries = PL_COLOR_PRIM_BT_2020,
+        .transfer  = PL_COLOR_TRC_HLG,
+        .hdr.min_luma = -0.5,
+        .hdr.max_luma = -4000,
+    };
+
+    TEST_METADATA(bogus_vals, PL_HDR_METADATA_HDR10, PL_COLOR_HDR_BLACK, 10000, 0);
+    TEST_METADATA(bogus_flip, PL_HDR_METADATA_HDR10, PL_COLOR_HDR_BLACK, 10000, 0);
+    TEST_METADATA(bogus_sign, PL_HDR_METADATA_HDR10, PL_COLOR_HDR_BLACK, PL_COLOR_HLG_PEAK, 0);
+}
diff --git a/src/tests/common.c b/src/tests/common.c
new file mode 100644
index 0000000..849971e
--- /dev/null
+++ b/src/tests/common.c
@@ -0,0 +1,136 @@
+#include "tests.h"
+
+static int irand()
+{
+    return rand() - RAND_MAX / 2;
+}
+
+int main()
+{
+    pl_log log = pl_test_logger();
+    pl_log_update(log, NULL);
+    pl_log_destroy(&log);
+
+    // Test some misc helper functions
+    pl_rect2d rc2 = {
+        irand(), irand(),
+        irand(), irand(),
+    };
+
+    pl_rect3d rc3 = {
+        irand(), irand(), irand(),
+        irand(), irand(), irand(),
+    };
+
+    pl_rect2d_normalize(&rc2);
+    REQUIRE_CMP(rc2.x1, >=, rc2.x0, "d");
+    REQUIRE_CMP(rc2.y1, >=, rc2.y0, "d");
+
+    pl_rect3d_normalize(&rc3);
+    REQUIRE_CMP(rc3.x1, >=, rc3.x0, "d");
+    REQUIRE_CMP(rc3.y1, >=, rc3.y0, "d");
+    REQUIRE_CMP(rc3.z1, >=, rc3.z0, "d");
+
+    pl_rect2df rc2f = {
+        RANDOM, RANDOM,
+        RANDOM, RANDOM,
+    };
+
+    pl_rect3df rc3f = {
+        RANDOM, RANDOM, RANDOM,
+        RANDOM, RANDOM, RANDOM,
+    };
+
+    pl_rect2df_normalize(&rc2f);
+    REQUIRE_CMP(rc2f.x1, >=, rc2f.x0, "f");
+    REQUIRE_CMP(rc2f.y1, >=, rc2f.y0, "f");
+
+    pl_rect3df_normalize(&rc3f);
+    REQUIRE_CMP(rc3f.x1, >=, rc3f.x0, "f");
+    REQUIRE_CMP(rc3f.y1, >=, rc3f.y0, "f");
+    REQUIRE_CMP(rc3f.z1, >=, rc3f.z0, "f");
+
+    pl_rect2d rc2r = pl_rect2df_round(&rc2f);
+    pl_rect3d rc3r = pl_rect3df_round(&rc3f);
+
+    REQUIRE_CMP(fabs(rc2r.x0 - rc2f.x0), <=, 0.5, "f");
+    REQUIRE_CMP(fabs(rc2r.x1 - rc2f.x1), <=, 0.5, "f");
+    REQUIRE_CMP(fabs(rc2r.y0 - rc2f.y0), <=, 0.5, "f");
+    REQUIRE_CMP(fabs(rc2r.y1 - rc2f.y1), <=, 0.5, "f");
+
+    REQUIRE_CMP(fabs(rc3r.x0 - rc3f.x0), <=, 0.5, "f");
+    REQUIRE_CMP(fabs(rc3r.x1 - rc3f.x1), <=, 0.5, "f");
+    REQUIRE_CMP(fabs(rc3r.y0 - rc3f.y0), <=, 0.5, "f");
+    REQUIRE_CMP(fabs(rc3r.y1 - rc3f.y1), <=, 0.5, "f");
+    REQUIRE_CMP(fabs(rc3r.z0 - rc3f.z0), <=, 0.5, "f");
+    REQUIRE_CMP(fabs(rc3r.z1 - rc3f.z1), <=, 0.5, "f");
+
+    pl_transform3x3 tr = {
+        .mat = {{
+            { RANDOM, RANDOM, RANDOM },
+            { RANDOM, RANDOM, RANDOM },
+            { RANDOM, RANDOM, RANDOM },
+        }},
+        .c = { RANDOM, RANDOM, RANDOM },
+    };
+
+    pl_transform3x3 tr2 = tr;
+    float scale = 1.0 + RANDOM;
+    pl_transform3x3_scale(&tr2, scale);
+    pl_transform3x3_invert(&tr2);
+    pl_transform3x3_invert(&tr2);
+    pl_transform3x3_scale(&tr2, 1.0 / scale);
+
+    for (int i = 0; i < 3; i++) {
+        for (int j = 0; j < 3; j++) {
+            printf("%f %f\n", tr.mat.m[i][j], tr2.mat.m[i][j]);
+            REQUIRE_FEQ(tr.mat.m[i][j], tr2.mat.m[i][j], 1e-4);
+        }
+        REQUIRE_FEQ(tr.c[i], tr2.c[i], 1e-4);
+    }
+
+    // Test aspect ratio code
+    const pl_rect2df rc1080p = {0, 0, 1920, 1080};
+    const pl_rect2df rc43 = {0, 0, 1024, 768};
+    pl_rect2df rc;
+
+    REQUIRE_FEQ(pl_rect2df_aspect(&rc1080p), 16.0/9.0, 1e-8);
+    REQUIRE_FEQ(pl_rect2df_aspect(&rc43), 4.0/3.0, 1e-8);
+
+#define pl_rect2df_midx(rc) (((rc).x0 + (rc).x1) / 2.0)
+#define pl_rect2df_midy(rc) (((rc).y0 + (rc).y1) / 2.0)
+
+    for (float aspect = 0.2; aspect < 3.0; aspect += 0.4) {
+        for (float scan = 0.0; scan <= 1.0; scan += 0.5) {
+            rc = rc1080p;
+            pl_rect2df_aspect_set(&rc, aspect, scan);
+            printf("aspect %.2f, panscan %.1f: {%f %f} -> {%f %f}\n",
+                   aspect, scan, rc.x0, rc.y0, rc.x1, rc.y1);
+            REQUIRE_FEQ(pl_rect2df_aspect(&rc), aspect, 1e-6);
+            REQUIRE_FEQ(pl_rect2df_midx(rc), pl_rect2df_midx(rc1080p), 1e-6);
+            REQUIRE_FEQ(pl_rect2df_midy(rc), pl_rect2df_midy(rc1080p), 1e-6);
+        }
+    }
+
+    rc = rc1080p;
+    pl_rect2df_aspect_fit(&rc, &rc43, 0.0);
+    REQUIRE_FEQ(pl_rect2df_aspect(&rc), pl_rect2df_aspect(&rc43), 1e-6);
+    REQUIRE_FEQ(pl_rect2df_midx(rc), pl_rect2df_midx(rc1080p), 1e-6);
+    REQUIRE_FEQ(pl_rect2df_midy(rc), pl_rect2df_midy(rc1080p), 1e-6);
+    REQUIRE_FEQ(pl_rect_w(rc), pl_rect_w(rc43), 1e-6);
+    REQUIRE_FEQ(pl_rect_h(rc), pl_rect_h(rc43), 1e-6);
+
+    rc = rc43;
+    pl_rect2df_aspect_fit(&rc, &rc1080p, 0.0);
+    REQUIRE_FEQ(pl_rect2df_aspect(&rc), pl_rect2df_aspect(&rc1080p), 1e-6);
+    REQUIRE_FEQ(pl_rect2df_midx(rc), pl_rect2df_midx(rc43), 1e-6);
+    REQUIRE_FEQ(pl_rect2df_midy(rc), pl_rect2df_midy(rc43), 1e-6);
+    REQUIRE_FEQ(pl_rect_w(rc), pl_rect_w(rc43), 1e-6);
+
+    rc = (pl_rect2df) { 1920, 1080, 0, 0 };
+    pl_rect2df_offset(&rc, 50, 100);
+    REQUIRE_FEQ(rc.x0, 1870, 1e-6);
+    REQUIRE_FEQ(rc.x1, -50, 1e-6);
+    REQUIRE_FEQ(rc.y0, 980, 1e-6);
+    REQUIRE_FEQ(rc.y1, -100, 1e-6);
+}
diff --git a/src/tests/d3d11.c b/src/tests/d3d11.c
new file mode 100644
index 0000000..256af35
--- /dev/null
+++ b/src/tests/d3d11.c
@@ -0,0 +1,59 @@
+#include "gpu_tests.h"
+#include "d3d11/gpu.h"
+#include <dxgi1_2.h>
+
+#include <libplacebo/d3d11.h>
+
+int main()
+{
+    pl_log log = pl_test_logger();
+    IDXGIFactory1 *factory = NULL;
+    IDXGIAdapter1 *adapter1 = NULL;
+    HRESULT hr;
+
+    HMODULE dxgi = LoadLibraryW(L"dxgi.dll");
+    if (!dxgi)
+        return SKIP;
+
+    __typeof__(&CreateDXGIFactory1) pCreateDXGIFactory1 =
+        (void *) GetProcAddress(dxgi, "CreateDXGIFactory1");
+    if (!pCreateDXGIFactory1)
+        return SKIP;
+
+    hr = pCreateDXGIFactory1(&IID_IDXGIFactory1, (void **) &factory);
+    if (FAILED(hr)) {
+        printf("Failed to create DXGI factory\n");
+        return SKIP;
+    }
+
+    // Test all attached devices
+    for (int i = 0;; i++) {
+        hr = IDXGIFactory1_EnumAdapters1(factory, i, &adapter1);
+        if (hr == DXGI_ERROR_NOT_FOUND)
+            break;
+        if (FAILED(hr)) {
+            printf("Failed to enumerate adapters\n");
+            return SKIP;
+        }
+
+        DXGI_ADAPTER_DESC1 desc;
+        hr = IDXGIAdapter1_GetDesc1(adapter1, &desc);
+        if (FAILED(hr)) {
+            printf("Failed to enumerate adapters\n");
+            return SKIP;
+        }
+        SAFE_RELEASE(adapter1);
+
+        const struct pl_d3d11_t *d3d11 = pl_d3d11_create(log, pl_d3d11_params(
+            .debug = true,
+            .adapter_luid = desc.AdapterLuid,
+        ));
+        REQUIRE(d3d11);
+
+        gpu_shader_tests(d3d11->gpu);
+
+        pl_d3d11_destroy(&d3d11);
+    }
+
+    SAFE_RELEASE(factory);
+}
diff --git a/src/tests/dav1d.c b/src/tests/dav1d.c
new file mode 100644
index 0000000..7e2439f
--- /dev/null
+++ b/src/tests/dav1d.c
@@ -0,0 +1,45 @@
+#include "tests.h"
+#include "libplacebo/utils/dav1d.h"
+
+int main()
+{
+    // Test enum functions
+    for (enum pl_color_system sys = 0; sys < PL_COLOR_SYSTEM_COUNT; sys++) {
+        // Exceptions to the rule, due to different handling in dav1d
+        if (sys == PL_COLOR_SYSTEM_BT_2100_HLG || sys == PL_COLOR_SYSTEM_XYZ)
+            continue;
+
+        enum Dav1dMatrixCoefficients mc = pl_system_to_dav1d(sys);
+        enum pl_color_system sys2 = pl_system_from_dav1d(mc);
+        if (sys2)
+            REQUIRE_CMP(sys, ==, sys2, "u");
+    }
+
+    for (enum pl_color_levels lev = 0; lev < PL_COLOR_LEVELS_COUNT; lev++) {
+        int range = pl_levels_to_dav1d(lev);
+        enum pl_color_levels lev2 = pl_levels_from_dav1d(range);
+        if (lev != PL_COLOR_LEVELS_UNKNOWN)
+            REQUIRE_CMP(lev, ==, lev2, "u");
+    }
+
+    for (enum pl_color_primaries prim = 0; prim < PL_COLOR_PRIM_COUNT; prim++) {
+        enum Dav1dColorPrimaries dpri = pl_primaries_to_dav1d(prim);
+        enum pl_color_primaries prim2 = pl_primaries_from_dav1d(dpri);
+        if (prim2)
+            REQUIRE_CMP(prim, ==, prim2, "u");
+    }
+
+    for (enum pl_color_transfer trc = 0; trc < PL_COLOR_TRC_COUNT; trc++) {
+        enum Dav1dTransferCharacteristics dtrc = pl_transfer_to_dav1d(trc);
+        enum pl_color_transfer trc2 = pl_transfer_from_dav1d(dtrc);
+        if (trc2)
+            REQUIRE_CMP(trc, ==, trc2, "u");
+    }
+
+    for (enum pl_chroma_location loc = 0; loc < PL_CHROMA_COUNT; loc++) {
+        enum Dav1dChromaSamplePosition dloc = pl_chroma_to_dav1d(loc);
+        enum pl_chroma_location loc2 = pl_chroma_from_dav1d(dloc);
+        if (loc2)
+            REQUIRE_CMP(loc, ==, loc2, "u");
+    }
+}
diff --git a/src/tests/dither.c b/src/tests/dither.c
new file mode 100644
index 0000000..c9f639c
--- /dev/null
+++ b/src/tests/dither.c
@@ -0,0 +1,41 @@
+#include "tests.h"
+
+#include <libplacebo/dither.h>
+#include <libplacebo/shaders/dithering.h>
+
+#define SHIFT 4
+#define SIZE (1 << SHIFT)
+float data[SIZE][SIZE];
+
+int main()
+{
+    printf("Ordered dither matrix:\n");
+    pl_generate_bayer_matrix(&data[0][0], SIZE);
+    for (int y = 0; y < SIZE; y++) {
+        for (int x = 0; x < SIZE; x++)
+            printf(" %3d", (int)(data[y][x] * SIZE * SIZE));
+        printf("\n");
+    }
+
+    printf("Blue noise dither matrix:\n");
+    pl_generate_blue_noise(&data[0][0], SHIFT);
+    for (int y = 0; y < SIZE; y++) {
+        for (int x = 0; x < SIZE; x++)
+            printf(" %3d", (int)(data[y][x] * SIZE * SIZE));
+        printf("\n");
+    }
+
+    // Generate an example of a dither shader
+    pl_log log = pl_test_logger();
+    pl_shader sh = pl_shader_alloc(log, NULL);
+    pl_shader_obj obj = NULL;
+
+    pl_shader_dither(sh, 8, &obj, NULL);
+    const struct pl_shader_res *res = pl_shader_finalize(sh);
+    REQUIRE(res);
+    printf("Generated dither shader:\n%s\n", res->glsl);
+
+    pl_shader_obj_destroy(&obj);
+    pl_shader_free(&sh);
+    pl_log_destroy(&log);
+}
diff --git a/src/tests/dummy.c b/src/tests/dummy.c
new file mode 100644
index 0000000..0e87a2c
--- /dev/null
+++ b/src/tests/dummy.c
@@ -0,0 +1,70 @@
+#include "gpu_tests.h"
+
+#include <libplacebo/dummy.h>
+
+int main()
+{
+    pl_log log = pl_test_logger();
+    pl_gpu gpu = pl_gpu_dummy_create(log, NULL);
+    pl_buffer_tests(gpu);
+    pl_texture_tests(gpu);
+
+    // Attempt creating a shader and accessing the resulting LUT
+    pl_tex dummy = pl_tex_dummy_create(gpu, pl_tex_dummy_params(
+        .w = 100,
+        .h = 100,
+        .format = pl_find_named_fmt(gpu, "rgba8"),
+    ));
+
+    struct pl_sample_src src = {
+        .tex = dummy,
+        .new_w = 1000,
+        .new_h = 1000,
+    };
+
+    pl_shader_obj lut = NULL;
+    struct pl_sample_filter_params filter_params = {
+        .filter = pl_filter_ewa_lanczos,
+        .lut = &lut,
+    };
+
+    pl_shader sh = pl_shader_alloc(log, pl_shader_params( .gpu = gpu ));
+    REQUIRE(pl_shader_sample_polar(sh, &src, &filter_params));
+    const struct pl_shader_res *res = pl_shader_finalize(sh);
+    REQUIRE(res);
+
+    for (int n = 0; n < res->num_descriptors; n++) {
+        const struct pl_shader_desc *sd = &res->descriptors[n];
+        if (sd->desc.type != PL_DESC_SAMPLED_TEX)
+            continue;
+
+        pl_tex tex = sd->binding.object;
+        const float *data = (float *) pl_tex_dummy_data(tex);
+        if (!data)
+            continue; // means this was the `dummy` texture
+
+#ifdef PRINT_LUTS
+        for (int i = 0; i < tex->params.w; i++)
+            printf("lut[%d] = %f\n", i, data[i]);
+#endif
+    }
+
+    // Try out generation of the sampler2D interface
+    src.tex = NULL;
+    src.tex_w = 100;
+    src.tex_h = 100;
+    src.format = PL_FMT_UNORM;
+    src.sampler = PL_SAMPLER_NORMAL;
+    src.mode = PL_TEX_SAMPLE_LINEAR;
+
+    pl_shader_reset(sh, pl_shader_params( .gpu = gpu ));
+    REQUIRE(pl_shader_sample_polar(sh, &src, &filter_params));
+    REQUIRE((res = pl_shader_finalize(sh)));
+    REQUIRE_CMP(res->input, ==, PL_SHADER_SIG_SAMPLER, "u");
+
+    pl_shader_free(&sh);
+    pl_shader_obj_destroy(&lut);
+    pl_tex_destroy(gpu, &dummy);
+    pl_gpu_dummy_destroy(&gpu);
+    pl_log_destroy(&log);
+}
diff --git a/src/tests/filters.c b/src/tests/filters.c
new file mode 100644
index 0000000..b6b323c
--- /dev/null
+++ b/src/tests/filters.c
@@ -0,0 +1,81 @@
+#include "tests.h"
+
+#include <libplacebo/filters.h>
+
+int main()
+{
+    pl_log log = pl_test_logger();
+
+    for (int i = 0; i < pl_num_filter_functions; i++) {
+        const struct pl_filter_function *fun = pl_filter_functions[i];
+        if (fun->opaque)
+            continue;
+
+        printf("Testing filter function '%s'\n", fun->name);
+
+        struct pl_filter_ctx ctx = { .radius = fun->radius };
+        memcpy(ctx.params, fun->params, sizeof(ctx.params));
+
+        // Ensure the kernel is correctly scaled
+        REQUIRE_FEQ(fun->weight(&ctx, 0.0), 1.0, 1e-7);
+
+        // Only box filters are radius 1, these are unwindowed by design.
+        // Gaussian technically never reaches 0 even at its preconfigured radius.
+        if (fun->radius > 1.0 && fun != &pl_filter_function_gaussian)
+            REQUIRE_FEQ(fun->weight(&ctx, fun->radius), 0.0, 1e-7);
+    }
+
+    for (int c = 0; c < pl_num_filter_configs; c++) {
+        const struct pl_filter_config *conf = pl_filter_configs[c];
+        if (conf->kernel->opaque)
+            continue;
+
+        printf("Testing filter config '%s'\n", conf->name);
+        pl_filter flt = pl_filter_generate(log, pl_filter_params(
+            .config      = *conf,
+            .lut_entries = 256,
+            .cutoff      = 1e-3,
+        ));
+        REQUIRE(flt);
+        const float radius = PL_DEF(conf->radius, conf->kernel->radius);
+        REQUIRE_CMP(flt->radius, <=, radius, "f");
+        REQUIRE_CMP(flt->radius_zero, >, 0.0, "f");
+        REQUIRE_CMP(flt->radius_zero, <=, flt->radius, "f");
+
+        if (conf->polar) {
+
+            // Test LUT accuracy
+            const int range = flt->params.lut_entries - 1;
+            double scale = flt->weights[0] / pl_filter_sample(conf, 0.0);
+            double err = 0.0;
+            for (float k = 0.0; k <= 1.0; k += 1e-3f) {
+                double ref = scale * pl_filter_sample(conf, k * flt->radius);
+                double idx = k * range;
+                int base = floorf(idx);
+                double fpart = idx - base;
+                int next = PL_MIN(base + 1, range);
+                double interp = PL_MIX(flt->weights[base], flt->weights[next], fpart);
+                err = fmaxf(err, fabs(interp - ref));
+            }
+            REQUIRE_CMP(err, <=, 1e-4, "g");
+
+        } else {
+
+            // Ensure the weights for each row add up to unity
+            for (int i = 0; i < flt->params.lut_entries; i++) {
+                const float *row = flt->weights + i * flt->row_stride;
+                float sum = 0.0;
+                REQUIRE(flt->row_size);
+                REQUIRE_CMP(flt->row_stride, >=, flt->row_size, "d");
+                for (int n = 0; n < flt->row_size; n++)
+                    sum += row[n];
+                REQUIRE_FEQ(sum, 1.0, 1e-6);
+            }
+
+        }
+
+        pl_filter_free(&flt);
+    }
+
+    pl_log_destroy(&log);
+}
diff --git a/src/tests/fuzz/lut.c b/src/tests/fuzz/lut.c
new file mode 100644
index 0000000..24e5f89
--- /dev/null
+++ b/src/tests/fuzz/lut.c
@@ -0,0 +1,24 @@
+#include "../tests.h"
+
+#include <libplacebo/shaders/lut.h>
+
+__AFL_FUZZ_INIT();
+
+#pragma clang optimize off
+
+int main()
+{
+    struct pl_custom_lut *lut;
+
+#ifdef __AFL_HAVE_MANUAL_CONTROL
+    __AFL_INIT();
+#endif
+
+    unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF;
+
+    while (__AFL_LOOP(100000)) {
+        size_t len = __AFL_FUZZ_TESTCASE_LEN;
+        lut = pl_lut_parse_cube(NULL, (char *) buf, len);
+        pl_lut_free(&lut);
+    }
+}
diff --git a/src/tests/fuzz/options.c b/src/tests/fuzz/options.c
new file mode 100644
index 0000000..c88e462
--- /dev/null
+++ b/src/tests/fuzz/options.c
@@ -0,0 +1,26 @@
+#include "../tests.h"
+
+#include <libplacebo/options.h>
+
+__AFL_FUZZ_INIT();
+
+#pragma clang optimize off
+
+int main()
+{
+    pl_options opts = pl_options_alloc(NULL);
+
+#ifdef __AFL_HAVE_MANUAL_CONTROL
+    __AFL_INIT();
+#endif
+
+    unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF;
+
+    while (__AFL_LOOP(100000)) {
+        size_t len = __AFL_FUZZ_TESTCASE_LEN;
+        buf[len - 1] = '\0'; // ensure proper null termination
+        pl_options_load(opts, (const char *) buf);
+        pl_options_save(opts);
+        pl_options_reset(opts, NULL);
+    }
+}
diff --git a/src/tests/fuzz/shaders.c b/src/tests/fuzz/shaders.c
new file mode 100644
index 0000000..2e3e92c
--- /dev/null
+++ b/src/tests/fuzz/shaders.c
@@ -0,0 +1,166 @@
+#include "../tests.h"
+#include "shaders.h"
+
+#include <libplacebo/dummy.h>
+#include <libplacebo/shaders/colorspace.h>
+#include <libplacebo/shaders/custom.h>
+#include <libplacebo/shaders/sampling.h>
+
+__AFL_FUZZ_INIT();
+
+#pragma clang optimize off
+
+int main()
+{
+    pl_gpu gpu = pl_gpu_dummy_create(NULL, NULL);
+
+#define WIDTH 64
+#define HEIGHT 64
+#define COMPS 4
+
+    static const float empty[HEIGHT][WIDTH][COMPS] = {0};
+
+    struct pl_sample_src src = {
+        .tex = pl_tex_create(gpu, pl_tex_params(
+            .format = pl_find_fmt(gpu, PL_FMT_FLOAT, COMPS, 0, 32, PL_FMT_CAP_SAMPLEABLE),
+            .initial_data = empty,
+            .sampleable = true,
+            .w = WIDTH,
+            .h = HEIGHT,
+        )),
+        .new_w = WIDTH * 2,
+        .new_h = HEIGHT * 2,
+    };
+
+    if (!src.tex)
+        return 1;
+
+#ifdef __AFL_HAVE_MANUAL_CONTROL
+    __AFL_INIT();
+#endif
+
+    unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF;
+    while (__AFL_LOOP(10000)) {
+
+#define STACK_SIZE 16
+        pl_shader stack[STACK_SIZE] = {0};
+        int idx = 0;
+
+        stack[0] = pl_shader_alloc(NULL, pl_shader_params(
+            .gpu = gpu,
+        ));
+
+        pl_shader sh = stack[idx];
+        pl_shader_obj polar = NULL, ortho = NULL, peak = NULL, dither = NULL;
+
+        size_t len = __AFL_FUZZ_TESTCASE_LEN;
+        for (size_t pos = 0; pos < len; pos++) {
+            switch (buf[pos]) {
+            // Sampling steps
+            case 'S':
+                pl_shader_sample_direct(sh, &src);
+                break;
+            case 'D':
+                pl_shader_deband(sh, &src, NULL);
+                break;
+            case 'P':
+                pl_shader_sample_polar(sh, &src, pl_sample_filter_params(
+                    .filter = pl_filter_ewa_lanczos,
+                    .lut = &polar,
+                ));
+                break;
+            case 'O': ;
+                struct pl_sample_src srcfix = src;
+                srcfix.new_w = WIDTH;
+                pl_shader_sample_ortho2(sh, &srcfix, pl_sample_filter_params(
+                    .filter = pl_filter_spline36,
+                    .lut = &ortho,
+                ));
+                break;
+            case 'X':
+                pl_shader_custom(sh, &(struct pl_custom_shader) {
+                    .input = PL_SHADER_SIG_NONE,
+                    .output = PL_SHADER_SIG_COLOR,
+                    .body = "// merge subpasses",
+                });
+                break;
+
+            // Colorspace transformation steps
+            case 'y': {
+                struct pl_color_repr repr = pl_color_repr_jpeg;
+                pl_shader_decode_color(sh, &repr, NULL);
+                break;
+            }
+            case 'p':
+                pl_shader_detect_peak(sh, pl_color_space_hdr10, &peak, NULL);
+                break;
+            case 'm':
+                pl_shader_color_map(sh, NULL, pl_color_space_bt709,
+                                    pl_color_space_monitor, NULL, false);
+                break;
+            case 't':
+                pl_shader_color_map(sh, NULL, pl_color_space_hdr10,
+                                    pl_color_space_monitor, &peak, false);
+                break;
+            case 'd':
+                pl_shader_dither(sh, 8, &dither, pl_dither_params(
+                    // Picked to speed up calculation
+                    .method = PL_DITHER_ORDERED_LUT,
+                    .lut_size = 2,
+                ));
+                break;
+
+            // Push and pop subshader commands
+            case '(':
+                if (idx+1 == STACK_SIZE)
+                    goto invalid;
+
+                idx++;
+                if (!stack[idx]) {
+                    stack[idx] = pl_shader_alloc(NULL, pl_shader_params(
+                        .gpu = gpu,
+                        .id = idx,
+                    ));
+                }
+                sh = stack[idx];
+                break;
+
+            case ')':
+                if (idx == 0)
+                    goto invalid;
+
+                idx--;
+                sh_subpass(stack[idx], stack[idx + 1]);
+                pl_shader_reset(stack[idx + 1], pl_shader_params(
+                    .gpu = gpu,
+                    .id = idx + 1,
+                ));
+                sh = stack[idx];
+                break;
+
+            default:
+                goto invalid;
+            }
+        }
+
+        // Merge remaining shaders
+        while (idx > 0) {
+            sh_subpass(stack[idx - 1], stack[idx]);
+            idx--;
+        }
+
+        pl_shader_finalize(stack[0]);
+
+invalid:
+        for (int i = 0; i < STACK_SIZE; i++)
+            pl_shader_free(&stack[i]);
+
+        pl_shader_obj_destroy(&polar);
+        pl_shader_obj_destroy(&ortho);
+        pl_shader_obj_destroy(&peak);
+        pl_shader_obj_destroy(&dither);
+    }
+
+    pl_tex_destroy(gpu, &src.tex);
+    pl_gpu_dummy_destroy(&gpu);
+}
diff --git a/src/tests/fuzz/user_shaders.c b/src/tests/fuzz/user_shaders.c
new file mode 100644
index 0000000..bbb98c8
--- /dev/null
+++ b/src/tests/fuzz/user_shaders.c
@@ -0,0 +1,28 @@
+#include "../tests.h"
+
+#include <libplacebo/dummy.h>
+#include <libplacebo/shaders/custom.h>
+
+__AFL_FUZZ_INIT();
+
+#pragma clang optimize off
+
+int main()
+{
+    pl_gpu gpu = pl_gpu_dummy_create(NULL, NULL);
+    const struct pl_hook *hook;
+
+#ifdef __AFL_HAVE_MANUAL_CONTROL
+    __AFL_INIT();
+#endif
+
+    unsigned char *buf = __AFL_FUZZ_TESTCASE_BUF;
+
+    while (__AFL_LOOP(100000)) {
+        size_t len = __AFL_FUZZ_TESTCASE_LEN;
+        hook = pl_mpv_user_shader_parse(gpu, (char *) buf, len);
+        pl_mpv_user_shader_destroy(&hook);
+    }
+
+    pl_gpu_dummy_destroy(&gpu);
+}
diff --git a/src/tests/gpu_tests.h b/src/tests/gpu_tests.h
new file mode 100644
index 0000000..f14f260
--- /dev/null
+++ b/src/tests/gpu_tests.h
@@ -0,0 +1,1741 @@
+#include "tests.h"
+#include "shaders.h"
+
+#include <libplacebo/renderer.h>
+#include <libplacebo/utils/frame_queue.h>
+#include <libplacebo/utils/upload.h>
+
+//#define PRINT_OUTPUT
+
+static void pl_buffer_tests(pl_gpu gpu)
+{
+    const size_t buf_size = 1024;
+    if (buf_size > gpu->limits.max_buf_size)
+        return;
+
+    uint8_t *test_src = malloc(buf_size * 2);
+    uint8_t *test_dst = test_src + buf_size;
+    assert(test_src && test_dst);
+    memset(test_dst, 0, buf_size);
+    for (int i = 0; i < buf_size; i++)
+        test_src[i] = RANDOM_U8;
+
+    pl_buf buf = NULL, tbuf = NULL;
+
+    printf("test buffer static creation and readback\n");
+    buf = pl_buf_create(gpu, pl_buf_params(
+        .size = buf_size,
+        .host_readable = true,
+        .initial_data = test_src,
+    ));
+
+    REQUIRE(buf);
+    REQUIRE(pl_buf_read(gpu, buf, 0, test_dst, buf_size));
+    REQUIRE_MEMEQ(test_src, test_dst, buf_size);
+    pl_buf_destroy(gpu, &buf);
+
+    printf("test buffer empty creation, update and readback\n");
+    memset(test_dst, 0, buf_size);
+    buf = pl_buf_create(gpu, pl_buf_params(
+        .size = buf_size,
+        .host_writable = true,
+        .host_readable = true,
+    ));
+
+    REQUIRE(buf);
+    pl_buf_write(gpu, buf, 0, test_src, buf_size);
+    REQUIRE(pl_buf_read(gpu, buf, 0, test_dst, buf_size));
+    REQUIRE_MEMEQ(test_src, test_dst, buf_size);
+    pl_buf_destroy(gpu, &buf);
+
+    printf("test buffer-buffer copy and readback\n");
+    memset(test_dst, 0, buf_size);
+    buf = pl_buf_create(gpu, pl_buf_params(
+        .size = buf_size,
+        .initial_data = test_src,
+    ));
+
+    tbuf = pl_buf_create(gpu, pl_buf_params(
+        .size = buf_size,
+        .host_readable = true,
+    ));
+
+    REQUIRE(buf && tbuf);
+    pl_buf_copy(gpu, tbuf, 0, buf, 0, buf_size);
+    REQUIRE(pl_buf_read(gpu, tbuf, 0, test_dst, buf_size));
+    REQUIRE_MEMEQ(test_src, test_dst, buf_size);
+    pl_buf_destroy(gpu, &buf);
+    pl_buf_destroy(gpu, &tbuf);
+
+    if (buf_size <= gpu->limits.max_mapped_size) {
+        printf("test host mapped buffer readback\n");
+        buf = pl_buf_create(gpu, pl_buf_params(
+            .size = buf_size,
+            .host_mapped = true,
+            .initial_data = test_src,
+        ));
+
+        REQUIRE(buf);
+        REQUIRE(!pl_buf_poll(gpu, buf, 0));
+        REQUIRE_MEMEQ(test_src, buf->data, buf_size);
+        pl_buf_destroy(gpu, &buf);
+    }
+
+    // `compute_queues` check is to exclude dummy GPUs here
+    if (buf_size <= gpu->limits.max_ssbo_size && gpu->limits.compute_queues)
+    {
+        printf("test endian swapping\n");
+        buf = pl_buf_create(gpu, pl_buf_params(
+            .size = buf_size,
+            .storable = true,
+            .initial_data = test_src,
+        ));
+
+        tbuf = pl_buf_create(gpu, pl_buf_params(
+            .size = buf_size,
+            .storable = true,
+            .host_readable = true,
+        ));
+
+        REQUIRE(buf && tbuf);
+        REQUIRE(pl_buf_copy_swap(gpu, &(struct pl_buf_copy_swap_params) {
+            .src = buf,
+            .dst = tbuf,
+            .size = buf_size,
+            .wordsize = 2,
+        }));
+        REQUIRE(pl_buf_read(gpu, tbuf, 0, test_dst, buf_size));
+        for (int i = 0; i < buf_size / 2; i++) {
+            REQUIRE_CMP(test_src[2 * i + 0], ==, test_dst[2 * i + 1], PRIu8);
+            REQUIRE_CMP(test_src[2 * i + 1], ==, test_dst[2 * i + 0], PRIu8);
+        }
+        // test endian swap in-place
+        REQUIRE(pl_buf_copy_swap(gpu, &(struct pl_buf_copy_swap_params) {
+            .src = tbuf,
+            .dst = tbuf,
+            .size = buf_size,
+            .wordsize = 4,
+        }));
+        REQUIRE(pl_buf_read(gpu, tbuf, 0, test_dst, buf_size));
+        for (int i = 0; i < buf_size / 4; i++) {
+            REQUIRE_CMP(test_src[4 * i + 0], ==, test_dst[4 * i + 2], PRIu8);
+            REQUIRE_CMP(test_src[4 * i + 1], ==, test_dst[4 * i + 3], PRIu8);
+            REQUIRE_CMP(test_src[4 * i + 2], ==, test_dst[4 * i + 0], PRIu8);
+            REQUIRE_CMP(test_src[4 * i + 3], ==, test_dst[4 * i + 1], PRIu8);
+        }
+        pl_buf_destroy(gpu, &buf);
+        pl_buf_destroy(gpu, &tbuf);
+    }
+
+    free(test_src);
+}
+
+static void test_cb(void *priv)
+{
+    bool *flag = priv;
+    *flag = true;
+}
+
+static void pl_test_roundtrip(pl_gpu gpu, pl_tex tex[2],
+                              uint8_t *src, uint8_t *dst)
+{
+    if (!tex[0] || !tex[1]) {
+        printf("failed creating test textures... skipping this test\n");
+        return;
+    }
+
+    int texels = tex[0]->params.w;
+    texels *= tex[0]->params.h ? tex[0]->params.h : 1;
+    texels *= tex[0]->params.d ? tex[0]->params.d : 1;
+
+    pl_fmt fmt = tex[0]->params.format;
+    size_t bytes = texels * fmt->texel_size;
+    memset(src, 0, bytes);
+    memset(dst, 0, bytes);
+
+    for (size_t i = 0; i < bytes; i++)
+        src[i] = RANDOM_U8;
+
+    pl_timer ul, dl;
+    ul = pl_timer_create(gpu);
+    dl = pl_timer_create(gpu);
+
+    bool ran_ul = false, ran_dl = false;
+
+    REQUIRE(pl_tex_upload(gpu, &(struct pl_tex_transfer_params){
+        .tex = tex[0],
+        .ptr = src,
+        .timer = ul,
+        .callback = gpu->limits.callbacks ? test_cb : NULL,
+        .priv = &ran_ul,
+    }));
+
+    // Test blitting, if possible for this format
+    pl_tex dst_tex = tex[0];
+    if (tex[0]->params.blit_src && tex[1]->params.blit_dst) {
+        pl_tex_clear_ex(gpu, tex[1], (union pl_clear_color){0}); // for testing
+        pl_tex_blit(gpu, &(struct pl_tex_blit_params) {
+            .src = tex[0],
+            .dst = tex[1],
+        });
+        dst_tex = tex[1];
+    }
+
+    REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params){
+        .tex = dst_tex,
+        .ptr = dst,
+        .timer = dl,
+        .callback = gpu->limits.callbacks ? test_cb : NULL,
+        .priv = &ran_dl,
+    }));
+
+    pl_gpu_finish(gpu);
+    if (gpu->limits.callbacks)
+        REQUIRE(ran_ul && ran_dl);
+
+    if (fmt->emulated && fmt->type == PL_FMT_FLOAT) {
+        // TODO: can't memcmp here because bits might be lost due to the
+        // emulated 16/32 bit upload paths, figure out a better way to
+        // generate data and verify the roundtrip!
+    } else {
+        REQUIRE_MEMEQ(src, dst, bytes);
+    }
+
+    // Report timer results
+    printf("upload time: %"PRIu64", download time: %"PRIu64"\n",
+           pl_timer_query(gpu, ul), pl_timer_query(gpu, dl));
+
+    pl_timer_destroy(gpu, &ul);
+    pl_timer_destroy(gpu, &dl);
+}
+
+static void pl_texture_tests(pl_gpu gpu)
+{
+    const size_t max_size = 16*16*16 * 4 *sizeof(double);
+    uint8_t *test_src = malloc(max_size * 2);
+    uint8_t *test_dst = test_src + max_size;
+
+    for (int f = 0; f < gpu->num_formats; f++) {
+        pl_fmt fmt = gpu->formats[f];
+        if (fmt->opaque || !(fmt->caps & PL_FMT_CAP_HOST_READABLE))
+            continue;
+
+        printf("testing texture roundtrip for format %s\n", fmt->name);
+        assert(fmt->texel_size <= 4 * sizeof(double));
+
+        struct pl_tex_params ref_params = {
+            .format        = fmt,
+            .blit_src      = (fmt->caps & PL_FMT_CAP_BLITTABLE),
+            .blit_dst      = (fmt->caps & PL_FMT_CAP_BLITTABLE),
+            .host_writable = true,
+            .host_readable = true,
+            .debug_tag     = PL_DEBUG_TAG,
+        };
+
+        pl_tex tex[2];
+
+        if (gpu->limits.max_tex_1d_dim >= 16) {
+            printf("... 1D\n");
+            struct pl_tex_params params = ref_params;
+            params.w = 16;
+            if (!gpu->limits.blittable_1d_3d)
+                params.blit_src = params.blit_dst = false;
+            for (int i = 0; i < PL_ARRAY_SIZE(tex); i++)
+                tex[i] = pl_tex_create(gpu, &params);
+            pl_test_roundtrip(gpu, tex, test_src, test_dst);
+            for (int i = 0; i < PL_ARRAY_SIZE(tex); i++)
+                pl_tex_destroy(gpu, &tex[i]);
+        }
+
+        if (gpu->limits.max_tex_2d_dim >= 16) {
+            printf("... 2D\n");
+            struct pl_tex_params params = ref_params;
+            params.w = params.h = 16;
+            for (int i = 0; i < PL_ARRAY_SIZE(tex); i++)
+                tex[i] = pl_tex_create(gpu, &params);
+            pl_test_roundtrip(gpu, tex, test_src, test_dst);
+            for (int i = 0; i < PL_ARRAY_SIZE(tex); i++)
+                pl_tex_destroy(gpu, &tex[i]);
+        }
+
+        if (gpu->limits.max_tex_3d_dim >= 16) {
+            printf("... 3D\n");
+            struct pl_tex_params params = ref_params;
+            params.w = params.h = params.d = 16;
+            if (!gpu->limits.blittable_1d_3d)
+                params.blit_src = params.blit_dst = false;
+            for (int i = 0; i < PL_ARRAY_SIZE(tex); i++)
+                tex[i] = pl_tex_create(gpu, &params);
+            pl_test_roundtrip(gpu, tex, test_src, test_dst);
+            for (int i = 0; i < PL_ARRAY_SIZE(tex); i++)
+                pl_tex_destroy(gpu, &tex[i]);
+        }
+    }
+
+    free(test_src);
+}
+
+static void pl_planar_tests(pl_gpu gpu)
+{
+    pl_fmt fmt = pl_find_named_fmt(gpu, "g8_b8_r8_420");
+    if (!fmt)
+        return;
+    REQUIRE_CMP(fmt->num_planes, ==, 3, "d");
+
+    const int width = 64, height = 32;
+    pl_tex tex = pl_tex_create(gpu, pl_tex_params(
+        .w              = width,
+        .h              = height,
+        .format         = fmt,
+        .blit_dst       = true,
+        .host_readable  = true,
+    ));
+    if (!tex)
+        return;
+    for (int i = 0; i < fmt->num_planes; i++)
+        REQUIRE(tex->planes[i]);
+
+    pl_tex plane = tex->planes[1];
+    uint8_t data[(width * height) >> 2];
+    REQUIRE_CMP(plane->params.w * plane->params.h, ==, PL_ARRAY_SIZE(data), "d");
+
+    pl_tex_clear(gpu, plane, (float[]){ (float) 0x80 / 0xFF, 0.0, 0.0, 1.0 });
+    REQUIRE(pl_tex_download(gpu, pl_tex_transfer_params(
+        .tex = plane,
+        .ptr = data,
+    )));
+
+    uint8_t ref[PL_ARRAY_SIZE(data)];
+    memset(ref, 0x80, sizeof(ref));
+    REQUIRE_MEMEQ(data, ref, PL_ARRAY_SIZE(data));
+
+    pl_tex_destroy(gpu, &tex);
+}
+
+static void pl_shader_tests(pl_gpu gpu)
+{
+    if (gpu->glsl.version < 410)
+        return;
+
+    const char *vert_shader =
+        "#version 410                               \n"
+        "layout(location=0) in vec2 vertex_pos;     \n"
+        "layout(location=1) in vec3 vertex_color;   \n"
+        "layout(location=0) out vec3 frag_color;    \n"
+        "void main() {                              \n"
+        "    gl_Position = vec4(vertex_pos, 0, 1);  \n"
+        "    frag_color = vertex_color;             \n"
+        "}";
+
+    const char *frag_shader =
+        "#version 410                               \n"
+        "layout(location=0) in vec3 frag_color;     \n"
+        "layout(location=0) out vec4 out_color;     \n"
+        "void main() {                              \n"
+        "    out_color = vec4(frag_color, 1.0);     \n"
+        "}";
+
+    pl_fmt fbo_fmt;
+    enum pl_fmt_caps caps = PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_BLITTABLE |
+                            PL_FMT_CAP_LINEAR;
+
+    fbo_fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 4, 16, 32, caps);
+    if (!fbo_fmt)
+        return;
+
+#define FBO_W 16
+#define FBO_H 16
+
+    pl_tex fbo;
+    fbo = pl_tex_create(gpu, &(struct pl_tex_params) {
+        .format         = fbo_fmt,
+        .w              = FBO_W,
+        .h              = FBO_H,
+        .renderable     = true,
+        .storable       = !!(fbo_fmt->caps & PL_FMT_CAP_STORABLE),
+        .host_readable  = true,
+        .blit_dst       = true,
+    });
+    REQUIRE(fbo);
+
+    pl_tex_clear_ex(gpu, fbo, (union pl_clear_color){0});
+
+    pl_fmt vert_fmt;
+    vert_fmt = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 3);
+    REQUIRE(vert_fmt);
+
+    static const struct vertex { float pos[2]; float color[3]; } vertices[] = {
+        {{-1.0, -1.0}, {0, 0, 0}},
+        {{ 1.0, -1.0}, {1, 0, 0}},
+        {{-1.0,  1.0}, {0, 1, 0}},
+        {{ 1.0,  1.0}, {1, 1, 0}},
+    };
+
+    pl_pass pass;
+    pass = pl_pass_create(gpu, &(struct pl_pass_params) {
+        .type           = PL_PASS_RASTER,
+        .target_format  = fbo_fmt,
+        .vertex_shader  = vert_shader,
+        .glsl_shader    = frag_shader,
+
+        .vertex_type    = PL_PRIM_TRIANGLE_STRIP,
+        .vertex_stride  = sizeof(struct vertex),
+        .num_vertex_attribs = 2,
+        .vertex_attribs = (struct pl_vertex_attrib[]) {{
+            .name     = "vertex_pos",
+            .fmt      = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2),
+            .location = 0,
+            .offset   = offsetof(struct vertex, pos),
+        }, {
+            .name     = "vertex_color",
+            .fmt      = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 3),
+            .location = 1,
+            .offset   = offsetof(struct vertex, color),
+        }},
+    });
+    REQUIRE(pass);
+    if (pass->params.cached_program || pass->params.cached_program_len) {
+        // Ensure both are set if either one is set
+        REQUIRE(pass->params.cached_program);
+        REQUIRE(pass->params.cached_program_len);
+    }
+
+    pl_timer timer = pl_timer_create(gpu);
+    pl_pass_run(gpu, &(struct pl_pass_run_params) {
+        .pass           = pass,
+        .target         = fbo,
+        .vertex_count   = PL_ARRAY_SIZE(vertices),
+        .vertex_data    = vertices,
+        .timer          = timer,
+    });
+
+    // Wait until this pass is complete and report the timer result
+    pl_gpu_finish(gpu);
+    printf("timer query result: %"PRIu64"\n", pl_timer_query(gpu, timer));
+    pl_timer_destroy(gpu, &timer);
+
+    static float test_data[FBO_H * FBO_W * 4] = {0};
+
+    // Test against the known pattern of `src`, only useful for roundtrip tests
+#define TEST_FBO_PATTERN(eps, fmt, ...)                                     \
+    do {                                                                    \
+        printf("testing pattern of " fmt "\n", __VA_ARGS__);                \
+        REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params) {     \
+            .tex = fbo,                                                     \
+            .ptr = test_data,                                               \
+        }));                                                                \
+                                                                            \
+        for (int y = 0; y < FBO_H; y++) {                                   \
+            for (int x = 0; x < FBO_W; x++) {                               \
+                float *color = &test_data[(y * FBO_W + x) * 4];             \
+                REQUIRE_FEQ(color[0], (x + 0.5) / FBO_W, eps);              \
+                REQUIRE_FEQ(color[1], (y + 0.5) / FBO_H, eps);              \
+                REQUIRE_FEQ(color[2], 0.0, eps);                            \
+                REQUIRE_FEQ(color[3], 1.0, eps);                            \
+            }                                                               \
+        }                                                                   \
+    } while (0)
+
+    TEST_FBO_PATTERN(1e-6, "%s", "initial rendering");
+
+    if (sizeof(vertices) <= gpu->limits.max_vbo_size) {
+        // Test the use of an explicit vertex buffer
+        pl_buf vert = pl_buf_create(gpu, &(struct pl_buf_params) {
+            .size = sizeof(vertices),
+            .initial_data = vertices,
+            .drawable = true,
+        });
+
+        REQUIRE(vert);
+        pl_pass_run(gpu, &(struct pl_pass_run_params) {
+            .pass           = pass,
+            .target         = fbo,
+            .vertex_count   = sizeof(vertices) / sizeof(struct vertex),
+            .vertex_buf     = vert,
+            .buf_offset     = 0,
+        });
+
+        pl_buf_destroy(gpu, &vert);
+        TEST_FBO_PATTERN(1e-6, "%s", "using vertex buffer");
+    }
+
+    // Test the use of index buffers
+    static const uint16_t indices[] = { 3, 2, 1, 0 };
+    pl_pass_run(gpu, &(struct pl_pass_run_params) {
+        .pass           = pass,
+        .target         = fbo,
+        .vertex_count   = PL_ARRAY_SIZE(indices),
+        .vertex_data    = vertices,
+        .index_data     = indices,
+    });
+
+    pl_pass_destroy(gpu, &pass);
+    TEST_FBO_PATTERN(1e-6, "%s", "using indexed rendering");
+
+    // Test the use of pl_dispatch
+    pl_dispatch dp = pl_dispatch_create(gpu->log, gpu);
+    pl_shader sh = pl_dispatch_begin(dp);
+    REQUIRE(pl_shader_custom(sh, &(struct pl_custom_shader) {
+        .body       = "color = vec4(col, 1.0);",
+        .input      = PL_SHADER_SIG_NONE,
+        .output     = PL_SHADER_SIG_COLOR,
+    }));
+
+    REQUIRE(pl_dispatch_vertex(dp, &(struct pl_dispatch_vertex_params) {
+        .shader         = &sh,
+        .target         = fbo,
+        .vertex_stride  = sizeof(struct vertex),
+        .vertex_position_idx = 0,
+        .num_vertex_attribs = 2,
+        .vertex_attribs = (struct pl_vertex_attrib[]) {{
+            .name   = "pos",
+            .fmt    = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 2),
+            .offset = offsetof(struct vertex, pos),
+        }, {
+            .name   = "col",
+            .fmt    = pl_find_vertex_fmt(gpu, PL_FMT_FLOAT, 3),
+            .offset = offsetof(struct vertex, color),
+        }},
+
+        .vertex_type    = PL_PRIM_TRIANGLE_STRIP,
+        .vertex_coords  = PL_COORDS_NORMALIZED,
+        .vertex_count   = PL_ARRAY_SIZE(vertices),
+        .vertex_data    = vertices,
+    }));
+
+    TEST_FBO_PATTERN(1e-6, "%s", "using custom vertices");
+
+    static float src_data[FBO_H * FBO_W * 4] = {0};
+    memcpy(src_data, test_data, sizeof(src_data));
+
+    pl_tex src;
+    src = pl_tex_create(gpu, &(struct pl_tex_params) {
+        .format         = fbo_fmt,
+        .w              = FBO_W,
+        .h              = FBO_H,
+        .storable       = fbo->params.storable,
+        .sampleable     = true,
+        .initial_data   = src_data,
+    });
+
+    if (fbo->params.storable) {
+        // Test 1x1 blit, to make sure the scaling code runs
+        REQUIRE(pl_tex_blit_compute(gpu, &(struct pl_tex_blit_params) {
+            .src = src,
+            .dst = fbo,
+            .src_rc = {0, 0, 0, 1, 1, 1},
+            .dst_rc = {0, 0, 0, FBO_W, FBO_H, 1},
+            .sample_mode = PL_TEX_SAMPLE_NEAREST,
+        }));
+
+        // Test non-resizing blit, which uses the efficient imageLoad path
+        REQUIRE(pl_tex_blit_compute(gpu, &(struct pl_tex_blit_params) {
+            .src = src,
+            .dst = fbo,
+            .src_rc = {0, 0, 0, FBO_W, FBO_H, 1},
+            .dst_rc = {0, 0, 0, FBO_W, FBO_H, 1},
+            .sample_mode = PL_TEX_SAMPLE_NEAREST,
+        }));
+
+        TEST_FBO_PATTERN(1e-6, "%s", "pl_tex_blit_compute");
+    }
+
+    // Test encoding/decoding of all gamma functions, color spaces, etc.
+    for (enum pl_color_transfer trc = 0; trc < PL_COLOR_TRC_COUNT; trc++) {
+        struct pl_color_space test_csp = {
+            .transfer = trc,
+            .hdr.min_luma = PL_COLOR_HDR_BLACK,
+        };
+        sh = pl_dispatch_begin(dp);
+        pl_shader_sample_nearest(sh, pl_sample_src( .tex = src ));
+        pl_shader_delinearize(sh, &test_csp);
+        pl_shader_linearize(sh, &test_csp);
+        REQUIRE(pl_dispatch_finish(dp, pl_dispatch_params(
+            .shader = &sh,
+            .target = fbo,
+        )));
+
+        float epsilon = pl_color_transfer_is_hdr(trc) ? 1e-4 : 1e-6;
+        TEST_FBO_PATTERN(epsilon, "transfer function %d", (int) trc);
+    }
+
+    for (enum pl_color_system sys = 0; sys < PL_COLOR_SYSTEM_COUNT; sys++) {
+        if (sys == PL_COLOR_SYSTEM_DOLBYVISION)
+            continue; // requires metadata
+        sh = pl_dispatch_begin(dp);
+        pl_shader_sample_nearest(sh, pl_sample_src( .tex = src ));
+        pl_shader_encode_color(sh, &(struct pl_color_repr) { .sys = sys });
+        pl_shader_decode_color(sh, &(struct pl_color_repr) { .sys = sys }, NULL);
+        REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) {
+            .shader = &sh,
+            .target = fbo,
+        }));
+
+        float epsilon;
+        switch (sys) {
+        case PL_COLOR_SYSTEM_BT_2020_C:
+        case PL_COLOR_SYSTEM_XYZ:
+            epsilon = 1e-5;
+            break;
+
+        case PL_COLOR_SYSTEM_BT_2100_PQ:
+        case PL_COLOR_SYSTEM_BT_2100_HLG:
+            // These seem to be horrifically noisy and prone to breaking on
+            // edge cases for some reason
+            // TODO: figure out why!
+            continue;
+
+        default: epsilon = 1e-6; break;
+        }
+
+        TEST_FBO_PATTERN(epsilon, "color system %d", (int) sys);
+    }
+
+    // Repeat this a few times to test the caching
+    pl_cache cache = pl_cache_create(pl_cache_params( .log = gpu->log ));
+    pl_gpu_set_cache(gpu, cache);
+    for (int i = 0; i < 10; i++) {
+        if (i == 5) {
+            printf("Recreating pl_dispatch to test the caching\n");
+            size_t size = pl_dispatch_save(dp, NULL);
+            REQUIRE(size);
+            uint8_t *cache_data = malloc(size);
+            REQUIRE(cache_data);
+            REQUIRE_CMP(pl_dispatch_save(dp, cache_data), ==, size, "zu");
+
+            pl_dispatch_destroy(&dp);
+            dp = pl_dispatch_create(gpu->log, gpu);
+            pl_dispatch_load(dp, cache_data);
+
+            // Test to make sure the pass regenerates the same cache
+            uint64_t hash = pl_str_hash((pl_str) { cache_data, size });
+            REQUIRE_CMP(pl_dispatch_save(dp, NULL), ==, size, "zu");
+            REQUIRE_CMP(pl_dispatch_save(dp, cache_data), ==, size, "zu");
+            REQUIRE_CMP(pl_str_hash((pl_str) { cache_data, size }), ==, hash, PRIu64);
+            free(cache_data);
+        }
+
+        sh = pl_dispatch_begin(dp);
+
+        // For testing, force the use of CS if possible
+        if (gpu->glsl.compute) {
+            sh->type = SH_COMPUTE;
+            sh->group_size[0] = 8;
+            sh->group_size[1] = 8;
+        }
+
+        pl_shader_deband(sh, pl_sample_src( .tex = src ), pl_deband_params(
+            .iterations     = 0,
+            .grain          = 0.0,
+        ));
+
+        REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) {
+            .shader = &sh,
+            .target = fbo,
+        }));
+        TEST_FBO_PATTERN(1e-6, "deband iter %d", i);
+    }
+
+    pl_gpu_set_cache(gpu, NULL);
+    pl_cache_destroy(&cache);
+
+    // Test peak detection and readback if possible
+    sh = pl_dispatch_begin(dp);
+    pl_shader_sample_nearest(sh, pl_sample_src( .tex = src ));
+
+    pl_shader_obj peak_state = NULL;
+    struct pl_color_space csp_gamma22 = { .transfer = PL_COLOR_TRC_GAMMA22 };
+    struct pl_peak_detect_params peak_params = { .minimum_peak = 0.01 };
+    if (pl_shader_detect_peak(sh, csp_gamma22, &peak_state, &peak_params)) {
+        REQUIRE(pl_dispatch_compute(dp, &(struct pl_dispatch_compute_params) {
+            .shader = &sh,
+            .width = fbo->params.w,
+            .height = fbo->params.h,
+        }));
+
+        float peak, avg;
+        REQUIRE(pl_get_detected_peak(peak_state, &peak, &avg));
+
+        float real_peak = 0, real_avg = 0;
+        for (int y = 0; y < FBO_H; y++) {
+            for (int x = 0; x < FBO_W; x++) {
+                float *color = &src_data[(y * FBO_W + x) * 4];
+                float luma = 0.212639f * powf(color[0], 2.2f) +
+                             0.715169f * powf(color[1], 2.2f) +
+                             0.072192f * powf(color[2], 2.2f);
+                luma = pl_hdr_rescale(PL_HDR_NORM, PL_HDR_PQ, luma);
+                real_peak = PL_MAX(real_peak, luma);
+                real_avg += luma;
+            }
+        }
+        real_avg = real_avg / (FBO_W * FBO_H);
+
+        real_avg  = pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NORM, real_avg);
+        real_peak = pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NORM, real_peak);
+        REQUIRE_FEQ(peak, real_peak, 1e-3);
+        REQUIRE_FEQ(avg, real_avg, 1e-2);
+    }
+
+    pl_dispatch_abort(dp, &sh);
+    pl_shader_obj_destroy(&peak_state);
+
+    // Test film grain synthesis
+    pl_shader_obj grain = NULL;
+    struct pl_film_grain_params grain_params = {
+        .tex = src,
+        .components = 3,
+        .component_mapping = { 0, 1, 2},
+        .repr = &(struct pl_color_repr) {
+            .sys = PL_COLOR_SYSTEM_BT_709,
+            .levels = PL_COLOR_LEVELS_LIMITED,
+            .bits = { .color_depth = 10, .sample_depth = 10 },
+        },
+    };
+
+    for (int i = 0; i < 2; i++) {
+        grain_params.data.type = PL_FILM_GRAIN_AV1;
+        grain_params.data.params.av1 = av1_grain_data;
+        grain_params.data.params.av1.overlap = !!i;
+        grain_params.data.seed = rand();
+
+        sh = pl_dispatch_begin(dp);
+        pl_shader_film_grain(sh, &grain, &grain_params);
+        REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) {
+            .shader = &sh,
+            .target = fbo,
+        }));
+    }
+
+    if (gpu->glsl.compute) {
+        grain_params.data.type = PL_FILM_GRAIN_H274;
+        grain_params.data.params.h274 = h274_grain_data;
+        grain_params.data.seed = rand();
+
+        sh = pl_dispatch_begin(dp);
+        pl_shader_film_grain(sh, &grain, &grain_params);
+        REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) {
+            .shader = &sh,
+            .target = fbo,
+        }));
+    }
+    pl_shader_obj_destroy(&grain);
+
+    // Test custom shaders
+    struct pl_custom_shader custom = {
+        .header =
+            "vec3 invert(vec3 color)            \n"
+            "{                                  \n"
+            "    return vec3(1.0) - color;      \n"
+            "}                                  \n",
+
+        .body =
+            "color = vec4(gl_FragCoord.xy, 0.0, 1.0);   \n"
+            "color.rgb = invert(color.rgb) + offset;    \n",
+
+        .input = PL_SHADER_SIG_NONE,
+        .output = PL_SHADER_SIG_COLOR,
+
+        .num_variables = 1,
+        .variables = &(struct pl_shader_var) {
+            .var = pl_var_float("offset"),
+            .data = &(float) { 0.1 },
+        },
+    };
+
+    sh = pl_dispatch_begin(dp);
+    REQUIRE(pl_shader_custom(sh, &custom));
+    REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) {
+        .shader = &sh,
+        .target = fbo,
+    }));
+
+    // Test dolbyvision
+    struct pl_color_repr repr = {
+        .sys = PL_COLOR_SYSTEM_DOLBYVISION,
+        .dovi = &dovi_meta,
+    };
+
+    sh = pl_dispatch_begin(dp);
+    pl_shader_sample_direct(sh, pl_sample_src( .tex = src ));
+    pl_shader_decode_color(sh, &repr, NULL);
+    REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) {
+        .shader = &sh,
+        .target = fbo,
+    }));
+
+    // Test deinterlacing
+    sh = pl_dispatch_begin(dp);
+    pl_shader_deinterlace(sh, pl_deinterlace_source( .cur = pl_field_pair(src) ), NULL);
+    REQUIRE(pl_dispatch_finish(dp, pl_dispatch_params(
+        .shader = &sh,
+        .target = fbo,
+    )));
+
+    // Test error diffusion
+    if (fbo->params.storable) {
+        for (int i = 0; i < pl_num_error_diffusion_kernels; i++) {
+            const struct pl_error_diffusion_kernel *k = pl_error_diffusion_kernels[i];
+            printf("testing error diffusion kernel '%s'\n", k->name);
+            sh = pl_dispatch_begin(dp);
+            bool ok = pl_shader_error_diffusion(sh, pl_error_diffusion_params(
+                .input_tex  = src,
+                .output_tex = fbo,
+                .new_depth  = 8,
+                .kernel     = k,
+            ));
+
+            if (!ok) {
+                fprintf(stderr, "kernel '%s' exceeds GPU limits, skipping...\n", k->name);
+                continue;
+            }
+
+            REQUIRE(pl_dispatch_compute(dp, pl_dispatch_compute_params(
+                .shader = &sh,
+                .dispatch_size = {1, 1, 1},
+            )));
+        }
+    }
+
+    pl_dispatch_destroy(&dp);
+    pl_tex_destroy(gpu, &src);
+    pl_tex_destroy(gpu, &fbo);
+}
+
+static void pl_scaler_tests(pl_gpu gpu)
+{
+    pl_fmt src_fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 1, 16, 32, PL_FMT_CAP_LINEAR);
+    pl_fmt fbo_fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, 1, 16, 32, PL_FMT_CAP_RENDERABLE);
+    if (!src_fmt || !fbo_fmt)
+        return;
+
+    float *fbo_data = NULL;
+    pl_shader_obj lut = NULL;
+
+    static float data_5x5[5][5] = {
+        { 0, 0, 0, 0, 0 },
+        { 0, 0, 0, 0, 0 },
+        { 0, 0, 1, 0, 0 },
+        { 0, 0, 0, 0, 0 },
+        { 0, 0, 0, 0, 0 },
+    };
+
+    pl_tex dot5x5 = pl_tex_create(gpu, &(struct pl_tex_params) {
+        .w              = 5,
+        .h              = 5,
+        .format         = src_fmt,
+        .sampleable     = true,
+        .initial_data   = &data_5x5[0][0],
+    });
+
+    struct pl_tex_params fbo_params = {
+        .w              = 100,
+        .h              = 100,
+        .format         = fbo_fmt,
+        .renderable     = true,
+        .storable       = fbo_fmt->caps & PL_FMT_CAP_STORABLE,
+        .host_readable  = fbo_fmt->caps & PL_FMT_CAP_HOST_READABLE,
+    };
+
+    pl_tex fbo = pl_tex_create(gpu, &fbo_params);
+    pl_dispatch dp = pl_dispatch_create(gpu->log, gpu);
+    if (!dot5x5 || !fbo || !dp)
+        goto error;
+
+    pl_shader sh = pl_dispatch_begin(dp);
+    REQUIRE(pl_shader_sample_polar(sh,
+        pl_sample_src(
+            .tex        = dot5x5,
+            .new_w      = fbo->params.w,
+            .new_h      = fbo->params.h,
+        ),
+        pl_sample_filter_params(
+            .filter     = pl_filter_ewa_lanczos,
+            .lut        = &lut,
+            .no_compute = !fbo->params.storable,
+        )
+    ));
+    REQUIRE(pl_dispatch_finish(dp, &(struct pl_dispatch_params) {
+        .shader = &sh,
+        .target = fbo,
+    }));
+
+    if (fbo->params.host_readable) {
+        fbo_data = malloc(fbo->params.w * fbo->params.h * sizeof(float));
+        REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params) {
+            .tex            = fbo,
+            .ptr            = fbo_data,
+        }));
+
+#ifdef PRINT_OUTPUT
+        int max = 255;
+        printf("P2\n%d %d\n%d\n", fbo->params.w, fbo->params.h, max);
+        for (int y = 0; y < fbo->params.h; y++) {
+            for (int x = 0; x < fbo->params.w; x++) {
+                float v = fbo_data[y * fbo->params.h + x];
+                printf("%d ", (int) round(fmin(fmax(v, 0.0), 1.0) * max));
+            }
+            printf("\n");
+        }
+#endif
+    }
+
+error:
+    free(fbo_data);
+    pl_shader_obj_destroy(&lut);
+    pl_dispatch_destroy(&dp);
+    pl_tex_destroy(gpu, &dot5x5);
+    pl_tex_destroy(gpu, &fbo);
+}
+
+static const char *user_shader_tests[] = {
+    // Test hooking, saving and loading
+    "// Example of a comment at the beginning                               \n"
+    "                                                                       \n"
+    "//!HOOK NATIVE                                                         \n"
+    "//!DESC upscale image                                                  \n"
+    "//!BIND HOOKED                                                         \n"
+    "//!WIDTH HOOKED.w 10 *                                                 \n"
+    "//!HEIGHT HOOKED.h 10 *                                                \n"
+    "//!SAVE NATIVEBIG                                                      \n"
+    "//!WHEN NATIVE.w 500 <                                                 \n"
+    "                                                                       \n"
+    "vec4 hook()                                                            \n"
+    "{                                                                      \n"
+    "    return HOOKED_texOff(0);                                           \n"
+    "}                                                                      \n"
+    "                                                                       \n"
+    "//!HOOK MAIN                                                           \n"
+    "//!DESC downscale bigger image                                         \n"
+    "//!WHEN NATIVE.w 500 <                                                 \n"
+    "//!BIND NATIVEBIG                                                      \n"
+    "                                                                       \n"
+    "vec4 hook()                                                            \n"
+    "{                                                                      \n"
+    "    return NATIVEBIG_texOff(0);                                        \n"
+    "}                                                                      \n",
+
+    // Test use of textures
+    "//!HOOK MAIN                                                           \n"
+    "//!DESC turn everything into colorful pixels                           \n"
+    "//!BIND HOOKED                                                         \n"
+    "//!BIND DISCO                                                          \n"
+    "//!COMPONENTS 3                                                        \n"
+    "                                                                       \n"
+    "vec4 hook()                                                            \n"
+    "{                                                                      \n"
+    "    return vec4(DISCO_tex(HOOKED_pos * 10.0).rgb, 1);                  \n"
+    "}                                                                      \n"
+    "                                                                       \n"
+    "//!TEXTURE DISCO                                                       \n"
+    "//!SIZE 3 3                                                            \n"
+    "//!FORMAT rgba8                                                        \n"
+    "//!FILTER NEAREST                                                      \n"
+    "//!BORDER REPEAT                                                       \n"
+    "ff0000ff00ff00ff0000ffff00ffffffff00ffffffff00ff4c4c4cff999999ffffffffff\n"
+
+    // Test custom parameters
+    "//!PARAM test                                                          \n"
+    "//!DESC test parameter                                                 \n"
+    "//!TYPE DYNAMIC float                                                  \n"
+    "//!MINIMUM 0.0                                                         \n"
+    "//!MAXIMUM 100.0                                                       \n"
+    "1.0                                                                    \n"
+    "                                                                       \n"
+    "//!PARAM testconst                                                     \n"
+    "//!TYPE CONSTANT uint                                                  \n"
+    "//!MAXIMUM 16                                                          \n"
+    "3                                                                      \n"
+    "                                                                       \n"
+    "//!PARAM testdefine                                                    \n"
+    "//!TYPE DEFINE                                                         \n"
+    "100                                                                    \n"
+    "                                                                       \n"
+    "//!PARAM testenum                                                      \n"
+    "//!TYPE ENUM DEFINE                                                    \n"
+    "FOO                                                                    \n"
+    "BAR                                                                    \n"
+    "                                                                       \n"
+    "//!HOOK MAIN                                                           \n"
+    "//!WHEN testconst 30 >                                                 \n"
+    "#error should not be run                                               \n"
+    "                                                                       \n"
+    "//!HOOK MAIN                                                           \n"
+    "//!WHEN testenum FOO =                                                 \n"
+    "#if testenum == BAR                                                    \n"
+    " #error bad                                                            \n"
+    "#endif                                                                 \n"
+    "vec4 hook() { return vec4(0.0); }                                      \n"
+};
+
+static const char *compute_shader_tests[] = {
+    // Test use of storage/buffer resources
+    "//!HOOK MAIN                                                           \n"
+    "//!DESC attach some storage objects                                    \n"
+    "//!BIND tex_storage                                                    \n"
+    "//!BIND buf_uniform                                                    \n"
+    "//!BIND buf_storage                                                    \n"
+    "//!COMPONENTS 4                                                        \n"
+    "                                                                       \n"
+    "vec4 hook()                                                            \n"
+    "{                                                                      \n"
+    "    return vec4(foo, bar, bat);                                        \n"
+    "}                                                                      \n"
+    "                                                                       \n"
+    "//!TEXTURE tex_storage                                                 \n"
+    "//!SIZE 100 100                                                        \n"
+    "//!FORMAT r32f                                                         \n"
+    "//!STORAGE                                                             \n"
+    "                                                                       \n"
+    "//!BUFFER buf_uniform                                                  \n"
+    "//!VAR float foo                                                       \n"
+    "//!VAR float bar                                                       \n"
+    "0000000000000000                                                       \n"
+    "                                                                       \n"
+    "//!BUFFER buf_storage                                                  \n"
+    "//!VAR vec2 bat                                                        \n"
+    "//!VAR int big[32];                                                    \n"
+    "//!STORAGE                                                             \n",
+
+};
+
+static const char *test_luts[] = {
+
+    "TITLE \"1D identity\"  \n"
+    "LUT_1D_SIZE 2          \n"
+    "0.0 0.0 0.0            \n"
+    "1.0 1.0 1.0            \n",
+
+    "TITLE \"3D identity\"  \n"
+    "LUT_3D_SIZE 2          \n"
+    "0.0 0.0 0.0            \n"
+    "1.0 0.0 0.0            \n"
+    "0.0 1.0 0.0            \n"
+    "1.0 1.0 0.0            \n"
+    "0.0 0.0 1.0            \n"
+    "1.0 0.0 1.0            \n"
+    "0.0 1.0 1.0            \n"
+    "1.0 1.0 1.0            \n"
+
+};
+
+static bool frame_passthrough(pl_gpu gpu, pl_tex *tex,
+                              const struct pl_source_frame *src, struct pl_frame *out_frame)
+{
+    const struct pl_frame *frame = src->frame_data;
+    *out_frame = *frame;
+    return true;
+}
+
+static enum pl_queue_status get_frame_ptr(struct pl_source_frame *out_frame,
+                                          const struct pl_queue_params *qparams)
+{
+    const struct pl_source_frame **pframe = qparams->priv;
+    if (!(*pframe)->frame_data)
+        return PL_QUEUE_EOF;
+
+    *out_frame = *(*pframe)++;
+    return PL_QUEUE_OK;
+}
+
+static void render_info_cb(void *priv, const struct pl_render_info *info)
+{
+    printf("{%d} Executed shader: %s\n", info->index,
+           info->pass->shader->description);
+}
+
+static void pl_render_tests(pl_gpu gpu)
+{
+    pl_tex img_tex = NULL, fbo = NULL;
+    pl_renderer rr = NULL;
+
+    enum { width = 50, height = 50 };
+    static float data[width][height];
+    for (int y = 0; y < height; y++) {
+        for (int x = 0; x < width; x++)
+            data[y][x] = RANDOM;
+    }
+
+    struct pl_plane img_plane = {0};
+    struct pl_plane_data plane_data = {
+        .type = PL_FMT_FLOAT,
+        .width = width,
+        .height = height,
+        .component_size = { 8 * sizeof(float) },
+        .component_map  = { 0 },
+        .pixel_stride = sizeof(float),
+        .pixels = data,
+    };
+
+    if (!pl_recreate_plane(gpu, NULL, &fbo, &plane_data))
+        return;
+
+    if (!pl_upload_plane(gpu, &img_plane, &img_tex, &plane_data))
+        goto error;
+
+    rr = pl_renderer_create(gpu->log, gpu);
+    pl_tex_clear_ex(gpu, fbo, (union pl_clear_color){0});
+
+    struct pl_frame image = {
+        .num_planes     = 1,
+        .planes         = { img_plane },
+        .repr = {
+            .sys        = PL_COLOR_SYSTEM_BT_709,
+            .levels     = PL_COLOR_LEVELS_FULL,
+        },
+        .color          = pl_color_space_srgb,
+    };
+
+    struct pl_frame target = {
+        .num_planes     = 1,
+        .planes         = {{
+            .texture            = fbo,
+            .components         = 3,
+            .component_mapping  = {0, 1, 2},
+        }},
+        .repr = {
+            .sys        = PL_COLOR_SYSTEM_RGB,
+            .levels     = PL_COLOR_LEVELS_FULL,
+            .bits.color_depth = 32,
+        },
+        .color          = pl_color_space_srgb,
+    };
+
+    REQUIRE(pl_render_image(rr, &image, &target, NULL));
+    REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE);
+
+    // TODO: embed a reference texture and ensure it matches
+
+    // Test a bunch of different params
+#define TEST(SNAME, STYPE, DEFAULT, FIELD, LIMIT)                       \
+    do {                                                                \
+        for (int i = 0; i <= LIMIT; i++) {                              \
+            printf("testing `" #STYPE "." #FIELD " = %d`\n", i);        \
+            struct pl_render_params params = pl_render_default_params;  \
+            params.force_dither = true;                                 \
+            struct STYPE tmp = DEFAULT;                                 \
+            tmp.FIELD = i;                                              \
+            params.SNAME = &tmp;                                        \
+            REQUIRE(pl_render_image(rr, &image, &target, &params));     \
+            pl_gpu_flush(gpu);                                          \
+            REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE); \
+        }                                                               \
+    } while (0)
+
+#define TEST_PARAMS(NAME, FIELD, LIMIT) \
+    TEST(NAME##_params, pl_##NAME##_params, pl_##NAME##_default_params, FIELD, LIMIT)
+
+    image.crop.x1 = width / 2.0;
+    image.crop.y1 = height / 2.0;
+    for (int i = 0; i < pl_num_scale_filters; i++) {
+        struct pl_render_params params = pl_render_default_params;
+        params.upscaler = pl_scale_filters[i].filter;
+        printf("testing `params.upscaler = /* %s */`\n", pl_scale_filters[i].name);
+        REQUIRE(pl_render_image(rr, &image, &target, &params));
+        pl_gpu_flush(gpu);
+        REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE);
+    }
+    image.crop.x1 = image.crop.y1 = 0;
+
+    target.crop.x1 = width / 2.0;
+    target.crop.y1 = height / 2.0;
+    for (int i = 0; i < pl_num_scale_filters; i++) {
+        struct pl_render_params params = pl_render_default_params;
+        params.downscaler = pl_scale_filters[i].filter;
+        printf("testing `params.downscaler = /* %s */`\n", pl_scale_filters[i].name);
+        REQUIRE(pl_render_image(rr, &image, &target, &params));
+        pl_gpu_flush(gpu);
+        REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE);
+    }
+    target.crop.x1 = target.crop.y1 = 0;
+
+    TEST_PARAMS(deband, iterations, 3);
+    TEST_PARAMS(sigmoid, center, 1);
+    TEST_PARAMS(color_map, intent, PL_INTENT_ABSOLUTE_COLORIMETRIC);
+    TEST_PARAMS(dither, method, PL_DITHER_WHITE_NOISE);
+    TEST_PARAMS(dither, temporal, true);
+    TEST_PARAMS(distort, alpha_mode, PL_ALPHA_INDEPENDENT);
+    TEST_PARAMS(distort, constrain, true);
+    TEST_PARAMS(distort, bicubic, true);
+    TEST(cone_params, pl_cone_params, pl_vision_deuteranomaly, strength, 0);
+
+    // Test gamma-correct dithering
+    target.repr.bits.color_depth = 2;
+    TEST_PARAMS(dither, transfer, PL_COLOR_TRC_GAMMA22);
+    target.repr.bits.color_depth = 32;
+
+    // Test HDR tone mapping
+    image.color = pl_color_space_hdr10;
+    TEST_PARAMS(color_map, visualize_lut, true);
+    if (gpu->limits.max_ssbo_size)
+        TEST_PARAMS(peak_detect, allow_delayed, true);
+
+    // Test inverse tone-mapping and pure BPC
+    image.color.hdr.max_luma = 1000;
+    target.color.hdr.max_luma = 4000;
+    target.color.hdr.min_luma = 0.02;
+    TEST_PARAMS(color_map, inverse_tone_mapping, true);
+
+    image.color = pl_color_space_srgb;
+    target.color = pl_color_space_srgb;
+
+    // Test some misc stuff
+    struct pl_render_params params = pl_render_default_params;
+    params.color_adjustment = &(struct pl_color_adjustment) {
+        .brightness = 0.1,
+        .contrast = 0.9,
+        .saturation = 1.5,
+        .gamma = 0.8,
+        .temperature = 0.3,
+    };
+    REQUIRE(pl_render_image(rr, &image, &target, &params));
+    REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE);
+    params = pl_render_default_params;
+
+    struct pl_frame inferred_image = image, inferred_target = target;
+    pl_frames_infer(rr, &inferred_image, &inferred_target);
+    REQUIRE(pl_render_image(rr, &inferred_image, &inferred_target, &params));
+    REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE);
+
+    // Test background blending and alpha transparency
+    params.blend_against_tiles = true;
+    params.corner_rounding = 0.25f;
+    REQUIRE(pl_render_image(rr, &image, &target, &params));
+    REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE);
+    params = pl_render_default_params;
+
+    // Test film grain synthesis
+    image.film_grain.type = PL_FILM_GRAIN_AV1;
+    image.film_grain.params.av1 = av1_grain_data;
+    REQUIRE(pl_render_image(rr, &image, &target, &params));
+    REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE);
+
+    image.film_grain.type = PL_FILM_GRAIN_H274;
+    image.film_grain.params.h274 = h274_grain_data;
+    REQUIRE(pl_render_image(rr, &image, &target, &params));
+    // H.274 film grain synthesis requires compute shaders
+    if (gpu->glsl.compute) {
+        REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE);
+    } else {
+        const struct pl_render_errors rr_err = pl_renderer_get_errors(rr);
+        REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_FILM_GRAIN);
+        pl_renderer_reset_errors(rr, &rr_err);
+    }
+    image.film_grain = (struct pl_film_grain_data) {0};
+
+    // Test mpv-style custom shaders
+    for (int i = 0; i < PL_ARRAY_SIZE(user_shader_tests); i++) {
+        printf("testing user shader:\n\n%s\n", user_shader_tests[i]);
+        const struct pl_hook *hook;
+        hook = pl_mpv_user_shader_parse(gpu, user_shader_tests[i],
+                                        strlen(user_shader_tests[i]));
+        REQUIRE(hook);
+
+        params.hooks = &hook;
+        params.num_hooks = 1;
+        REQUIRE(pl_render_image(rr, &image, &target, &params));
+        REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE);
+
+        pl_mpv_user_shader_destroy(&hook);
+    }
+
+    if (gpu->glsl.compute && gpu->limits.max_ssbo_size) {
+        for (int i = 0; i < PL_ARRAY_SIZE(compute_shader_tests); i++) {
+            printf("testing user shader:\n\n%s\n", compute_shader_tests[i]);
+            const struct pl_hook *hook;
+            hook = pl_mpv_user_shader_parse(gpu, compute_shader_tests[i],
+                                            strlen(compute_shader_tests[i]));
+            REQUIRE(hook);
+
+            params.hooks = &hook;
+            params.num_hooks = 1;
+            REQUIRE(pl_render_image(rr, &image, &target, &params));
+            REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE);
+
+            pl_mpv_user_shader_destroy(&hook);
+        }
+    }
+    params = pl_render_default_params;
+
+    // Test custom LUTs
+    for (int i = 0; i < PL_ARRAY_SIZE(test_luts); i++) {
+        printf("testing custom lut %d\n", i);
+        struct pl_custom_lut *lut;
+        lut = pl_lut_parse_cube(gpu->log, test_luts[i], strlen(test_luts[i]));
+        REQUIRE(lut);
+
+        bool has_3dlut = gpu->limits.max_tex_3d_dim && gpu->glsl.version > 100;
+        if (lut->size[2] && !has_3dlut) {
+            pl_lut_free(&lut);
+            continue;
+        }
+
+        // Test all three at the same time to reduce the number of tests
+        image.lut = target.lut = params.lut = lut;
+
+        for (enum pl_lut_type t = PL_LUT_UNKNOWN; t <= PL_LUT_CONVERSION; t++) {
+            printf("testing LUT method %d\n", t);
+            image.lut_type = target.lut_type = params.lut_type = t;
+            REQUIRE(pl_render_image(rr, &image, &target, &params));
+            REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE);
+        }
+
+        image.lut = target.lut = params.lut = NULL;
+        pl_lut_free(&lut);
+    }
+
+#ifdef PL_HAVE_LCMS
+
+    // It doesn't fit without use of 3D textures on GLES2
+    if (gpu->glsl.version > 100) {
+        // Test ICC profiles
+        image.profile = TEST_PROFILE(sRGB_v2_nano_icc);
+        REQUIRE(pl_render_image(rr, &image, &target, &params));
+        REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE);
+        image.profile = (struct pl_icc_profile) {0};
+
+        target.profile = TEST_PROFILE(sRGB_v2_nano_icc);
+        REQUIRE(pl_render_image(rr, &image, &target, &params));
+        REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE);
+        target.profile = (struct pl_icc_profile) {0};
+
+        image.profile = TEST_PROFILE(sRGB_v2_nano_icc);
+        target.profile = image.profile;
+        REQUIRE(pl_render_image(rr, &image, &target, &params));
+        REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE);
+        image.profile = (struct pl_icc_profile) {0};
+        target.profile = (struct pl_icc_profile) {0};
+    }
+
+#endif
+
+    // Test overlays
+    image.num_overlays = 1;
+    image.overlays = &(struct pl_overlay) {
+        .tex = img_plane.texture,
+        .mode = PL_OVERLAY_NORMAL,
+        .num_parts = 2,
+        .parts = (struct pl_overlay_part[]) {{
+            .src = {0, 0, 2, 2},
+            .dst = {30, 100, 40, 200},
+        }, {
+            .src = {2, 2, 5, 5},
+            .dst = {1000, -1, 3, 5},
+        }},
+    };
+    REQUIRE(pl_render_image(rr, &image, &target, &params));
+    REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE);
+    params.disable_fbos = true;
+    REQUIRE(pl_render_image(rr, &image, &target, &params));
+    REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE);
+    image.num_overlays = 0;
+    params = pl_render_default_params;
+
+    target.num_overlays = 1;
+    target.overlays = &(struct pl_overlay) {
+        .tex = img_plane.texture,
+        .mode = PL_OVERLAY_MONOCHROME,
+        .num_parts = 1,
+        .parts = &(struct pl_overlay_part) {
+            .src = {5, 5, 15, 15},
+            .dst = {5, 5, 15, 15},
+            .color = {1.0, 0.5, 0.0},
+        },
+    };
+    REQUIRE(pl_render_image(rr, &image, &target, &params));
+    REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE);
+    REQUIRE(pl_render_image(rr, NULL, &target, &params));
+    REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE);
+    target.num_overlays = 0;
+
+    // Test rotation
+    for (pl_rotation rot = 0; rot < PL_ROTATION_360; rot += PL_ROTATION_90) {
+        image.rotation = rot;
+        REQUIRE(pl_render_image(rr, &image, &target, &params));
+        REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE);
+    }
+
+    // Attempt frame mixing, using the mixer queue helper
+    printf("testing frame mixing \n");
+    struct pl_render_params mix_params = {
+        .frame_mixer = &pl_filter_mitchell_clamp,
+        .info_callback = render_info_cb,
+    };
+
+    struct pl_queue_params qparams = {
+        .radius = pl_frame_mix_radius(&mix_params),
+        .vsync_duration = 1.0 / 60.0,
+    };
+
+    // Test large PTS jumps in frame mix
+    struct pl_frame_mix mix = (struct pl_frame_mix) {
+        .num_frames = 2,
+        .frames = (const struct pl_frame *[]) { &image, &image },
+        .signatures = (uint64_t[]) { 0xFFF1, 0xFFF2 },
+        .timestamps = (float[]) { -100, 100 },
+        .vsync_duration = 1.6,
+    };
+    REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params));
+
+    // Test inferring frame mix
+    inferred_target = target;
+    pl_frames_infer_mix(rr, &mix, &inferred_target, &inferred_image);
+    REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params));
+
+    // Test empty frame mix
+    mix = (struct pl_frame_mix) {0};
+    REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params));
+
+    // Test inferring empty frame mix
+    inferred_target = target;
+    pl_frames_infer_mix(rr, &mix, &inferred_target, &inferred_image);
+    REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params));
+
+    // Test mixer queue
+#define NUM_MIX_FRAMES 20
+    const float frame_duration = 1.0 / 24.0;
+    struct pl_source_frame srcframes[NUM_MIX_FRAMES+1];
+    srcframes[NUM_MIX_FRAMES] = (struct pl_source_frame) {0};
+    for (int i = 0; i < NUM_MIX_FRAMES; i++) {
+        srcframes[i] = (struct pl_source_frame) {
+            .pts = i * frame_duration,
+            .duration = frame_duration,
+            .map = frame_passthrough,
+            .frame_data = &image,
+        };
+    }
+
+    pl_queue queue = pl_queue_create(gpu);
+    enum pl_queue_status ret;
+
+    // Test pre-pushing all frames, with delayed EOF.
+    for (int i = 0; i < NUM_MIX_FRAMES; i++) {
+        const struct pl_source_frame *src = &srcframes[i];
+        if (i > 10) // test pushing in reverse order
+            src = &srcframes[NUM_MIX_FRAMES + 10 - i];
+        if (!pl_queue_push_block(queue, 1, src)) // mini-sleep
+            pl_queue_push(queue, src); // push it anyway, for testing
+    }
+
+    while ((ret = pl_queue_update(queue, &mix, &qparams)) != PL_QUEUE_EOF) {
+        if (ret == PL_QUEUE_MORE) {
+            REQUIRE_CMP(qparams.pts, >, 0.0f, "f");
+            pl_queue_push(queue, NULL); // push delayed EOF
+            continue;
+        }
+
+        REQUIRE_CMP(ret, ==, PL_QUEUE_OK, "u");
+        REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params));
+
+        // Simulate advancing vsync
+        qparams.pts += qparams.vsync_duration;
+    }
+
+    // Test dynamically pulling all frames, with oversample mixer
+    const struct pl_source_frame *frame_ptr = &srcframes[0];
+    mix_params.frame_mixer = &pl_oversample_frame_mixer;
+
+    qparams = (struct pl_queue_params) {
+        .radius = pl_frame_mix_radius(&mix_params),
+        .vsync_duration = qparams.vsync_duration,
+        .get_frame = get_frame_ptr,
+        .priv = &frame_ptr,
+    };
+
+    pl_queue_reset(queue);
+    while ((ret = pl_queue_update(queue, &mix, &qparams)) != PL_QUEUE_EOF) {
+        REQUIRE_CMP(ret, ==, PL_QUEUE_OK, "u");
+        REQUIRE_CMP(mix.num_frames, <=, 2, "d");
+        REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params));
+        qparams.pts += qparams.vsync_duration;
+    }
+
+    // Test large PTS jump
+    pl_queue_reset(queue);
+    REQUIRE(pl_queue_update(queue, &mix, &qparams) == PL_QUEUE_EOF);
+
+    // Test deinterlacing
+    pl_queue_reset(queue);
+    printf("testing deinterlacing \n");
+    for (int i = 0; i < NUM_MIX_FRAMES; i++) {
+        struct pl_source_frame *src = &srcframes[i];
+        if (i > 10)
+            src = &srcframes[NUM_MIX_FRAMES + 10 - i];
+        src->first_field = PL_FIELD_EVEN;
+        pl_queue_push(queue, src);
+    }
+    pl_queue_push(queue, NULL);
+
+    qparams.pts = 0;
+    qparams.get_frame = NULL;
+    while ((ret = pl_queue_update(queue, &mix, &qparams)) != PL_QUEUE_EOF) {
+        REQUIRE_CMP(ret, ==, PL_QUEUE_OK, "u");
+        REQUIRE(pl_render_image_mix(rr, &mix, &target, &mix_params));
+        qparams.pts += qparams.vsync_duration;
+    }
+
+    pl_queue_destroy(&queue);
+
+error:
+    pl_renderer_destroy(&rr);
+    pl_tex_destroy(gpu, &img_tex);
+    pl_tex_destroy(gpu, &fbo);
+}
+
+static struct pl_hook_res noop_hook(void *priv, const struct pl_hook_params *params)
+{
+    return (struct pl_hook_res) {0};
+}
+
+static void pl_ycbcr_tests(pl_gpu gpu)
+{
+    struct pl_plane_data data[3];
+    for (int i = 0; i < 3; i++) {
+        const int sub = i > 0 ? 1 : 0;
+        const int width = (323 + sub) >> sub;
+        const int height = (255 + sub) >> sub;
+
+        data[i] = (struct pl_plane_data) {
+            .type = PL_FMT_UNORM,
+            .width = width,
+            .height = height,
+            .component_size = {16},
+            .component_map = {i},
+            .pixel_stride = sizeof(uint16_t),
+            .row_stride = PL_ALIGN2(width * sizeof(uint16_t),
+                                    gpu->limits.align_tex_xfer_pitch),
+        };
+    }
+
+    pl_fmt fmt = pl_plane_find_fmt(gpu, NULL, &data[0]);
+    enum pl_fmt_caps caps = PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_HOST_READABLE;
+    if (!fmt || (fmt->caps & caps) != caps)
+        return;
+
+    pl_renderer rr = pl_renderer_create(gpu->log, gpu);
+    if (!rr)
+        return;
+
+    pl_tex src_tex[3] = {0};
+    pl_tex dst_tex[3] = {0};
+    struct pl_frame img = {
+        .num_planes = 3,
+        .repr = pl_color_repr_hdtv,
+        .color = pl_color_space_bt709,
+    };
+
+    struct pl_frame target = {
+        .num_planes = 3,
+        .repr = pl_color_repr_hdtv,
+        .color = pl_color_space_bt709,
+    };
+
+    uint8_t *src_buffer[3] = {0};
+    uint8_t *dst_buffer = NULL;
+    for (int i = 0; i < 3; i++) {
+        // Generate some arbitrary data for the buffer
+        src_buffer[i] = malloc(data[i].height * data[i].row_stride);
+        if (!src_buffer[i])
+            goto error;
+
+        data[i].pixels = src_buffer[i];
+        for (int y = 0; y < data[i].height; y++) {
+            for (int x = 0; x < data[i].width; x++) {
+                size_t off = y * data[i].row_stride + x * data[i].pixel_stride;
+                uint16_t *pixel = (uint16_t *) &src_buffer[i][off];
+                int gx = 200 + 100 * i, gy = 300 + 150 * i;
+                *pixel = (gx * x) ^ (gy * y); // whatever
+            }
+        }
+
+        REQUIRE(pl_upload_plane(gpu, &img.planes[i], &src_tex[i], &data[i]));
+    }
+
+    // This co-sites chroma pixels with pixels in the RGB image, meaning we
+    // get an exact round-trip when sampling both ways. This makes it useful
+    // as a test case, even though it's not common in the real world.
+    pl_frame_set_chroma_location(&img, PL_CHROMA_TOP_LEFT);
+
+    for (int i = 0; i < 3; i++) {
+        dst_tex[i] = pl_tex_create(gpu, &(struct pl_tex_params) {
+            .format = fmt,
+            .w = data[i].width,
+            .h = data[i].height,
+            .renderable = true,
+            .host_readable = true,
+            .storable = fmt->caps & PL_FMT_CAP_STORABLE,
+            .blit_dst = fmt->caps & PL_FMT_CAP_BLITTABLE,
+        });
+
+        if (!dst_tex[i])
+            goto error;
+
+        target.planes[i] = img.planes[i];
+        target.planes[i].texture = dst_tex[i];
+    }
+
+    REQUIRE(pl_render_image(rr, &img, &target, &(struct pl_render_params) {
+        .num_hooks = 1,
+        .hooks = &(const struct pl_hook *){&(struct pl_hook) {
+            // Forces chroma merging, to test the chroma merging code
+            .stages = PL_HOOK_CHROMA_INPUT,
+            .hook = noop_hook,
+        }},
+    }));
+    REQUIRE(pl_renderer_get_errors(rr).errors == PL_RENDER_ERR_NONE);
+
+    size_t buf_size = data[0].height * data[0].row_stride;
+    dst_buffer = malloc(buf_size);
+    if (!dst_buffer)
+        goto error;
+
+    for (int i = 0; i < 3; i++) {
+        memset(dst_buffer, 0xAA, buf_size);
+        REQUIRE(pl_tex_download(gpu, &(struct pl_tex_transfer_params) {
+            .tex = dst_tex[i],
+            .ptr = dst_buffer,
+            .row_pitch = data[i].row_stride,
+        }));
+
+        for (int y = 0; y < data[i].height; y++) {
+            for (int x = 0; x < data[i].width; x++) {
+                size_t off = y * data[i].row_stride + x * data[i].pixel_stride;
+                uint16_t *src_pixel = (uint16_t *) &src_buffer[i][off];
+                uint16_t *dst_pixel = (uint16_t *) &dst_buffer[off];
+                int diff = abs((int) *src_pixel - (int) *dst_pixel);
+                REQUIRE_CMP(diff, <=, 50, "d"); // a little under 0.1%
+            }
+        }
+    }
+
+error:
+    pl_renderer_destroy(&rr);
+    free(dst_buffer);
+    for (int i = 0; i < 3; i++) {
+        free(src_buffer[i]);
+        pl_tex_destroy(gpu, &src_tex[i]);
+        pl_tex_destroy(gpu, &dst_tex[i]);
+    }
+}
+
+static void pl_test_export_import(pl_gpu gpu,
+                                  enum pl_handle_type handle_type)
+{
+    // Test texture roundtrip
+
+    if (!(gpu->export_caps.tex & handle_type) ||
+        !(gpu->import_caps.tex & handle_type))
+        goto skip_tex;
+
+    pl_fmt fmt = pl_find_fmt(gpu, PL_FMT_UNORM, 4, 0, 0, PL_FMT_CAP_BLITTABLE);
+    if (!fmt)
+        goto skip_tex;
+
+    printf("testing texture import/export with fmt %s\n", fmt->name);
+
+    pl_tex export = pl_tex_create(gpu, &(struct pl_tex_params) {
+        .w = 32,
+        .h = 32,
+        .format = fmt,
+        .export_handle = handle_type,
+    });
+    REQUIRE(export);
+    REQUIRE_HANDLE(export->shared_mem, handle_type);
+
+    pl_tex import = pl_tex_create(gpu, &(struct pl_tex_params) {
+        .w = export->params.w,
+        .h = export->params.h,
+        .format = fmt,
+        .import_handle = handle_type,
+        .shared_mem = export->shared_mem,
+    });
+    REQUIRE(import);
+
+    pl_tex_destroy(gpu, &import);
+    pl_tex_destroy(gpu, &export);
+
+skip_tex: ;
+
+    // Test buffer roundtrip
+
+    if (!(gpu->export_caps.buf & handle_type) ||
+        !(gpu->import_caps.buf & handle_type))
+        return;
+
+    printf("testing buffer import/export\n");
+
+    pl_buf exp_buf = pl_buf_create(gpu, &(struct pl_buf_params) {
+        .size = 32,
+        .export_handle = handle_type,
+    });
+    REQUIRE(exp_buf);
+    REQUIRE_HANDLE(exp_buf->shared_mem, handle_type);
+
+    pl_buf imp_buf = pl_buf_create(gpu, &(struct pl_buf_params) {
+        .size = 32,
+        .import_handle = handle_type,
+        .shared_mem = exp_buf->shared_mem,
+    });
+    REQUIRE(imp_buf);
+
+    pl_buf_destroy(gpu, &imp_buf);
+    pl_buf_destroy(gpu, &exp_buf);
+}
+
+static void pl_test_host_ptr(pl_gpu gpu)
+{
+    if (!(gpu->import_caps.buf & PL_HANDLE_HOST_PTR))
+        return;
+
+#ifdef __unix__
+
+    printf("testing host ptr\n");
+    REQUIRE(gpu->limits.max_mapped_size);
+
+    const size_t size = 2 << 20;
+    const size_t offset = 2 << 10;
+    const size_t slice = 2 << 16;
+
+    uint8_t *data = aligned_alloc(0x1000, size);
+    for (int i = 0; i < size; i++)
+        data[i] = (uint8_t) i;
+
+    pl_buf buf = pl_buf_create(gpu, &(struct pl_buf_params) {
+        .size = slice,
+        .import_handle = PL_HANDLE_HOST_PTR,
+        .shared_mem = {
+            .handle.ptr = data,
+            .size = size,
+            .offset = offset,
+        },
+        .host_mapped = true,
+    });
+
+    REQUIRE(buf);
+    REQUIRE_MEMEQ(data + offset, buf->data, slice);
+
+    pl_buf_destroy(gpu, &buf);
+    free(data);
+
+#endif // unix
+}
+
+static void gpu_shader_tests(pl_gpu gpu)
+{
+    pl_buffer_tests(gpu);
+    pl_texture_tests(gpu);
+    pl_planar_tests(gpu);
+    pl_shader_tests(gpu);
+    pl_scaler_tests(gpu);
+    pl_render_tests(gpu);
+    pl_ycbcr_tests(gpu);
+
+    REQUIRE(!pl_gpu_is_failed(gpu));
+}
+
+static void gpu_interop_tests(pl_gpu gpu)
+{
+    pl_test_export_import(gpu, PL_HANDLE_DMA_BUF);
+    pl_test_host_ptr(gpu);
+
+    REQUIRE(!pl_gpu_is_failed(gpu));
+}
diff --git a/src/tests/icc.c b/src/tests/icc.c
new file mode 100644
index 0000000..188940b
--- /dev/null
+++ b/src/tests/icc.c
@@ -0,0 +1,106 @@
+#include "tests.h"
+
+#include <libplacebo/shaders/icc.h>
+
+static const uint8_t DisplayP3_v2_micro_icc[] = {
+  0x00, 0x00, 0x01, 0xc8, 0x6c, 0x63, 0x6d, 0x73, 0x02, 0x10, 0x00, 0x00,
+  0x6d, 0x6e, 0x74, 0x72, 0x52, 0x47, 0x42, 0x20, 0x58, 0x59, 0x5a, 0x20,
+  0x07, 0xe2, 0x00, 0x03, 0x00, 0x14, 0x00, 0x09, 0x00, 0x0e, 0x00, 0x1d,
+  0x61, 0x63, 0x73, 0x70, 0x4d, 0x53, 0x46, 0x54, 0x00, 0x00, 0x00, 0x00,
+  0x73, 0x61, 0x77, 0x73, 0x63, 0x74, 0x72, 0x6c, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf6, 0xd6,
+  0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0xd3, 0x2d, 0x68, 0x61, 0x6e, 0x64,
+  0xb4, 0xaa, 0xdd, 0x1f, 0x13, 0xc8, 0x03, 0x3c, 0xf5, 0x51, 0x14, 0x45,
+  0x28, 0x7a, 0x98, 0xe2, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09,
+  0x64, 0x65, 0x73, 0x63, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x5e,
+  0x63, 0x70, 0x72, 0x74, 0x00, 0x00, 0x01, 0x0c, 0x00, 0x00, 0x00, 0x0c,
+  0x77, 0x74, 0x70, 0x74, 0x00, 0x00, 0x01, 0x18, 0x00, 0x00, 0x00, 0x14,
+  0x72, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x01, 0x2c, 0x00, 0x00, 0x00, 0x14,
+  0x67, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x14,
+  0x62, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x01, 0x54, 0x00, 0x00, 0x00, 0x14,
+  0x72, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0x68, 0x00, 0x00, 0x00, 0x60,
+  0x67, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0x68, 0x00, 0x00, 0x00, 0x60,
+  0x62, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0x68, 0x00, 0x00, 0x00, 0x60,
+  0x64, 0x65, 0x73, 0x63, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x04,
+  0x75, 0x50, 0x33, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x74, 0x65, 0x78, 0x74, 0x00, 0x00, 0x00, 0x00,
+  0x43, 0x43, 0x30, 0x00, 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0xf3, 0x51, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x16, 0xcc,
+  0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x83, 0xdf,
+  0x00, 0x00, 0x3d, 0xbf, 0xff, 0xff, 0xff, 0xbb, 0x58, 0x59, 0x5a, 0x20,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x4a, 0xbf, 0x00, 0x00, 0xb1, 0x37,
+  0x00, 0x00, 0x0a, 0xb9, 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x28, 0x38, 0x00, 0x00, 0x11, 0x0a, 0x00, 0x00, 0xc8, 0xb9,
+  0x63, 0x75, 0x72, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a,
+  0x00, 0x00, 0x00, 0x7c, 0x00, 0xf8, 0x01, 0x9c, 0x02, 0x75, 0x03, 0x83,
+  0x04, 0xc9, 0x06, 0x4e, 0x08, 0x12, 0x0a, 0x18, 0x0c, 0x62, 0x0e, 0xf4,
+  0x11, 0xcf, 0x14, 0xf6, 0x18, 0x6a, 0x1c, 0x2e, 0x20, 0x43, 0x24, 0xac,
+  0x29, 0x6a, 0x2e, 0x7e, 0x33, 0xeb, 0x39, 0xb3, 0x3f, 0xd6, 0x46, 0x57,
+  0x4d, 0x36, 0x54, 0x76, 0x5c, 0x17, 0x64, 0x1d, 0x6c, 0x86, 0x75, 0x56,
+  0x7e, 0x8d, 0x88, 0x2c, 0x92, 0x36, 0x9c, 0xab, 0xa7, 0x8c, 0xb2, 0xdb,
+  0xbe, 0x99, 0xca, 0xc7, 0xd7, 0x65, 0xe4, 0x77, 0xf1, 0xf9, 0xff, 0xff
+};
+
+static const uint8_t Rec2020_v2_micro_icc[] = {
+  0x00, 0x00, 0x01, 0xcc, 0x6c, 0x63, 0x6d, 0x73, 0x02, 0x10, 0x00, 0x00,
+  0x6d, 0x6e, 0x74, 0x72, 0x52, 0x47, 0x42, 0x20, 0x58, 0x59, 0x5a, 0x20,
+  0x07, 0xe2, 0x00, 0x03, 0x00, 0x14, 0x00, 0x09, 0x00, 0x0e, 0x00, 0x1d,
+  0x61, 0x63, 0x73, 0x70, 0x4d, 0x53, 0x46, 0x54, 0x00, 0x00, 0x00, 0x00,
+  0x73, 0x61, 0x77, 0x73, 0x63, 0x74, 0x72, 0x6c, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf6, 0xd6,
+  0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0xd3, 0x2d, 0x68, 0x61, 0x6e, 0x64,
+  0x17, 0xcb, 0x44, 0xd1, 0x0d, 0xca, 0xe1, 0xc9, 0x03, 0x3e, 0x20, 0x85,
+  0x4a, 0x67, 0x4e, 0xa9, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09,
+  0x64, 0x65, 0x73, 0x63, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x5f,
+  0x63, 0x70, 0x72, 0x74, 0x00, 0x00, 0x01, 0x0c, 0x00, 0x00, 0x00, 0x0c,
+  0x77, 0x74, 0x70, 0x74, 0x00, 0x00, 0x01, 0x18, 0x00, 0x00, 0x00, 0x14,
+  0x72, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x01, 0x2c, 0x00, 0x00, 0x00, 0x14,
+  0x67, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x01, 0x40, 0x00, 0x00, 0x00, 0x14,
+  0x62, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x01, 0x54, 0x00, 0x00, 0x00, 0x14,
+  0x72, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0x68, 0x00, 0x00, 0x00, 0x64,
+  0x67, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0x68, 0x00, 0x00, 0x00, 0x64,
+  0x62, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0x68, 0x00, 0x00, 0x00, 0x64,
+  0x64, 0x65, 0x73, 0x63, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05,
+  0x75, 0x32, 0x30, 0x32, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x74, 0x65, 0x78, 0x74, 0x00, 0x00, 0x00, 0x00,
+  0x43, 0x43, 0x30, 0x00, 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0xf3, 0x51, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x16, 0xcc,
+  0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xac, 0x69,
+  0x00, 0x00, 0x47, 0x70, 0xff, 0xff, 0xff, 0x81, 0x58, 0x59, 0x5a, 0x20,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2a, 0x6a, 0x00, 0x00, 0xac, 0xe3,
+  0x00, 0x00, 0x07, 0xad, 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x20, 0x03, 0x00, 0x00, 0x0b, 0xad, 0x00, 0x00, 0xcb, 0xff,
+  0x63, 0x75, 0x72, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x2c,
+  0x00, 0x00, 0x01, 0x53, 0x02, 0xa5, 0x03, 0xf8, 0x05, 0x4e, 0x06, 0xd6,
+  0x08, 0x98, 0x0a, 0x8f, 0x0c, 0xc3, 0x0f, 0x31, 0x11, 0xdc, 0x14, 0xc3,
+  0x17, 0xe8, 0x1b, 0x4c, 0x1e, 0xf0, 0x22, 0xd5, 0x26, 0xfa, 0x2b, 0x62,
+  0x30, 0x0c, 0x34, 0xfa, 0x3a, 0x2b, 0x3f, 0xa2, 0x45, 0x5d, 0x4b, 0x5f,
+  0x51, 0xa7, 0x58, 0x37, 0x5f, 0x0d, 0x66, 0x2c, 0x6d, 0x94, 0x75, 0x45,
+  0x7d, 0x3f, 0x85, 0x84, 0x8e, 0x13, 0x96, 0xee, 0xa0, 0x13, 0xa9, 0x86,
+  0xb3, 0x44, 0xbd, 0x4f, 0xc7, 0xa8, 0xd2, 0x4e, 0xdd, 0x42, 0xe8, 0x86,
+  0xf4, 0x16, 0xff, 0xff
+};
+
+int main()
+{
+    pl_log log = pl_test_logger();
+    pl_icc_object icc;
+
+    icc = pl_icc_open(log, &TEST_PROFILE(sRGB_v2_nano_icc), NULL);
+    REQUIRE_CMP(icc->csp.primaries, ==, PL_COLOR_PRIM_BT_709, "u");
+    pl_icc_close(&icc);
+
+    icc = pl_icc_open(log, &TEST_PROFILE(DisplayP3_v2_micro_icc), NULL);
+    REQUIRE_CMP(icc->csp.primaries, ==, PL_COLOR_PRIM_DISPLAY_P3, "u");
+    pl_icc_close(&icc);
+
+    icc = pl_icc_open(log, &TEST_PROFILE(Rec2020_v2_micro_icc), NULL);
+    REQUIRE_CMP(icc->csp.primaries, ==, PL_COLOR_PRIM_BT_2020, "u");
+    pl_icc_close(&icc);
+
+    pl_log_destroy(&log);
+}
diff --git a/src/tests/include/include_tmpl.c b/src/tests/include/include_tmpl.c
new file mode 100644
index 0000000..dd1000e
--- /dev/null
+++ b/src/tests/include/include_tmpl.c
@@ -0,0 +1 @@
+#include <libplacebo/@header@>
diff --git a/src/tests/include/include_tmpl.cpp b/src/tests/include/include_tmpl.cpp
new file mode 100644
index 0000000..2b6334c
--- /dev/null
+++ b/src/tests/include/include_tmpl.cpp
@@ -0,0 +1,3 @@
+#define PL_LIBAV_IMPLEMENTATION 0
+#define PL_DAV1D_IMPLEMENTATION 0
+#include <libplacebo/@header@>
diff --git a/src/tests/include/meson.build b/src/tests/include/meson.build
new file mode 100644
index 0000000..25dfaee
--- /dev/null
+++ b/src/tests/include/meson.build
@@ -0,0 +1,35 @@
+include_tmpl_langs = ['c', 'cpp']
+
+# Ensure all headers compile
+
+test_include_sources = []
+foreach h : headers
+
+  if (h.contains('internal') or
+      h.contains('dav1d') and not dav1d.found() or
+      h.contains('libav') and not libav_found or
+      h.contains('d3d11') and not d3d11_header)
+    continue
+  endif
+
+  foreach lang : include_tmpl_langs
+
+    test_include_sources += configure_file(
+        input: 'include_tmpl.' + lang,
+        output: 'include_@0@.@1@'.format(h.underscorify(), lang),
+        configuration: {
+          'header': h
+        },
+    )
+
+  endforeach
+
+endforeach
+
+static_library('test_include', test_include_sources,
+    dependencies: [tdep_static, lavu, lavc, lavf],
+    include_directories: [inc, vulkan_headers_inc],
+    implicit_include_directories: false,
+    c_args: ['-Wall', '-Wextra', '-Wpedantic'],
+    cpp_args: ['-Wall', '-Wextra', '-Wpedantic'],
+)
diff --git a/src/tests/libav.c b/src/tests/libav.c
new file mode 100644
index 0000000..7c91e85
--- /dev/null
+++ b/src/tests/libav.c
@@ -0,0 +1,393 @@
+#include "tests.h"
+#include "libplacebo/utils/libav.h"
+
+int main()
+{
+    struct pl_plane_data data[4] = {0};
+    struct pl_bit_encoding bits;
+
+    // Make sure we don't crash on any av pixfmt
+    const AVPixFmtDescriptor *desc = NULL;
+    while ((desc = av_pix_fmt_desc_next(desc)))
+        pl_plane_data_from_pixfmt(data, &bits, av_pix_fmt_desc_get_id(desc));
+
+#define TEST(pixfmt, reference)                                                 \
+    do {                                                                        \
+        int planes = pl_plane_data_from_pixfmt(data, &bits, pixfmt);            \
+        REQUIRE_CMP(planes, ==, sizeof(reference) / sizeof(*reference), "d");   \
+        REQUIRE_MEMEQ(data, reference, sizeof(reference));                      \
+    } while (0)
+
+    // Planar and semiplanar formats
+    static const struct pl_plane_data yuvp8[] = {
+        {
+            .type = PL_FMT_UNORM,
+            .component_size = {8},
+            .component_map = {0},
+            .pixel_stride = 1,
+        }, {
+            .type = PL_FMT_UNORM,
+            .component_size = {8},
+            .component_map = {1},
+            .pixel_stride = 1,
+        }, {
+            .type = PL_FMT_UNORM,
+            .component_size = {8},
+            .component_map = {2},
+            .pixel_stride = 1,
+        }
+    };
+
+    TEST(AV_PIX_FMT_YUV420P, yuvp8);
+    TEST(AV_PIX_FMT_YUV422P, yuvp8);
+    TEST(AV_PIX_FMT_YUV444P, yuvp8);
+    TEST(AV_PIX_FMT_YUV410P, yuvp8);
+    TEST(AV_PIX_FMT_YUV411P, yuvp8);
+    TEST(AV_PIX_FMT_YUV440P, yuvp8);
+
+    static const struct pl_plane_data yuvap8[] = {
+        {
+            .type = PL_FMT_UNORM,
+            .component_size = {8},
+            .component_map = {0},
+            .pixel_stride = 1,
+        }, {
+            .type = PL_FMT_UNORM,
+            .component_size = {8},
+            .component_map = {1},
+            .pixel_stride = 1,
+        }, {
+            .type = PL_FMT_UNORM,
+            .component_size = {8},
+            .component_map = {2},
+            .pixel_stride = 1,
+        }, {
+            .type = PL_FMT_UNORM,
+            .component_size = {8},
+            .component_map = {3},
+            .pixel_stride = 1,
+        }
+    };
+
+    TEST(AV_PIX_FMT_YUVA420P, yuvap8);
+
+    static const struct pl_plane_data yuvp16[] = {
+        {
+            .type = PL_FMT_UNORM,
+            .component_size = {16},
+            .component_map = {0},
+            .pixel_stride = 2,
+        }, {
+            .type = PL_FMT_UNORM,
+            .component_size = {16},
+            .component_map = {1},
+            .pixel_stride = 2,
+        }, {
+            .type = PL_FMT_UNORM,
+            .component_size = {16},
+            .component_map = {2},
+            .pixel_stride = 2,
+        }
+    };
+
+    TEST(AV_PIX_FMT_YUV420P10LE, yuvp16);
+    TEST(AV_PIX_FMT_YUV420P16LE, yuvp16);
+
+    static const struct pl_plane_data nv12[] = {
+        {
+            .type = PL_FMT_UNORM,
+            .component_size = {8},
+            .component_map = {0},
+            .pixel_stride = 1,
+        }, {
+            .type = PL_FMT_UNORM,
+            .component_size = {8, 8},
+            .component_map = {1, 2},
+            .pixel_stride = 2,
+        }
+    };
+
+    TEST(AV_PIX_FMT_NV12, nv12);
+
+    static const struct pl_plane_data nv21[] = {
+        {
+            .type = PL_FMT_UNORM,
+            .component_size = {8},
+            .component_map = {0},
+            .pixel_stride = 1,
+        }, {
+            .type = PL_FMT_UNORM,
+            .component_size = {8, 8},
+            .component_map = {2, 1},
+            .pixel_stride = 2,
+        }
+    };
+
+    TEST(AV_PIX_FMT_NV21, nv21);
+
+    static const struct pl_plane_data p016[] = {
+        {
+            .type = PL_FMT_UNORM,
+            .component_size = {16},
+            .component_map = {0},
+            .pixel_stride = 2,
+        }, {
+            .type = PL_FMT_UNORM,
+            .component_size = {16, 16},
+            .component_map = {1, 2},
+            .pixel_stride = 4,
+        }
+    };
+
+    TEST(AV_PIX_FMT_P010LE, p016);
+    TEST(AV_PIX_FMT_P016LE, p016);
+
+    // Packed formats
+    static const struct pl_plane_data r8[] = {
+        {
+            .type = PL_FMT_UNORM,
+            .component_size = {8},
+            .component_map = {0},
+            .pixel_stride = 1,
+        }
+    };
+
+    TEST(AV_PIX_FMT_GRAY8, r8);
+
+    static const struct pl_plane_data rg8[] = {
+        {
+            .type = PL_FMT_UNORM,
+            .component_size = {8, 8},
+            .component_map = {0, 1},
+            .pixel_stride = 2,
+        }
+    };
+
+    TEST(AV_PIX_FMT_GRAY8A, rg8);
+
+    static const struct pl_plane_data rgb8[] = {
+        {
+            .type = PL_FMT_UNORM,
+            .component_size = {8, 8, 8},
+            .component_map = {0, 1, 2},
+            .pixel_stride = 3,
+        }
+    };
+
+    TEST(AV_PIX_FMT_RGB24, rgb8);
+
+    static const struct pl_plane_data bgr8[] = {
+        {
+            .type = PL_FMT_UNORM,
+            .component_size = {8, 8, 8},
+            .component_map = {2, 1, 0},
+            .pixel_stride = 3,
+        }
+    };
+
+    TEST(AV_PIX_FMT_BGR24, bgr8);
+
+    static const struct pl_plane_data rgbx8[] = {
+        {
+            .type = PL_FMT_UNORM,
+            .component_size = {8, 8, 8},
+            .component_map = {0, 1, 2},
+            .pixel_stride = 4,
+        }
+    };
+
+    TEST(AV_PIX_FMT_RGB0, rgbx8);
+
+    static const struct pl_plane_data xrgb8[] = {
+        {
+            .type = PL_FMT_UNORM,
+            .component_size = {8, 8, 8},
+            .component_map = {0, 1, 2},
+            .component_pad = {8, 0, 0},
+            .pixel_stride = 4,
+        }
+    };
+
+    TEST(AV_PIX_FMT_0RGB, xrgb8);
+
+    static const struct pl_plane_data rgba8[] = {
+        {
+            .type = PL_FMT_UNORM,
+            .component_size = {8, 8, 8, 8},
+            .component_map = {0, 1, 2, 3},
+            .pixel_stride = 4,
+        }
+    };
+
+    TEST(AV_PIX_FMT_RGBA, rgba8);
+
+    static const struct pl_plane_data argb8[] = {
+        {
+            .type = PL_FMT_UNORM,
+            .component_size = {8, 8, 8, 8},
+            .component_map = {3, 0, 1, 2},
+            .pixel_stride = 4,
+        }
+    };
+
+    TEST(AV_PIX_FMT_ARGB, argb8);
+
+    static const struct pl_plane_data bgra8[] = {
+        {
+            .type = PL_FMT_UNORM,
+            .component_size = {8, 8, 8, 8},
+            .component_map = {2, 1, 0, 3},
+            .pixel_stride = 4,
+        }
+    };
+
+    TEST(AV_PIX_FMT_BGRA, bgra8);
+
+    static const struct pl_plane_data abgr8[] = {
+        {
+            .type = PL_FMT_UNORM,
+            .component_size = {8, 8, 8, 8},
+            .component_map = {3, 2, 1, 0},
+            .pixel_stride = 4,
+        }
+    };
+
+    TEST(AV_PIX_FMT_ABGR, abgr8);
+
+    static const struct pl_plane_data r16[] = {
+        {
+            .type = PL_FMT_UNORM,
+            .component_size = {16},
+            .component_map = {0},
+            .pixel_stride = 2,
+        }
+    };
+
+    TEST(AV_PIX_FMT_GRAY16LE, r16);
+
+    static const struct pl_plane_data rgb16[] = {
+        {
+            .type = PL_FMT_UNORM,
+            .component_size = {16, 16, 16},
+            .component_map = {0, 1, 2},
+            .pixel_stride = 6,
+        }
+    };
+
+    TEST(AV_PIX_FMT_RGB48LE, rgb16);
+
+    static const struct pl_plane_data rgb16be[] = {
+        {
+            .type = PL_FMT_UNORM,
+            .component_size = {16, 16, 16},
+            .component_map = {0, 1, 2},
+            .pixel_stride = 6,
+            .swapped = true,
+        }
+    };
+
+    TEST(AV_PIX_FMT_RGB48BE, rgb16be);
+
+    static const struct pl_plane_data rgba16[] = {
+        {
+            .type = PL_FMT_UNORM,
+            .component_size = {16, 16, 16, 16},
+            .component_map = {0, 1, 2, 3},
+            .pixel_stride = 8,
+        }
+    };
+
+    TEST(AV_PIX_FMT_RGBA64LE, rgba16);
+
+    static const struct pl_plane_data rgba16be[] = {
+        {
+            .type = PL_FMT_UNORM,
+            .component_size = {16, 16, 16, 16},
+            .component_map = {0, 1, 2, 3},
+            .pixel_stride = 8,
+            .swapped = true,
+        }
+    };
+
+    TEST(AV_PIX_FMT_RGBA64BE, rgba16be);
+
+    static const struct pl_plane_data rgb565[] = {
+        {
+            .type = PL_FMT_UNORM,
+            .component_size = {5, 6, 5},
+            .component_map = {2, 1, 0}, // LSB to MSB
+            .pixel_stride = 2,
+        }
+    };
+
+    TEST(AV_PIX_FMT_RGB565LE, rgb565);
+
+#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 37, 100)
+
+    static const struct pl_plane_data rgb32f[] = {
+        {
+            .type = PL_FMT_FLOAT,
+            .component_size = {32, 32, 32},
+            .component_map = {0, 1, 2},
+            .pixel_stride = 12,
+        }
+    };
+
+    TEST(AV_PIX_FMT_RGBF32LE, rgb32f);
+
+#endif
+
+    // Test pl_frame <- AVFrame bridge
+    struct pl_frame image;
+    AVFrame *frame = av_frame_alloc();
+    frame->format = AV_PIX_FMT_RGBA;
+    pl_frame_from_avframe(&image, frame);
+    REQUIRE_CMP(image.num_planes, ==, 1, "d");
+    REQUIRE_CMP(image.repr.sys, ==, PL_COLOR_SYSTEM_RGB, "u");
+
+    // Test inverse mapping
+    struct pl_color_space csp = image.color;
+    pl_color_space_infer(&csp);
+    pl_avframe_set_color(frame, csp);
+    pl_avframe_set_repr(frame, image.repr);
+    pl_avframe_set_profile(frame, image.profile);
+    pl_frame_from_avframe(&image, frame);
+    pl_color_space_infer(&image.color);
+    REQUIRE(pl_color_space_equal(&csp, &image.color));
+    av_frame_free(&frame);
+
+    // Test enum functions
+    for (enum pl_color_system sys = 0; sys < PL_COLOR_SYSTEM_COUNT; sys++) {
+        enum AVColorSpace spc = pl_system_to_av(sys);
+        enum pl_color_system sys2 = pl_system_from_av(spc);
+        // Exception to the rule, due to different handling in libav*
+        if (sys2 && sys != PL_COLOR_SYSTEM_BT_2100_HLG)
+            REQUIRE_CMP(sys, ==, sys2, "u");
+    }
+
+    for (enum pl_color_levels lev = 0; lev < PL_COLOR_LEVELS_COUNT; lev++) {
+        enum AVColorRange range = pl_levels_to_av(lev);
+        enum pl_color_levels lev2 = pl_levels_from_av(range);
+        REQUIRE_CMP(lev, ==, lev2, "u");
+    }
+
+    for (enum pl_color_primaries prim = 0; prim < PL_COLOR_PRIM_COUNT; prim++) {
+        enum AVColorPrimaries avpri = pl_primaries_to_av(prim);
+        enum pl_color_primaries prim2 = pl_primaries_from_av(avpri);
+        if (prim2)
+            REQUIRE_CMP(prim, ==, prim2, "u");
+    }
+
+    for (enum pl_color_transfer trc = 0; trc < PL_COLOR_TRC_COUNT; trc++) {
+        enum AVColorTransferCharacteristic avtrc = pl_transfer_to_av(trc);
+        enum pl_color_transfer trc2 = pl_transfer_from_av(avtrc);
+        if (trc2)
+            REQUIRE_CMP(trc, ==, trc2, "u");
+    }
+
+    for (enum pl_chroma_location loc = 0; loc < PL_CHROMA_COUNT; loc++) {
+        enum AVChromaLocation avloc = pl_chroma_to_av(loc);
+        enum pl_chroma_location loc2 = pl_chroma_from_av(avloc);
+        REQUIRE_CMP(loc, ==, loc2, "u");
+    }
+}
diff --git a/src/tests/lut.c b/src/tests/lut.c
new file mode 100644
index 0000000..4af44ee
--- /dev/null
+++ b/src/tests/lut.c
@@ -0,0 +1,86 @@
+#include "tests.h"
+
+#include <libplacebo/dummy.h>
+#include <libplacebo/shaders/lut.h>
+
+static const char *luts[] = {
+
+    "TITLE \"1D LUT example\"   \n"
+    "LUT_1D_SIZE 11             \n"
+    "# Random comment           \n"
+    "0.0 0.0 0.0                \n"
+    "0.1 0.1 0.1                \n"
+    "0.2 0.2 0.2                \n"
+    "0.3 0.3 0.3                \n"
+    "0.4 0.4 0.4                \n"
+    "0.5 0.5 0.5                \n"
+    "0.6 0.6 0.6                \n"
+    "0.7 0.7 0.7                \n"
+    "0.8 0.8 0.8                \n"
+    "0.9 0.9 0.9                \n"
+    "0.10 0.10 0.10             \n",
+
+    "LUT_3D_SIZE 3              \n"
+    "TITLE \"3D LUT example\"   \n"
+    "0.0 0.0 0.0                \n"
+    "0.5 0.0 0.0                \n"
+    "1.0 0.0 0.0                \n"
+    "0.0 0.5 0.0                \n"
+    "0.5 0.5 0.0                \n"
+    "1.0 0.5 0.0                \n"
+    "0.0 1.0 0.0                \n"
+    "0.5 1.0 0.0                \n"
+    "1.0 1.0 0.0                \n"
+    "0.0 0.0 0.5                \n"
+    "0.5 0.0 0.5                \n"
+    "1.0 0.0 0.5                \n"
+    "0.0 0.5 0.5                \n"
+    "0.5 0.5 0.5                \n"
+    "1.0 0.5 0.5                \n"
+    "0.0 1.0 0.5                \n"
+    "0.5 1.0 0.5                \n"
+    "1.0 1.0 0.5                \n"
+    "0.0 0.0 1.0                \n"
+    "0.5 0.0 1.0                \n"
+    "1.0 0.0 1.0                \n"
+    "0.0 0.5 1.0                \n"
+    "0.5 0.5 1.0                \n"
+    "1.0 0.5 1.0                \n"
+    "0.0 1.0 1.0                \n"
+    "0.5 1.0 1.0                \n"
+    "1.0 1.0 1.0                \n",
+
+    "LUT_1D_SIZE 3              \n"
+    "TITLE \"custom domain\"    \n"
+    "DOMAIN_MAX 255 255 255     \n"
+    "0 0 0                      \n"
+    "128 128 128                \n"
+    "255 255 255                \n"
+
+};
+
+int main()
+{
+    pl_log log = pl_test_logger();
+    pl_gpu gpu = pl_gpu_dummy_create(log, NULL);
+    pl_shader sh = pl_shader_alloc(log, NULL);
+    pl_shader_obj obj = NULL;
+
+    for (int i = 0; i < PL_ARRAY_SIZE(luts); i++) {
+        struct pl_custom_lut *lut;
+        lut = pl_lut_parse_cube(log, luts[i], strlen(luts[i]));
+        REQUIRE(lut);
+
+        pl_shader_reset(sh, pl_shader_params( .gpu = gpu ));
+        pl_shader_custom_lut(sh, lut, &obj);
+        const struct pl_shader_res *res = pl_shader_finalize(sh);
+        REQUIRE(res);
+        printf("Generated LUT shader:\n%s\n", res->glsl);
+        pl_lut_free(&lut);
+    }
+
+    pl_shader_obj_destroy(&obj);
+    pl_shader_free(&sh);
+    pl_gpu_dummy_destroy(&gpu);
+    pl_log_destroy(&log);
+}
diff --git a/src/tests/meson.build b/src/tests/meson.build
new file mode 100644
index 0000000..335c6b1
--- /dev/null
+++ b/src/tests/meson.build
@@ -0,0 +1,39 @@
+ts = []
+
+foreach t : tests
+  deps = [tdep_static]
+  if t == 'opengl_surfaceless.c'
+    deps += glad_dep
+  endif
+  # TODO: Define objects in tdep_static once Meson 1.1.0 is ok to use
+  ts += { 'source': t,
+          'deps': deps,
+          'objects': lib.extract_all_objects(recursive: false) }
+endforeach
+
+dav1d = dependency('dav1d', required: false)
+if dav1d.found()
+  ts += { 'source': 'dav1d.c', 'deps': [dav1d, tdep_shared] }
+endif
+
+lavu = dependency('libavutil', version: '>=55.74.100', required: false)
+lavc = dependency('libavcodec', required: false)
+lavf = dependency('libavformat', required: false)
+libav_found = lavu.found() and lavc.found() and lavf.found()
+if libav_found
+  ts += { 'source': 'libav.c', 'deps': [lavu, lavc, lavf, tdep_shared] }
+endif
+
+foreach t : ts
+  e = executable('test.' + t['source'], t['source'],
+      objects: t.get('objects', []),
+      c_args: [ '-Wno-unused-function' ],
+      dependencies: t.get('deps', []),
+      link_args: link_args,
+      link_depends: link_depends,
+  )
+
+  test(t['source'], e, timeout: 120)
+endforeach
+
+subdir('include')
diff --git a/src/tests/opengl_surfaceless.c b/src/tests/opengl_surfaceless.c
new file mode 100644
index 0000000..2d12a08
--- /dev/null
+++ b/src/tests/opengl_surfaceless.c
@@ -0,0 +1,247 @@
+#include "gpu_tests.h"
+#include "opengl/utils.h"
+
+#include <libplacebo/opengl.h>
+
+static void opengl_interop_tests(pl_gpu gpu)
+{
+    pl_fmt fmt = pl_find_fmt(gpu, PL_FMT_UNORM, 1, 0, 0,
+                             PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_LINEAR);
+    if (!fmt)
+        return;
+
+    pl_tex export = pl_tex_create(gpu, pl_tex_params(
+        .w = 32,
+        .h = 32,
+        .format = fmt,
+        .sampleable = true,
+        .renderable = true,
+        .blit_dst = fmt->caps & PL_FMT_CAP_BLITTABLE,
+    ));
+
+    REQUIRE(export);
+
+    struct pl_opengl_wrap_params wrap = {
+        .width = export->params.w,
+        .height = export->params.h,
+        .depth = export->params.d,
+    };
+
+    wrap.texture = pl_opengl_unwrap(gpu, export, &wrap.target, &wrap.iformat, NULL);
+    REQUIRE(wrap.texture);
+
+    pl_tex import = pl_opengl_wrap(gpu, &wrap);
+    REQUIRE(import);
+    REQUIRE(import->params.renderable);
+    REQUIRE_CMP(import->params.blit_dst, ==, export->params.blit_dst, "d");
+
+    pl_tex_destroy(gpu, &import);
+    pl_tex_destroy(gpu, &export);
+}
+
+#define PBUFFER_WIDTH 640
+#define PBUFFER_HEIGHT 480
+
+struct swapchain_priv {
+    EGLDisplay display;
+    EGLSurface surface;
+};
+
+static void swap_buffers(void *priv)
+{
+    struct swapchain_priv *p = priv;
+    eglSwapBuffers(p->display, p->surface);
+}
+
+static void opengl_swapchain_tests(pl_opengl gl,
+                                   EGLDisplay display, EGLSurface surface)
+{
+    if (surface == EGL_NO_SURFACE)
+        return;
+
+    printf("testing opengl swapchain\n");
+    pl_gpu gpu = gl->gpu;
+    pl_swapchain sw;
+    sw = pl_opengl_create_swapchain(gl, pl_opengl_swapchain_params(
+        .swap_buffers = swap_buffers,
+        .priv = &(struct swapchain_priv) { display, surface },
+    ));
+    REQUIRE(sw);
+
+    int w = PBUFFER_WIDTH, h = PBUFFER_HEIGHT;
+    REQUIRE(pl_swapchain_resize(sw, &w, &h));
+
+    for (int i = 0; i < 10; i++) {
+        struct pl_swapchain_frame frame;
+        REQUIRE(pl_swapchain_start_frame(sw, &frame));
+        if (frame.fbo->params.blit_dst)
+            pl_tex_clear(gpu, frame.fbo, (float[4]){0});
+
+        // TODO: test this with an actual pl_renderer instance
+        struct pl_frame target;
+        pl_frame_from_swapchain(&target, &frame);
+
+        REQUIRE(pl_swapchain_submit_frame(sw));
+        pl_swapchain_swap_buffers(sw);
+    }
+
+    pl_swapchain_destroy(&sw);
+}
+
+int main()
+{
+    if (!gladLoaderLoadEGL(EGL_NO_DISPLAY))
+        return SKIP;
+
+    const char *extstr = eglQueryString(EGL_NO_DISPLAY, EGL_EXTENSIONS);
+    if (!extstr || !strstr(extstr, "EGL_MESA_platform_surfaceless"))
+        return SKIP;
+
+    // Create the OpenGL context
+    EGLDisplay dpy = eglGetPlatformDisplayEXT(EGL_PLATFORM_SURFACELESS_MESA,
+                                              (void *) EGL_DEFAULT_DISPLAY, NULL);
+    if (dpy == EGL_NO_DISPLAY)
+        return SKIP;
+
+    EGLint major, minor;
+    if (!eglInitialize(dpy, &major, &minor))
+        return SKIP;
+
+    if (!gladLoaderLoadEGL(dpy))
+        return SKIP;
+
+    printf("Initialized EGL v%d.%d\n", major, minor);
+    int egl_ver = major * 10 + minor;
+
+    struct {
+        EGLenum api;
+        EGLenum render;
+        int major, minor;
+        int glsl_ver;
+        EGLenum profile;
+    } egl_vers[] = {
+        { EGL_OPENGL_API,       EGL_OPENGL_BIT,     4, 6, 460, EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT },
+        { EGL_OPENGL_API,       EGL_OPENGL_BIT,     3, 3, 330, EGL_CONTEXT_OPENGL_CORE_PROFILE_BIT },
+        { EGL_OPENGL_API,       EGL_OPENGL_BIT,     3, 0, 130, EGL_CONTEXT_OPENGL_COMPATIBILITY_PROFILE_BIT, },
+        { EGL_OPENGL_ES_API,    EGL_OPENGL_ES3_BIT, 3, 0, 300, },
+    };
+
+    struct pl_glsl_version last_glsl = {0};
+    struct pl_gpu_limits last_limits = {0};
+
+    pl_log log = pl_test_logger();
+
+    for (int i = 0; i < PL_ARRAY_SIZE(egl_vers); i++) {
+
+        const int cfg_attribs[] = {
+            EGL_SURFACE_TYPE, EGL_PBUFFER_BIT,
+            EGL_RENDERABLE_TYPE, egl_vers[i].render,
+            EGL_NONE
+        };
+
+        EGLConfig config = 0;
+        EGLint num_configs = 0;
+        bool ok = eglChooseConfig(dpy, cfg_attribs, &config, 1, &num_configs);
+        if (!ok || !num_configs)
+            goto error;
+
+        if (!eglBindAPI(egl_vers[i].api))
+            goto error;
+
+        EGLContext egl;
+        if (egl_vers[i].api == EGL_OPENGL_ES_API) {
+            // OpenGL ES
+            const EGLint egl_attribs[] = {
+                EGL_CONTEXT_CLIENT_VERSION, egl_vers[i].major,
+                (egl_ver >= 15) ? EGL_CONTEXT_OPENGL_DEBUG : EGL_NONE, EGL_TRUE,
+                EGL_NONE
+            };
+
+            printf("Attempting creation of OpenGL ES v%d context\n", egl_vers[i].major);
+            egl = eglCreateContext(dpy, config, EGL_NO_CONTEXT, egl_attribs);
+        } else {
+            // Desktop OpenGL
+            const int egl_attribs[] = {
+                EGL_CONTEXT_MAJOR_VERSION, egl_vers[i].major,
+                EGL_CONTEXT_MINOR_VERSION, egl_vers[i].minor,
+                EGL_CONTEXT_OPENGL_PROFILE_MASK, egl_vers[i].profile,
+                (egl_ver >= 15) ? EGL_CONTEXT_OPENGL_DEBUG : EGL_NONE, EGL_TRUE,
+                EGL_NONE
+            };
+
+            printf("Attempting creation of Desktop OpenGL v%d.%d context\n",
+                   egl_vers[i].major, egl_vers[i].minor);
+            egl = eglCreateContext(dpy, config, EGL_NO_CONTEXT, egl_attribs);
+        }
+
+        if (!egl)
+            goto error;
+
+        const EGLint pbuffer_attribs[] = {
+            EGL_WIDTH, PBUFFER_WIDTH,
+            EGL_HEIGHT, PBUFFER_HEIGHT,
+            EGL_NONE
+        };
+
+        EGLSurface surf = eglCreatePbufferSurface(dpy, config, pbuffer_attribs);
+
+        if (!eglMakeCurrent(dpy, surf, surf, egl))
+            goto error;
+
+        pl_opengl gl = pl_opengl_create(log, pl_opengl_params(
+            .get_proc_addr = (pl_voidfunc_t (*)(const char *)) eglGetProcAddress,
+            .max_glsl_version = egl_vers[i].glsl_ver,
+            .debug = true,
+            .egl_display = dpy,
+            .egl_context = egl,
+#ifdef CI_ALLOW_SW
+            .allow_software = true,
+#endif
+        ));
+        if (!gl)
+            goto next;
+
+        // Skip repeat tests
+        pl_gpu gpu = gl->gpu;
+        if (memcmp(&last_glsl, &gpu->glsl, sizeof(last_glsl)) == 0 &&
+            memcmp(&last_limits, &gpu->limits, sizeof(last_limits)) == 0)
+        {
+            printf("Skipping tests due to duplicate capabilities/version\n");
+            goto next;
+        }
+
+#ifdef CI_MAXGL
+        if (last_glsl.version && last_glsl.gles == gpu->glsl.gles)
+            goto next;
+#endif
+
+        last_glsl = gpu->glsl;
+        last_limits = gpu->limits;
+
+        gpu_shader_tests(gpu);
+        gpu_interop_tests(gpu);
+        opengl_interop_tests(gpu);
+        opengl_swapchain_tests(gl, dpy, surf);
+
+        // Reduce log spam after first successful test
+        pl_log_level_update(log, PL_LOG_INFO);
+
+next:
+        pl_opengl_destroy(&gl);
+        eglDestroySurface(dpy, surf);
+        eglDestroyContext(dpy, egl);
+        continue;
+
+error: ;
+        EGLint error = eglGetError();
+        if (error != EGL_SUCCESS)
+            fprintf(stderr, "EGL error: %s\n", egl_err_str(error));
+    }
+
+    eglTerminate(dpy);
+    gladLoaderUnloadEGL();
+    pl_log_destroy(&log);
+
+    if (!last_glsl.version)
+        return SKIP;
+}
diff --git a/src/tests/options.c b/src/tests/options.c
new file mode 100644
index 0000000..f178668
--- /dev/null
+++ b/src/tests/options.c
@@ -0,0 +1,123 @@
+#include "tests.h"
+
+#include <libplacebo/options.h>
+
+static void count_cb(void *priv, pl_opt_data data)
+{
+    int *num = priv;
+    printf("Iterating over option: %s = %s\n", data->opt->key, data->text);
+    (*num)++;
+}
+
+static void set_cb(void *priv, pl_opt_data data)
+{
+    pl_options dst = priv;
+    REQUIRE(pl_options_set_str(dst, data->opt->key, data->text));
+}
+
+int main()
+{
+    pl_log log = pl_test_logger();
+    pl_options test = pl_options_alloc(log);
+
+    REQUIRE_STREQ(pl_options_save(test), "");
+    REQUIRE(pl_options_load(test, ""));
+    REQUIRE_STREQ(pl_options_save(test), "");
+
+    pl_options_reset(test, &pl_render_fast_params);
+    REQUIRE_STREQ(pl_options_save(test), "");
+    REQUIRE(pl_options_load(test, "preset=fast"));
+    REQUIRE_STREQ(pl_options_save(test), "");
+
+    const char *def_opts = "upscaler=lanczos,downscaler=hermite,frame_mixer=oversample,sigmoid=yes,peak_detect=yes,dither=yes";
+    pl_options_reset(test, &pl_render_default_params);
+    REQUIRE_STREQ(pl_options_save(test), def_opts);
+    struct pl_options_t def_pre = *test;
+    pl_options_reset(test, NULL);
+    REQUIRE_STREQ(pl_options_save(test), "");
+    REQUIRE(pl_options_load(test, def_opts));
+    REQUIRE_STREQ(pl_options_save(test), def_opts);
+    REQUIRE_MEMEQ(test, &def_pre, sizeof(*test));
+    pl_options_reset(test, NULL);
+    REQUIRE(pl_options_load(test, "preset=default"));
+    REQUIRE_STREQ(pl_options_save(test), def_opts);
+    REQUIRE_MEMEQ(test, &def_pre, sizeof(*test));
+
+    int num = 0;
+    pl_options_iterate(test, count_cb, &num);
+    REQUIRE_CMP(num, ==, 6, "d");
+
+    pl_opt_data data;
+    REQUIRE((data = pl_options_get(test, "tile_size")));
+    REQUIRE_STREQ(data->opt->key, "tile_size");
+    REQUIRE_CMP(*(int *) data->value, =, pl_render_default_params.tile_size, "d");
+    REQUIRE_STREQ(data->text, "32");
+
+    const char *hq_opts = "upscaler=ewa_lanczossharp,downscaler=hermite,frame_mixer=oversample,deband=yes,sigmoid=yes,peak_detect=yes,peak_percentile=99.99500274658203,contrast_recovery=0.30000001192092896,dither=yes";
+    // fallback can produce different precision
+    const char *hq_opts2 = "upscaler=ewa_lanczossharp,downscaler=hermite,frame_mixer=oversample,deband=yes,sigmoid=yes,peak_detect=yes,peak_percentile=99.99500274658203125,contrast_recovery=0.30000001192092896,dither=yes";
+
+    pl_options_reset(test, &pl_render_high_quality_params);
+    const char *opts = pl_options_save(test);
+    if (!strcmp(opts, hq_opts2))
+        hq_opts = hq_opts2;
+    REQUIRE_STREQ(opts, hq_opts);
+    struct pl_options_t hq_pre = *test;
+    pl_options_reset(test, NULL);
+    REQUIRE_STREQ(pl_options_save(test), "");
+    REQUIRE(pl_options_load(test, hq_opts));
+    REQUIRE_STREQ(pl_options_save(test), hq_opts);
+    REQUIRE_MEMEQ(test, &hq_pre, sizeof(*test));
+    REQUIRE(pl_options_load(test, "preset=high_quality"));
+    REQUIRE_STREQ(pl_options_save(test), hq_opts);
+    REQUIRE_MEMEQ(test, &hq_pre, sizeof(*test));
+
+    pl_options test2 = pl_options_alloc(log);
+    pl_options_iterate(test, set_cb, test2);
+    REQUIRE_STREQ(pl_options_save(test), pl_options_save(test2));
+    pl_options_free(&test2);
+
+    // Test custom scalers
+    pl_options_reset(test, pl_render_params(
+        .upscaler = &(struct pl_filter_config) {
+            .kernel = &pl_filter_function_jinc,
+            .window = &pl_filter_function_jinc,
+            .radius = 4.0,
+            .polar  = true,
+        },
+    ));
+    const char *jinc4_opts = "upscaler=custom,upscaler_kernel=jinc,upscaler_window=jinc,upscaler_radius=4,upscaler_polar=yes";
+    REQUIRE_STREQ(pl_options_save(test), jinc4_opts);
+    struct pl_options_t jinc4_pre = *test;
+    pl_options_reset(test, NULL);
+    REQUIRE(pl_options_load(test, "upscaler=custom,upscaler_preset=ewa_lanczos,upscaler_radius=4.0,upscaler_clamp=0.0"));
+    REQUIRE_STREQ(pl_options_save(test), jinc4_opts);
+    REQUIRE_MEMEQ(test, &jinc4_pre, sizeof(*test));
+
+    // Test params presets
+    pl_options_reset(test, NULL);
+    REQUIRE(pl_options_load(test, "cone=yes,cone_preset=deuteranomaly"));
+    REQUIRE_STREQ(pl_options_save(test), "cone=yes,cones=m,cone_strength=0.5");
+
+    // Test error paths
+    pl_options bad = pl_options_alloc(NULL);
+    REQUIRE(!pl_options_load(bad, "scale_preset=help"));
+    REQUIRE(!pl_options_load(bad, "dither_method=invalid"));
+    REQUIRE(!pl_options_load(bad, "lut_entries=-1"));
+    REQUIRE(!pl_options_load(bad, "deband_iterations=100"));
+    REQUIRE(!pl_options_load(bad, "tone_lut_size=abc"));
+    REQUIRE(!pl_options_load(bad, "show_clipping=hello"));
+    REQUIRE(!pl_options_load(bad, "brightness=2.0"));
+    REQUIRE(!pl_options_load(bad, "gamma=oops"));
+    REQUIRE(!pl_options_load(bad, "invalid"));
+    REQUIRE(!pl_options_load(bad, "="));
+    REQUIRE(!pl_options_load(bad, "preset==bar"));
+    REQUIRE(!pl_options_load(bad, "peak_percentile=E8203125"));
+    REQUIRE(!pl_options_get(bad, "invalid"));
+    REQUIRE_STREQ(pl_options_save(bad), "");
+    pl_options_free(&bad);
+
+    pl_options_free(&test);
+    pl_log_destroy(&log);
+    return 0;
+}
diff --git a/src/tests/string.c b/src/tests/string.c
new file mode 100644
index 0000000..52985c4
--- /dev/null
+++ b/src/tests/string.c
@@ -0,0 +1,147 @@
+#include "tests.h"
+
+static const pl_str null = {0};
+static const pl_str test = PL_STR0("test");
+static const pl_str empty = PL_STR0("");
+
+static inline bool is_null(pl_str str)
+{
+    return !str.len && !str.buf;
+}
+
+static inline bool is_empty(pl_str str)
+{
+    return !str.len;
+}
+
+int main()
+{
+    void *tmp = pl_tmp(NULL);
+
+    REQUIRE(is_null(pl_str0(NULL)));
+    REQUIRE(is_null(pl_strdup(tmp, null)));
+    char *empty0 = pl_strdup0(tmp, null);
+    REQUIRE(empty0 && !empty0[0]);
+    REQUIRE(pl_str_equals0(empty, empty0));
+
+    pl_str buf = {0};
+    pl_str_append(tmp, &buf, null);
+    REQUIRE(is_empty(buf));
+    pl_str_append_asprintf(tmp, &buf, "%.*s", PL_STR_FMT(test));
+    REQUIRE(pl_str_equals(buf, test));
+
+    pl_str_append_asprintf_c(tmp, &buf, "%d %f %f %f %lld %zu %.*sx %hx %hx %hx %hx",
+        1, 1.0f, 4294967295.56, 83224965647295.65, 0xFFll, (size_t) 0, PL_STR_FMT(empty),
+        (unsigned short) 0xCAFEu, (unsigned short) 0x1, (unsigned short) 0,
+        (unsigned short) 0xFFFFu);
+    const char *expected = "test1 1 4294967295.56 83224965647295.66 255 0 x cafe 1 0 ffff";
+    // fallback can produce different precision
+    const char *expected2 = "test1 1 4294967295.55999994277954102 83224965647295.65625 255 0 x cafe 1 0 ffff";
+    REQUIRE(pl_str_equals0(buf, expected) || pl_str_equals0(buf, expected2));
+
+    REQUIRE_CMP(pl_strchr(null, ' '), <, 0, "d");
+    REQUIRE_CMP((int) pl_strspn(null, " "), ==, 0, "d");
+    REQUIRE_CMP((int) pl_strcspn(null, " "), ==, 0, "d");
+    REQUIRE(is_null(pl_str_strip(null)));
+
+    REQUIRE_CMP(pl_strchr(test, 's'), ==, 2, "d");
+    REQUIRE_CMP((int) pl_strspn(test, "et"), ==, 2, "d");
+    REQUIRE_CMP((int) pl_strcspn(test, "xs"), ==, 2, "d");
+
+    REQUIRE(is_null(pl_str_take(null, 10)));
+    REQUIRE(is_empty(pl_str_take(test, 0)));
+    REQUIRE(is_null(pl_str_drop(null, 10)));
+    REQUIRE(is_null(pl_str_drop(test, test.len)));
+    REQUIRE(pl_str_equals(pl_str_drop(test, 0), test));
+
+    REQUIRE_CMP(pl_str_find(null, test), <, 0, "d");
+    REQUIRE_CMP(pl_str_find(null, null), ==, 0, "d");
+    REQUIRE_CMP(pl_str_find(test, null), ==, 0, "d");
+    REQUIRE_CMP(pl_str_find(test, test), ==, 0, "d");
+
+    pl_str rest;
+    REQUIRE(is_null(pl_str_split_char(null, ' ', &rest)) && is_null(rest));
+    REQUIRE(is_null(pl_str_split_str(null, test, &rest)) && is_null(rest));
+    REQUIRE(is_empty(pl_str_split_str(test, test, &rest)) && is_empty(rest));
+    REQUIRE(is_null(pl_str_getline(null, &rest)) && is_null(rest));
+
+    pl_str right, left = pl_str_split_char(pl_str0("left right"), ' ', &right);
+    REQUIRE(pl_str_equals0(left, "left"));
+    REQUIRE(pl_str_equals0(right, "right"));
+
+    left = pl_str_split_str0(pl_str0("leftTESTright"), "TEST", &right);
+    REQUIRE(pl_str_equals0(left, "left"));
+    REQUIRE(pl_str_equals0(right, "right"));
+
+    pl_str out;
+    REQUIRE(pl_str_decode_hex(tmp, null, &out) && is_empty(out));
+    REQUIRE(!pl_str_decode_hex(tmp, pl_str0("invalid"), &out));
+
+    REQUIRE(pl_str_equals(null, null));
+    REQUIRE(pl_str_equals(null, empty));
+    REQUIRE(pl_str_startswith(null, null));
+    REQUIRE(pl_str_startswith(test, null));
+    REQUIRE(pl_str_startswith(test, test));
+    REQUIRE(pl_str_endswith(null, null));
+    REQUIRE(pl_str_endswith(test, null));
+    REQUIRE(pl_str_endswith(test, test));
+
+    double d;
+    float f;
+    int i;
+    unsigned u;
+    int64_t i64;
+    uint64_t u64;
+
+    REQUIRE(pl_str_parse_double(pl_str0("4294967295.56"), &d));      REQUIRE_FEQ(d, 4294967295.56, 1e-20);
+    REQUIRE(pl_str_parse_double(pl_str0("-4294967295.56"), &d));     REQUIRE_FEQ(d, -4294967295.56, 1e-20);
+    REQUIRE(pl_str_parse_double(pl_str0("83224965647295.65"), &d));  REQUIRE_FEQ(d, 83224965647295.65, 1e-20);
+    REQUIRE(pl_str_parse_double(pl_str0("-83224965647295.65"), &d)); REQUIRE_FEQ(d, -83224965647295.65, 1e-20);
+    REQUIRE(pl_str_parse_float(pl_str0("4294967295.56"), &f));       REQUIRE_FEQ(f, 4294967295.56f, 1e-8);
+    REQUIRE(pl_str_parse_float(pl_str0("-4294967295.56"), &f));      REQUIRE_FEQ(f, -4294967295.56f, 1e-8);
+    REQUIRE(pl_str_parse_float(pl_str0("83224965647295.65"), &f));   REQUIRE_FEQ(f, 83224965647295.65f, 1e-8);
+    REQUIRE(pl_str_parse_float(pl_str0("-83224965647295.65"), &f));  REQUIRE_FEQ(f, -83224965647295.65f, 1e-8);
+    REQUIRE(pl_str_parse_float(pl_str0("1.3984"), &f));     REQUIRE_FEQ(f, 1.3984f, 1e-8);
+    REQUIRE(pl_str_parse_float(pl_str0("-8.9100083"), &f)); REQUIRE_FEQ(f, -8.9100083f, 1e-8);
+    REQUIRE(pl_str_parse_float(pl_str0("-0"), &f));         REQUIRE_FEQ(f, 0.0f, 1e-8);
+    REQUIRE(pl_str_parse_float(pl_str0("-3.14e20"), &f));   REQUIRE_FEQ(f, -3.14e20f, 1e-8);
+    REQUIRE(pl_str_parse_float(pl_str0("0.5e-5"), &f));     REQUIRE_FEQ(f, 0.5e-5f, 1e-8);
+    REQUIRE(pl_str_parse_float(pl_str0("0.5e+5"), &f));     REQUIRE_FEQ(f, 0.5e+5f, 1e-8);
+    REQUIRE(pl_str_parse_int(pl_str0("64239"), &i));        REQUIRE_CMP(i, ==, 64239, "d");
+    REQUIRE(pl_str_parse_int(pl_str0("-102"), &i));         REQUIRE_CMP(i, ==, -102, "d");
+    REQUIRE(pl_str_parse_int(pl_str0("1"), &i));            REQUIRE_CMP(i, ==, 1, "d");
+    REQUIRE(pl_str_parse_int(pl_str0("-0"), &i));           REQUIRE_CMP(i, ==, 0, "d");
+    REQUIRE(pl_str_parse_uint(pl_str0("64239"), &u));       REQUIRE_CMP(u, ==, 64239, "u");
+    REQUIRE(pl_str_parse_uint(pl_str0("1"), &u));           REQUIRE_CMP(u, ==, 1, "u");
+    REQUIRE(pl_str_parse_int64(pl_str0("9223372036854775799"), &i64));
+    REQUIRE_CMP(i64, ==, 9223372036854775799LL, PRIi64);
+    REQUIRE(pl_str_parse_int64(pl_str0("-9223372036854775799"), &i64));
+    REQUIRE_CMP(i64, ==, -9223372036854775799LL, PRIi64);
+    REQUIRE(pl_str_parse_uint64(pl_str0("18446744073709551609"), &u64));
+    REQUIRE_CMP(u64, ==, 18446744073709551609LLU, PRIu64);
+    REQUIRE(!pl_str_parse_float(null, &f));
+    REQUIRE(!pl_str_parse_float(test, &f));
+    REQUIRE(!pl_str_parse_float(empty, &f));
+    REQUIRE(!pl_str_parse_int(null, &i));
+    REQUIRE(!pl_str_parse_int(test, &i));
+    REQUIRE(!pl_str_parse_int(empty, &i));
+    REQUIRE(!pl_str_parse_uint(null, &u));
+    REQUIRE(!pl_str_parse_uint(test, &u));
+    REQUIRE(!pl_str_parse_uint(empty, &u));
+
+    pl_str_builder builder = pl_str_builder_alloc(tmp);
+    pl_str_builder_const_str(builder, "hello");
+    pl_str_builder_str(builder, pl_str0("world"));
+    pl_str res = pl_str_builder_exec(builder);
+    REQUIRE(pl_str_equals0(res, "helloworld"));
+
+    pl_str_builder_reset(builder);
+    pl_str_builder_printf_c(builder, "foo %d bar %u bat %s baz %lld",
+            123, 56u, "quack", 0xDEADBEEFll);
+    pl_str_builder_printf_c(builder, " %.*s", PL_STR_FMT(pl_str0("test123")));
+    res = pl_str_builder_exec(builder);
+    REQUIRE(pl_str_equals0(res, "foo 123 bar 56 bat quack baz 3735928559 test123"));
+
+    pl_free(tmp);
+    return 0;
+}
diff --git a/src/tests/tests.h b/src/tests/tests.h
new file mode 100644
index 0000000..a33a0de
--- /dev/null
+++ b/src/tests/tests.h
@@ -0,0 +1,319 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#pragma once
+
+#include "common.h"
+
+#include <libplacebo/log.h>
+#include <libplacebo/colorspace.h>
+#include <libplacebo/shaders/film_grain.h>
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <math.h>
+#include <time.h>
+
+#ifdef PL_HAVE_WIN32
+#include <io.h>
+#define isatty _isatty
+#define fileno _fileno
+#else
+#include <unistd.h>
+#endif
+
+static void pl_log_timestamp(void *stream, enum pl_log_level level, const char *msg)
+{
+    static char letter[] = {
+        [PL_LOG_FATAL] = 'f',
+        [PL_LOG_ERR]   = 'e',
+        [PL_LOG_WARN]  = 'w',
+        [PL_LOG_INFO]  = 'i',
+        [PL_LOG_DEBUG] = 'd',
+        [PL_LOG_TRACE] = 't',
+    };
+
+    // Log time relative to the first message
+    static pl_clock_t base = 0;
+    if (!base)
+        base = pl_clock_now();
+
+    double secs = pl_clock_diff(pl_clock_now(), base);
+    printf("[%2.3f][%c] %s\n", secs, letter[level], msg);
+
+    if (level <= PL_LOG_WARN) {
+        // duplicate warnings/errors to stderr
+        fprintf(stderr, "[%2.3f][%c] %s\n", secs, letter[level], msg);
+        fflush(stderr);
+    }
+}
+
+static inline pl_log pl_test_logger(void)
+{
+    setbuf(stdout, NULL);
+    setbuf(stderr, NULL);
+
+    return pl_log_create(PL_API_VER, pl_log_params(
+        .log_cb    = isatty(fileno(stdout)) ? pl_log_color : pl_log_timestamp,
+        .log_level = PL_LOG_DEBUG,
+    ));
+}
+
+#define RANDOM (rand() / (float) RAND_MAX)
+#define RANDOM_U8 ((uint8_t) (256.0 * rand() / (RAND_MAX + 1.0)))
+#define SKIP 77
+
+// Helpers for performing various checks
+#define REQUIRE(cond) do                                                        \
+{                                                                               \
+    if (!(cond)) {                                                              \
+        fprintf(stderr, "=== FAILED: '"#cond"' at "__FILE__":%d\n\n", __LINE__);\
+        exit(1);                                                                \
+    }                                                                           \
+} while (0)
+
+#define REQUIRE_CMP(a, op, b, fmt) do                                           \
+{                                                                               \
+    __typeof__(a) _va = (a), _vb = (b);                                         \
+                                                                                \
+    if (!(_va op _vb)) {                                                        \
+        fprintf(stderr, "=== FAILED: '"#a" "#op" "#b"' at "__FILE__":%d\n"      \
+                        " %-31s = %"fmt"\n"                                     \
+                        " %-31s = %"fmt"\n\n",                                  \
+                __LINE__, #a, _va, #b, _vb);                                    \
+        exit(1);                                                                \
+    }                                                                           \
+} while (0)
+
+#define REQUIRE_FEQ(a, b, epsilon) do                                           \
+{                                                                               \
+    float _va = (a);                                                            \
+    float _vb = (b);                                                            \
+    float _delta = (epsilon) * fmax(1.0, fabs(_va));                            \
+                                                                                \
+    if (fabs(_va - _vb) > _delta) {                                             \
+        fprintf(stderr, "=== FAILED: '"#a" ≈ "#b"' at "__FILE__":%d\n"          \
+                        " %-31s = %f\n"                                         \
+                        " %-31s = %f\n"                                         \
+                        " %-31s = %f\n\n",                                      \
+                __LINE__, #a, _va, #b, _vb,                                     \
+                "epsilon "#epsilon" -> max delta", _delta);                     \
+        exit(1);                                                                \
+    }                                                                           \
+} while (0)
+
+#define REQUIRE_STREQ(a, b) do                                                  \
+{                                                                               \
+    const char *_a = (a);                                                       \
+    const char *_b = (b);                                                       \
+    if (strcmp(_a, _b) != 0) {                                                  \
+        fprintf(stderr, "=== FAILED: !strcmp("#a", "#b") at "__FILE__":%d\n"    \
+                        " %-31s = %s\n"                                         \
+                        " %-31s = %s\n\n",                                      \
+                __LINE__, #a, _a, #b, _b);                                      \
+        exit(1);                                                                \
+    }                                                                           \
+} while (0)
+
+static inline void log_array(const uint8_t *a, const uint8_t *ref, size_t off, size_t size)
+{
+    for (size_t n = 0; n < size; n++) {
+        const char *prefix = "", *suffix = "";
+        char terminator = ' ';
+        if (a[n + off] != ref[n + off]) {
+            prefix = "\033[31;1m";
+            suffix = "\033[0m";
+        }
+        if (n+1 == size || n % 16 == 15)
+            terminator = '\n';
+        fprintf(stderr, "%s%02"PRIx8"%s%c", prefix, a[n + off], suffix, terminator);
+    }
+}
+
+static inline void require_memeq(const void *aptr, const void *bptr, size_t size,
+                                 const char *astr, const char *bstr,
+                                 const char *sizestr, const char *file, int line)
+{
+    const uint8_t *a = aptr, *b = bptr;
+    for (size_t i = 0; i < size; i++) {
+        if (a[i] == b[i])
+            continue;
+
+        fprintf(stderr, "=== FAILED: memcmp(%s, %s, %s) == 0 at %s:%d\n"
+                        "at position %zu: 0x%02"PRIx8" != 0x%02"PRIx8"\n\n",
+                astr, bstr, sizestr, file, line, i, a[i], b[i]);
+
+        size_t start = i >= 256 ? i - 256 : 0;
+        size_t end   = PL_MIN(size, i + 256);
+        fprintf(stderr, "%zu bytes of '%s' at offset %zu:\n", end - start, astr, start);
+        log_array(a, b, start, end - start);
+        fprintf(stderr, "\n%zu bytes of '%s' at offset %zu:\n", end - start, bstr, start);
+        log_array(b, a, start, end - start);
+        exit(1);
+    }
+}
+
+#define REQUIRE_MEMEQ(a, b, size) require_memeq(a, b, size, #a, #b, #size, __FILE__, __LINE__)
+
+#define REQUIRE_HANDLE(shmem, type)                                             \
+    switch (type) {                                                             \
+    case PL_HANDLE_FD:                                                          \
+    case PL_HANDLE_DMA_BUF:                                                     \
+        REQUIRE(shmem.handle.fd > -1);                                          \
+        break;                                                                  \
+    case PL_HANDLE_WIN32:                                                       \
+    case PL_HANDLE_WIN32_KMT:                                                   \
+        /* INVALID_HANDLE_VALUE = (-1) */                                       \
+        REQUIRE(shmem.handle.handle != (void *)(intptr_t) (-1));                \
+        /* fallthrough */                                                       \
+    case PL_HANDLE_MTL_TEX:                                                     \
+    case PL_HANDLE_IOSURFACE:                                                   \
+        REQUIRE(shmem.handle.handle);                                           \
+        break;                                                                  \
+    case PL_HANDLE_HOST_PTR:                                                    \
+        REQUIRE(shmem.handle.ptr);                                              \
+        break;                                                                  \
+    }
+
+static const struct pl_av1_grain_data av1_grain_data = {
+    .num_points_y = 6,
+    .points_y = {{0, 4}, {27, 33}, {54, 55}, {67, 61}, {108, 71}, {255, 72}},
+    .chroma_scaling_from_luma = false,
+    .num_points_uv = {2, 2},
+    .points_uv = {{{0, 64}, {255, 64}}, {{0, 64}, {255, 64}}},
+    .scaling_shift = 11,
+    .ar_coeff_lag = 3,
+    .ar_coeffs_y = {4,   1, 3,   0,  1, -3,  8, -3,  7, -23, 1, -25,
+                    0, -10, 6, -17, -4, 53, 36,  5, -5, -17, 8,  66},
+    .ar_coeffs_uv = {
+        {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127},
+        {0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+         0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 127},
+    },
+    .ar_coeff_shift = 7,
+    .grain_scale_shift = 0,
+    .uv_mult = {0, 0},
+    .uv_mult_luma = {64, 64},
+    .uv_offset = {0, 0},
+};
+
+static const uint8_t h274_lower_bound = 10;
+static const uint8_t h274_upper_bound = 250;
+static const int16_t h274_values[6] = {16, 12, 14};
+
+static const struct pl_h274_grain_data h274_grain_data = {
+    .model_id = 0,
+    .blending_mode_id = 0,
+    .log2_scale_factor = 2,
+    .component_model_present = {true},
+    .num_intensity_intervals = {1},
+    .num_model_values = {3},
+    .intensity_interval_lower_bound = {&h274_lower_bound},
+    .intensity_interval_upper_bound = {&h274_upper_bound},
+    .comp_model_value = {&h274_values},
+};
+
+static const struct pl_dovi_metadata dovi_meta = {
+    .nonlinear = {{{1, 0, 0}, {0, 1, 0}, {0, 0, 1}}},
+    .linear    = {{{1, 0, 0}, {0, 1, 0}, {0, 0, 1}}},
+    .comp = {
+        {
+            .num_pivots = 9,
+            .pivots = {0.0615835786, 0.129032254, 0.353861183,
+                       0.604105592, 0.854349971, 0.890518069,
+                       0.906158328, 0.913978517, 0.92082113},
+            .method = {0, 0, 0, 0, 0, 0, 0, 0},
+            .poly_coeffs = {
+                {-0.0488376617, 1.99335372, -2.41716385},
+                {-0.0141925812, 1.61829138, -1.53397191},
+                { 0.157061458, 0.63640213, -0.11302495},
+                {0.25272119, 0.246226311, 0.27281332},
+                {0.951621532, -1.35507894, 1.18898678},
+                {6.41251612, -13.6188488, 8.07336903},
+                {13.467535, -29.1869125, 16.6612244},
+                {28.2321472, -61.8516273, 34.7264938}
+            },
+        }, {
+            .num_pivots = 2,
+            .pivots = {0.0, 1.0},
+            .method = {1},
+            .mmr_order = {3},
+            .mmr_constant = {-0.500733018},
+            .mmr_coeffs = {{
+                {1.08411026, 3.80807829, 0.0881733894, -3.23097038, -0.409078479, -1.31310081, 2.71297002},
+                {-0.241833091, -3.57880807, -0.108109117, 3.13198471, 0.869203091, 1.96561158, -9.30871677},
+                {-0.177356839, 1.48970401, 0.0908923149, -0.510447979, -0.687603354, -0.934977889, 12.3544884},
+            }},
+        }, {
+            .num_pivots = 2,
+            .pivots = {0.0, 1.0},
+            .method = {1},
+            .mmr_order = {3},
+            .mmr_constant = {-1.23833287},
+            .mmr_coeffs = {{
+                {3.52909589, 0.383154511, 5.50820637, -1.02094889, -6.36386824, 0.194121242, 0.64683497},
+                {-2.57899785, -0.626081586, -6.05729723, 2.29143763, 9.14653015, -0.0507702827, -4.17724133},
+                {0.705404401, 0.341412306, 2.98387456, -1.71712542, -4.91501331, 0.1465137, 6.38665438},
+            }},
+        },
+    },
+};
+
+static const uint8_t sRGB_v2_nano_icc[] = {
+  0x00, 0x00, 0x01, 0x9a, 0x6c, 0x63, 0x6d, 0x73, 0x02, 0x10, 0x00, 0x00,
+  0x6d, 0x6e, 0x74, 0x72, 0x52, 0x47, 0x42, 0x20, 0x58, 0x59, 0x5a, 0x20,
+  0x07, 0xe2, 0x00, 0x03, 0x00, 0x14, 0x00, 0x09, 0x00, 0x0e, 0x00, 0x1d,
+  0x61, 0x63, 0x73, 0x70, 0x4d, 0x53, 0x46, 0x54, 0x00, 0x00, 0x00, 0x00,
+  0x73, 0x61, 0x77, 0x73, 0x63, 0x74, 0x72, 0x6c, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xf6, 0xd6,
+  0x00, 0x01, 0x00, 0x00, 0x00, 0x00, 0xd3, 0x2d, 0x68, 0x61, 0x6e, 0x64,
+  0xeb, 0x77, 0x1f, 0x3c, 0xaa, 0x53, 0x51, 0x02, 0xe9, 0x3e, 0x28, 0x6c,
+  0x91, 0x46, 0xae, 0x57, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x09,
+  0x64, 0x65, 0x73, 0x63, 0x00, 0x00, 0x00, 0xf0, 0x00, 0x00, 0x00, 0x5f,
+  0x77, 0x74, 0x70, 0x74, 0x00, 0x00, 0x01, 0x0c, 0x00, 0x00, 0x00, 0x14,
+  0x72, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x01, 0x20, 0x00, 0x00, 0x00, 0x14,
+  0x67, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x01, 0x34, 0x00, 0x00, 0x00, 0x14,
+  0x62, 0x58, 0x59, 0x5a, 0x00, 0x00, 0x01, 0x48, 0x00, 0x00, 0x00, 0x14,
+  0x72, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0x5c, 0x00, 0x00, 0x00, 0x34,
+  0x67, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0x5c, 0x00, 0x00, 0x00, 0x34,
+  0x62, 0x54, 0x52, 0x43, 0x00, 0x00, 0x01, 0x5c, 0x00, 0x00, 0x00, 0x34,
+  0x63, 0x70, 0x72, 0x74, 0x00, 0x00, 0x01, 0x90, 0x00, 0x00, 0x00, 0x0a,
+  0x64, 0x65, 0x73, 0x63, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x05,
+  0x6e, 0x52, 0x47, 0x42, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x00, 0x00, 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0xf3, 0x54, 0x00, 0x01, 0x00, 0x00, 0x00, 0x01, 0x16, 0xc9,
+  0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x6f, 0xa0,
+  0x00, 0x00, 0x38, 0xf2, 0x00, 0x00, 0x03, 0x8f, 0x58, 0x59, 0x5a, 0x20,
+  0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x62, 0x96, 0x00, 0x00, 0xb7, 0x89,
+  0x00, 0x00, 0x18, 0xda, 0x58, 0x59, 0x5a, 0x20, 0x00, 0x00, 0x00, 0x00,
+  0x00, 0x00, 0x24, 0xa0, 0x00, 0x00, 0x0f, 0x85, 0x00, 0x00, 0xb6, 0xc4,
+  0x63, 0x75, 0x72, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x14,
+  0x00, 0x00, 0x01, 0x07, 0x02, 0xb5, 0x05, 0x6b, 0x09, 0x36, 0x0e, 0x50,
+  0x14, 0xb1, 0x1c, 0x80, 0x25, 0xc8, 0x30, 0xa1, 0x3d, 0x19, 0x4b, 0x40,
+  0x5b, 0x27, 0x6c, 0xdb, 0x80, 0x6b, 0x95, 0xe3, 0xad, 0x50, 0xc6, 0xc2,
+  0xe2, 0x31, 0xff, 0xff, 0x74, 0x65, 0x78, 0x74, 0x00, 0x00, 0x00, 0x00,
+  0x30, 0x00
+};
+
+#define TEST_PROFILE(arr) ((struct pl_icc_profile) {    \
+    .data = (arr),                                      \
+    .len = PL_ARRAY_SIZE(arr),                          \
+    .signature = (uintptr_t) (arr),                     \
+})
diff --git a/src/tests/tone_mapping.c b/src/tests/tone_mapping.c
new file mode 100644
index 0000000..0a48945
--- /dev/null
+++ b/src/tests/tone_mapping.c
@@ -0,0 +1,181 @@
+#include "tests.h"
+#include "log.h"
+
+#include <libplacebo/gamut_mapping.h>
+#include <libplacebo/tone_mapping.h>
+
+//#define PRINT_LUTS
+
+int main()
+{
+    pl_log log = pl_test_logger();
+
+    // PQ unit tests
+    REQUIRE_FEQ(pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NITS, 0.0), 0.0,     1e-2);
+    REQUIRE_FEQ(pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NITS, 1.0), 10000.0, 1e-2);
+    REQUIRE_FEQ(pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NITS, 0.58), 203.0,  1e-2);
+
+    // Test round-trip
+    for (float x = 0.0f; x < 1.0f; x += 0.01f) {
+        REQUIRE_FEQ(x, pl_hdr_rescale(PL_HDR_NORM, PL_HDR_PQ,
+                       pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NORM, x)),
+                    1e-5);
+    }
+
+    static float lut[128];
+    struct pl_tone_map_params params = {
+        .constants      = { PL_TONE_MAP_CONSTANTS },
+        .input_scaling  = PL_HDR_PQ,
+        .output_scaling = PL_HDR_PQ,
+        .lut_size       = PL_ARRAY_SIZE(lut),
+    };
+
+    // Test regular tone-mapping
+    params.input_min = pl_hdr_rescale(PL_HDR_NITS, params.input_scaling, 0.005);
+    params.input_max = pl_hdr_rescale(PL_HDR_NITS, params.input_scaling, 1000.0);
+    params.output_min = pl_hdr_rescale(PL_HDR_NORM, params.output_scaling, 0.001);
+    params.output_max = pl_hdr_rescale(PL_HDR_NORM, params.output_scaling, 1.0);
+
+    struct pl_tone_map_params params_inv = params;
+    PL_SWAP(params_inv.input_min, params_inv.output_min);
+    PL_SWAP(params_inv.input_max, params_inv.output_max);
+
+    int tested_pure_bpc = 0;
+
+    // Generate example tone mapping curves, forward and inverse
+    for (int i = 0; i < pl_num_tone_map_functions; i++) {
+        const struct pl_tone_map_function *fun = pl_tone_map_functions[i];
+        printf("Testing tone-mapping function %s\n", fun->name);
+        params.function = params_inv.function = fun;
+        pl_clock_t start = pl_clock_now();
+        pl_tone_map_generate(lut, &params);
+        pl_log_cpu_time(log, start, pl_clock_now(), "generating LUT");
+        for (int j = 0; j < PL_ARRAY_SIZE(lut); j++) {
+            REQUIRE(isfinite(lut[j]) && !isnan(lut[j]));
+            if (j > 0)
+                REQUIRE_CMP(lut[j], >=, lut[j - 1], "f");
+#ifdef PRINT_LUTS
+            printf("%f, %f\n", j / (PL_ARRAY_SIZE(lut) - 1.0f), lut[j]);
+#endif
+        }
+
+        if (fun->map_inverse || !tested_pure_bpc++) {
+            start = pl_clock_now();
+            pl_tone_map_generate(lut, &params_inv);
+            pl_log_cpu_time(log, start, pl_clock_now(), "generating inverse LUT");
+            for (int j = 0; j < PL_ARRAY_SIZE(lut); j++) {
+                REQUIRE(isfinite(lut[j]) && !isnan(lut[j]));
+                if (j > 0)
+                    REQUIRE_CMP(lut[j], >=, lut[j - 1], "f");
+#ifdef PRINT_LUTS
+                printf("%f, %f\n", j / (PL_ARRAY_SIZE(lut) - 1.0f), lut[j]);
+#endif
+            }
+        }
+    }
+
+    // Test that `spline` is a no-op for 1:1 tone mapping
+    params.output_min = params.input_min;
+    params.output_max = params.input_max;
+    params.function = &pl_tone_map_spline;
+    pl_tone_map_generate(lut, &params);
+    for (int j = 0; j < PL_ARRAY_SIZE(lut); j++) {
+        float x = j / (PL_ARRAY_SIZE(lut) - 1.0f);
+        x = PL_MIX(params.input_min, params.input_max, x);
+        REQUIRE_FEQ(x, lut[j], 1e-5);
+    }
+
+    // Test some gamut mapping methods
+    for (int i = 0; i < pl_num_gamut_map_functions; i++) {
+        static const float min_rgb = 0.1f, max_rgb = PL_COLOR_SDR_WHITE;
+        struct pl_gamut_map_params gamut = {
+            .function     = pl_gamut_map_functions[i],
+            .input_gamut  = *pl_raw_primaries_get(PL_COLOR_PRIM_BT_2020),
+            .output_gamut = *pl_raw_primaries_get(PL_COLOR_PRIM_BT_709),
+            .min_luma     = pl_hdr_rescale(PL_HDR_NITS, PL_HDR_PQ, min_rgb),
+            .max_luma     = pl_hdr_rescale(PL_HDR_NITS, PL_HDR_PQ, max_rgb),
+        };
+
+        printf("Testing gamut-mapping function %s\n", gamut.function->name);
+
+        // Require that black maps to black and white maps to white
+        float black[3] = { gamut.min_luma, 0.0f, 0.0f };
+        float white[3] = { gamut.max_luma, 0.0f, 0.0f };
+        pl_gamut_map_sample(black, &gamut);
+        pl_gamut_map_sample(white, &gamut);
+        REQUIRE_FEQ(black[0], gamut.min_luma, 1e-4);
+        REQUIRE_FEQ(black[1], 0.0f, 1e-4);
+        REQUIRE_FEQ(black[2], 0.0f, 1e-4);
+        if (gamut.function != &pl_gamut_map_darken)
+            REQUIRE_FEQ(white[0], gamut.max_luma, 1e-4);
+        REQUIRE_FEQ(white[1], 0.0f, 1e-4);
+        REQUIRE_FEQ(white[2], 0.0f, 1e-4);
+    }
+
+    enum { LUT3D_SIZE = 65 }; // for benchmarking
+    struct pl_gamut_map_params perceptual = {
+        .function     = &pl_gamut_map_perceptual,
+        .input_gamut  = *pl_raw_primaries_get(PL_COLOR_PRIM_BT_2020),
+        .output_gamut = *pl_raw_primaries_get(PL_COLOR_PRIM_BT_709),
+        .max_luma     = pl_hdr_rescale(PL_HDR_NORM, PL_HDR_PQ, 1.0f),
+        .lut_size_I   = LUT3D_SIZE,
+        .lut_size_C   = LUT3D_SIZE,
+        .lut_size_h   = LUT3D_SIZE,
+        .lut_stride   = 3,
+
+        // Set strength to maximum, because otherwise the saturation mapping
+        // code will not fully apply, invalidating the following test
+        .constants.perceptual_strength = 1.0f,
+    };
+
+    // Test that primaries round-trip for perceptual gamut mapping
+    const pl_matrix3x3 rgb2lms_src = pl_ipt_rgb2lms(&perceptual.input_gamut);
+    const pl_matrix3x3 rgb2lms_dst = pl_ipt_rgb2lms(&perceptual.output_gamut);
+    const pl_matrix3x3 lms2rgb_dst = pl_ipt_lms2rgb(&perceptual.output_gamut);
+    static const float refpoints[][3] = {
+        {1, 0, 0}, {0, 1, 0}, {0, 0, 1},
+        {0, 1, 1}, {1, 0, 1}, {1, 1, 0},
+    };
+
+    for (int i = 0; i < PL_ARRAY_SIZE(refpoints); i++) {
+        float c[3]   = { refpoints[i][0], refpoints[i][1], refpoints[i][2] };
+        float ref[3] = { refpoints[i][0], refpoints[i][1], refpoints[i][2] };
+        printf("Testing primary: RGB {%.0f %.0f %.0f}\n", c[0], c[1], c[2]);
+        pl_matrix3x3_apply(&rgb2lms_src, c);
+        c[0] = pl_hdr_rescale(PL_HDR_NORM, PL_HDR_PQ, c[0]);
+        c[1] = pl_hdr_rescale(PL_HDR_NORM, PL_HDR_PQ, c[1]);
+        c[2] = pl_hdr_rescale(PL_HDR_NORM, PL_HDR_PQ, c[2]);
+        pl_matrix3x3_apply(&pl_ipt_lms2ipt, c);
+        printf("Before:    ICh {%f %f %f}\n",
+               c[0], sqrtf(c[1]*c[1] + c[2]*c[2]), atan2f(c[2], c[1]));
+        pl_gamut_map_sample(c, &perceptual);
+        float rgb[3] = { c[0], c[1], c[2] };
+        pl_matrix3x3_apply(&pl_ipt_ipt2lms, rgb);
+        rgb[0] = pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NORM, rgb[0]);
+        rgb[1] = pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NORM, rgb[1]);
+        rgb[2] = pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NORM, rgb[2]);
+        pl_matrix3x3_apply(&lms2rgb_dst, rgb);
+        const float hue = atan2f(c[2], c[1]);
+        printf("After:     ICh {%f %f %f} = RGB {%f %f %f}\n",
+               c[0], sqrtf(c[1]*c[1] + c[2]*c[2]), hue, rgb[0], rgb[1], rgb[2]);
+        pl_matrix3x3_apply(&rgb2lms_dst, ref);
+        ref[0] = pl_hdr_rescale(PL_HDR_NORM, PL_HDR_PQ, ref[0]);
+        ref[1] = pl_hdr_rescale(PL_HDR_NORM, PL_HDR_PQ, ref[1]);
+        ref[2] = pl_hdr_rescale(PL_HDR_NORM, PL_HDR_PQ, ref[2]);
+        pl_matrix3x3_apply(&pl_ipt_lms2ipt, ref);
+        const float hue_ref = atan2f(ref[2], ref[1]);
+        printf("Should be: ICh {%f %f %f}\n",
+               ref[0], sqrtf(ref[1]*ref[1] + ref[2]*ref[2]), hue_ref);
+        REQUIRE_FEQ(hue, hue_ref, 3.0e-3);
+    }
+
+    float *tmp = malloc(sizeof(float[LUT3D_SIZE][LUT3D_SIZE][LUT3D_SIZE][3]));
+    if (tmp) {
+        pl_clock_t start = pl_clock_now();
+        pl_gamut_map_generate(tmp, &perceptual);
+        pl_log_cpu_time(log, start, pl_clock_now(), "generating 3DLUT");
+        free(tmp);
+    }
+
+    pl_log_destroy(&log);
+}
diff --git a/src/tests/utils.c b/src/tests/utils.c
new file mode 100644
index 0000000..73a9265
--- /dev/null
+++ b/src/tests/utils.c
@@ -0,0 +1,165 @@
+#include "tests.h"
+#include "gpu.h"
+
+#include <libplacebo/utils/upload.h>
+
+int main()
+{
+    struct pl_bit_encoding bits = {0};
+    struct pl_plane_data data = {0};
+
+    static const struct pl_bit_encoding bits0 = {0};
+    static const struct pl_bit_encoding bits8 = {
+        .sample_depth = 8,
+        .color_depth = 8,
+    };
+
+    static const struct pl_bit_encoding bits16 = {
+        .sample_depth = 16,
+        .color_depth = 16,
+    };
+
+    static const struct pl_bit_encoding bits10_16 = {
+        .sample_depth = 16,
+        .color_depth = 10,
+    };
+
+    static const struct pl_bit_encoding bits10_16_6 = {
+        .sample_depth = 16,
+        .color_depth = 10,
+        .bit_shift = 6,
+    };
+
+#define TEST_ALIGN(ref, ref_align, ref_bits, ...)                       \
+    do {                                                                \
+        pl_plane_data_from_mask(&data, (uint64_t[4]){ __VA_ARGS__ });   \
+        REQUIRE_MEMEQ(&data, &ref, sizeof(ref));                        \
+        pl_plane_data_align(&data, &bits);                              \
+        REQUIRE_MEMEQ(&data, &ref_align, sizeof(ref_align));            \
+        REQUIRE_MEMEQ(&bits, &ref_bits, sizeof(bits));                  \
+    } while (0)
+
+#define TEST(ref, bits, ...) TEST_ALIGN(ref, ref, bits, __VA_ARGS__)
+
+    static const struct pl_plane_data rgb8 = {
+        .component_size = {8, 8, 8},
+        .component_map  = {0, 1, 2},
+    };
+
+    TEST(rgb8, bits8, 0xFF, 0xFF00, 0xFF0000);
+
+    static const struct pl_plane_data bgra8 = {
+        .component_size = {8, 8, 8, 8},
+        .component_map  = {2, 1, 0, 3},
+    };
+
+    TEST(bgra8, bits8, 0xFF0000, 0xFF00, 0xFF, 0xFF000000);
+
+    static const struct pl_plane_data gr16 = {
+        .component_size = {16, 16},
+        .component_map  = {1, 0},
+    };
+
+    TEST(gr16, bits16, 0xFFFF0000, 0xFFFF);
+
+    static const struct pl_plane_data r10x6g10 = {
+        .component_size = {10, 10},
+        .component_map  = {1, 0}, // LSB -> MSB ordering
+        .component_pad  = {0, 6},
+    };
+
+    TEST_ALIGN(r10x6g10, gr16, bits10_16, 0x03FF0000, 0x03FF);
+
+    static const struct pl_plane_data rgb565 = {
+        .component_size = {5, 6, 5},
+        .component_map  = {2, 1, 0}, // LSB -> MSB ordering
+    };
+
+    TEST(rgb565, bits0, 0xF800, 0x07E0, 0x001F);
+
+    static const struct pl_plane_data rgba16 = {
+        .component_size = {16, 16, 16, 16},
+        .component_map  = {0, 1, 2, 3},
+    };
+
+    TEST(rgba16, bits16, 0xFFFFllu, 0xFFFF0000llu, 0xFFFF00000000llu, 0xFFFF000000000000llu);
+
+    static const struct pl_plane_data p010 = {
+        .component_size = {10, 10, 10},
+        .component_map  = {0, 1, 2},
+        .component_pad  = {6, 6, 6},
+    };
+
+    static const struct pl_plane_data rgb16 = {
+        .component_size = {16, 16, 16},
+        .component_map  = {0, 1, 2},
+    };
+
+    TEST_ALIGN(p010, rgb16, bits10_16_6, 0xFFC0llu, 0xFFC00000llu, 0xFFC000000000llu);
+
+    // Test GLSL structure packing
+    struct pl_var vec1 = pl_var_float(""),
+                  vec2 = pl_var_vec2(""),
+                  vec3 = pl_var_vec3(""),
+                  mat2 = pl_var_mat2(""),
+                  mat3 = pl_var_mat3("");
+
+    struct pl_var_layout layout;
+    layout = pl_std140_layout(0, &vec2);
+    REQUIRE_CMP(layout.offset, ==, 0 * sizeof(float), "zu");
+    REQUIRE_CMP(layout.stride, ==, 2 * sizeof(float), "zu");
+    REQUIRE_CMP(layout.size, ==, 2 * sizeof(float), "zu");
+
+    layout = pl_std140_layout(3 * sizeof(float), &vec3);
+    REQUIRE_CMP(layout.offset, ==, 4 * sizeof(float), "zu");
+    REQUIRE_CMP(layout.stride, ==, 3 * sizeof(float), "zu");
+    REQUIRE_CMP(layout.size, ==, 3 * sizeof(float), "zu");
+
+    layout = pl_std140_layout(2 * sizeof(float), &mat3);
+    REQUIRE_CMP(layout.offset, ==, 4 * sizeof(float), "zu");
+    REQUIRE_CMP(layout.stride, ==, 4 * sizeof(float), "zu");
+    REQUIRE_CMP(layout.size, ==, 3 * 4 * sizeof(float), "zu");
+
+    layout = pl_std430_layout(2 * sizeof(float), &mat3);
+    REQUIRE_CMP(layout.offset, ==, 4 * sizeof(float), "zu");
+    REQUIRE_CMP(layout.stride, ==, 4 * sizeof(float), "zu");
+    REQUIRE_CMP(layout.size, ==, 4 * 3 * sizeof(float), "zu");
+
+    layout = pl_std140_layout(3 * sizeof(float), &vec1);
+    REQUIRE_CMP(layout.offset, ==, 3 * sizeof(float), "zu");
+    REQUIRE_CMP(layout.stride, ==, sizeof(float), "zu");
+    REQUIRE_CMP(layout.size, ==, sizeof(float), "zu");
+
+    struct pl_var vec2a = vec2;
+    vec2a.dim_a = 50;
+
+    layout = pl_std140_layout(sizeof(float), &vec2a);
+    REQUIRE_CMP(layout.offset, ==, 4 * sizeof(float), "zu");
+    REQUIRE_CMP(layout.stride, ==, 4 * sizeof(float), "zu");
+    REQUIRE_CMP(layout.size, ==, 50 * 4 * sizeof(float), "zu");
+
+    layout = pl_std430_layout(sizeof(float), &vec2a);
+    REQUIRE_CMP(layout.offset, ==, 2 * sizeof(float), "zu");
+    REQUIRE_CMP(layout.stride, ==, 2 * sizeof(float), "zu");
+    REQUIRE_CMP(layout.size, ==, 50 * 2 * sizeof(float), "zu");
+
+    struct pl_var mat2a = mat2;
+    mat2a.dim_a = 20;
+
+    layout = pl_std140_layout(5 * sizeof(float), &mat2a);
+    REQUIRE_CMP(layout.offset, ==, 8 * sizeof(float), "zu");
+    REQUIRE_CMP(layout.stride, ==, 4 * sizeof(float), "zu");
+    REQUIRE_CMP(layout.size, ==, 20 * 2 * 4 * sizeof(float), "zu");
+
+    layout = pl_std430_layout(5 * sizeof(float), &mat2a);
+    REQUIRE_CMP(layout.offset, ==, 6 * sizeof(float), "zu");
+    REQUIRE_CMP(layout.stride, ==, 2 * sizeof(float), "zu");
+    REQUIRE_CMP(layout.size, ==, 20 * 2 * 2 * sizeof(float), "zu");
+
+    for (const struct pl_named_var *nvar = pl_var_glsl_types; nvar->glsl_name; nvar++) {
+        struct pl_var var = nvar->var;
+        REQUIRE_CMP(nvar->glsl_name, ==, pl_var_glsl_type_name(var), "s");
+        var.dim_a = 100;
+        REQUIRE_CMP(nvar->glsl_name, ==, pl_var_glsl_type_name(var), "s");
+    }
+}
diff --git a/src/tests/vulkan.c b/src/tests/vulkan.c
new file mode 100644
index 0000000..476560a
--- /dev/null
+++ b/src/tests/vulkan.c
@@ -0,0 +1,296 @@
+#include <vulkan/vulkan.h>
+
+#include "gpu_tests.h"
+#include "vulkan/command.h"
+#include "vulkan/gpu.h"
+
+#include <libplacebo/vulkan.h>
+
+static void vulkan_interop_tests(pl_vulkan pl_vk,
+                                 enum pl_handle_type handle_type)
+{
+    pl_gpu gpu = pl_vk->gpu;
+    printf("testing vulkan interop for handle type 0x%x\n", handle_type);
+
+    if (gpu->export_caps.buf & handle_type) {
+        pl_buf buf = pl_buf_create(gpu, pl_buf_params(
+            .size = 1024,
+            .export_handle = handle_type,
+        ));
+
+        REQUIRE(buf);
+        REQUIRE_HANDLE(buf->shared_mem, handle_type);
+        REQUIRE_CMP(buf->shared_mem.size, >=, buf->params.size, "zu");
+        REQUIRE(pl_buf_export(gpu, buf));
+        pl_buf_destroy(gpu, &buf);
+    }
+
+    pl_fmt fmt = pl_find_fmt(gpu, PL_FMT_UNORM, 1, 0, 0, PL_FMT_CAP_BLITTABLE);
+    if (!fmt)
+        return;
+
+    if (gpu->export_caps.sync & handle_type) {
+        pl_sync sync = pl_sync_create(gpu, handle_type);
+        pl_tex tex = pl_tex_create(gpu, pl_tex_params(
+            .w = 32,
+            .h = 32,
+            .format = fmt,
+            .blit_dst = true,
+        ));
+
+        REQUIRE(sync);
+        REQUIRE(tex);
+
+        // Note: For testing purposes, we have to fool pl_tex_export into
+        // thinking this texture is actually exportable. Just hack it in
+        // horribly.
+        ((struct pl_tex_params *) &tex->params)->export_handle = PL_HANDLE_DMA_BUF;
+
+        REQUIRE(pl_tex_export(gpu, tex, sync));
+
+        // Re-use our internal helpers to signal this VkSemaphore
+        struct vk_ctx *vk = PL_PRIV(pl_vk);
+        struct vk_cmd *cmd = vk_cmd_begin(vk->pool_graphics, NULL);
+        REQUIRE(cmd);
+        struct pl_sync_vk *sync_vk = PL_PRIV(sync);
+        vk_cmd_sig(cmd, VK_PIPELINE_STAGE_2_NONE, (pl_vulkan_sem){ sync_vk->signal });
+        REQUIRE(vk_cmd_submit(&cmd));
+
+        // Do something with the image again to "import" it
+        pl_tex_clear(gpu, tex, (float[4]){0});
+        pl_gpu_finish(gpu);
+        REQUIRE(!pl_tex_poll(gpu, tex, 0));
+
+        pl_sync_destroy(gpu, &sync);
+        pl_tex_destroy(gpu, &tex);
+    }
+
+    // Test interop API
+    if (gpu->export_caps.tex & handle_type) {
+        VkSemaphore sem = pl_vulkan_sem_create(gpu, pl_vulkan_sem_params(
+            .type           = VK_SEMAPHORE_TYPE_TIMELINE,
+            .initial_value  = 0,
+        ));
+
+        pl_tex tex = pl_tex_create(gpu, pl_tex_params(
+            .w              = 32,
+            .h              = 32,
+            .format         = fmt,
+            .blit_dst       = true,
+            .export_handle  = handle_type,
+        ));
+
+        REQUIRE(sem);
+        REQUIRE(tex);
+
+        REQUIRE(pl_vulkan_hold_ex(gpu, pl_vulkan_hold_params(
+            .tex            = tex,
+            .layout         = VK_IMAGE_LAYOUT_GENERAL,
+            .qf             = VK_QUEUE_FAMILY_EXTERNAL,
+            .semaphore      = { sem, 1 },
+        )));
+
+        pl_vulkan_release_ex(gpu, pl_vulkan_release_params(
+            .tex            = tex,
+            .layout         = VK_IMAGE_LAYOUT_GENERAL,
+            .qf             = VK_QUEUE_FAMILY_EXTERNAL,
+            .semaphore      = { sem, 1 },
+        ));
+
+        pl_tex_clear(gpu, tex, (float[4]){0});
+        pl_gpu_finish(gpu);
+        REQUIRE(!pl_tex_poll(gpu, tex, 0));
+
+        pl_vulkan_sem_destroy(gpu, &sem);
+        pl_tex_destroy(gpu, &tex);
+    }
+}
+
+static void vulkan_swapchain_tests(pl_vulkan vk, VkSurfaceKHR surf)
+{
+    if (!surf)
+        return;
+
+    printf("testing vulkan swapchain\n");
+    pl_gpu gpu = vk->gpu;
+    pl_swapchain sw;
+    sw = pl_vulkan_create_swapchain(vk, pl_vulkan_swapchain_params(
+        .surface = surf,
+    ));
+    REQUIRE(sw);
+
+    // Attempt actually initializing the swapchain
+    int w = 640, h = 480;
+    REQUIRE(pl_swapchain_resize(sw, &w, &h));
+
+    for (int i = 0; i < 10; i++) {
+        struct pl_swapchain_frame frame;
+        REQUIRE(pl_swapchain_start_frame(sw, &frame));
+        if (frame.fbo->params.blit_dst)
+            pl_tex_clear(gpu, frame.fbo, (float[4]){0});
+
+        // TODO: test this with an actual pl_renderer instance
+        struct pl_frame target;
+        pl_frame_from_swapchain(&target, &frame);
+
+        REQUIRE(pl_swapchain_submit_frame(sw));
+        pl_swapchain_swap_buffers(sw);
+
+        // Try resizing the swapchain in the middle of rendering
+        if (i == 5) {
+            w = 320;
+            h = 240;
+            REQUIRE(pl_swapchain_resize(sw, &w, &h));
+        }
+    }
+
+    pl_swapchain_destroy(&sw);
+}
+
+int main()
+{
+    pl_log log = pl_test_logger();
+    pl_vk_inst inst = pl_vk_inst_create(log, pl_vk_inst_params(
+        .debug = true,
+        .debug_extra = true,
+        .get_proc_addr = vkGetInstanceProcAddr,
+        .opt_extensions = (const char *[]){
+            VK_KHR_SURFACE_EXTENSION_NAME,
+            VK_EXT_HEADLESS_SURFACE_EXTENSION_NAME,
+        },
+        .num_opt_extensions = 2,
+    ));
+
+    if (!inst)
+        return SKIP;
+
+    PL_VK_LOAD_FUN(inst->instance, EnumeratePhysicalDevices, inst->get_proc_addr);
+    PL_VK_LOAD_FUN(inst->instance, GetPhysicalDeviceProperties, inst->get_proc_addr);
+
+    uint32_t num = 0;
+    EnumeratePhysicalDevices(inst->instance, &num, NULL);
+    if (!num)
+        return SKIP;
+
+    VkPhysicalDevice *devices = calloc(num, sizeof(*devices));
+    if (!devices)
+        return 1;
+    EnumeratePhysicalDevices(inst->instance, &num, devices);
+
+    VkSurfaceKHR surf = VK_NULL_HANDLE;
+
+    PL_VK_LOAD_FUN(inst->instance, CreateHeadlessSurfaceEXT, inst->get_proc_addr);
+    if (CreateHeadlessSurfaceEXT) {
+        VkHeadlessSurfaceCreateInfoEXT info = {
+            .sType = VK_STRUCTURE_TYPE_HEADLESS_SURFACE_CREATE_INFO_EXT,
+        };
+
+        VkResult res = CreateHeadlessSurfaceEXT(inst->instance, &info, NULL, &surf);
+        REQUIRE_CMP(res, ==, VK_SUCCESS, "u");
+    }
+
+    // Make sure choosing any device works
+    VkPhysicalDevice dev;
+    dev = pl_vulkan_choose_device(log, pl_vulkan_device_params(
+        .instance = inst->instance,
+        .get_proc_addr = inst->get_proc_addr,
+        .allow_software = true,
+        .surface = surf,
+    ));
+    if (!dev)
+        return SKIP;
+
+    // Test all attached devices
+    for (int i = 0; i < num; i++) {
+        VkPhysicalDeviceProperties props = {0};
+        GetPhysicalDeviceProperties(devices[i], &props);
+#ifndef CI_ALLOW_SW
+        if (props.deviceType == VK_PHYSICAL_DEVICE_TYPE_CPU) {
+            printf("Skipping device %d: %s\n", i, props.deviceName);
+            continue;
+        }
+#endif
+        printf("Testing device %d: %s\n", i, props.deviceName);
+
+        // Make sure we can choose this device by name
+        dev = pl_vulkan_choose_device(log, pl_vulkan_device_params(
+            .instance = inst->instance,
+            .get_proc_addr = inst->get_proc_addr,
+            .device_name = props.deviceName,
+        ));
+        REQUIRE_CMP(dev, ==, devices[i], "p");
+
+        struct pl_vulkan_params params = *pl_vulkan_params(
+            .instance = inst->instance,
+            .get_proc_addr = inst->get_proc_addr,
+            .device = devices[i],
+            .queue_count = 8, // test inter-queue stuff
+            .surface = surf,
+        );
+
+        pl_vulkan vk = pl_vulkan_create(log, &params);
+        if (!vk)
+            continue;
+
+        gpu_shader_tests(vk->gpu);
+        vulkan_swapchain_tests(vk, surf);
+
+        // Print heap statistics
+        pl_vk_print_heap(vk->gpu, PL_LOG_DEBUG);
+
+        // Test importing this context via the vulkan interop API
+        pl_vulkan vk2 = pl_vulkan_import(log, pl_vulkan_import_params(
+            .instance = vk->instance,
+            .get_proc_addr = inst->get_proc_addr,
+            .phys_device = vk->phys_device,
+            .device = vk->device,
+
+            .extensions = vk->extensions,
+            .num_extensions = vk->num_extensions,
+            .features = vk->features,
+            .queue_graphics = vk->queue_graphics,
+            .queue_compute = vk->queue_compute,
+            .queue_transfer = vk->queue_transfer,
+        ));
+        REQUIRE(vk2);
+        pl_vulkan_destroy(&vk2);
+
+        // Run these tests last because they disable some validation layers
+#ifdef PL_HAVE_UNIX
+        vulkan_interop_tests(vk, PL_HANDLE_FD);
+        vulkan_interop_tests(vk, PL_HANDLE_DMA_BUF);
+#endif
+#ifdef PL_HAVE_WIN32
+        vulkan_interop_tests(vk, PL_HANDLE_WIN32);
+        vulkan_interop_tests(vk, PL_HANDLE_WIN32_KMT);
+#endif
+        gpu_interop_tests(vk->gpu);
+        pl_vulkan_destroy(&vk);
+
+        // Re-run the same export/import tests with async queues disabled
+        params.async_compute = false;
+        params.async_transfer = false;
+        vk = pl_vulkan_create(log, &params);
+        REQUIRE(vk); // it succeeded the first time
+
+#ifdef PL_HAVE_UNIX
+        vulkan_interop_tests(vk, PL_HANDLE_FD);
+        vulkan_interop_tests(vk, PL_HANDLE_DMA_BUF);
+#endif
+#ifdef PL_HAVE_WIN32
+        vulkan_interop_tests(vk, PL_HANDLE_WIN32);
+        vulkan_interop_tests(vk, PL_HANDLE_WIN32_KMT);
+#endif
+        gpu_interop_tests(vk->gpu);
+        pl_vulkan_destroy(&vk);
+
+        // Reduce log spam after first tested device
+        pl_log_level_update(log, PL_LOG_INFO);
+    }
+
+    if (surf)
+        vkDestroySurfaceKHR(inst->instance, surf, NULL);
+    pl_vk_inst_destroy(&inst);
+    pl_log_destroy(&log);
+    free(devices);
+}
author	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-15 20:38:23 +0000
committer	Daniel Baumann <daniel.baumann@progress-linux.org>	2024-04-15 20:38:23 +0000
commit	ff6e3c025658a5fa1affd094f220b623e7e1b24b (patch)
tree	9faab72d69c92d24e349d184f5869b9796f17e0c /src/tests
parent	Initial commit. (diff)
download	libplacebo-ff6e3c025658a5fa1affd094f220b623e7e1b24b.tar.xz libplacebo-ff6e3c025658a5fa1affd094f220b623e7e1b24b.zip