From ff6e3c025658a5fa1affd094f220b623e7e1b24b Mon Sep 17 00:00:00 2001 From: Daniel Baumann Date: Mon, 15 Apr 2024 22:38:23 +0200 Subject: Adding upstream version 6.338.2. Signed-off-by: Daniel Baumann --- src/tests/bench.c | 550 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 550 insertions(+) create mode 100644 src/tests/bench.c (limited to 'src/tests/bench.c') diff --git a/src/tests/bench.c b/src/tests/bench.c new file mode 100644 index 0000000..22638d8 --- /dev/null +++ b/src/tests/bench.c @@ -0,0 +1,550 @@ +#include "tests.h" + +#include +#include +#include +#include +#include + +enum { + // Image configuration + NUM_TEX = 16, + WIDTH = 2048, + HEIGHT = 2048, + DEPTH = 16, + COMPS = 4, + + // Queue configuration + NUM_QUEUES = NUM_TEX, + ASYNC_TX = 1, + ASYNC_COMP = 1, + + // Test configuration + TEST_MS = 1000, + WARMUP_MS = 500, +}; + +static pl_tex create_test_img(pl_gpu gpu) +{ + pl_fmt fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, COMPS, DEPTH, 32, PL_FMT_CAP_LINEAR); + REQUIRE(fmt); + + const float xc = (WIDTH - 1) / 2.0f; + const float yc = (HEIGHT - 1) / 2.0f; + const float kf = 0.5f / sqrtf(xc * xc + yc * yc); + const float invphi = 0.61803398874989; + const float freqR = kf * M_PI * 0.2f; + const float freqG = freqR * invphi; + const float freqB = freqG * invphi; + float *data = malloc(WIDTH * HEIGHT * COMPS * sizeof(float)); + for (int y = 0; y < HEIGHT; y++) { + for (int x = 0; x < WIDTH; x++) { + float *color = &data[(y * WIDTH + x) * COMPS]; + float xx = x - xc, yy = y - yc; + float r2 = xx * xx + yy * yy; + switch (COMPS) { + case 4: color[3] = 1.0; + case 3: color[2] = 0.5f * sinf(freqB * r2) + 0.5f;; + case 2: color[1] = 0.5f * sinf(freqG * r2) + 0.5f;; + case 1: color[0] = 0.5f * sinf(freqR * r2) + 0.5f;; + } + } + } + + pl_tex tex = pl_tex_create(gpu, pl_tex_params( + .format = fmt, + .w = WIDTH, + .h = HEIGHT, + .sampleable = true, + .initial_data = data, + )); + + free(data); + REQUIRE(tex); + return tex; +} + +struct bench { + void (*run_sh)(pl_shader sh, pl_shader_obj *state, + pl_tex src); + + void (*run_tex)(pl_gpu gpu, pl_tex tex); +}; + +static void run_bench(pl_gpu gpu, pl_dispatch dp, + pl_shader_obj *state, pl_tex src, + pl_tex fbo, pl_timer timer, + const struct bench *bench) +{ + REQUIRE(bench); + REQUIRE(bench->run_sh || bench->run_tex); + if (bench->run_sh) { + pl_shader sh = pl_dispatch_begin(dp); + bench->run_sh(sh, state, src); + + pl_dispatch_finish(dp, pl_dispatch_params( + .shader = &sh, + .target = fbo, + .timer = timer, + )); + } else { + bench->run_tex(gpu, fbo); + } +} + +static void benchmark(pl_gpu gpu, const char *name, + const struct bench *bench) +{ + pl_dispatch dp = pl_dispatch_create(gpu->log, gpu); + REQUIRE(dp); + pl_shader_obj state = NULL; + pl_tex src = create_test_img(gpu); + + // Create the FBOs + pl_fmt fmt = pl_find_fmt(gpu, PL_FMT_FLOAT, COMPS, DEPTH, 32, + PL_FMT_CAP_RENDERABLE | PL_FMT_CAP_BLITTABLE); + REQUIRE(fmt); + + pl_tex fbos[NUM_TEX] = {0}; + for (int i = 0; i < NUM_TEX; i++) { + fbos[i] = pl_tex_create(gpu, pl_tex_params( + .format = fmt, + .w = WIDTH, + .h = HEIGHT, + .renderable = true, + .blit_dst = true, + .host_writable = true, + .host_readable = true, + .storable = !!(fmt->caps & PL_FMT_CAP_STORABLE), + )); + REQUIRE(fbos[i]); + + pl_tex_clear(gpu, fbos[i], (float[4]){ 0.0 }); + } + + // Run the benchmark and flush+block once to force shader compilation etc. + run_bench(gpu, dp, &state, src, fbos[0], NULL, bench); + pl_gpu_finish(gpu); + + // Perform the actual benchmark + pl_clock_t start_warmup = 0, start_test = 0; + unsigned long frames = 0, frames_warmup = 0; + + pl_timer timer = pl_timer_create(gpu); + uint64_t gputime_total = 0; + unsigned long gputime_count = 0; + uint64_t gputime; + + start_warmup = pl_clock_now(); + do { + const int idx = frames % NUM_TEX; + while (pl_tex_poll(gpu, fbos[idx], UINT64_MAX)) + ; // do nothing + run_bench(gpu, dp, &state, src, fbos[idx], start_test ? timer : NULL, bench); + pl_gpu_flush(gpu); + frames++; + + if (start_test) { + while ((gputime = pl_timer_query(gpu, timer))) { + gputime_total += gputime; + gputime_count++; + } + } + + pl_clock_t now = pl_clock_now(); + if (start_test) { + if (pl_clock_diff(now, start_test) > TEST_MS * 1e-3) + break; + } else if (pl_clock_diff(now, start_warmup) > WARMUP_MS * 1e-3) { + start_test = now; + frames_warmup = frames; + } + } while (true); + + // Force the GPU to finish execution and re-measure the final stop time + pl_gpu_finish(gpu); + + pl_clock_t stop = pl_clock_now(); + while ((gputime = pl_timer_query(gpu, timer))) { + gputime_total += gputime; + gputime_count++; + } + + frames -= frames_warmup; + double secs = pl_clock_diff(stop, start_test); + printf("'%s':\t%4lu frames in %1.6f seconds => %2.6f ms/frame (%5.2f FPS)", + name, frames, secs, 1000 * secs / frames, frames / secs); + if (gputime_count) + printf(", gpu time: %2.6f ms", 1e-6 * gputime_total / gputime_count); + printf("\n"); + + pl_timer_destroy(gpu, &timer); + pl_shader_obj_destroy(&state); + pl_dispatch_destroy(&dp); + pl_tex_destroy(gpu, &src); + for (int i = 0; i < NUM_TEX; i++) + pl_tex_destroy(gpu, &fbos[i]); +} + +// List of benchmarks +static void bench_deband(pl_shader sh, pl_shader_obj *state, pl_tex src) +{ + pl_shader_deband(sh, pl_sample_src( .tex = src ), NULL); +} + +static void bench_deband_heavy(pl_shader sh, pl_shader_obj *state, pl_tex src) +{ + pl_shader_deband(sh, pl_sample_src( .tex = src ), pl_deband_params( + .iterations = 4, + .threshold = 4.0, + .radius = 4.0, + .grain = 16.0, + )); +} + +static void bench_bilinear(pl_shader sh, pl_shader_obj *state, pl_tex src) +{ + REQUIRE(pl_shader_sample_bilinear(sh, pl_sample_src( .tex = src ))); +} + +static void bench_bicubic(pl_shader sh, pl_shader_obj *state, pl_tex src) +{ + REQUIRE(pl_shader_sample_bicubic(sh, pl_sample_src( .tex = src ))); +} + +static void bench_hermite(pl_shader sh, pl_shader_obj *state, pl_tex src) +{ + REQUIRE(pl_shader_sample_hermite(sh, pl_sample_src( .tex = src ))); +} + +static void bench_gaussian(pl_shader sh, pl_shader_obj *state, pl_tex src) +{ + REQUIRE(pl_shader_sample_gaussian(sh, pl_sample_src( .tex = src ))); +} + +static void bench_dither_blue(pl_shader sh, pl_shader_obj *state, pl_tex src) +{ + REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src ))); + pl_shader_dither(sh, 8, state, pl_dither_params( + .method = PL_DITHER_BLUE_NOISE, + )); +} + +static void bench_dither_white(pl_shader sh, pl_shader_obj *state, pl_tex src) +{ + REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src ))); + pl_shader_dither(sh, 8, state, pl_dither_params( + .method = PL_DITHER_WHITE_NOISE, + )); +} + +static void bench_dither_ordered_fix(pl_shader sh, pl_shader_obj *state, pl_tex src) +{ + REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src ))); + pl_shader_dither(sh, 8, state, pl_dither_params( + .method = PL_DITHER_ORDERED_FIXED, + )); +} + +static void bench_polar(pl_shader sh, pl_shader_obj *state, pl_tex src) +{ + struct pl_sample_filter_params params = { + .filter = pl_filter_ewa_lanczos, + .lut = state, + }; + + REQUIRE(pl_shader_sample_polar(sh, pl_sample_src( .tex = src ), ¶ms)); +} + +static void bench_polar_nocompute(pl_shader sh, pl_shader_obj *state, pl_tex src) +{ + struct pl_sample_filter_params params = { + .filter = pl_filter_ewa_lanczos, + .no_compute = true, + .lut = state, + }; + + REQUIRE(pl_shader_sample_polar(sh, pl_sample_src( .tex = src ), ¶ms)); +} + +static void bench_hdr_peak(pl_shader sh, pl_shader_obj *state, pl_tex src) +{ + REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src ))); + REQUIRE(pl_shader_detect_peak(sh, pl_color_space_hdr10, state, &pl_peak_detect_default_params)); +} + +static void bench_hdr_peak_hq(pl_shader sh, pl_shader_obj *state, pl_tex src) +{ + REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src ))); + REQUIRE(pl_shader_detect_peak(sh, pl_color_space_hdr10, state, &pl_peak_detect_high_quality_params)); +} + +static void bench_hdr_lut(pl_shader sh, pl_shader_obj *state, pl_tex src) +{ + struct pl_color_map_params params = { + PL_COLOR_MAP_DEFAULTS + .tone_mapping_function = &pl_tone_map_bt2390, + .tone_mapping_mode = PL_TONE_MAP_RGB, + }; + + REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src ))); + pl_shader_color_map_ex(sh, ¶ms, pl_color_map_args( + .src = pl_color_space_hdr10, + .dst = pl_color_space_monitor, + .state = state, + )); +} + +static void bench_hdr_clip(pl_shader sh, pl_shader_obj *state, pl_tex src) +{ + struct pl_color_map_params params = { + PL_COLOR_MAP_DEFAULTS + .tone_mapping_function = &pl_tone_map_clip, + .tone_mapping_mode = PL_TONE_MAP_RGB, + }; + + REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src ))); + pl_shader_color_map_ex(sh, ¶ms, pl_color_map_args( + .src = pl_color_space_hdr10, + .dst = pl_color_space_monitor, + .state = state, + )); +} + +static void bench_weave(pl_shader sh, pl_shader_obj *state, pl_tex src) +{ + struct pl_deinterlace_source dsrc = { + .cur = pl_field_pair(src), + .field = PL_FIELD_TOP, + }; + + pl_shader_deinterlace(sh, &dsrc, pl_deinterlace_params( + .algo = PL_DEINTERLACE_WEAVE, + )); +} + +static void bench_bob(pl_shader sh, pl_shader_obj *state, pl_tex src) +{ + struct pl_deinterlace_source dsrc = { + .cur = pl_field_pair(src), + .field = PL_FIELD_TOP, + }; + + pl_shader_deinterlace(sh, &dsrc, pl_deinterlace_params( + .algo = PL_DEINTERLACE_BOB, + )); +} + +static void bench_yadif(pl_shader sh, pl_shader_obj *state, pl_tex src) +{ + struct pl_deinterlace_source dsrc = { + .prev = pl_field_pair(src), + .cur = pl_field_pair(src), + .next = pl_field_pair(src), + .field = PL_FIELD_TOP, + }; + + pl_shader_deinterlace(sh, &dsrc, pl_deinterlace_params( + .algo = PL_DEINTERLACE_YADIF, + )); +} + +static void bench_av1_grain(pl_shader sh, pl_shader_obj *state, pl_tex src) +{ + struct pl_film_grain_params params = { + .data = { + .type = PL_FILM_GRAIN_AV1, + .params.av1 = av1_grain_data, + .seed = rand(), + }, + .tex = src, + .components = 3, + .component_mapping = {0, 1, 2}, + .repr = &(struct pl_color_repr) {0}, + }; + + REQUIRE(pl_shader_film_grain(sh, state, ¶ms)); +} + +static void bench_av1_grain_lap(pl_shader sh, pl_shader_obj *state, pl_tex src) +{ + struct pl_film_grain_params params = { + .data = { + .type = PL_FILM_GRAIN_AV1, + .params.av1 = av1_grain_data, + .seed = rand(), + }, + .tex = src, + .components = 3, + .component_mapping = {0, 1, 2}, + .repr = &(struct pl_color_repr) {0}, + }; + + params.data.params.av1.overlap = true; + REQUIRE(pl_shader_film_grain(sh, state, ¶ms)); +} + +static void bench_h274_grain(pl_shader sh, pl_shader_obj *state, pl_tex src) +{ + struct pl_film_grain_params params = { + .data = { + .type = PL_FILM_GRAIN_H274, + .params.h274 = h274_grain_data, + .seed = rand(), + }, + .tex = src, + .components = 3, + .component_mapping = {0, 1, 2}, + .repr = &(struct pl_color_repr) {0}, + }; + + REQUIRE(pl_shader_film_grain(sh, state, ¶ms)); +} + +static void bench_reshape_poly(pl_shader sh, pl_shader_obj *state, pl_tex src) +{ + REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src ))); + pl_shader_dovi_reshape(sh, &(struct pl_dovi_metadata) { .comp = { + { + .num_pivots = 8, + .pivots = {0.0, 0.00488758553, 0.0420332365, 0.177908108, + 0.428152502, 0.678396881, 0.92864126, 1.0}, + .method = {0, 0, 0, 0, 0, 0, 0}, + .poly_coeffs = { + {0.00290930271, 2.30019712, 50.1446037}, + {0.00725257397, 1.88119054, -4.49443769}, + {0.0150123835, 1.61106598, -1.64833081}, + {0.0498571396, 1.2059114, -0.430627108}, + {0.0878019333, 1.01845241, -0.19669354}, + {0.120447636, 0.920134187, -0.122338772}, + {2.12430835, -3.30913281, 2.10893941}, + }, + }, { + .num_pivots = 2, + .pivots = {0.0, 1.0}, + .method = {0}, + .poly_coeffs = {{-0.397901177, 1.85908031, 0}}, + }, { + .num_pivots = 2, + .pivots = {0.0, 1.0}, + .method = {0}, + .poly_coeffs = {{-0.399355531, 1.85591626, 0}}, + }, + }}); +} + +static void bench_reshape_mmr(pl_shader sh, pl_shader_obj *state, pl_tex src) +{ + REQUIRE(pl_shader_sample_direct(sh, pl_sample_src( .tex = src ))); + pl_shader_dovi_reshape(sh, &dovi_meta); // this includes MMR +} + +static float data[WIDTH * HEIGHT * COMPS + 8192]; + +static void bench_download(pl_gpu gpu, pl_tex tex) +{ + REQUIRE(pl_tex_download(gpu, pl_tex_transfer_params( + .tex = tex, + .ptr = (uint8_t *) PL_ALIGN((uintptr_t) data, 4096), + ))); +} + +static void bench_upload(pl_gpu gpu, pl_tex tex) +{ + REQUIRE(pl_tex_upload(gpu, pl_tex_transfer_params( + .tex = tex, + .ptr = (uint8_t *) PL_ALIGN((uintptr_t) data, 4096), + ))); +} + +static void dummy_cb(void *arg) {} + +static void bench_download_async(pl_gpu gpu, pl_tex tex) +{ + REQUIRE(pl_tex_download(gpu, pl_tex_transfer_params( + .tex = tex, + .ptr = (uint8_t *) PL_ALIGN((uintptr_t) data, 4096), + .callback = dummy_cb, + ))); +} + +static void bench_upload_async(pl_gpu gpu, pl_tex tex) +{ + REQUIRE(pl_tex_upload(gpu, pl_tex_transfer_params( + .tex = tex, + .ptr = (uint8_t *) PL_ALIGN((uintptr_t) data, 4096), + .callback = dummy_cb, + ))); +} + +int main() +{ + setbuf(stdout, NULL); + setbuf(stderr, NULL); + + pl_log log = pl_log_create(PL_API_VER, pl_log_params( + .log_cb = isatty(fileno(stdout)) ? pl_log_color : pl_log_simple, + .log_level = PL_LOG_WARN, + )); + + pl_vulkan vk = pl_vulkan_create(log, pl_vulkan_params( + .allow_software = true, + .async_transfer = ASYNC_TX, + .async_compute = ASYNC_COMP, + .queue_count = NUM_QUEUES, + )); + + if (!vk) + return SKIP; + +#define BENCH_SH(fn) &(struct bench) { .run_sh = fn } +#define BENCH_TEX(fn) &(struct bench) { .run_tex = fn } + + printf("= Running benchmarks =\n"); + benchmark(vk->gpu, "tex_download ptr", BENCH_TEX(bench_download)); + benchmark(vk->gpu, "tex_download ptr async", BENCH_TEX(bench_download_async)); + benchmark(vk->gpu, "tex_upload ptr", BENCH_TEX(bench_upload)); + benchmark(vk->gpu, "tex_upload ptr async", BENCH_TEX(bench_upload_async)); + benchmark(vk->gpu, "bilinear", BENCH_SH(bench_bilinear)); + benchmark(vk->gpu, "bicubic", BENCH_SH(bench_bicubic)); + benchmark(vk->gpu, "hermite", BENCH_SH(bench_hermite)); + benchmark(vk->gpu, "gaussian", BENCH_SH(bench_gaussian)); + benchmark(vk->gpu, "deband", BENCH_SH(bench_deband)); + benchmark(vk->gpu, "deband_heavy", BENCH_SH(bench_deband_heavy)); + + // Deinterlacing + benchmark(vk->gpu, "weave", BENCH_SH(bench_weave)); + benchmark(vk->gpu, "bob", BENCH_SH(bench_bob)); + benchmark(vk->gpu, "yadif", BENCH_SH(bench_yadif)); + + // Polar sampling + benchmark(vk->gpu, "polar", BENCH_SH(bench_polar)); + if (vk->gpu->glsl.compute) + benchmark(vk->gpu, "polar_nocompute", BENCH_SH(bench_polar_nocompute)); + + // Dithering algorithms + benchmark(vk->gpu, "dither_blue", BENCH_SH(bench_dither_blue)); + benchmark(vk->gpu, "dither_white", BENCH_SH(bench_dither_white)); + benchmark(vk->gpu, "dither_ordered_fixed", BENCH_SH(bench_dither_ordered_fix)); + + // HDR peak detection + if (vk->gpu->glsl.compute) { + benchmark(vk->gpu, "hdr_peakdetect", BENCH_SH(bench_hdr_peak)); + benchmark(vk->gpu, "hdr_peakdetect_hq", BENCH_SH(bench_hdr_peak_hq)); + } + + // Tone mapping + benchmark(vk->gpu, "hdr_lut", BENCH_SH(bench_hdr_lut)); + benchmark(vk->gpu, "hdr_clip", BENCH_SH(bench_hdr_clip)); + + // Misc stuff + benchmark(vk->gpu, "av1_grain", BENCH_SH(bench_av1_grain)); + benchmark(vk->gpu, "av1_grain_lap", BENCH_SH(bench_av1_grain_lap)); + benchmark(vk->gpu, "h274_grain", BENCH_SH(bench_h274_grain)); + benchmark(vk->gpu, "reshape_poly", BENCH_SH(bench_reshape_poly)); + benchmark(vk->gpu, "reshape_mmr", BENCH_SH(bench_reshape_mmr)); + + pl_vulkan_destroy(&vk); + pl_log_destroy(&log); + return 0; +} -- cgit v1.2.3