diff options
Diffstat (limited to 'src/utils/upload.c')
-rw-r--r-- | src/utils/upload.c | 382 |
1 files changed, 382 insertions, 0 deletions
diff --git a/src/utils/upload.c b/src/utils/upload.c new file mode 100644 index 0000000..75bd4bb --- /dev/null +++ b/src/utils/upload.c @@ -0,0 +1,382 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#include "log.h" +#include "common.h" +#include "gpu.h" + +#include <libplacebo/utils/upload.h> + +#define MAX_COMPS 4 + +struct comp { + int order; // e.g. 0, 1, 2, 3 for RGBA + int size; // size in bits + int shift; // bit-shift / offset in bits +}; + +static int compare_comp(const void *pa, const void *pb) +{ + const struct comp *a = pa, *b = pb; + + // Move all of the components with a size of 0 to the end, so they can + // be ignored outright + if (a->size && !b->size) + return -1; + if (b->size && !a->size) + return 1; + + // Otherwise, just compare based on the shift + return PL_CMP(a->shift, b->shift); +} + +void pl_plane_data_from_comps(struct pl_plane_data *data, int size[4], + int shift[4]) +{ + struct comp comps[MAX_COMPS]; + for (int i = 0; i < PL_ARRAY_SIZE(comps); i++) { + comps[i].order = i; + comps[i].size = size[i]; + comps[i].shift = shift[i]; + } + + // Sort the components by shift + qsort(comps, MAX_COMPS, sizeof(struct comp), compare_comp); + + // Generate the resulting component size/pad/map + int offset = 0; + for (int i = 0; i < MAX_COMPS; i++) { + if (comps[i].size) { + assert(comps[i].shift >= offset); + data->component_size[i] = comps[i].size; + data->component_pad[i] = comps[i].shift - offset; + data->component_map[i] = comps[i].order; + offset += data->component_size[i] + data->component_pad[i]; + } else { + // Clear the superfluous entries for sanity + data->component_size[i] = 0; + data->component_pad[i] = 0; + data->component_map[i] = 0; + } + } +} + +void pl_plane_data_from_mask(struct pl_plane_data *data, uint64_t mask[4]) +{ + int size[4]; + int shift[4]; + + for (int i = 0; i < PL_ARRAY_SIZE(size); i++) { + size[i] = __builtin_popcountll(mask[i]); + shift[i] = PL_MAX(0, __builtin_ffsll(mask[i]) - 1); + + // Sanity checking + uint64_t mask_reconstructed = (1LLU << size[i]) - 1; + mask_reconstructed <<= shift[i]; + pl_assert(mask_reconstructed == mask[i]); + } + + pl_plane_data_from_comps(data, size, shift); +} + +bool pl_plane_data_align(struct pl_plane_data *data, struct pl_bit_encoding *out_bits) +{ + struct pl_plane_data aligned = *data; + struct pl_bit_encoding bits = {0}; + + int offset = 0; + +#define SET_TEST(var, value) \ + do { \ + if (offset == 0) { \ + (var) = (value); \ + } else if ((var) != (value)) { \ + goto misaligned; \ + } \ + } while (0) + + for (int i = 0; i < MAX_COMPS; i++) { + if (!aligned.component_size[i]) + break; + + // Can't meaningfully align alpha channel, so just skip it. This is a + // limitation of the fact that `pl_bit_encoding` only applies to the + // main color channels, and changing this would be very nontrivial. + if (aligned.component_map[i] == PL_CHANNEL_A) + continue; + + // Color depth is the original component size, before alignment + SET_TEST(bits.color_depth, aligned.component_size[i]); + + // Try consuming padding of the current component to align down. This + // corresponds to an extra bit shift to the left. + int comp_start = offset + aligned.component_pad[i]; + int left_delta = comp_start - PL_ALIGN2(comp_start - 7, 8); + left_delta = PL_MIN(left_delta, aligned.component_pad[i]); + aligned.component_pad[i] -= left_delta; + aligned.component_size[i] += left_delta; + SET_TEST(bits.bit_shift, left_delta); + + // Try consuming padding of the next component to align up. This + // corresponds to simply ignoring some extra 0s on the end. + int comp_end = comp_start + aligned.component_size[i] - left_delta; + int right_delta = PL_ALIGN2(comp_end, 8) - comp_end; + if (i+1 == MAX_COMPS || !aligned.component_size[i+1]) { + // This is the last component, so we can be greedy + aligned.component_size[i] += right_delta; + } else { + right_delta = PL_MIN(right_delta, aligned.component_pad[i+1]); + aligned.component_pad[i+1] -= right_delta; + aligned.component_size[i] += right_delta; + } + + // Sample depth is the new total component size, including padding + SET_TEST(bits.sample_depth, aligned.component_size[i]); + + offset += aligned.component_pad[i] + aligned.component_size[i]; + } + + // Easy sanity check, to make sure that we don't exceed the known stride + if (aligned.pixel_stride && offset > aligned.pixel_stride * 8) + goto misaligned; + + *data = aligned; + if (out_bits) + *out_bits = bits; + return true; + +misaligned: + // Can't properly align anything, so just do a no-op + if (out_bits) + *out_bits = (struct pl_bit_encoding) {0}; + return false; +} + +pl_fmt pl_plane_find_fmt(pl_gpu gpu, int out_map[4], const struct pl_plane_data *data) +{ + int dummy[4] = {0}; + out_map = PL_DEF(out_map, dummy); + + // Endian swapping requires compute shaders (currently) + if (data->swapped && !gpu->limits.max_ssbo_size) + return NULL; + + // Count the number of components and initialize out_map + int num = 0; + for (int i = 0; i < PL_ARRAY_SIZE(data->component_size); i++) { + out_map[i] = -1; + if (data->component_size[i]) + num = i+1; + } + + for (int n = 0; n < gpu->num_formats; n++) { + pl_fmt fmt = gpu->formats[n]; + if (fmt->opaque || fmt->num_components < num) + continue; + if (fmt->type != data->type || fmt->texel_size != data->pixel_stride) + continue; + if (!(fmt->caps & PL_FMT_CAP_SAMPLEABLE)) + continue; + + int idx = 0; + + // Try mapping all pl_plane_data components to texture components + for (int i = 0; i < num; i++) { + // If there's padding we have to map it to an unused physical + // component first + int pad = data->component_pad[i]; + if (pad && (idx >= 4 || fmt->host_bits[idx++] != pad)) + goto next_fmt; + + // Otherwise, try and match this component + int size = data->component_size[i]; + if (size && (idx >= 4 || fmt->host_bits[idx] != size)) + goto next_fmt; + out_map[idx++] = data->component_map[i]; + } + + // Reject misaligned formats, check this last to only log such errors + // if this is the only thing preventing a format from being used, as + // this is likely an issue in the API usage. + if (data->row_stride % fmt->texel_align) { + PL_WARN(gpu, "Rejecting texture format '%s' due to misalignment: " + "Row stride %zu is not a clean multiple of texel size %zu! " + "This is likely an API usage bug.", + fmt->name, data->row_stride, fmt->texel_align); + continue; + } + + return fmt; + +next_fmt: ; // acts as `continue` + } + + return NULL; +} + +bool pl_upload_plane(pl_gpu gpu, struct pl_plane *out_plane, + pl_tex *tex, const struct pl_plane_data *data) +{ + pl_assert(!data->buf ^ !data->pixels); // exactly one + + int out_map[4]; + pl_fmt fmt = pl_plane_find_fmt(gpu, out_map, data); + if (!fmt) { + PL_ERR(gpu, "Failed picking any compatible texture format for a plane!"); + return false; + + // TODO: try soft-converting to a supported format using e.g zimg? + } + + bool ok = pl_tex_recreate(gpu, tex, pl_tex_params( + .w = data->width, + .h = data->height, + .format = fmt, + .sampleable = true, + .host_writable = true, + .blit_src = fmt->caps & PL_FMT_CAP_BLITTABLE, + )); + + if (!ok) { + PL_ERR(gpu, "Failed initializing plane texture!"); + return false; + } + + if (out_plane) { + out_plane->texture = *tex; + out_plane->components = 0; + for (int i = 0; i < PL_ARRAY_SIZE(out_map); i++) { + out_plane->component_mapping[i] = out_map[i]; + if (out_map[i] >= 0) + out_plane->components = i+1; + } + } + + struct pl_tex_transfer_params params = { + .tex = *tex, + .rc.x1 = data->width, // set these for `pl_tex_transfer_size` + .rc.y1 = data->height, + .rc.z1 = 1, + .row_pitch = PL_DEF(data->row_stride, data->width * fmt->texel_size), + .ptr = (void *) data->pixels, + .buf = data->buf, + .buf_offset = data->buf_offset, + .callback = data->callback, + .priv = data->priv, + }; + + pl_buf swapbuf = NULL; + if (data->swapped) { + const size_t aligned = PL_ALIGN2(pl_tex_transfer_size(¶ms), 4); + swapbuf = pl_buf_create(gpu, pl_buf_params( + .size = aligned, + .storable = true, + .initial_data = params.ptr, + + // Note: This may over-read from `ptr` if `ptr` is not aligned to a + // word boundary, but the extra texels will be ignored by + // `pl_tex_upload` so this UB should be a non-issue in practice. + )); + if (!swapbuf) { + PL_ERR(gpu, "Failed creating endian swapping buffer!"); + return false; + } + + struct pl_buf_copy_swap_params swap_params = { + .src = swapbuf, + .dst = swapbuf, + .size = aligned, + .wordsize = fmt->texel_size / fmt->num_components, + }; + + bool can_reuse = params.buf && params.buf->params.storable && + params.buf_offset % 4 == 0 && + params.buf_offset + aligned <= params.buf->params.size; + + if (params.ptr) { + // Data is already uploaded (no-op), can swap in-place + } else if (can_reuse) { + // We can sample directly from the source buffer + swap_params.src = params.buf; + swap_params.src_offset = params.buf_offset; + } else { + // We sadly need to do a second memcpy + assert(params.buf); + PL_TRACE(gpu, "Double-slow path! pl_buf_copy -> pl_buf_copy_swap..."); + pl_buf_copy(gpu, swapbuf, 0, params.buf, params.buf_offset, + PL_MIN(aligned, params.buf->params.size - params.buf_offset)); + } + + if (!pl_buf_copy_swap(gpu, &swap_params)) { + PL_ERR(gpu, "Failed swapping endianness!"); + pl_buf_destroy(gpu, &swapbuf); + return false; + } + + params.ptr = NULL; + params.buf = swapbuf; + params.buf_offset = 0; + } + + ok = pl_tex_upload(gpu, ¶ms); + pl_buf_destroy(gpu, &swapbuf); + return ok; +} + +bool pl_recreate_plane(pl_gpu gpu, struct pl_plane *out_plane, + pl_tex *tex, const struct pl_plane_data *data) +{ + if (data->swapped) { + PL_ERR(gpu, "Cannot call pl_recreate_plane on non-native endian plane " + "data, this is only supported for `pl_upload_plane`!"); + return false; + } + + int out_map[4]; + pl_fmt fmt = pl_plane_find_fmt(gpu, out_map, data); + if (!fmt) { + PL_ERR(gpu, "Failed picking any compatible texture format for a plane!"); + return false; + } + + bool ok = pl_tex_recreate(gpu, tex, pl_tex_params( + .w = data->width, + .h = data->height, + .format = fmt, + .renderable = true, + .host_readable = fmt->caps & PL_FMT_CAP_HOST_READABLE, + .blit_dst = fmt->caps & PL_FMT_CAP_BLITTABLE, + .storable = fmt->caps & PL_FMT_CAP_STORABLE, + )); + + if (!ok) { + PL_ERR(gpu, "Failed initializing plane texture!"); + return false; + } + + if (out_plane) { + out_plane->texture = *tex; + out_plane->components = 0; + for (int i = 0; i < PL_ARRAY_SIZE(out_map); i++) { + out_plane->component_mapping[i] = out_map[i]; + if (out_map[i] >= 0) + out_plane->components = i+1; + } + } + + return true; +} |