summaryrefslogtreecommitdiffstats
path: root/src/utils/upload.c
diff options
context:
space:
mode:
Diffstat (limited to 'src/utils/upload.c')
-rw-r--r--src/utils/upload.c382
1 files changed, 382 insertions, 0 deletions
diff --git a/src/utils/upload.c b/src/utils/upload.c
new file mode 100644
index 0000000..75bd4bb
--- /dev/null
+++ b/src/utils/upload.c
@@ -0,0 +1,382 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "log.h"
+#include "common.h"
+#include "gpu.h"
+
+#include <libplacebo/utils/upload.h>
+
+#define MAX_COMPS 4
+
+struct comp {
+ int order; // e.g. 0, 1, 2, 3 for RGBA
+ int size; // size in bits
+ int shift; // bit-shift / offset in bits
+};
+
+static int compare_comp(const void *pa, const void *pb)
+{
+ const struct comp *a = pa, *b = pb;
+
+ // Move all of the components with a size of 0 to the end, so they can
+ // be ignored outright
+ if (a->size && !b->size)
+ return -1;
+ if (b->size && !a->size)
+ return 1;
+
+ // Otherwise, just compare based on the shift
+ return PL_CMP(a->shift, b->shift);
+}
+
+void pl_plane_data_from_comps(struct pl_plane_data *data, int size[4],
+ int shift[4])
+{
+ struct comp comps[MAX_COMPS];
+ for (int i = 0; i < PL_ARRAY_SIZE(comps); i++) {
+ comps[i].order = i;
+ comps[i].size = size[i];
+ comps[i].shift = shift[i];
+ }
+
+ // Sort the components by shift
+ qsort(comps, MAX_COMPS, sizeof(struct comp), compare_comp);
+
+ // Generate the resulting component size/pad/map
+ int offset = 0;
+ for (int i = 0; i < MAX_COMPS; i++) {
+ if (comps[i].size) {
+ assert(comps[i].shift >= offset);
+ data->component_size[i] = comps[i].size;
+ data->component_pad[i] = comps[i].shift - offset;
+ data->component_map[i] = comps[i].order;
+ offset += data->component_size[i] + data->component_pad[i];
+ } else {
+ // Clear the superfluous entries for sanity
+ data->component_size[i] = 0;
+ data->component_pad[i] = 0;
+ data->component_map[i] = 0;
+ }
+ }
+}
+
+void pl_plane_data_from_mask(struct pl_plane_data *data, uint64_t mask[4])
+{
+ int size[4];
+ int shift[4];
+
+ for (int i = 0; i < PL_ARRAY_SIZE(size); i++) {
+ size[i] = __builtin_popcountll(mask[i]);
+ shift[i] = PL_MAX(0, __builtin_ffsll(mask[i]) - 1);
+
+ // Sanity checking
+ uint64_t mask_reconstructed = (1LLU << size[i]) - 1;
+ mask_reconstructed <<= shift[i];
+ pl_assert(mask_reconstructed == mask[i]);
+ }
+
+ pl_plane_data_from_comps(data, size, shift);
+}
+
+bool pl_plane_data_align(struct pl_plane_data *data, struct pl_bit_encoding *out_bits)
+{
+ struct pl_plane_data aligned = *data;
+ struct pl_bit_encoding bits = {0};
+
+ int offset = 0;
+
+#define SET_TEST(var, value) \
+ do { \
+ if (offset == 0) { \
+ (var) = (value); \
+ } else if ((var) != (value)) { \
+ goto misaligned; \
+ } \
+ } while (0)
+
+ for (int i = 0; i < MAX_COMPS; i++) {
+ if (!aligned.component_size[i])
+ break;
+
+ // Can't meaningfully align alpha channel, so just skip it. This is a
+ // limitation of the fact that `pl_bit_encoding` only applies to the
+ // main color channels, and changing this would be very nontrivial.
+ if (aligned.component_map[i] == PL_CHANNEL_A)
+ continue;
+
+ // Color depth is the original component size, before alignment
+ SET_TEST(bits.color_depth, aligned.component_size[i]);
+
+ // Try consuming padding of the current component to align down. This
+ // corresponds to an extra bit shift to the left.
+ int comp_start = offset + aligned.component_pad[i];
+ int left_delta = comp_start - PL_ALIGN2(comp_start - 7, 8);
+ left_delta = PL_MIN(left_delta, aligned.component_pad[i]);
+ aligned.component_pad[i] -= left_delta;
+ aligned.component_size[i] += left_delta;
+ SET_TEST(bits.bit_shift, left_delta);
+
+ // Try consuming padding of the next component to align up. This
+ // corresponds to simply ignoring some extra 0s on the end.
+ int comp_end = comp_start + aligned.component_size[i] - left_delta;
+ int right_delta = PL_ALIGN2(comp_end, 8) - comp_end;
+ if (i+1 == MAX_COMPS || !aligned.component_size[i+1]) {
+ // This is the last component, so we can be greedy
+ aligned.component_size[i] += right_delta;
+ } else {
+ right_delta = PL_MIN(right_delta, aligned.component_pad[i+1]);
+ aligned.component_pad[i+1] -= right_delta;
+ aligned.component_size[i] += right_delta;
+ }
+
+ // Sample depth is the new total component size, including padding
+ SET_TEST(bits.sample_depth, aligned.component_size[i]);
+
+ offset += aligned.component_pad[i] + aligned.component_size[i];
+ }
+
+ // Easy sanity check, to make sure that we don't exceed the known stride
+ if (aligned.pixel_stride && offset > aligned.pixel_stride * 8)
+ goto misaligned;
+
+ *data = aligned;
+ if (out_bits)
+ *out_bits = bits;
+ return true;
+
+misaligned:
+ // Can't properly align anything, so just do a no-op
+ if (out_bits)
+ *out_bits = (struct pl_bit_encoding) {0};
+ return false;
+}
+
+pl_fmt pl_plane_find_fmt(pl_gpu gpu, int out_map[4], const struct pl_plane_data *data)
+{
+ int dummy[4] = {0};
+ out_map = PL_DEF(out_map, dummy);
+
+ // Endian swapping requires compute shaders (currently)
+ if (data->swapped && !gpu->limits.max_ssbo_size)
+ return NULL;
+
+ // Count the number of components and initialize out_map
+ int num = 0;
+ for (int i = 0; i < PL_ARRAY_SIZE(data->component_size); i++) {
+ out_map[i] = -1;
+ if (data->component_size[i])
+ num = i+1;
+ }
+
+ for (int n = 0; n < gpu->num_formats; n++) {
+ pl_fmt fmt = gpu->formats[n];
+ if (fmt->opaque || fmt->num_components < num)
+ continue;
+ if (fmt->type != data->type || fmt->texel_size != data->pixel_stride)
+ continue;
+ if (!(fmt->caps & PL_FMT_CAP_SAMPLEABLE))
+ continue;
+
+ int idx = 0;
+
+ // Try mapping all pl_plane_data components to texture components
+ for (int i = 0; i < num; i++) {
+ // If there's padding we have to map it to an unused physical
+ // component first
+ int pad = data->component_pad[i];
+ if (pad && (idx >= 4 || fmt->host_bits[idx++] != pad))
+ goto next_fmt;
+
+ // Otherwise, try and match this component
+ int size = data->component_size[i];
+ if (size && (idx >= 4 || fmt->host_bits[idx] != size))
+ goto next_fmt;
+ out_map[idx++] = data->component_map[i];
+ }
+
+ // Reject misaligned formats, check this last to only log such errors
+ // if this is the only thing preventing a format from being used, as
+ // this is likely an issue in the API usage.
+ if (data->row_stride % fmt->texel_align) {
+ PL_WARN(gpu, "Rejecting texture format '%s' due to misalignment: "
+ "Row stride %zu is not a clean multiple of texel size %zu! "
+ "This is likely an API usage bug.",
+ fmt->name, data->row_stride, fmt->texel_align);
+ continue;
+ }
+
+ return fmt;
+
+next_fmt: ; // acts as `continue`
+ }
+
+ return NULL;
+}
+
+bool pl_upload_plane(pl_gpu gpu, struct pl_plane *out_plane,
+ pl_tex *tex, const struct pl_plane_data *data)
+{
+ pl_assert(!data->buf ^ !data->pixels); // exactly one
+
+ int out_map[4];
+ pl_fmt fmt = pl_plane_find_fmt(gpu, out_map, data);
+ if (!fmt) {
+ PL_ERR(gpu, "Failed picking any compatible texture format for a plane!");
+ return false;
+
+ // TODO: try soft-converting to a supported format using e.g zimg?
+ }
+
+ bool ok = pl_tex_recreate(gpu, tex, pl_tex_params(
+ .w = data->width,
+ .h = data->height,
+ .format = fmt,
+ .sampleable = true,
+ .host_writable = true,
+ .blit_src = fmt->caps & PL_FMT_CAP_BLITTABLE,
+ ));
+
+ if (!ok) {
+ PL_ERR(gpu, "Failed initializing plane texture!");
+ return false;
+ }
+
+ if (out_plane) {
+ out_plane->texture = *tex;
+ out_plane->components = 0;
+ for (int i = 0; i < PL_ARRAY_SIZE(out_map); i++) {
+ out_plane->component_mapping[i] = out_map[i];
+ if (out_map[i] >= 0)
+ out_plane->components = i+1;
+ }
+ }
+
+ struct pl_tex_transfer_params params = {
+ .tex = *tex,
+ .rc.x1 = data->width, // set these for `pl_tex_transfer_size`
+ .rc.y1 = data->height,
+ .rc.z1 = 1,
+ .row_pitch = PL_DEF(data->row_stride, data->width * fmt->texel_size),
+ .ptr = (void *) data->pixels,
+ .buf = data->buf,
+ .buf_offset = data->buf_offset,
+ .callback = data->callback,
+ .priv = data->priv,
+ };
+
+ pl_buf swapbuf = NULL;
+ if (data->swapped) {
+ const size_t aligned = PL_ALIGN2(pl_tex_transfer_size(&params), 4);
+ swapbuf = pl_buf_create(gpu, pl_buf_params(
+ .size = aligned,
+ .storable = true,
+ .initial_data = params.ptr,
+
+ // Note: This may over-read from `ptr` if `ptr` is not aligned to a
+ // word boundary, but the extra texels will be ignored by
+ // `pl_tex_upload` so this UB should be a non-issue in practice.
+ ));
+ if (!swapbuf) {
+ PL_ERR(gpu, "Failed creating endian swapping buffer!");
+ return false;
+ }
+
+ struct pl_buf_copy_swap_params swap_params = {
+ .src = swapbuf,
+ .dst = swapbuf,
+ .size = aligned,
+ .wordsize = fmt->texel_size / fmt->num_components,
+ };
+
+ bool can_reuse = params.buf && params.buf->params.storable &&
+ params.buf_offset % 4 == 0 &&
+ params.buf_offset + aligned <= params.buf->params.size;
+
+ if (params.ptr) {
+ // Data is already uploaded (no-op), can swap in-place
+ } else if (can_reuse) {
+ // We can sample directly from the source buffer
+ swap_params.src = params.buf;
+ swap_params.src_offset = params.buf_offset;
+ } else {
+ // We sadly need to do a second memcpy
+ assert(params.buf);
+ PL_TRACE(gpu, "Double-slow path! pl_buf_copy -> pl_buf_copy_swap...");
+ pl_buf_copy(gpu, swapbuf, 0, params.buf, params.buf_offset,
+ PL_MIN(aligned, params.buf->params.size - params.buf_offset));
+ }
+
+ if (!pl_buf_copy_swap(gpu, &swap_params)) {
+ PL_ERR(gpu, "Failed swapping endianness!");
+ pl_buf_destroy(gpu, &swapbuf);
+ return false;
+ }
+
+ params.ptr = NULL;
+ params.buf = swapbuf;
+ params.buf_offset = 0;
+ }
+
+ ok = pl_tex_upload(gpu, &params);
+ pl_buf_destroy(gpu, &swapbuf);
+ return ok;
+}
+
+bool pl_recreate_plane(pl_gpu gpu, struct pl_plane *out_plane,
+ pl_tex *tex, const struct pl_plane_data *data)
+{
+ if (data->swapped) {
+ PL_ERR(gpu, "Cannot call pl_recreate_plane on non-native endian plane "
+ "data, this is only supported for `pl_upload_plane`!");
+ return false;
+ }
+
+ int out_map[4];
+ pl_fmt fmt = pl_plane_find_fmt(gpu, out_map, data);
+ if (!fmt) {
+ PL_ERR(gpu, "Failed picking any compatible texture format for a plane!");
+ return false;
+ }
+
+ bool ok = pl_tex_recreate(gpu, tex, pl_tex_params(
+ .w = data->width,
+ .h = data->height,
+ .format = fmt,
+ .renderable = true,
+ .host_readable = fmt->caps & PL_FMT_CAP_HOST_READABLE,
+ .blit_dst = fmt->caps & PL_FMT_CAP_BLITTABLE,
+ .storable = fmt->caps & PL_FMT_CAP_STORABLE,
+ ));
+
+ if (!ok) {
+ PL_ERR(gpu, "Failed initializing plane texture!");
+ return false;
+ }
+
+ if (out_plane) {
+ out_plane->texture = *tex;
+ out_plane->components = 0;
+ for (int i = 0; i < PL_ARRAY_SIZE(out_map); i++) {
+ out_plane->component_mapping[i] = out_map[i];
+ if (out_map[i] >= 0)
+ out_plane->components = i+1;
+ }
+ }
+
+ return true;
+}