1 files changed, 382 insertions, 0 deletions
diff --git a/src/utils/upload.c b/src/utils/upload.c
new file mode 100644
index 0000000..75bd4bb
--- /dev/null
+++ b/src/utils/upload.c
@@ -0,0 +1,382 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "log.h"
+#include "common.h"
+#include "gpu.h"
+
+#include <libplacebo/utils/upload.h>
+
+#define MAX_COMPS 4
+
+struct comp {
+    int order; // e.g. 0, 1, 2, 3 for RGBA
+    int size;  // size in bits
+    int shift; // bit-shift / offset in bits
+};
+
+static int compare_comp(const void *pa, const void *pb)
+{
+    const struct comp *a = pa, *b = pb;
+
+    // Move all of the components with a size of 0 to the end, so they can
+    // be ignored outright
+    if (a->size && !b->size)
+        return -1;
+    if (b->size && !a->size)
+        return 1;
+
+    // Otherwise, just compare based on the shift
+    return PL_CMP(a->shift, b->shift);
+}
+
+void pl_plane_data_from_comps(struct pl_plane_data *data, int size[4],
+                              int shift[4])
+{
+    struct comp comps[MAX_COMPS];
+    for (int i = 0; i < PL_ARRAY_SIZE(comps); i++) {
+        comps[i].order = i;
+        comps[i].size = size[i];
+        comps[i].shift = shift[i];
+    }
+
+    // Sort the components by shift
+    qsort(comps, MAX_COMPS, sizeof(struct comp), compare_comp);
+
+    // Generate the resulting component size/pad/map
+    int offset = 0;
+    for (int i = 0; i < MAX_COMPS; i++)  {
+        if (comps[i].size) {
+            assert(comps[i].shift >= offset);
+            data->component_size[i] = comps[i].size;
+            data->component_pad[i] = comps[i].shift - offset;
+            data->component_map[i] = comps[i].order;
+            offset += data->component_size[i] + data->component_pad[i];
+        } else {
+            // Clear the superfluous entries for sanity
+            data->component_size[i] = 0;
+            data->component_pad[i] = 0;
+            data->component_map[i] = 0;
+        }
+    }
+}
+
+void pl_plane_data_from_mask(struct pl_plane_data *data, uint64_t mask[4])
+{
+    int size[4];
+    int shift[4];
+
+    for (int i = 0; i < PL_ARRAY_SIZE(size); i++) {
+        size[i] = __builtin_popcountll(mask[i]);
+        shift[i] = PL_MAX(0, __builtin_ffsll(mask[i]) - 1);
+
+        // Sanity checking
+        uint64_t mask_reconstructed = (1LLU << size[i]) - 1;
+        mask_reconstructed <<= shift[i];
+        pl_assert(mask_reconstructed == mask[i]);
+    }
+
+    pl_plane_data_from_comps(data, size, shift);
+}
+
+bool pl_plane_data_align(struct pl_plane_data *data, struct pl_bit_encoding *out_bits)
+{
+    struct pl_plane_data aligned = *data;
+    struct pl_bit_encoding bits = {0};
+
+    int offset = 0;
+
+#define SET_TEST(var, value)                \
+    do {                                    \
+        if (offset == 0) {                  \
+            (var) = (value);                \
+        } else if ((var) != (value)) {      \
+            goto misaligned;                \
+        }                                   \
+    } while (0)
+
+    for (int i = 0; i < MAX_COMPS; i++) {
+        if (!aligned.component_size[i])
+            break;
+
+        // Can't meaningfully align alpha channel, so just skip it. This is a
+        // limitation of the fact that `pl_bit_encoding` only applies to the
+        // main color channels, and changing this would be very nontrivial.
+        if (aligned.component_map[i] == PL_CHANNEL_A)
+            continue;
+
+        // Color depth is the original component size, before alignment
+        SET_TEST(bits.color_depth, aligned.component_size[i]);
+
+        // Try consuming padding of the current component to align down. This
+        // corresponds to an extra bit shift to the left.
+        int comp_start = offset + aligned.component_pad[i];
+        int left_delta = comp_start - PL_ALIGN2(comp_start - 7, 8);
+        left_delta = PL_MIN(left_delta, aligned.component_pad[i]);
+        aligned.component_pad[i] -= left_delta;
+        aligned.component_size[i] += left_delta;
+        SET_TEST(bits.bit_shift, left_delta);
+
+        // Try consuming padding of the next component to align up. This
+        // corresponds to simply ignoring some extra 0s on the end.
+        int comp_end = comp_start + aligned.component_size[i] - left_delta;
+        int right_delta = PL_ALIGN2(comp_end, 8) - comp_end;
+        if (i+1 == MAX_COMPS || !aligned.component_size[i+1]) {
+            // This is the last component, so we can be greedy
+            aligned.component_size[i] += right_delta;
+        } else {
+            right_delta = PL_MIN(right_delta, aligned.component_pad[i+1]);
+            aligned.component_pad[i+1] -= right_delta;
+            aligned.component_size[i] += right_delta;
+        }
+
+        // Sample depth is the new total component size, including padding
+        SET_TEST(bits.sample_depth, aligned.component_size[i]);
+
+        offset += aligned.component_pad[i] + aligned.component_size[i];
+    }
+
+    // Easy sanity check, to make sure that we don't exceed the known stride
+    if (aligned.pixel_stride && offset > aligned.pixel_stride * 8)
+        goto misaligned;
+
+    *data = aligned;
+    if (out_bits)
+        *out_bits = bits;
+    return true;
+
+misaligned:
+    // Can't properly align anything, so just do a no-op
+    if (out_bits)
+        *out_bits = (struct pl_bit_encoding) {0};
+    return false;
+}
+
+pl_fmt pl_plane_find_fmt(pl_gpu gpu, int out_map[4], const struct pl_plane_data *data)
+{
+    int dummy[4] = {0};
+    out_map = PL_DEF(out_map, dummy);
+
+    // Endian swapping requires compute shaders (currently)
+    if (data->swapped && !gpu->limits.max_ssbo_size)
+        return NULL;
+
+    // Count the number of components and initialize out_map
+    int num = 0;
+    for (int i = 0; i < PL_ARRAY_SIZE(data->component_size); i++) {
+        out_map[i] = -1;
+        if (data->component_size[i])
+            num = i+1;
+    }
+
+    for (int n = 0; n < gpu->num_formats; n++) {
+        pl_fmt fmt = gpu->formats[n];
+        if (fmt->opaque || fmt->num_components < num)
+            continue;
+        if (fmt->type != data->type || fmt->texel_size != data->pixel_stride)
+            continue;
+        if (!(fmt->caps & PL_FMT_CAP_SAMPLEABLE))
+            continue;
+
+        int idx = 0;
+
+        // Try mapping all pl_plane_data components to texture components
+        for (int i = 0; i < num; i++) {
+            // If there's padding we have to map it to an unused physical
+            // component first
+            int pad = data->component_pad[i];
+            if (pad && (idx >= 4 || fmt->host_bits[idx++] != pad))
+                goto next_fmt;
+
+            // Otherwise, try and match this component
+            int size = data->component_size[i];
+            if (size && (idx >= 4 || fmt->host_bits[idx] != size))
+                goto next_fmt;
+            out_map[idx++] = data->component_map[i];
+        }
+
+        // Reject misaligned formats, check this last to only log such errors
+        // if this is the only thing preventing a format from being used, as
+        // this is likely an issue in the API usage.
+        if (data->row_stride % fmt->texel_align) {
+            PL_WARN(gpu, "Rejecting texture format '%s' due to misalignment: "
+                    "Row stride %zu is not a clean multiple of texel size %zu! "
+                    "This is likely an API usage bug.",
+                    fmt->name, data->row_stride, fmt->texel_align);
+            continue;
+        }
+
+        return fmt;
+
+next_fmt: ; // acts as `continue`
+    }
+
+    return NULL;
+}
+
+bool pl_upload_plane(pl_gpu gpu, struct pl_plane *out_plane,
+                     pl_tex *tex, const struct pl_plane_data *data)
+{
+    pl_assert(!data->buf ^ !data->pixels); // exactly one
+
+    int out_map[4];
+    pl_fmt fmt = pl_plane_find_fmt(gpu, out_map, data);
+    if (!fmt) {
+        PL_ERR(gpu, "Failed picking any compatible texture format for a plane!");
+        return false;
+
+        // TODO: try soft-converting to a supported format using e.g zimg?
+    }
+
+    bool ok = pl_tex_recreate(gpu, tex, pl_tex_params(
+        .w = data->width,
+        .h = data->height,
+        .format = fmt,
+        .sampleable = true,
+        .host_writable = true,
+        .blit_src = fmt->caps & PL_FMT_CAP_BLITTABLE,
+    ));
+
+    if (!ok) {
+        PL_ERR(gpu, "Failed initializing plane texture!");
+        return false;
+    }
+
+    if (out_plane) {
+        out_plane->texture = *tex;
+        out_plane->components = 0;
+        for (int i = 0; i < PL_ARRAY_SIZE(out_map); i++) {
+            out_plane->component_mapping[i] = out_map[i];
+            if (out_map[i] >= 0)
+                out_plane->components = i+1;
+        }
+    }
+
+    struct pl_tex_transfer_params params = {
+        .tex        = *tex,
+        .rc.x1      = data->width, // set these for `pl_tex_transfer_size`
+        .rc.y1      = data->height,
+        .rc.z1      = 1,
+        .row_pitch  = PL_DEF(data->row_stride, data->width * fmt->texel_size),
+        .ptr        = (void *) data->pixels,
+        .buf        = data->buf,
+        .buf_offset = data->buf_offset,
+        .callback   = data->callback,
+        .priv       = data->priv,
+    };
+
+    pl_buf swapbuf = NULL;
+    if (data->swapped) {
+        const size_t aligned = PL_ALIGN2(pl_tex_transfer_size(&params), 4);
+        swapbuf = pl_buf_create(gpu, pl_buf_params(
+            .size           = aligned,
+            .storable       = true,
+            .initial_data   = params.ptr,
+
+            // Note: This may over-read from `ptr` if `ptr` is not aligned to a
+            // word boundary, but the extra texels will be ignored by
+            // `pl_tex_upload` so this UB should be a non-issue in practice.
+        ));
+        if (!swapbuf) {
+            PL_ERR(gpu, "Failed creating endian swapping buffer!");
+            return false;
+        }
+
+        struct pl_buf_copy_swap_params swap_params = {
+            .src        = swapbuf,
+            .dst        = swapbuf,
+            .size       = aligned,
+            .wordsize   = fmt->texel_size / fmt->num_components,
+        };
+
+        bool can_reuse = params.buf && params.buf->params.storable &&
+                         params.buf_offset % 4 == 0 &&
+                         params.buf_offset + aligned <= params.buf->params.size;
+
+        if (params.ptr) {
+            // Data is already uploaded (no-op), can swap in-place
+        } else if (can_reuse) {
+            // We can sample directly from the source buffer
+            swap_params.src = params.buf;
+            swap_params.src_offset = params.buf_offset;
+        } else {
+            // We sadly need to do a second memcpy
+            assert(params.buf);
+            PL_TRACE(gpu, "Double-slow path! pl_buf_copy -> pl_buf_copy_swap...");
+            pl_buf_copy(gpu, swapbuf, 0, params.buf, params.buf_offset,
+                        PL_MIN(aligned, params.buf->params.size - params.buf_offset));
+        }
+
+        if (!pl_buf_copy_swap(gpu, &swap_params)) {
+            PL_ERR(gpu, "Failed swapping endianness!");
+            pl_buf_destroy(gpu, &swapbuf);
+            return false;
+        }
+
+        params.ptr = NULL;
+        params.buf = swapbuf;
+        params.buf_offset = 0;
+    }
+
+    ok = pl_tex_upload(gpu, &params);
+    pl_buf_destroy(gpu, &swapbuf);
+    return ok;
+}
+
+bool pl_recreate_plane(pl_gpu gpu, struct pl_plane *out_plane,
+                       pl_tex *tex, const struct pl_plane_data *data)
+{
+    if (data->swapped) {
+        PL_ERR(gpu, "Cannot call pl_recreate_plane on non-native endian plane "
+               "data, this is only supported for `pl_upload_plane`!");
+        return false;
+    }
+
+    int out_map[4];
+    pl_fmt fmt = pl_plane_find_fmt(gpu, out_map, data);
+    if (!fmt) {
+        PL_ERR(gpu, "Failed picking any compatible texture format for a plane!");
+        return false;
+    }
+
+    bool ok = pl_tex_recreate(gpu, tex, pl_tex_params(
+        .w = data->width,
+        .h = data->height,
+        .format = fmt,
+        .renderable = true,
+        .host_readable = fmt->caps & PL_FMT_CAP_HOST_READABLE,
+        .blit_dst = fmt->caps & PL_FMT_CAP_BLITTABLE,
+        .storable = fmt->caps & PL_FMT_CAP_STORABLE,
+    ));
+
+    if (!ok) {
+        PL_ERR(gpu, "Failed initializing plane texture!");
+        return false;
+    }
+
+    if (out_plane) {
+        out_plane->texture = *tex;
+        out_plane->components = 0;
+        for (int i = 0; i < PL_ARRAY_SIZE(out_map); i++) {
+            out_plane->component_mapping[i] = out_map[i];
+            if (out_map[i] >= 0)
+                out_plane->components = i+1;
+        }
+    }
+
+    return true;
+}