/*
* This file is part of libplacebo.
*
* libplacebo is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* libplacebo is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with libplacebo. If not, see .
*/
#include "log.h"
#include "common.h"
#include "gpu.h"
#include
#define MAX_COMPS 4
struct comp {
int order; // e.g. 0, 1, 2, 3 for RGBA
int size; // size in bits
int shift; // bit-shift / offset in bits
};
static int compare_comp(const void *pa, const void *pb)
{
const struct comp *a = pa, *b = pb;
// Move all of the components with a size of 0 to the end, so they can
// be ignored outright
if (a->size && !b->size)
return -1;
if (b->size && !a->size)
return 1;
// Otherwise, just compare based on the shift
return PL_CMP(a->shift, b->shift);
}
void pl_plane_data_from_comps(struct pl_plane_data *data, int size[4],
int shift[4])
{
struct comp comps[MAX_COMPS];
for (int i = 0; i < PL_ARRAY_SIZE(comps); i++) {
comps[i].order = i;
comps[i].size = size[i];
comps[i].shift = shift[i];
}
// Sort the components by shift
qsort(comps, MAX_COMPS, sizeof(struct comp), compare_comp);
// Generate the resulting component size/pad/map
int offset = 0;
for (int i = 0; i < MAX_COMPS; i++) {
if (comps[i].size) {
assert(comps[i].shift >= offset);
data->component_size[i] = comps[i].size;
data->component_pad[i] = comps[i].shift - offset;
data->component_map[i] = comps[i].order;
offset += data->component_size[i] + data->component_pad[i];
} else {
// Clear the superfluous entries for sanity
data->component_size[i] = 0;
data->component_pad[i] = 0;
data->component_map[i] = 0;
}
}
}
void pl_plane_data_from_mask(struct pl_plane_data *data, uint64_t mask[4])
{
int size[4];
int shift[4];
for (int i = 0; i < PL_ARRAY_SIZE(size); i++) {
size[i] = __builtin_popcountll(mask[i]);
shift[i] = PL_MAX(0, __builtin_ffsll(mask[i]) - 1);
// Sanity checking
uint64_t mask_reconstructed = (1LLU << size[i]) - 1;
mask_reconstructed <<= shift[i];
pl_assert(mask_reconstructed == mask[i]);
}
pl_plane_data_from_comps(data, size, shift);
}
bool pl_plane_data_align(struct pl_plane_data *data, struct pl_bit_encoding *out_bits)
{
struct pl_plane_data aligned = *data;
struct pl_bit_encoding bits = {0};
int offset = 0;
#define SET_TEST(var, value) \
do { \
if (offset == 0) { \
(var) = (value); \
} else if ((var) != (value)) { \
goto misaligned; \
} \
} while (0)
for (int i = 0; i < MAX_COMPS; i++) {
if (!aligned.component_size[i])
break;
// Can't meaningfully align alpha channel, so just skip it. This is a
// limitation of the fact that `pl_bit_encoding` only applies to the
// main color channels, and changing this would be very nontrivial.
if (aligned.component_map[i] == PL_CHANNEL_A)
continue;
// Color depth is the original component size, before alignment
SET_TEST(bits.color_depth, aligned.component_size[i]);
// Try consuming padding of the current component to align down. This
// corresponds to an extra bit shift to the left.
int comp_start = offset + aligned.component_pad[i];
int left_delta = comp_start - PL_ALIGN2(comp_start - 7, 8);
left_delta = PL_MIN(left_delta, aligned.component_pad[i]);
aligned.component_pad[i] -= left_delta;
aligned.component_size[i] += left_delta;
SET_TEST(bits.bit_shift, left_delta);
// Try consuming padding of the next component to align up. This
// corresponds to simply ignoring some extra 0s on the end.
int comp_end = comp_start + aligned.component_size[i] - left_delta;
int right_delta = PL_ALIGN2(comp_end, 8) - comp_end;
if (i+1 == MAX_COMPS || !aligned.component_size[i+1]) {
// This is the last component, so we can be greedy
aligned.component_size[i] += right_delta;
} else {
right_delta = PL_MIN(right_delta, aligned.component_pad[i+1]);
aligned.component_pad[i+1] -= right_delta;
aligned.component_size[i] += right_delta;
}
// Sample depth is the new total component size, including padding
SET_TEST(bits.sample_depth, aligned.component_size[i]);
offset += aligned.component_pad[i] + aligned.component_size[i];
}
// Easy sanity check, to make sure that we don't exceed the known stride
if (aligned.pixel_stride && offset > aligned.pixel_stride * 8)
goto misaligned;
*data = aligned;
if (out_bits)
*out_bits = bits;
return true;
misaligned:
// Can't properly align anything, so just do a no-op
if (out_bits)
*out_bits = (struct pl_bit_encoding) {0};
return false;
}
pl_fmt pl_plane_find_fmt(pl_gpu gpu, int out_map[4], const struct pl_plane_data *data)
{
int dummy[4] = {0};
out_map = PL_DEF(out_map, dummy);
// Endian swapping requires compute shaders (currently)
if (data->swapped && !gpu->limits.max_ssbo_size)
return NULL;
// Count the number of components and initialize out_map
int num = 0;
for (int i = 0; i < PL_ARRAY_SIZE(data->component_size); i++) {
out_map[i] = -1;
if (data->component_size[i])
num = i+1;
}
for (int n = 0; n < gpu->num_formats; n++) {
pl_fmt fmt = gpu->formats[n];
if (fmt->opaque || fmt->num_components < num)
continue;
if (fmt->type != data->type || fmt->texel_size != data->pixel_stride)
continue;
if (!(fmt->caps & PL_FMT_CAP_SAMPLEABLE))
continue;
int idx = 0;
// Try mapping all pl_plane_data components to texture components
for (int i = 0; i < num; i++) {
// If there's padding we have to map it to an unused physical
// component first
int pad = data->component_pad[i];
if (pad && (idx >= 4 || fmt->host_bits[idx++] != pad))
goto next_fmt;
// Otherwise, try and match this component
int size = data->component_size[i];
if (size && (idx >= 4 || fmt->host_bits[idx] != size))
goto next_fmt;
out_map[idx++] = data->component_map[i];
}
// Reject misaligned formats, check this last to only log such errors
// if this is the only thing preventing a format from being used, as
// this is likely an issue in the API usage.
if (data->row_stride % fmt->texel_align) {
PL_WARN(gpu, "Rejecting texture format '%s' due to misalignment: "
"Row stride %zu is not a clean multiple of texel size %zu! "
"This is likely an API usage bug.",
fmt->name, data->row_stride, fmt->texel_align);
continue;
}
return fmt;
next_fmt: ; // acts as `continue`
}
return NULL;
}
bool pl_upload_plane(pl_gpu gpu, struct pl_plane *out_plane,
pl_tex *tex, const struct pl_plane_data *data)
{
pl_assert(!data->buf ^ !data->pixels); // exactly one
int out_map[4];
pl_fmt fmt = pl_plane_find_fmt(gpu, out_map, data);
if (!fmt) {
PL_ERR(gpu, "Failed picking any compatible texture format for a plane!");
return false;
// TODO: try soft-converting to a supported format using e.g zimg?
}
bool ok = pl_tex_recreate(gpu, tex, pl_tex_params(
.w = data->width,
.h = data->height,
.format = fmt,
.sampleable = true,
.host_writable = true,
.blit_src = fmt->caps & PL_FMT_CAP_BLITTABLE,
));
if (!ok) {
PL_ERR(gpu, "Failed initializing plane texture!");
return false;
}
if (out_plane) {
out_plane->texture = *tex;
out_plane->components = 0;
for (int i = 0; i < PL_ARRAY_SIZE(out_map); i++) {
out_plane->component_mapping[i] = out_map[i];
if (out_map[i] >= 0)
out_plane->components = i+1;
}
}
struct pl_tex_transfer_params params = {
.tex = *tex,
.rc.x1 = data->width, // set these for `pl_tex_transfer_size`
.rc.y1 = data->height,
.rc.z1 = 1,
.row_pitch = PL_DEF(data->row_stride, data->width * fmt->texel_size),
.ptr = (void *) data->pixels,
.buf = data->buf,
.buf_offset = data->buf_offset,
.callback = data->callback,
.priv = data->priv,
};
pl_buf swapbuf = NULL;
if (data->swapped) {
const size_t aligned = PL_ALIGN2(pl_tex_transfer_size(¶ms), 4);
swapbuf = pl_buf_create(gpu, pl_buf_params(
.size = aligned,
.storable = true,
.initial_data = params.ptr,
// Note: This may over-read from `ptr` if `ptr` is not aligned to a
// word boundary, but the extra texels will be ignored by
// `pl_tex_upload` so this UB should be a non-issue in practice.
));
if (!swapbuf) {
PL_ERR(gpu, "Failed creating endian swapping buffer!");
return false;
}
struct pl_buf_copy_swap_params swap_params = {
.src = swapbuf,
.dst = swapbuf,
.size = aligned,
.wordsize = fmt->texel_size / fmt->num_components,
};
bool can_reuse = params.buf && params.buf->params.storable &&
params.buf_offset % 4 == 0 &&
params.buf_offset + aligned <= params.buf->params.size;
if (params.ptr) {
// Data is already uploaded (no-op), can swap in-place
} else if (can_reuse) {
// We can sample directly from the source buffer
swap_params.src = params.buf;
swap_params.src_offset = params.buf_offset;
} else {
// We sadly need to do a second memcpy
assert(params.buf);
PL_TRACE(gpu, "Double-slow path! pl_buf_copy -> pl_buf_copy_swap...");
pl_buf_copy(gpu, swapbuf, 0, params.buf, params.buf_offset,
PL_MIN(aligned, params.buf->params.size - params.buf_offset));
}
if (!pl_buf_copy_swap(gpu, &swap_params)) {
PL_ERR(gpu, "Failed swapping endianness!");
pl_buf_destroy(gpu, &swapbuf);
return false;
}
params.ptr = NULL;
params.buf = swapbuf;
params.buf_offset = 0;
}
ok = pl_tex_upload(gpu, ¶ms);
pl_buf_destroy(gpu, &swapbuf);
return ok;
}
bool pl_recreate_plane(pl_gpu gpu, struct pl_plane *out_plane,
pl_tex *tex, const struct pl_plane_data *data)
{
if (data->swapped) {
PL_ERR(gpu, "Cannot call pl_recreate_plane on non-native endian plane "
"data, this is only supported for `pl_upload_plane`!");
return false;
}
int out_map[4];
pl_fmt fmt = pl_plane_find_fmt(gpu, out_map, data);
if (!fmt) {
PL_ERR(gpu, "Failed picking any compatible texture format for a plane!");
return false;
}
bool ok = pl_tex_recreate(gpu, tex, pl_tex_params(
.w = data->width,
.h = data->height,
.format = fmt,
.renderable = true,
.host_readable = fmt->caps & PL_FMT_CAP_HOST_READABLE,
.blit_dst = fmt->caps & PL_FMT_CAP_BLITTABLE,
.storable = fmt->caps & PL_FMT_CAP_STORABLE,
));
if (!ok) {
PL_ERR(gpu, "Failed initializing plane texture!");
return false;
}
if (out_plane) {
out_plane->texture = *tex;
out_plane->components = 0;
for (int i = 0; i < PL_ARRAY_SIZE(out_map); i++) {
out_plane->component_mapping[i] = out_map[i];
if (out_map[i] >= 0)
out_plane->components = i+1;
}
}
return true;
}