/* * This file is part of libplacebo. * * libplacebo is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License as published by the Free Software Foundation; either * version 2.1 of the License, or (at your option) any later version. * * libplacebo is distributed in the hope that it will be useful, * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU Lesser General Public License for more details. * * You should have received a copy of the GNU Lesser General Public * License along with libplacebo. If not, see . */ #include "log.h" #include "common.h" #include "gpu.h" #include #define MAX_COMPS 4 struct comp { int order; // e.g. 0, 1, 2, 3 for RGBA int size; // size in bits int shift; // bit-shift / offset in bits }; static int compare_comp(const void *pa, const void *pb) { const struct comp *a = pa, *b = pb; // Move all of the components with a size of 0 to the end, so they can // be ignored outright if (a->size && !b->size) return -1; if (b->size && !a->size) return 1; // Otherwise, just compare based on the shift return PL_CMP(a->shift, b->shift); } void pl_plane_data_from_comps(struct pl_plane_data *data, int size[4], int shift[4]) { struct comp comps[MAX_COMPS]; for (int i = 0; i < PL_ARRAY_SIZE(comps); i++) { comps[i].order = i; comps[i].size = size[i]; comps[i].shift = shift[i]; } // Sort the components by shift qsort(comps, MAX_COMPS, sizeof(struct comp), compare_comp); // Generate the resulting component size/pad/map int offset = 0; for (int i = 0; i < MAX_COMPS; i++) { if (comps[i].size) { assert(comps[i].shift >= offset); data->component_size[i] = comps[i].size; data->component_pad[i] = comps[i].shift - offset; data->component_map[i] = comps[i].order; offset += data->component_size[i] + data->component_pad[i]; } else { // Clear the superfluous entries for sanity data->component_size[i] = 0; data->component_pad[i] = 0; data->component_map[i] = 0; } } } void pl_plane_data_from_mask(struct pl_plane_data *data, uint64_t mask[4]) { int size[4]; int shift[4]; for (int i = 0; i < PL_ARRAY_SIZE(size); i++) { size[i] = __builtin_popcountll(mask[i]); shift[i] = PL_MAX(0, __builtin_ffsll(mask[i]) - 1); // Sanity checking uint64_t mask_reconstructed = (1LLU << size[i]) - 1; mask_reconstructed <<= shift[i]; pl_assert(mask_reconstructed == mask[i]); } pl_plane_data_from_comps(data, size, shift); } bool pl_plane_data_align(struct pl_plane_data *data, struct pl_bit_encoding *out_bits) { struct pl_plane_data aligned = *data; struct pl_bit_encoding bits = {0}; int offset = 0; #define SET_TEST(var, value) \ do { \ if (offset == 0) { \ (var) = (value); \ } else if ((var) != (value)) { \ goto misaligned; \ } \ } while (0) for (int i = 0; i < MAX_COMPS; i++) { if (!aligned.component_size[i]) break; // Can't meaningfully align alpha channel, so just skip it. This is a // limitation of the fact that `pl_bit_encoding` only applies to the // main color channels, and changing this would be very nontrivial. if (aligned.component_map[i] == PL_CHANNEL_A) continue; // Color depth is the original component size, before alignment SET_TEST(bits.color_depth, aligned.component_size[i]); // Try consuming padding of the current component to align down. This // corresponds to an extra bit shift to the left. int comp_start = offset + aligned.component_pad[i]; int left_delta = comp_start - PL_ALIGN2(comp_start - 7, 8); left_delta = PL_MIN(left_delta, aligned.component_pad[i]); aligned.component_pad[i] -= left_delta; aligned.component_size[i] += left_delta; SET_TEST(bits.bit_shift, left_delta); // Try consuming padding of the next component to align up. This // corresponds to simply ignoring some extra 0s on the end. int comp_end = comp_start + aligned.component_size[i] - left_delta; int right_delta = PL_ALIGN2(comp_end, 8) - comp_end; if (i+1 == MAX_COMPS || !aligned.component_size[i+1]) { // This is the last component, so we can be greedy aligned.component_size[i] += right_delta; } else { right_delta = PL_MIN(right_delta, aligned.component_pad[i+1]); aligned.component_pad[i+1] -= right_delta; aligned.component_size[i] += right_delta; } // Sample depth is the new total component size, including padding SET_TEST(bits.sample_depth, aligned.component_size[i]); offset += aligned.component_pad[i] + aligned.component_size[i]; } // Easy sanity check, to make sure that we don't exceed the known stride if (aligned.pixel_stride && offset > aligned.pixel_stride * 8) goto misaligned; *data = aligned; if (out_bits) *out_bits = bits; return true; misaligned: // Can't properly align anything, so just do a no-op if (out_bits) *out_bits = (struct pl_bit_encoding) {0}; return false; } pl_fmt pl_plane_find_fmt(pl_gpu gpu, int out_map[4], const struct pl_plane_data *data) { int dummy[4] = {0}; out_map = PL_DEF(out_map, dummy); // Endian swapping requires compute shaders (currently) if (data->swapped && !gpu->limits.max_ssbo_size) return NULL; // Count the number of components and initialize out_map int num = 0; for (int i = 0; i < PL_ARRAY_SIZE(data->component_size); i++) { out_map[i] = -1; if (data->component_size[i]) num = i+1; } for (int n = 0; n < gpu->num_formats; n++) { pl_fmt fmt = gpu->formats[n]; if (fmt->opaque || fmt->num_components < num) continue; if (fmt->type != data->type || fmt->texel_size != data->pixel_stride) continue; if (!(fmt->caps & PL_FMT_CAP_SAMPLEABLE)) continue; int idx = 0; // Try mapping all pl_plane_data components to texture components for (int i = 0; i < num; i++) { // If there's padding we have to map it to an unused physical // component first int pad = data->component_pad[i]; if (pad && (idx >= 4 || fmt->host_bits[idx++] != pad)) goto next_fmt; // Otherwise, try and match this component int size = data->component_size[i]; if (size && (idx >= 4 || fmt->host_bits[idx] != size)) goto next_fmt; out_map[idx++] = data->component_map[i]; } // Reject misaligned formats, check this last to only log such errors // if this is the only thing preventing a format from being used, as // this is likely an issue in the API usage. if (data->row_stride % fmt->texel_align) { PL_WARN(gpu, "Rejecting texture format '%s' due to misalignment: " "Row stride %zu is not a clean multiple of texel size %zu! " "This is likely an API usage bug.", fmt->name, data->row_stride, fmt->texel_align); continue; } return fmt; next_fmt: ; // acts as `continue` } return NULL; } bool pl_upload_plane(pl_gpu gpu, struct pl_plane *out_plane, pl_tex *tex, const struct pl_plane_data *data) { pl_assert(!data->buf ^ !data->pixels); // exactly one int out_map[4]; pl_fmt fmt = pl_plane_find_fmt(gpu, out_map, data); if (!fmt) { PL_ERR(gpu, "Failed picking any compatible texture format for a plane!"); return false; // TODO: try soft-converting to a supported format using e.g zimg? } bool ok = pl_tex_recreate(gpu, tex, pl_tex_params( .w = data->width, .h = data->height, .format = fmt, .sampleable = true, .host_writable = true, .blit_src = fmt->caps & PL_FMT_CAP_BLITTABLE, )); if (!ok) { PL_ERR(gpu, "Failed initializing plane texture!"); return false; } if (out_plane) { out_plane->texture = *tex; out_plane->components = 0; for (int i = 0; i < PL_ARRAY_SIZE(out_map); i++) { out_plane->component_mapping[i] = out_map[i]; if (out_map[i] >= 0) out_plane->components = i+1; } } struct pl_tex_transfer_params params = { .tex = *tex, .rc.x1 = data->width, // set these for `pl_tex_transfer_size` .rc.y1 = data->height, .rc.z1 = 1, .row_pitch = PL_DEF(data->row_stride, data->width * fmt->texel_size), .ptr = (void *) data->pixels, .buf = data->buf, .buf_offset = data->buf_offset, .callback = data->callback, .priv = data->priv, }; pl_buf swapbuf = NULL; if (data->swapped) { const size_t aligned = PL_ALIGN2(pl_tex_transfer_size(¶ms), 4); swapbuf = pl_buf_create(gpu, pl_buf_params( .size = aligned, .storable = true, .initial_data = params.ptr, // Note: This may over-read from `ptr` if `ptr` is not aligned to a // word boundary, but the extra texels will be ignored by // `pl_tex_upload` so this UB should be a non-issue in practice. )); if (!swapbuf) { PL_ERR(gpu, "Failed creating endian swapping buffer!"); return false; } struct pl_buf_copy_swap_params swap_params = { .src = swapbuf, .dst = swapbuf, .size = aligned, .wordsize = fmt->texel_size / fmt->num_components, }; bool can_reuse = params.buf && params.buf->params.storable && params.buf_offset % 4 == 0 && params.buf_offset + aligned <= params.buf->params.size; if (params.ptr) { // Data is already uploaded (no-op), can swap in-place } else if (can_reuse) { // We can sample directly from the source buffer swap_params.src = params.buf; swap_params.src_offset = params.buf_offset; } else { // We sadly need to do a second memcpy assert(params.buf); PL_TRACE(gpu, "Double-slow path! pl_buf_copy -> pl_buf_copy_swap..."); pl_buf_copy(gpu, swapbuf, 0, params.buf, params.buf_offset, PL_MIN(aligned, params.buf->params.size - params.buf_offset)); } if (!pl_buf_copy_swap(gpu, &swap_params)) { PL_ERR(gpu, "Failed swapping endianness!"); pl_buf_destroy(gpu, &swapbuf); return false; } params.ptr = NULL; params.buf = swapbuf; params.buf_offset = 0; } ok = pl_tex_upload(gpu, ¶ms); pl_buf_destroy(gpu, &swapbuf); return ok; } bool pl_recreate_plane(pl_gpu gpu, struct pl_plane *out_plane, pl_tex *tex, const struct pl_plane_data *data) { if (data->swapped) { PL_ERR(gpu, "Cannot call pl_recreate_plane on non-native endian plane " "data, this is only supported for `pl_upload_plane`!"); return false; } int out_map[4]; pl_fmt fmt = pl_plane_find_fmt(gpu, out_map, data); if (!fmt) { PL_ERR(gpu, "Failed picking any compatible texture format for a plane!"); return false; } bool ok = pl_tex_recreate(gpu, tex, pl_tex_params( .w = data->width, .h = data->height, .format = fmt, .renderable = true, .host_readable = fmt->caps & PL_FMT_CAP_HOST_READABLE, .blit_dst = fmt->caps & PL_FMT_CAP_BLITTABLE, .storable = fmt->caps & PL_FMT_CAP_STORABLE, )); if (!ok) { PL_ERR(gpu, "Failed initializing plane texture!"); return false; } if (out_plane) { out_plane->texture = *tex; out_plane->components = 0; for (int i = 0; i < PL_ARRAY_SIZE(out_map); i++) { out_plane->component_mapping[i] = out_map[i]; if (out_map[i] >= 0) out_plane->components = i+1; } } return true; }