summaryrefslogtreecommitdiffstats
path: root/src/utils/upload.c
blob: 75bd4bb1c982bb2265934950b144579846ffd75b (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
/*
 * This file is part of libplacebo.
 *
 * libplacebo is free software; you can redistribute it and/or
 * modify it under the terms of the GNU Lesser General Public
 * License as published by the Free Software Foundation; either
 * version 2.1 of the License, or (at your option) any later version.
 *
 * libplacebo is distributed in the hope that it will be useful,
 * but WITHOUT ANY WARRANTY; without even the implied warranty of
 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 * GNU Lesser General Public License for more details.
 *
 * You should have received a copy of the GNU Lesser General Public
 * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
 */

#include "log.h"
#include "common.h"
#include "gpu.h"

#include <libplacebo/utils/upload.h>

#define MAX_COMPS 4

struct comp {
    int order; // e.g. 0, 1, 2, 3 for RGBA
    int size;  // size in bits
    int shift; // bit-shift / offset in bits
};

static int compare_comp(const void *pa, const void *pb)
{
    const struct comp *a = pa, *b = pb;

    // Move all of the components with a size of 0 to the end, so they can
    // be ignored outright
    if (a->size && !b->size)
        return -1;
    if (b->size && !a->size)
        return 1;

    // Otherwise, just compare based on the shift
    return PL_CMP(a->shift, b->shift);
}

void pl_plane_data_from_comps(struct pl_plane_data *data, int size[4],
                              int shift[4])
{
    struct comp comps[MAX_COMPS];
    for (int i = 0; i < PL_ARRAY_SIZE(comps); i++) {
        comps[i].order = i;
        comps[i].size = size[i];
        comps[i].shift = shift[i];
    }

    // Sort the components by shift
    qsort(comps, MAX_COMPS, sizeof(struct comp), compare_comp);

    // Generate the resulting component size/pad/map
    int offset = 0;
    for (int i = 0; i < MAX_COMPS; i++)  {
        if (comps[i].size) {
            assert(comps[i].shift >= offset);
            data->component_size[i] = comps[i].size;
            data->component_pad[i] = comps[i].shift - offset;
            data->component_map[i] = comps[i].order;
            offset += data->component_size[i] + data->component_pad[i];
        } else {
            // Clear the superfluous entries for sanity
            data->component_size[i] = 0;
            data->component_pad[i] = 0;
            data->component_map[i] = 0;
        }
    }
}

void pl_plane_data_from_mask(struct pl_plane_data *data, uint64_t mask[4])
{
    int size[4];
    int shift[4];

    for (int i = 0; i < PL_ARRAY_SIZE(size); i++) {
        size[i] = __builtin_popcountll(mask[i]);
        shift[i] = PL_MAX(0, __builtin_ffsll(mask[i]) - 1);

        // Sanity checking
        uint64_t mask_reconstructed = (1LLU << size[i]) - 1;
        mask_reconstructed <<= shift[i];
        pl_assert(mask_reconstructed == mask[i]);
    }

    pl_plane_data_from_comps(data, size, shift);
}

bool pl_plane_data_align(struct pl_plane_data *data, struct pl_bit_encoding *out_bits)
{
    struct pl_plane_data aligned = *data;
    struct pl_bit_encoding bits = {0};

    int offset = 0;

#define SET_TEST(var, value)                \
    do {                                    \
        if (offset == 0) {                  \
            (var) = (value);                \
        } else if ((var) != (value)) {      \
            goto misaligned;                \
        }                                   \
    } while (0)

    for (int i = 0; i < MAX_COMPS; i++) {
        if (!aligned.component_size[i])
            break;

        // Can't meaningfully align alpha channel, so just skip it. This is a
        // limitation of the fact that `pl_bit_encoding` only applies to the
        // main color channels, and changing this would be very nontrivial.
        if (aligned.component_map[i] == PL_CHANNEL_A)
            continue;

        // Color depth is the original component size, before alignment
        SET_TEST(bits.color_depth, aligned.component_size[i]);

        // Try consuming padding of the current component to align down. This
        // corresponds to an extra bit shift to the left.
        int comp_start = offset + aligned.component_pad[i];
        int left_delta = comp_start - PL_ALIGN2(comp_start - 7, 8);
        left_delta = PL_MIN(left_delta, aligned.component_pad[i]);
        aligned.component_pad[i] -= left_delta;
        aligned.component_size[i] += left_delta;
        SET_TEST(bits.bit_shift, left_delta);

        // Try consuming padding of the next component to align up. This
        // corresponds to simply ignoring some extra 0s on the end.
        int comp_end = comp_start + aligned.component_size[i] - left_delta;
        int right_delta = PL_ALIGN2(comp_end, 8) - comp_end;
        if (i+1 == MAX_COMPS || !aligned.component_size[i+1]) {
            // This is the last component, so we can be greedy
            aligned.component_size[i] += right_delta;
        } else {
            right_delta = PL_MIN(right_delta, aligned.component_pad[i+1]);
            aligned.component_pad[i+1] -= right_delta;
            aligned.component_size[i] += right_delta;
        }

        // Sample depth is the new total component size, including padding
        SET_TEST(bits.sample_depth, aligned.component_size[i]);

        offset += aligned.component_pad[i] + aligned.component_size[i];
    }

    // Easy sanity check, to make sure that we don't exceed the known stride
    if (aligned.pixel_stride && offset > aligned.pixel_stride * 8)
        goto misaligned;

    *data = aligned;
    if (out_bits)
        *out_bits = bits;
    return true;

misaligned:
    // Can't properly align anything, so just do a no-op
    if (out_bits)
        *out_bits = (struct pl_bit_encoding) {0};
    return false;
}

pl_fmt pl_plane_find_fmt(pl_gpu gpu, int out_map[4], const struct pl_plane_data *data)
{
    int dummy[4] = {0};
    out_map = PL_DEF(out_map, dummy);

    // Endian swapping requires compute shaders (currently)
    if (data->swapped && !gpu->limits.max_ssbo_size)
        return NULL;

    // Count the number of components and initialize out_map
    int num = 0;
    for (int i = 0; i < PL_ARRAY_SIZE(data->component_size); i++) {
        out_map[i] = -1;
        if (data->component_size[i])
            num = i+1;
    }

    for (int n = 0; n < gpu->num_formats; n++) {
        pl_fmt fmt = gpu->formats[n];
        if (fmt->opaque || fmt->num_components < num)
            continue;
        if (fmt->type != data->type || fmt->texel_size != data->pixel_stride)
            continue;
        if (!(fmt->caps & PL_FMT_CAP_SAMPLEABLE))
            continue;

        int idx = 0;

        // Try mapping all pl_plane_data components to texture components
        for (int i = 0; i < num; i++) {
            // If there's padding we have to map it to an unused physical
            // component first
            int pad = data->component_pad[i];
            if (pad && (idx >= 4 || fmt->host_bits[idx++] != pad))
                goto next_fmt;

            // Otherwise, try and match this component
            int size = data->component_size[i];
            if (size && (idx >= 4 || fmt->host_bits[idx] != size))
                goto next_fmt;
            out_map[idx++] = data->component_map[i];
        }

        // Reject misaligned formats, check this last to only log such errors
        // if this is the only thing preventing a format from being used, as
        // this is likely an issue in the API usage.
        if (data->row_stride % fmt->texel_align) {
            PL_WARN(gpu, "Rejecting texture format '%s' due to misalignment: "
                    "Row stride %zu is not a clean multiple of texel size %zu! "
                    "This is likely an API usage bug.",
                    fmt->name, data->row_stride, fmt->texel_align);
            continue;
        }

        return fmt;

next_fmt: ; // acts as `continue`
    }

    return NULL;
}

bool pl_upload_plane(pl_gpu gpu, struct pl_plane *out_plane,
                     pl_tex *tex, const struct pl_plane_data *data)
{
    pl_assert(!data->buf ^ !data->pixels); // exactly one

    int out_map[4];
    pl_fmt fmt = pl_plane_find_fmt(gpu, out_map, data);
    if (!fmt) {
        PL_ERR(gpu, "Failed picking any compatible texture format for a plane!");
        return false;

        // TODO: try soft-converting to a supported format using e.g zimg?
    }

    bool ok = pl_tex_recreate(gpu, tex, pl_tex_params(
        .w = data->width,
        .h = data->height,
        .format = fmt,
        .sampleable = true,
        .host_writable = true,
        .blit_src = fmt->caps & PL_FMT_CAP_BLITTABLE,
    ));

    if (!ok) {
        PL_ERR(gpu, "Failed initializing plane texture!");
        return false;
    }

    if (out_plane) {
        out_plane->texture = *tex;
        out_plane->components = 0;
        for (int i = 0; i < PL_ARRAY_SIZE(out_map); i++) {
            out_plane->component_mapping[i] = out_map[i];
            if (out_map[i] >= 0)
                out_plane->components = i+1;
        }
    }

    struct pl_tex_transfer_params params = {
        .tex        = *tex,
        .rc.x1      = data->width, // set these for `pl_tex_transfer_size`
        .rc.y1      = data->height,
        .rc.z1      = 1,
        .row_pitch  = PL_DEF(data->row_stride, data->width * fmt->texel_size),
        .ptr        = (void *) data->pixels,
        .buf        = data->buf,
        .buf_offset = data->buf_offset,
        .callback   = data->callback,
        .priv       = data->priv,
    };

    pl_buf swapbuf = NULL;
    if (data->swapped) {
        const size_t aligned = PL_ALIGN2(pl_tex_transfer_size(&params), 4);
        swapbuf = pl_buf_create(gpu, pl_buf_params(
            .size           = aligned,
            .storable       = true,
            .initial_data   = params.ptr,

            // Note: This may over-read from `ptr` if `ptr` is not aligned to a
            // word boundary, but the extra texels will be ignored by
            // `pl_tex_upload` so this UB should be a non-issue in practice.
        ));
        if (!swapbuf) {
            PL_ERR(gpu, "Failed creating endian swapping buffer!");
            return false;
        }

        struct pl_buf_copy_swap_params swap_params = {
            .src        = swapbuf,
            .dst        = swapbuf,
            .size       = aligned,
            .wordsize   = fmt->texel_size / fmt->num_components,
        };

        bool can_reuse = params.buf && params.buf->params.storable &&
                         params.buf_offset % 4 == 0 &&
                         params.buf_offset + aligned <= params.buf->params.size;

        if (params.ptr) {
            // Data is already uploaded (no-op), can swap in-place
        } else if (can_reuse) {
            // We can sample directly from the source buffer
            swap_params.src = params.buf;
            swap_params.src_offset = params.buf_offset;
        } else {
            // We sadly need to do a second memcpy
            assert(params.buf);
            PL_TRACE(gpu, "Double-slow path! pl_buf_copy -> pl_buf_copy_swap...");
            pl_buf_copy(gpu, swapbuf, 0, params.buf, params.buf_offset,
                        PL_MIN(aligned, params.buf->params.size - params.buf_offset));
        }

        if (!pl_buf_copy_swap(gpu, &swap_params)) {
            PL_ERR(gpu, "Failed swapping endianness!");
            pl_buf_destroy(gpu, &swapbuf);
            return false;
        }

        params.ptr = NULL;
        params.buf = swapbuf;
        params.buf_offset = 0;
    }

    ok = pl_tex_upload(gpu, &params);
    pl_buf_destroy(gpu, &swapbuf);
    return ok;
}

bool pl_recreate_plane(pl_gpu gpu, struct pl_plane *out_plane,
                       pl_tex *tex, const struct pl_plane_data *data)
{
    if (data->swapped) {
        PL_ERR(gpu, "Cannot call pl_recreate_plane on non-native endian plane "
               "data, this is only supported for `pl_upload_plane`!");
        return false;
    }

    int out_map[4];
    pl_fmt fmt = pl_plane_find_fmt(gpu, out_map, data);
    if (!fmt) {
        PL_ERR(gpu, "Failed picking any compatible texture format for a plane!");
        return false;
    }

    bool ok = pl_tex_recreate(gpu, tex, pl_tex_params(
        .w = data->width,
        .h = data->height,
        .format = fmt,
        .renderable = true,
        .host_readable = fmt->caps & PL_FMT_CAP_HOST_READABLE,
        .blit_dst = fmt->caps & PL_FMT_CAP_BLITTABLE,
        .storable = fmt->caps & PL_FMT_CAP_STORABLE,
    ));

    if (!ok) {
        PL_ERR(gpu, "Failed initializing plane texture!");
        return false;
    }

    if (out_plane) {
        out_plane->texture = *tex;
        out_plane->components = 0;
        for (int i = 0; i < PL_ARRAY_SIZE(out_map); i++) {
            out_plane->component_mapping[i] = out_map[i];
            if (out_map[i] >= 0)
                out_plane->components = i+1;
        }
    }

    return true;
}