diff options
author | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 20:38:23 +0000 |
---|---|---|
committer | Daniel Baumann <daniel.baumann@progress-linux.org> | 2024-04-15 20:38:23 +0000 |
commit | ff6e3c025658a5fa1affd094f220b623e7e1b24b (patch) | |
tree | 9faab72d69c92d24e349d184f5869b9796f17e0c /src/include | |
parent | Initial commit. (diff) | |
download | libplacebo-ff6e3c025658a5fa1affd094f220b623e7e1b24b.tar.xz libplacebo-ff6e3c025658a5fa1affd094f220b623e7e1b24b.zip |
Adding upstream version 6.338.2.upstream/6.338.2upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'src/include')
35 files changed, 11304 insertions, 0 deletions
diff --git a/src/include/libplacebo/cache.h b/src/include/libplacebo/cache.h new file mode 100644 index 0000000..5897ac8 --- /dev/null +++ b/src/include/libplacebo/cache.h @@ -0,0 +1,200 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_CACHE_H_ +#define LIBPLACEBO_CACHE_H_ + +#include <stddef.h> +#include <stdint.h> +#include <stdio.h> + +#include <libplacebo/config.h> +#include <libplacebo/common.h> +#include <libplacebo/log.h> + +PL_API_BEGIN + +typedef struct pl_cache_obj { + // Cache object key. This will uniquely identify this cached object. + uint64_t key; + + // Cache data pointer and length. 0-length cached objects are invalid + // and will be silently dropped. You can explicitly remove a cached + // object by overwriting it with a length 0 object. + void *data; + size_t size; + + // Free callback, to free memory associated with `data`. (Optional) + // Will be called when the object is either explicitly deleted, culled + // due to hitting size limits, or on pl_cache_destroy(). + void (*free)(void *data); +} pl_cache_obj; + +struct pl_cache_params { + // Optional `pl_log` that is used for logging internal events related + // to the cache, such as insertions, saving and loading. + pl_log log; + + // Size limits. If 0, no limit is imposed. + // + // Note: libplacebo will never detect or invalidate stale cache entries, so + // setting an upper size limit is strongly recommended + size_t max_object_size; + size_t max_total_size; + + // Optional external callback to call after a cached object is modified + // (including deletion and (re-)insertion). Note that this is not called on + // objects which are merely pruned from the cache due to `max_total_size`, + // so users must rely on some external mechanism to prune stale entries or + // enforce size limits. + // + // Note: `pl_cache_load` does not trigger this callback. + // Note: Ownership of `obj` does *not* pass to the caller. + // Note: This function must be thread safe. + void (*set)(void *priv, pl_cache_obj obj); + + // Optional external callback to call on a cache miss. Ownership of the + // returned object passes to the `pl_cache`. Objects returned by this + // callback *should* have a valid `free` callback, unless lifetime can be + // externally managed and guaranteed to outlive the `pl_cache`. + // + // Note: This function must be thread safe. + pl_cache_obj (*get)(void *priv, uint64_t key); + + // External context for insert/lookup. + void *priv; +}; + +#define pl_cache_params(...) (&(struct pl_cache_params) { __VA_ARGS__ }) +PL_API extern const struct pl_cache_params pl_cache_default_params; + +// Thread-safety: Safe +// +// Note: In any context in which `pl_cache` is used, users may also pass NULL +// to disable caching. In other words, NULL is a valid `pl_cache`. +typedef const struct pl_cache_t { + struct pl_cache_params params; +} *pl_cache; + +// Create a new cache. This function will never fail. +PL_API pl_cache pl_cache_create(const struct pl_cache_params *params); + +// Destroy a `pl_cache` object, including all underlying objects. +PL_API void pl_cache_destroy(pl_cache *cache); + +// Explicitly clear all objects in the cache without destroying it. This is +// similar to `pl_cache_destroy`, but the cache remains valid afterwards. +// +// Note: Objects destroyed in this way *not* propagated to the `set` callback. +PL_API void pl_cache_reset(pl_cache cache); + +// Return the current internal number of objects and total size (bytes) +PL_API int pl_cache_objects(pl_cache cache); +PL_API size_t pl_cache_size(pl_cache cache); + +// --- Cache saving and loading APIs + +// Serialize the internal state of a `pl_cache` into an abstract cache +// object that can be e.g. saved to disk and loaded again later. Returns the +// number of objects saved. +// +// Note: Using `save/load` is largely redundant with using `insert/lookup` +// callbacks, and the user should decide whether to use the explicit API or the +// callback-based API. +PL_API int pl_cache_save_ex(pl_cache cache, + void (*write)(void *priv, size_t size, const void *ptr), + void *priv); + +// Load the result of a previous `pl_cache_save` call. Any duplicate entries in +// the `pl_cache` will be overwritten. Returns the number of objects loaded, or +// a negative number on serious error (e.g. corrupt header) +// +// Note: This does not trigger the `update` callback. +PL_API int pl_cache_load_ex(pl_cache cache, + bool (*read)(void *priv, size_t size, void *ptr), + void *priv); + +// --- Convenience wrappers around pl_cache_save/load_ex + +// Writes data directly to a pointer. Returns the number of bytes that *would* +// have been written, so this can be used on a size 0 buffer to get the required +// total size. +PL_API size_t pl_cache_save(pl_cache cache, uint8_t *data, size_t size); + +// Reads data directly from a pointer. This still reads from `data`, so it does +// not avoid a copy. +PL_API int pl_cache_load(pl_cache cache, const uint8_t *data, size_t size); + +// Writes/loads data to/from a FILE stream at the current position. +#define pl_cache_save_file(c, file) pl_cache_save_ex(c, pl_write_file_cb, file) +#define pl_cache_load_file(c, file) pl_cache_load_ex(c, pl_read_file_cb, file) + +static inline void pl_write_file_cb(void *priv, size_t size, const void *ptr) +{ + (void) fwrite(ptr, 1, size, (FILE *) priv); +} + +static inline bool pl_read_file_cb(void *priv, size_t size, void *ptr) +{ + return fread(ptr, 1, size, (FILE *) priv) == size; +} + +// --- Object modification API. Mostly intended for internal use. + +// Insert a new cached object into a `pl_cache`. Returns whether successful. +// Overwrites any existing cached object with that signature, so this can be +// used to e.g. delete objects as well (set their size to 0). On success, +// ownership of `obj` passes to the `pl_cache`. +// +// Note: If `object.free` is NULL, this will perform an internal memdup. To +// bypass this (e.g. when directly adding externally managed memory), you can +// set the `free` callback to an explicit noop function. +// +// Note: `obj->data/free` will be reset to NULL on successful insertion. +PL_API bool pl_cache_try_set(pl_cache cache, pl_cache_obj *obj); + +// Variant of `pl_cache_try_set` that simply frees `obj` on failure. +PL_API void pl_cache_set(pl_cache cache, pl_cache_obj *obj); + +// Looks up `obj->key` in the object cache. If successful, `obj->data` is +// set to memory owned by the caller, which must be either explicitly +// re-inserted, or explicitly freed (using obj->free). +// +// Note: On failure, `obj->data/size/free` are reset to NULL. +PL_API bool pl_cache_get(pl_cache cache, pl_cache_obj *obj); + +// Run a callback on every object currently stored in `cache`. +// +// Note: Running any `pl_cache_*` function on `cache` from this callback is +// undefined behavior. +PL_API void pl_cache_iterate(pl_cache cache, + void (*cb)(void *priv, pl_cache_obj obj), + void *priv); + +// Utility wrapper to free a `pl_cache_obj` if necessary (and sanitize it) +static inline void pl_cache_obj_free(pl_cache_obj *obj) +{ + if (obj->free) + obj->free(obj->data); + obj->data = NULL; + obj->free = NULL; + obj->size = 0; +} + +PL_API_END + +#endif // LIBPLACEBO_CACHE_H_ diff --git a/src/include/libplacebo/colorspace.h b/src/include/libplacebo/colorspace.h new file mode 100644 index 0000000..6663019 --- /dev/null +++ b/src/include/libplacebo/colorspace.h @@ -0,0 +1,719 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_COLORSPACE_H_ +#define LIBPLACEBO_COLORSPACE_H_ + +#include <stdbool.h> +#include <stddef.h> +#include <stdint.h> + +#include <libplacebo/common.h> + +PL_API_BEGIN + +// The underlying color representation (e.g. RGB, XYZ or YCbCr) +enum pl_color_system { + PL_COLOR_SYSTEM_UNKNOWN = 0, + // YCbCr-like color systems: + PL_COLOR_SYSTEM_BT_601, // ITU-R Rec. BT.601 (SD) + PL_COLOR_SYSTEM_BT_709, // ITU-R Rec. BT.709 (HD) + PL_COLOR_SYSTEM_SMPTE_240M, // SMPTE-240M + PL_COLOR_SYSTEM_BT_2020_NC, // ITU-R Rec. BT.2020 (non-constant luminance) + PL_COLOR_SYSTEM_BT_2020_C, // ITU-R Rec. BT.2020 (constant luminance) + PL_COLOR_SYSTEM_BT_2100_PQ, // ITU-R Rec. BT.2100 ICtCp PQ variant + PL_COLOR_SYSTEM_BT_2100_HLG, // ITU-R Rec. BT.2100 ICtCp HLG variant + PL_COLOR_SYSTEM_DOLBYVISION, // Dolby Vision (see pl_dovi_metadata) + PL_COLOR_SYSTEM_YCGCO, // YCgCo (derived from RGB) + // Other color systems: + PL_COLOR_SYSTEM_RGB, // Red, Green and Blue + PL_COLOR_SYSTEM_XYZ, // Digital Cinema Distribution Master (XYZ) + PL_COLOR_SYSTEM_COUNT +}; + +PL_API bool pl_color_system_is_ycbcr_like(enum pl_color_system sys); + +// Returns true for color systems that are linear transformations of the RGB +// equivalent, i.e. are simple matrix multiplications. For color systems with +// this property, `pl_color_repr_decode` is sufficient for conversion to RGB. +PL_API bool pl_color_system_is_linear(enum pl_color_system sys); + +// Guesses the best YCbCr-like colorspace based on a image given resolution. +// This only picks conservative values. (In particular, BT.2020 is never +// auto-guessed, even for 4K resolution content) +PL_API enum pl_color_system pl_color_system_guess_ycbcr(int width, int height); + +// Friendly names for the canonical channel names and order. +enum pl_channel { + PL_CHANNEL_NONE = -1, + PL_CHANNEL_A = 3, // alpha + // RGB system + PL_CHANNEL_R = 0, + PL_CHANNEL_G = 1, + PL_CHANNEL_B = 2, + // YCbCr-like systems + PL_CHANNEL_Y = 0, + PL_CHANNEL_CB = 1, + PL_CHANNEL_CR = 2, + // Aliases for Cb/Cr + PL_CHANNEL_U = 1, + PL_CHANNEL_V = 2 + // There are deliberately no names for the XYZ system to avoid + // confusion due to PL_CHANNEL_Y. +}; + +// The numerical range of the representation (where applicable). +enum pl_color_levels { + PL_COLOR_LEVELS_UNKNOWN = 0, + PL_COLOR_LEVELS_LIMITED, // Limited/TV range, e.g. 16-235 + PL_COLOR_LEVELS_FULL, // Full/PC range, e.g. 0-255 + PL_COLOR_LEVELS_COUNT, + + // Compatibility aliases + PL_COLOR_LEVELS_TV = PL_COLOR_LEVELS_LIMITED, + PL_COLOR_LEVELS_PC = PL_COLOR_LEVELS_FULL, +}; + +// The alpha representation mode. +enum pl_alpha_mode { + PL_ALPHA_UNKNOWN = 0, // or no alpha channel present + PL_ALPHA_INDEPENDENT, // alpha channel is separate from the video + PL_ALPHA_PREMULTIPLIED, // alpha channel is multiplied into the colors + PL_ALPHA_MODE_COUNT, +}; + +// The underlying bit-wise representation of a color sample. For example, +// a 10-bit TV-range YCbCr value uploaded to a 16 bit texture would have +// sample_depth=16 color_depth=10 bit_shift=0. +// +// For another example, a 12-bit XYZ full range sample shifted to 16-bits with +// the lower 4 bits all set to 0 would have sample_depth=16 color_depth=12 +// bit_shift=4. (libavcodec likes outputting this type of `xyz12`) +// +// To explain the meaning of `sample_depth` further; the consideration factor +// here is the fact that GPU sampling will normalized the sampled color to the +// range 0.0 - 1.0 in a manner dependent on the number of bits in the texture +// format. So if you upload a 10-bit YCbCr value unpadded as 16-bit color +// samples, all of the sampled values will be extremely close to 0.0. In such a +// case, `pl_color_repr_normalize` would return a high scaling factor, which +// would pull the color up to their 16-bit range. +struct pl_bit_encoding { + int sample_depth; // the number of bits the color is stored/sampled as + int color_depth; // the effective number of bits of the color information + int bit_shift; // a representational bit shift applied to the color +}; + +// Returns whether two bit encodings are exactly identical. +PL_API bool pl_bit_encoding_equal(const struct pl_bit_encoding *b1, + const struct pl_bit_encoding *b2); + +// Parsed metadata from the Dolby Vision RPU +struct pl_dovi_metadata { + // Colorspace transformation metadata + float nonlinear_offset[3]; // input offset ("ycc_to_rgb_offset") + pl_matrix3x3 nonlinear; // before PQ, also called "ycc_to_rgb" + pl_matrix3x3 linear; // after PQ, also called "rgb_to_lms" + + // Reshape data, grouped by component + struct pl_reshape_data { + uint8_t num_pivots; + float pivots[9]; // normalized to [0.0, 1.0] based on BL bit depth + uint8_t method[8]; // 0 = polynomial, 1 = MMR + // Note: these must be normalized (divide by coefficient_log2_denom) + float poly_coeffs[8][3]; // x^0, x^1, x^2, unused must be 0 + uint8_t mmr_order[8]; // 1, 2 or 3 + float mmr_constant[8]; + float mmr_coeffs[8][3 /* order */][7]; + } comp[3]; +}; + +// Struct describing the underlying color system and representation. This +// information is needed to convert an encoded color to a normalized RGB triple +// in the range 0-1. +struct pl_color_repr { + enum pl_color_system sys; + enum pl_color_levels levels; + enum pl_alpha_mode alpha; + struct pl_bit_encoding bits; // or {0} if unknown + + // Metadata for PL_COLOR_SYSTEM_DOLBYVISION. Note that, for the sake of + // efficiency, this is treated purely as an opaque reference - functions + // like pl_color_repr_equal will merely do a pointer equality test. + // + // The only functions that actually dereference it in any way are + // pl_color_repr_decode, pl_shader_decode_color and pl_render_image(_mix). + const struct pl_dovi_metadata *dovi; +}; + +// Some common color representations. It's worth pointing out that all of these +// presets leave `alpha` and `bits` as unknown - that is, only the system and +// levels are predefined +PL_API extern const struct pl_color_repr pl_color_repr_unknown; +PL_API extern const struct pl_color_repr pl_color_repr_rgb; +PL_API extern const struct pl_color_repr pl_color_repr_sdtv; +PL_API extern const struct pl_color_repr pl_color_repr_hdtv; // also Blu-ray +PL_API extern const struct pl_color_repr pl_color_repr_uhdtv; // SDR, NCL system +PL_API extern const struct pl_color_repr pl_color_repr_jpeg; + +// Returns whether two colorspace representations are exactly identical. +PL_API bool pl_color_repr_equal(const struct pl_color_repr *c1, + const struct pl_color_repr *c2); + +// Replaces unknown values in the first struct by those of the second struct. +PL_API void pl_color_repr_merge(struct pl_color_repr *orig, + const struct pl_color_repr *update); + +// This function normalizes the color representation such that +// color_depth=sample_depth and bit_shift=0; and returns the scaling factor +// that must be multiplied into the color value to accomplish this, assuming +// it has already been sampled by the GPU. If unknown, the color and sample +// depth will both be inferred as 8 bits for the purposes of this conversion. +PL_API float pl_color_repr_normalize(struct pl_color_repr *repr); + +// Guesses the best color levels based on the specified color levels and +// falling back to using the color system instead. YCbCr-like systems are +// assumed to be TV range, otherwise this defaults to PC range. +PL_API enum pl_color_levels pl_color_levels_guess(const struct pl_color_repr *repr); + +// The colorspace's primaries (gamut) +enum pl_color_primaries { + PL_COLOR_PRIM_UNKNOWN = 0, + // Standard gamut: + PL_COLOR_PRIM_BT_601_525, // ITU-R Rec. BT.601 (525-line = NTSC, SMPTE-C) + PL_COLOR_PRIM_BT_601_625, // ITU-R Rec. BT.601 (625-line = PAL, SECAM) + PL_COLOR_PRIM_BT_709, // ITU-R Rec. BT.709 (HD), also sRGB + PL_COLOR_PRIM_BT_470M, // ITU-R Rec. BT.470 M + PL_COLOR_PRIM_EBU_3213, // EBU Tech. 3213-E / JEDEC P22 phosphors + // Wide gamut: + PL_COLOR_PRIM_BT_2020, // ITU-R Rec. BT.2020 (UltraHD) + PL_COLOR_PRIM_APPLE, // Apple RGB + PL_COLOR_PRIM_ADOBE, // Adobe RGB (1998) + PL_COLOR_PRIM_PRO_PHOTO, // ProPhoto RGB (ROMM) + PL_COLOR_PRIM_CIE_1931, // CIE 1931 RGB primaries + PL_COLOR_PRIM_DCI_P3, // DCI-P3 (Digital Cinema) + PL_COLOR_PRIM_DISPLAY_P3, // DCI-P3 (Digital Cinema) with D65 white point + PL_COLOR_PRIM_V_GAMUT, // Panasonic V-Gamut (VARICAM) + PL_COLOR_PRIM_S_GAMUT, // Sony S-Gamut + PL_COLOR_PRIM_FILM_C, // Traditional film primaries with Illuminant C + PL_COLOR_PRIM_ACES_AP0, // ACES Primaries #0 (ultra wide) + PL_COLOR_PRIM_ACES_AP1, // ACES Primaries #1 + PL_COLOR_PRIM_COUNT +}; + +PL_API bool pl_color_primaries_is_wide_gamut(enum pl_color_primaries prim); + +// Guesses the best primaries based on a resolution. This always guesses +// conservatively, i.e. it will never return a wide gamut color space even if +// the resolution is 4K. +PL_API enum pl_color_primaries pl_color_primaries_guess(int width, int height); + +// The colorspace's transfer function (gamma / EOTF) +enum pl_color_transfer { + PL_COLOR_TRC_UNKNOWN = 0, + // Standard dynamic range: + PL_COLOR_TRC_BT_1886, // ITU-R Rec. BT.1886 (CRT emulation + OOTF) + PL_COLOR_TRC_SRGB, // IEC 61966-2-4 sRGB (CRT emulation) + PL_COLOR_TRC_LINEAR, // Linear light content + PL_COLOR_TRC_GAMMA18, // Pure power gamma 1.8 + PL_COLOR_TRC_GAMMA20, // Pure power gamma 2.0 + PL_COLOR_TRC_GAMMA22, // Pure power gamma 2.2 + PL_COLOR_TRC_GAMMA24, // Pure power gamma 2.4 + PL_COLOR_TRC_GAMMA26, // Pure power gamma 2.6 + PL_COLOR_TRC_GAMMA28, // Pure power gamma 2.8 + PL_COLOR_TRC_PRO_PHOTO, // ProPhoto RGB (ROMM) + PL_COLOR_TRC_ST428, // Digital Cinema Distribution Master (XYZ) + // High dynamic range: + PL_COLOR_TRC_PQ, // ITU-R BT.2100 PQ (perceptual quantizer), aka SMPTE ST2048 + PL_COLOR_TRC_HLG, // ITU-R BT.2100 HLG (hybrid log-gamma), aka ARIB STD-B67 + PL_COLOR_TRC_V_LOG, // Panasonic V-Log (VARICAM) + PL_COLOR_TRC_S_LOG1, // Sony S-Log1 + PL_COLOR_TRC_S_LOG2, // Sony S-Log2 + PL_COLOR_TRC_COUNT +}; + +// Returns the nominal peak of a given transfer function, relative to the +// reference white. This refers to the highest encodable signal level. +// Always equal to 1.0 for SDR curves. +// +// Note: For HLG in particular, which is scene-referred, this returns the +// highest nominal peak in scene-referred space (3.77), which may be different +// from the actual peak in display space after application of the HLG OOTF. +PL_API float pl_color_transfer_nominal_peak(enum pl_color_transfer trc); + +static inline bool pl_color_transfer_is_hdr(enum pl_color_transfer trc) +{ + return pl_color_transfer_nominal_peak(trc) > 1.0; +} + +// This defines the display-space standard reference white level (in cd/m^2) +// that is assumed for SDR content, for use when mapping between HDR and SDR in +// display space. See ITU-R Report BT.2408 for more information. +#define PL_COLOR_SDR_WHITE 203.0f + +// This defines the assumed contrast level of an unknown SDR display. This +// will be used to determine the black point in the absence of any tagged +// minimum luminance, relative to the tagged maximum luminance (or +// PL_COLOR_SDR_WHITE in the absence of all tagging) +#define PL_COLOR_SDR_CONTRAST 1000.0f + +// This defines the default black point assumed for "infinite contrast" HDR +// displays. This is not exactly 0.0 because a value of 0.0 is interpreted +// as "unknown / missing metadata" inside struct pl_hdr_metadata, and also +// to avoid numerical issues in a variety of tone mapping functions. +// Essentially, a black level below this number is functionally meaningless +// inside libplacebo, and will be clamped to this value regardless. +// +// The value used here (1e-6) is about one 13-bit PQ step above absolute zero, +// which is a small fraction of the human JND at this brightness level, and also +// about 3 bits above the floating point machine epsilon. +#define PL_COLOR_HDR_BLACK 1e-6f + +// This defines the assumed peak brightness of a HLG display with no HDR10 +// metadata. This is set to the brightness of a "nominal" HLG reference display. +#define PL_COLOR_HLG_PEAK 1000.0f + +// Represents a single CIE xy coordinate (e.g. CIE Yxy with Y = 1.0) +struct pl_cie_xy { + float x, y; +}; + +// Creates a pl_cie_xyz from raw XYZ values +static inline struct pl_cie_xy pl_cie_from_XYZ(float X, float Y, float Z) +{ + float k = 1.0f / (X + Y + Z); + struct pl_cie_xy xy = { k * X, k * Y }; + return xy; +} + +// Recovers (X / Y) from a CIE xy value. +static inline float pl_cie_X(struct pl_cie_xy xy) +{ + return xy.x / xy.y; +} + +// Recovers (Z / Y) from a CIE xy value. +static inline float pl_cie_Z(struct pl_cie_xy xy) +{ + return (1 - xy.x - xy.y) / xy.y; +} + +static inline bool pl_cie_xy_equal(const struct pl_cie_xy *a, + const struct pl_cie_xy *b) +{ + return a->x == b->x && a->y == b->y; +} + +// Computes the CIE xy chromaticity coordinates of a CIE D-series illuminant +// with the given correlated color temperature. +// +// `temperature` must be between 2500 K and 25000 K, inclusive. +PL_API struct pl_cie_xy pl_white_from_temp(float temperature); + +// Represents the raw physical primaries corresponding to a color space. +struct pl_raw_primaries { + struct pl_cie_xy red, green, blue, white; +}; + +// Returns whether two raw primaries are exactly identical. +PL_API bool pl_raw_primaries_equal(const struct pl_raw_primaries *a, + const struct pl_raw_primaries *b); + +// Returns whether two raw primaries are approximately equal +PL_API bool pl_raw_primaries_similar(const struct pl_raw_primaries *a, + const struct pl_raw_primaries *b); + +// Replaces unknown values in the first struct by those of the second struct. +PL_API void pl_raw_primaries_merge(struct pl_raw_primaries *orig, + const struct pl_raw_primaries *update); + +// Returns the raw primaries for a given color space. +PL_API const struct pl_raw_primaries *pl_raw_primaries_get(enum pl_color_primaries prim); + +enum pl_hdr_scaling { + PL_HDR_NORM = 0, // 0.0 is absolute black, 1.0 is PL_COLOR_SDR_WHITE + PL_HDR_SQRT, // sqrt() of PL_HDR_NORM values + PL_HDR_NITS, // absolute brightness in raw cd/m² + PL_HDR_PQ, // absolute brightness in PQ (0.0 to 1.0) + PL_HDR_SCALING_COUNT, +}; + +// Generic helper for performing HDR scale conversions. +PL_API float pl_hdr_rescale(enum pl_hdr_scaling from, enum pl_hdr_scaling to, float x); + +enum pl_hdr_metadata_type { + PL_HDR_METADATA_ANY = 0, + PL_HDR_METADATA_NONE, + PL_HDR_METADATA_HDR10, // HDR10 static mastering display metadata + PL_HDR_METADATA_HDR10PLUS, // HDR10+ dynamic metadata + PL_HDR_METADATA_CIE_Y, // CIE Y derived dynamic luminance metadata + PL_HDR_METADATA_TYPE_COUNT, +}; + +// Bezier curve for HDR metadata +struct pl_hdr_bezier { + float target_luma; // target luminance (cd/m²) for this OOTF + float knee_x, knee_y; // cross-over knee point (0-1) + float anchors[15]; // intermediate bezier curve control points (0-1) + uint8_t num_anchors; +}; + +// Represents raw HDR metadata as defined by SMPTE 2086 / CTA 861.3, which is +// often attached to HDR sources and can be forwarded to HDR-capable displays, +// or used to guide the libplacebo built-in tone mapping. Values left as 0 +// are treated as unknown by libplacebo. +// +// Note: This means that a value of `min_luma == 0.0` gets treated as "minimum +// luminance not known", which in practice may end up inferring a default +// contrast of 1000:1 for SDR transfer functions. To avoid this, the user should +// set these fields to a low positive value, e.g. PL_COLOR_HDR_BLACK, to signal +// a "zero" black point (i.e. infinite contrast display). +struct pl_hdr_metadata { + // --- PL_HDR_METADATA_HDR10 + // Mastering display metadata. + struct pl_raw_primaries prim; // mastering display primaries + float min_luma, max_luma; // min/max luminance (in cd/m²) + + // Content light level. (Note: this is ignored by libplacebo itself) + float max_cll; // max content light level (in cd/m²) + float max_fall; // max frame average light level (in cd/m²) + + // --- PL_HDR_METADATA_HDR10PLUS + float scene_max[3]; // maxSCL in cd/m² per component (RGB) + float scene_avg; // average of maxRGB in cd/m² + struct pl_hdr_bezier ootf; // reference OOTF (optional) + + // --- PL_HDR_METADATA_CIE_Y + float max_pq_y; // maximum PQ luminance (in PQ, 0-1) + float avg_pq_y; // averaged PQ luminance (in PQ, 0-1) +}; + +PL_API extern const struct pl_hdr_metadata pl_hdr_metadata_empty; // equal to {0} +PL_API extern const struct pl_hdr_metadata pl_hdr_metadata_hdr10; // generic HDR10 display + +// Returns whether two sets of HDR metadata are exactly identical. +PL_API bool pl_hdr_metadata_equal(const struct pl_hdr_metadata *a, + const struct pl_hdr_metadata *b); + +// Replaces unknown values in the first struct by those of the second struct. +PL_API void pl_hdr_metadata_merge(struct pl_hdr_metadata *orig, + const struct pl_hdr_metadata *update); + +// Returns `true` if `data` contains a complete set of a given metadata type. +// Note: for PL_HDR_METADATA_HDR10, only `min_luma` and `max_luma` are +// considered - CLL/FALL and primaries are irrelevant for HDR tone-mapping. +PL_API bool pl_hdr_metadata_contains(const struct pl_hdr_metadata *data, + enum pl_hdr_metadata_type type); + +// Rendering intent for colorspace transformations. These constants match the +// ICC specification (Table 23) +enum pl_rendering_intent { + PL_INTENT_AUTO = -1, // not a valid ICC intent, but used to auto-infer + PL_INTENT_PERCEPTUAL = 0, + PL_INTENT_RELATIVE_COLORIMETRIC = 1, + PL_INTENT_SATURATION = 2, + PL_INTENT_ABSOLUTE_COLORIMETRIC = 3 +}; + +// Struct describing a physical color space. This information is needed to +// turn a normalized RGB triple into its physical meaning, as well as to convert +// between color spaces. +struct pl_color_space { + enum pl_color_primaries primaries; + enum pl_color_transfer transfer; + + // HDR metadata for this color space, if present. (Optional) + struct pl_hdr_metadata hdr; +}; + +#define pl_color_space(...) (&(struct pl_color_space) { __VA_ARGS__ }) + +// Returns whether or not a color space is considered as effectively HDR. +// This is true when the effective signal peak is greater than the SDR +// reference white (1.0), taking into account `csp->hdr`. +PL_API bool pl_color_space_is_hdr(const struct pl_color_space *csp); + +// Returns whether or not a color space is "black scaled", in which case 0.0 is +// the true black point. This is true for SDR signals other than BT.1886, as +// well as for HLG. +PL_API bool pl_color_space_is_black_scaled(const struct pl_color_space *csp); + +struct pl_nominal_luma_params { + // The color space to infer luminance from + const struct pl_color_space *color; + + // Which type of metadata to draw values from + enum pl_hdr_metadata_type metadata; + + // This field controls the scaling of `out_*` + enum pl_hdr_scaling scaling; + + // Fields to write the detected nominal luminance to. (Optional) + // + // For SDR displays, this will default to a contrast level of 1000:1 unless + // indicated otherwise in the `min/max_luma` static HDR10 metadata fields. + float *out_min; + float *out_max; + + // Field to write the detected average luminance to, or 0.0 in the absence + // of dynamic metadata. (Optional) + float *out_avg; +}; + +#define pl_nominal_luma_params(...) \ + (&(struct pl_nominal_luma_params) { __VA_ARGS__ }) + +// Returns the effective luminance described by a pl_color_space. +PL_API void pl_color_space_nominal_luma_ex(const struct pl_nominal_luma_params *params); + +// Backwards compatibility wrapper for `pl_color_space_nominal_luma_ex` +PL_DEPRECATED PL_API void pl_color_space_nominal_luma(const struct pl_color_space *csp, + float *out_min, float *out_max); + +// Replaces unknown values in the first struct by those of the second struct. +PL_API void pl_color_space_merge(struct pl_color_space *orig, + const struct pl_color_space *update); + +// Returns whether two colorspaces are exactly identical. +PL_API bool pl_color_space_equal(const struct pl_color_space *c1, + const struct pl_color_space *c2); + +// Go through a color-space and explicitly default all unknown fields to +// reasonable values. After this function is called, none of the values will be +// PL_COLOR_*_UNKNOWN or 0.0, except for the dynamic HDR metadata fields. +PL_API void pl_color_space_infer(struct pl_color_space *space); + +// Like `pl_color_space_infer`, but takes default values from the reference +// color space (excluding certain special cases like HDR or wide gamut). +PL_API void pl_color_space_infer_ref(struct pl_color_space *space, + const struct pl_color_space *ref); + +// Infer both the source and destination gamut simultaneously, and also adjust +// values for optimal display. This is mostly the same as +// `pl_color_space_infer(src)` followed by `pl_color_space_infer_ref`, but also +// takes into account the SDR contrast levels and PQ black points. This is +// basically the logic used by `pl_shader_color_map` and `pl_renderer` to +// decide the output color space in a conservative way and compute the final +// end-to-end color transformation that needs to be done. +PL_API void pl_color_space_infer_map(struct pl_color_space *src, + struct pl_color_space *dst); + +// Some common color spaces. Note: These don't necessarily have all fields +// filled, in particular `hdr` is left unset. +PL_API extern const struct pl_color_space pl_color_space_unknown; +PL_API extern const struct pl_color_space pl_color_space_srgb; +PL_API extern const struct pl_color_space pl_color_space_bt709; +PL_API extern const struct pl_color_space pl_color_space_hdr10; +PL_API extern const struct pl_color_space pl_color_space_bt2020_hlg; +PL_API extern const struct pl_color_space pl_color_space_monitor; // typical display + +// This represents metadata about extra operations to perform during colorspace +// conversion, which correspond to artistic adjustments of the color. +struct pl_color_adjustment { + // Brightness boost. 0.0 = neutral, 1.0 = solid white, -1.0 = solid black + float brightness; + // Contrast boost. 1.0 = neutral, 0.0 = solid black + float contrast; + // Saturation gain. 1.0 = neutral, 0.0 = grayscale + float saturation; + // Hue shift, corresponding to a rotation around the [U, V] subvector, in + // radians. 0.0 = neutral + float hue; + // Gamma adjustment. 1.0 = neutral, 0.0 = solid black + float gamma; + // Color temperature shift. 0.0 = 6500 K, -1.0 = 3000 K, 1.0 = 10000 K + float temperature; +}; + +#define PL_COLOR_ADJUSTMENT_NEUTRAL \ + .contrast = 1.0, \ + .saturation = 1.0, \ + .gamma = 1.0, + +#define pl_color_adjustment(...) (&(struct pl_color_adjustment) { PL_COLOR_ADJUSTMENT_NEUTRAL __VA_ARGS__ }) +PL_API extern const struct pl_color_adjustment pl_color_adjustment_neutral; + +// Represents the chroma placement with respect to the luma samples. This is +// only relevant for YCbCr-like colorspaces with chroma subsampling. +enum pl_chroma_location { + PL_CHROMA_UNKNOWN = 0, + PL_CHROMA_LEFT, // MPEG2/4, H.264 + PL_CHROMA_CENTER, // MPEG1, JPEG + PL_CHROMA_TOP_LEFT, + PL_CHROMA_TOP_CENTER, + PL_CHROMA_BOTTOM_LEFT, + PL_CHROMA_BOTTOM_CENTER, + PL_CHROMA_COUNT, +}; + +// Fills *x and *y with the offset in luma pixels corresponding to a given +// chroma location. +// +// Note: PL_CHROMA_UNKNOWN defaults to PL_CHROMA_LEFT +PL_API void pl_chroma_location_offset(enum pl_chroma_location loc, float *x, float *y); + +// Returns an RGB->XYZ conversion matrix for a given set of primaries. +// Multiplying this into the RGB color transforms it to CIE XYZ, centered +// around the color space's white point. +PL_API pl_matrix3x3 pl_get_rgb2xyz_matrix(const struct pl_raw_primaries *prim); + +// Similar to pl_get_rgb2xyz_matrix, but gives the inverse transformation. +PL_API pl_matrix3x3 pl_get_xyz2rgb_matrix(const struct pl_raw_primaries *prim); + +// Returns a primary adaptation matrix, which converts from one set of +// primaries to another. This is an RGB->RGB transformation. For rendering +// intents other than PL_INTENT_ABSOLUTE_COLORIMETRIC, the white point is +// adapted using the Bradford matrix. +PL_API pl_matrix3x3 pl_get_color_mapping_matrix(const struct pl_raw_primaries *src, + const struct pl_raw_primaries *dst, + enum pl_rendering_intent intent); + +// Return a chromatic adaptation matrix, which converts from one white point to +// another, using the Bradford matrix. This is an RGB->RGB transformation. +PL_API pl_matrix3x3 pl_get_adaptation_matrix(struct pl_cie_xy src, struct pl_cie_xy dst); + +// Returns true if 'b' is entirely contained in 'a'. Useful for figuring out if +// colorimetric clipping will occur or not. +PL_API bool pl_primaries_superset(const struct pl_raw_primaries *a, + const struct pl_raw_primaries *b); + +// Returns true if `prim` forms a nominally valid set of primaries. This does +// not check whether or not these primaries are actually physically realisable, +// merely that they satisfy the requirements for colorspace math (to avoid NaN). +PL_API bool pl_primaries_valid(const struct pl_raw_primaries *prim); + +// Returns true if two primaries are 'compatible', which is the case if +// they preserve the relationship between primaries (red=red, green=green, +// blue=blue). In other words, this is false for synthetic primaries that have +// channels misordered from the convention (e.g. for some test ICC profiles). +PL_API bool pl_primaries_compatible(const struct pl_raw_primaries *a, + const struct pl_raw_primaries *b); + +// Clip points in the first gamut (src) to be fully contained inside the second +// gamut (dst). Only works on compatible primaries (pl_primaries_compatible). +PL_API struct pl_raw_primaries +pl_primaries_clip(const struct pl_raw_primaries *src, + const struct pl_raw_primaries *dst); + +// Primary-dependent RGB->LMS matrix for the IPTPQc4 color system. This is +// derived from the HPE XYZ->LMS matrix with 4% crosstalk added. +PL_API pl_matrix3x3 pl_ipt_rgb2lms(const struct pl_raw_primaries *prim); +PL_API pl_matrix3x3 pl_ipt_lms2rgb(const struct pl_raw_primaries *prim); + +// Primary-independent L'M'S' -> IPT matrix for the IPTPQc4 color system, and +// its inverse. This is identical to the Ebner & Fairchild (1998) IPT matrix. +PL_API extern const pl_matrix3x3 pl_ipt_lms2ipt; +PL_API extern const pl_matrix3x3 pl_ipt_ipt2lms; + +// Cone types involved in human vision +enum pl_cone { + PL_CONE_L = 1 << 0, + PL_CONE_M = 1 << 1, + PL_CONE_S = 1 << 2, + + // Convenience aliases + PL_CONE_NONE = 0, + PL_CONE_LM = PL_CONE_L | PL_CONE_M, + PL_CONE_MS = PL_CONE_M | PL_CONE_S, + PL_CONE_LS = PL_CONE_L | PL_CONE_S, + PL_CONE_LMS = PL_CONE_L | PL_CONE_M | PL_CONE_S, +}; + +// Structure describing parameters for simulating color blindness +struct pl_cone_params { + enum pl_cone cones; // Which cones are *affected* by the vision model + float strength; // Coefficient for how strong the defect is + // (1.0 = Unaffected, 0.0 = Full blindness) +}; + +#define pl_cone_params(...) (&(struct pl_cone_params) { __VA_ARGS__ }) + +// Built-in color blindness models +PL_API extern const struct pl_cone_params pl_vision_normal; // No distortion (92%) +PL_API extern const struct pl_cone_params pl_vision_protanomaly; // Red deficiency (0.66%) +PL_API extern const struct pl_cone_params pl_vision_protanopia; // Red absence (0.59%) +PL_API extern const struct pl_cone_params pl_vision_deuteranomaly; // Green deficiency (2.7%) +PL_API extern const struct pl_cone_params pl_vision_deuteranopia; // Green absence (0.56%) +PL_API extern const struct pl_cone_params pl_vision_tritanomaly; // Blue deficiency (0.01%) +PL_API extern const struct pl_cone_params pl_vision_tritanopia; // Blue absence (0.016%) +PL_API extern const struct pl_cone_params pl_vision_monochromacy; // Blue cones only (<0.001%) +PL_API extern const struct pl_cone_params pl_vision_achromatopsia; // Rods only (<0.0001%) + +// Returns a cone adaptation matrix. Applying this to an RGB color in the given +// color space will apply the given cone adaptation coefficients for simulating +// a type of color blindness. +// +// For the color blindness models which don't entail complete loss of a cone, +// you can partially counteract the effect by using a similar model with the +// `strength` set to its inverse. For example, to partially counteract +// deuteranomaly, you could generate a cone matrix for PL_CONE_M with the +// strength 2.0 (or some other number above 1.0). +PL_API pl_matrix3x3 pl_get_cone_matrix(const struct pl_cone_params *params, + const struct pl_raw_primaries *prim); + +// Returns a color decoding matrix for a given combination of source color +// representation and adjustment parameters. This mutates `repr` to reflect the +// change. If `params` is NULL, it defaults to &pl_color_adjustment_neutral. +// +// This function always performs a conversion to RGB. To convert to other +// colorspaces (e.g. between YUV systems), obtain a second YUV->RGB matrix +// and invert it using `pl_transform3x3_invert`. +// +// Note: For BT.2020 constant-luminance, this outputs chroma information in the +// range [-0.5, 0.5]. Since the CL system conversion is non-linear, further +// processing must be done by the caller. The channel order is CrYCb. +// +// Note: For BT.2100 ICtCp, this outputs in the color space L'M'S'. Further +// non-linear processing must be done by the caller. +// +// Note: XYZ system is expected to be in DCDM X'Y'Z' encoding (ST 428-1), in +// practice this means normalizing by (48.0 / 52.37) factor and applying 2.6 gamma +PL_API pl_transform3x3 pl_color_repr_decode(struct pl_color_repr *repr, + const struct pl_color_adjustment *params); + +// Common struct to describe an ICC profile +struct pl_icc_profile { + // Points to the in-memory representation of the ICC profile. This is + // allowed to be NULL, in which case the `pl_icc_profile` represents "no + // profile”. + const void *data; + size_t len; + + // If a profile is set, this signature must uniquely identify it (including + // across restarts, for caching), ideally using a checksum of the profile + // contents. The user is free to choose the method of determining this + // signature, but note the existence of the + // `pl_icc_profile_compute_signature` helper. + uint64_t signature; +}; + +#define pl_icc_profile(...) &(struct pl_icc_profile) { __VA_ARGS__ } + +// This doesn't do a comparison of the actual contents, only of the signature. +PL_API bool pl_icc_profile_equal(const struct pl_icc_profile *p1, + const struct pl_icc_profile *p2); + +// Sets `signature` to a hash of `profile->data`, if non-NULL. Provided as a +// convenience function for the sake of users ingesting arbitrary ICC profiles +// from sources where they can't reliably detect profile changes. +// +// Note: This is based on a very fast hash, and will compute a signature for +// even large (10 MB) ICC profiles in, typically, a fraction of a millisecond. +PL_API void pl_icc_profile_compute_signature(struct pl_icc_profile *profile); + +PL_API_END + +#endif // LIBPLACEBO_COLORSPACE_H_ diff --git a/src/include/libplacebo/common.h b/src/include/libplacebo/common.h new file mode 100644 index 0000000..806730c --- /dev/null +++ b/src/include/libplacebo/common.h @@ -0,0 +1,244 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_COMMON_H_ +#define LIBPLACEBO_COMMON_H_ + +#include <stdbool.h> + +#include <libplacebo/config.h> + +PL_API_BEGIN + +// Some common utility types. These are overloaded to support 2D, 3D and +// integer/float variants. +typedef struct pl_rect2d { + int x0, y0; + int x1, y1; +} pl_rect2d; + +typedef struct pl_rect3d { + int x0, y0, z0; + int x1, y1, z1; +} pl_rect3d; + +typedef struct pl_rect2df { + float x0, y0; + float x1, y1; +} pl_rect2df; + +typedef struct pl_rect3df { + float x0, y0, z0; + float x1, y1, z1; +} pl_rect3df; + +// These macros will work for any of the above pl_rect variants (with enough +// dimensions). Careful: double-evaluation hazard +#define pl_rect_w(r) ((r).x1 - (r).x0) +#define pl_rect_h(r) ((r).y1 - (r).y0) +#define pl_rect_d(r) ((r).z1 - (r).z0) + +#define pl_rect2d_eq(a, b) \ + ((a).x0 == (b).x0 && (a).x1 == (b).x1 && \ + (a).y0 == (b).y0 && (a).y1 == (b).y1) + +#define pl_rect3d_eq(a, b) \ + ((a).x0 == (b).x0 && (a).x1 == (b).x1 && \ + (a).y0 == (b).y0 && (a).y1 == (b).y1 && \ + (a).z0 == (b).z0 && (a).z1 == (b).z1) + +// "Normalize" a rectangle: This ensures d1 >= d0 for all dimensions. +PL_API void pl_rect2d_normalize(pl_rect2d *rc); +PL_API void pl_rect3d_normalize(pl_rect3d *rc); + +PL_API void pl_rect2df_normalize(pl_rect2df *rc); +PL_API void pl_rect3df_normalize(pl_rect3df *rc); + +// Return the rounded form of a rect. +PL_API pl_rect2d pl_rect2df_round(const pl_rect2df *rc); +PL_API pl_rect3d pl_rect3df_round(const pl_rect3df *rc); + +// Represents a row-major matrix, i.e. the following matrix +// [ a11 a12 a13 ] +// [ a21 a22 a23 ] +// [ a31 a32 a33 ] +// is represented in C like this: +// { { a11, a12, a13 }, +// { a21, a22, a23 }, +// { a31, a32, a33 } }; +typedef struct pl_matrix3x3 { + float m[3][3]; +} pl_matrix3x3; + +PL_API extern const pl_matrix3x3 pl_matrix3x3_identity; + +// Applies a matrix to a float vector in-place. +PL_API void pl_matrix3x3_apply(const pl_matrix3x3 *mat, float vec[3]); + +// Applies a matrix to a pl_rect3df +PL_API void pl_matrix3x3_apply_rc(const pl_matrix3x3 *mat, pl_rect3df *rc); + +// Scales a color matrix by a linear factor. +PL_API void pl_matrix3x3_scale(pl_matrix3x3 *mat, float scale); + +// Inverts a matrix. Only use where precision is not that important. +PL_API void pl_matrix3x3_invert(pl_matrix3x3 *mat); + +// Composes/multiplies two matrices. Multiples B into A, i.e. +// A := A * B +PL_API void pl_matrix3x3_mul(pl_matrix3x3 *a, const pl_matrix3x3 *b); + +// Flipped version of `pl_matrix3x3_mul`. +// B := A * B +PL_API void pl_matrix3x3_rmul(const pl_matrix3x3 *a, pl_matrix3x3 *b); + +// Represents an affine transformation, which is basically a 3x3 matrix +// together with a column vector to add onto the output. +typedef struct pl_transform3x3 { + pl_matrix3x3 mat; + float c[3]; +} pl_transform3x3; + +PL_API extern const pl_transform3x3 pl_transform3x3_identity; + +// Applies a transform to a float vector in-place. +PL_API void pl_transform3x3_apply(const pl_transform3x3 *t, float vec[3]); + +// Applies a transform to a pl_rect3df +PL_API void pl_transform3x3_apply_rc(const pl_transform3x3 *t, pl_rect3df *rc); + +// Scales the output of a transform by a linear factor. Since an affine +// transformation is non-linear, this does not commute. If you want to scale +// the *input* of a transform, use pl_matrix3x3_scale on `t.mat`. +PL_API void pl_transform3x3_scale(pl_transform3x3 *t, float scale); + +// Inverts a transform. Only use where precision is not that important. +PL_API void pl_transform3x3_invert(pl_transform3x3 *t); + +// 2D analog of the above structs. Since these are featured less prominently, +// we omit some of the other helper functions. +typedef struct pl_matrix2x2 { + float m[2][2]; +} pl_matrix2x2; + +PL_API extern const pl_matrix2x2 pl_matrix2x2_identity; +PL_API pl_matrix2x2 pl_matrix2x2_rotation(float angle); + +PL_API void pl_matrix2x2_apply(const pl_matrix2x2 *mat, float vec[2]); +PL_API void pl_matrix2x2_apply_rc(const pl_matrix2x2 *mat, pl_rect2df *rc); + +PL_API void pl_matrix2x2_mul(pl_matrix2x2 *a, const pl_matrix2x2 *b); +PL_API void pl_matrix2x2_rmul(const pl_matrix2x2 *a, pl_matrix2x2 *b); + +PL_API void pl_matrix2x2_scale(pl_matrix2x2 *mat, float scale); +PL_API void pl_matrix2x2_invert(pl_matrix2x2 *mat); + +typedef struct pl_transform2x2 { + pl_matrix2x2 mat; + float c[2]; +} pl_transform2x2; + +PL_API extern const pl_transform2x2 pl_transform2x2_identity; + +PL_API void pl_transform2x2_apply(const pl_transform2x2 *t, float vec[2]); +PL_API void pl_transform2x2_apply_rc(const pl_transform2x2 *t, pl_rect2df *rc); + +PL_API void pl_transform2x2_mul(pl_transform2x2 *a, const pl_transform2x2 *b); +PL_API void pl_transform2x2_rmul(const pl_transform2x2 *a, pl_transform2x2 *b); + +PL_API void pl_transform2x2_scale(pl_transform2x2 *t, float scale); +PL_API void pl_transform2x2_invert(pl_transform2x2 *t); + +// Compute new bounding box of a transformation (as applied to a given rect). +PL_API pl_rect2df pl_transform2x2_bounds(const pl_transform2x2 *t, + const pl_rect2df *rc); + +// Helper functions for dealing with aspect ratios and stretched/scaled rects. + +// Return the (absolute) aspect ratio (width/height) of a given pl_rect2df. +// This will always be a positive number, even if `rc` is flipped. +PL_API float pl_rect2df_aspect(const pl_rect2df *rc); + +// Set the aspect of a `rc` to a given aspect ratio with an extra 'panscan' +// factor choosing the balance between shrinking and growing the `rc` to meet +// this aspect ratio. +// +// Notes: +// - If `panscan` is 0.0, this function will only ever shrink the `rc`. +// - If `panscan` is 1.0, this function will only ever grow the `rc`. +// - If `panscan` is 0.5, this function is area-preserving. +PL_API void pl_rect2df_aspect_set(pl_rect2df *rc, float aspect, float panscan); + +// Set one rect's aspect to that of another +#define pl_rect2df_aspect_copy(rc, src, panscan) \ + pl_rect2df_aspect_set((rc), pl_rect2df_aspect(src), (panscan)) + +// 'Fit' one rect inside another. `rc` will be set to the same size and aspect +// ratio as `src`, but with the size limited to fit inside the original `rc`. +// Like `pl_rect2df_aspect_set`, `panscan` controls the pan&scan factor. +PL_API void pl_rect2df_aspect_fit(pl_rect2df *rc, const pl_rect2df *src, float panscan); + +// Scale rect in each direction while keeping it centered. +PL_API void pl_rect2df_stretch(pl_rect2df *rc, float stretch_x, float stretch_y); + +// Offset rect by an arbitrary offset factor. If the corresponding dimension +// of a rect is flipped, so too is the applied offset. +PL_API void pl_rect2df_offset(pl_rect2df *rc, float offset_x, float offset_y); + +// Scale a rect uniformly in both dimensions. +#define pl_rect2df_zoom(rc, zoom) pl_rect2df_stretch((rc), (zoom), (zoom)) + +// Rotation in degrees clockwise +typedef int pl_rotation; +enum { + PL_ROTATION_0 = 0, + PL_ROTATION_90 = 1, + PL_ROTATION_180 = 2, + PL_ROTATION_270 = 3, + PL_ROTATION_360 = 4, // equivalent to PL_ROTATION_0 + + // Note: Values outside the range [0,4) are legal, including negatives. +}; + +// Constrains to the interval [PL_ROTATION_0, PL_ROTATION_360). +static inline pl_rotation pl_rotation_normalize(pl_rotation rot) +{ + return (rot % PL_ROTATION_360 + PL_ROTATION_360) % PL_ROTATION_360; +} + +// Rotates the coordinate system of a `pl_rect2d(f)` in a certain direction. +// For example, calling this with PL_ROTATION_90 will correspond to rotating +// the coordinate system 90° to the right (so the x axis becomes the y axis). +// +// The resulting rect is re-normalized in the same coordinate system. +PL_API void pl_rect2df_rotate(pl_rect2df *rc, pl_rotation rot); + +// Returns the aspect ratio in a rotated frame of reference. +static inline float pl_aspect_rotate(float aspect, pl_rotation rot) +{ + return (rot % PL_ROTATION_180) ? 1.0 / aspect : aspect; +} + +#define pl_rect2df_aspect_set_rot(rc, aspect, rot, panscan) \ + pl_rect2df_aspect_set((rc), pl_aspect_rotate((aspect), (rot)), (panscan)) + +#define pl_rect2df_aspect_copy_rot(rc, src, panscan, rot) \ + pl_rect2df_aspect_set_rot((rc), pl_rect2df_aspect(src), (rot), (panscan)) + +PL_API_END + +#endif // LIBPLACEBO_COMMON_H_ diff --git a/src/include/libplacebo/config.h.in b/src/include/libplacebo/config.h.in new file mode 100644 index 0000000..2ed6290 --- /dev/null +++ b/src/include/libplacebo/config.h.in @@ -0,0 +1,102 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_CONFIG_H_ +#define LIBPLACEBO_CONFIG_H_ + +// Increased any time the library changes in a fundamental/major way. +#define PL_MAJOR_VER @majorver@ + +// Increased any time the API changes. (Note: Does not reset when PL_MAJOR_VER +// is increased) +#define PL_API_VER @apiver@ + +// Increased any time a fix is made to a given API version. +#define PL_FIX_VER (pl_fix_ver()) + +// Friendly name (`git describe`) for the overall version of the library +#define PL_VERSION (pl_version()) + +// Feature tests. These aren't described in further detail, but may be useful +// for programmers wanting to programmatically check for feature support +// in their compiled libplacebo versions. +@extra_defs@ + +// Extra compiler-specific stuff +#ifndef PL_DEPRECATED +# if defined(_MSC_VER) +# define PL_DEPRECATED +# else +# define PL_DEPRECATED __attribute__((deprecated)) +# endif +#endif + +#ifndef __has_feature +#define __has_feature(x) 0 +#endif + +#ifndef PL_DEPRECATED_ENUMERATOR +# if (defined(__GNUC__) && (__GNUC__ >= 6)) || __has_feature(enumerator_attributes) +# define PL_DEPRECATED_ENUMERATOR PL_DEPRECATED +# else +# define PL_DEPRECATED_ENUMERATOR +# endif +#endif + +#if defined(_WIN32) || defined(__CYGWIN__) +# ifdef PL_EXPORT +# define PL_API __declspec(dllexport) +# else +# ifndef PL_STATIC +# define PL_API __declspec(dllimport) +# else +# define PL_API +# endif +# endif +#else +# define PL_API __attribute__ ((visibility ("default"))) +#endif + +// C++ compatibility +#ifdef __cplusplus +# define PL_API_BEGIN extern "C" { +# define PL_API_END } +#else +# define PL_API_BEGIN +# define PL_API_END +#endif + +#ifndef __cplusplus +// Disable this warning because libplacebo's params macros override fields +# pragma GCC diagnostic ignored "-Woverride-init" +#endif + +// Extra helper macros +#define PL_TOSTRING_INNER(x) #x +#define PL_TOSTRING(x) PL_TOSTRING_INNER(x) + +// Deprecated macro for back-compatibility +#define PL_STRUCT(name) struct name##_t + +PL_API_BEGIN + +PL_API int pl_fix_ver(void); +PL_API const char *pl_version(void); + +PL_API_END + +#endif // LIBPLACEBO_CONFIG_H_ diff --git a/src/include/libplacebo/d3d11.h b/src/include/libplacebo/d3d11.h new file mode 100644 index 0000000..8ecba30 --- /dev/null +++ b/src/include/libplacebo/d3d11.h @@ -0,0 +1,248 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_D3D11_H_ +#define LIBPLACEBO_D3D11_H_ + +#include <windows.h> +#include <d3d11.h> +#include <dxgi1_2.h> +#include <libplacebo/gpu.h> +#include <libplacebo/swapchain.h> + +PL_API_BEGIN + +// Structure representing the actual D3D11 device and associated GPU instance +typedef const struct pl_d3d11_t { + pl_gpu gpu; + + // The D3D11 device in use. The user is free to use this for their own + // purposes, including taking a reference to the device (with AddRef) and + // using it beyond the lifetime of the pl_d3d11 that created it (though if + // this is done with debug enabled, it will confuse the leak checker.) + ID3D11Device *device; + + // True if the device is using a software (WARP) adapter + bool software; +} *pl_d3d11; + +struct pl_d3d11_params { + // The Direct3D 11 device to use. Optional, if NULL then libplacebo will + // create its own ID3D11Device using the options below. If set, all the + // options below will be ignored. + ID3D11Device *device; + + // --- Adapter selection options + + // The adapter to use. This overrides adapter_luid. + IDXGIAdapter *adapter; + + // The LUID of the adapter to use. If adapter and adapter_luid are unset, + // the default adapter will be used instead. + LUID adapter_luid; + + // Allow a software (WARP) adapter when selecting the adapter automatically. + // Note that sometimes the default adapter will be a software adapter. This + // is because, on Windows 8 and up, if there are no hardware adapters, + // Windows will pretend the WARP adapter is the default hardware adapter. + bool allow_software; + + // Always use a software adapter. This is mainly for testing purposes. + bool force_software; + + // --- Device creation options + + // Enable the debug layer (D3D11_CREATE_DEVICE_DEBUG) + // Also logs IDXGIInfoQueue messages + bool debug; + + // Extra flags to pass to D3D11CreateDevice (D3D11_CREATE_DEVICE_FLAG). + // libplacebo should be compatible with any flags passed here. + UINT flags; + + // The minimum and maximum allowable feature levels for the created device. + // libplacebo will attempt to create a device with the highest feature level + // between min_feature_level and max_feature_level (inclusive.) If there are + // no supported feature levels in this range, `pl_d3d11_create` will either + // return NULL or fall back to the software adapter, depending on whether + // `allow_software` is set. + // + // Normally there is no reason to set `max_feature_level` other than to test + // if a program works at lower feature levels. + // + // Note that D3D_FEATURE_LEVEL_9_3 and below (known as 10level9) are highly + // restrictive. These feature levels are supported on a best-effort basis. + // They represent very old DirectX 9 compatible PC and laptop hardware + // (2001-2007, GeForce FX, 6, 7, ATI R300-R500, GMA 950-X3000) and some + // less-old mobile devices (Surface RT, Surface 2.) Basic video rendering + // should work, but the full pl_gpu API will not be available and advanced + // shaders will probably fail. The hardware is probably too slow for these + // anyway. + // + // Known restrictions of 10level9 devices include: + // D3D_FEATURE_LEVEL_9_3 and below: + // - `pl_pass_run_params->index_buf` will not work (but `index_data` will) + // - Dimensions of 3D textures must be powers of two + // - Shaders cannot use gl_FragCoord + // - Shaders cannot use texelFetch + // D3D_FEATURE_LEVEL_9_2 and below: + // - Fragment shaders have no dynamic flow control and very strict limits + // on the number of constants, temporary registers and instructions. + // Whether a shader meets the requirements will depend on how it's + // compiled and optimized, but it's likely that only simple shaders will + // work. + // D3D_FEATURE_LEVEL_9_1: + // - No high-bit-depth formats with PL_FMT_CAP_RENDERABLE or + // PL_FMT_CAP_LINEAR + // + // If these restrictions are undesirable and you don't need to support + // ancient hardware, set `min_feature_level` to D3D_FEATURE_LEVEL_10_0. + int min_feature_level; // Defaults to D3D_FEATURE_LEVEL_9_1 if unset + int max_feature_level; // Defaults to D3D_FEATURE_LEVEL_12_1 if unset + + // Allow up to N in-flight frames. Similar to swapchain_depth for Vulkan and + // OpenGL, though with DXGI this is a device-wide setting that affects all + // swapchains (except for waitable swapchains.) See the documentation for + // `pl_swapchain_latency` for more information. + int max_frame_latency; +}; + +// Default/recommended parameters. Should generally be safe and efficient. +#define PL_D3D11_DEFAULTS \ + .allow_software = true, + +#define pl_d3d11_params(...) (&(struct pl_d3d11_params) { PL_D3D11_DEFAULTS __VA_ARGS__ }) +PL_API extern const struct pl_d3d11_params pl_d3d11_default_params; + +// Creates a new Direct3D 11 device based on the given parameters, or wraps an +// existing device, and initializes a new GPU instance. If params is left as +// NULL, it defaults to &pl_d3d11_default_params. If an existing device is +// provided in params->device, `pl_d3d11_create` will take a reference to it +// that will be released in `pl_d3d11_destroy`. +PL_API pl_d3d11 pl_d3d11_create(pl_log log, const struct pl_d3d11_params *params); + +// Release the D3D11 device. +// +// Note that all libplacebo objects allocated from this pl_d3d11 object (e.g. +// via `d3d11->gpu` or using `pl_d3d11_create_swapchain`) *must* be explicitly +// destroyed by the user before calling this. +PL_API void pl_d3d11_destroy(pl_d3d11 *d3d11); + +// For a `pl_gpu` backed by `pl_d3d11`, this function can be used to retrieve +// the underlying `pl_d3d11`. Returns NULL for any other type of `gpu`. +PL_API pl_d3d11 pl_d3d11_get(pl_gpu gpu); + +struct pl_d3d11_swapchain_params { + // The Direct3D 11 swapchain to wrap. Optional. If NULL, libplacebo will + // create its own swapchain using the options below. If set, all the + // swapchain creation options will be ignored. + // + // The provided swapchain must have been created by the same device used + // by `gpu` and must not have multisampled backbuffers. + IDXGISwapChain *swapchain; + + // --- Swapchain creation options + + // Initial framebuffer width and height. If both width and height are set to + // 0 and window is non-NULL, the client area of the window is used instead. + // For convenience, if either component would be 0, it is set to 1 instead. + // This is because Windows can have 0-sized windows, but not 0-sized + // swapchains. + int width; + int height; + + // The handle of the output window. In Windows 8 and up this is optional + // because you can output to a CoreWindow or create a composition swapchain + // instead. + HWND window; + + // A pointer to the CoreWindow to output to. If both this and `window` are + // NULL, CreateSwapChainForComposition will be used to create the swapchain. + IUnknown *core_window; + + // If set, libplacebo will create a swapchain that uses the legacy bitblt + // presentation model (with the DXGI_SWAP_EFFECT_DISCARD swap effect.) This + // tends to give worse performance and frame pacing in windowed mode and it + // prevents borderless fullscreen optimizations, but it might be necessary + // to work around buggy drivers, especially with DXGI 1.2 in the Platform + // Update for Windows 7. When unset, libplacebo will try to use the flip + // presentation model and only fall back to bitblt if flip is unavailable. + bool blit; + + // additional swapchain flags + // No validation on these flags is being performed, and swapchain creation + // may fail if an unsupported combination is requested. + UINT flags; + + // --- Swapchain usage behavior options + + // Disable using a 10-bit swapchain format for SDR output + bool disable_10bit_sdr; +}; + +#define pl_d3d11_swapchain_params(...) (&(struct pl_d3d11_swapchain_params) { __VA_ARGS__ }) + +// Creates a new Direct3D 11 swapchain, or wraps an existing one. If an existing +// swapchain is provided in params->swapchain, `pl_d3d11_create_swapchain` will +// take a reference to it that will be released in `pl_swapchain_destroy`. +PL_API pl_swapchain pl_d3d11_create_swapchain(pl_d3d11 d3d11, + const struct pl_d3d11_swapchain_params *params); + +// Takes a `pl_swapchain` created by pl_d3d11_create_swapchain and returns a +// reference to the underlying IDXGISwapChain. This increments the refcount, so +// call IDXGISwapChain::Release when finished with it. +PL_API IDXGISwapChain *pl_d3d11_swapchain_unwrap(pl_swapchain sw); + +struct pl_d3d11_wrap_params { + // The D3D11 texture to wrap, or a texture array containing the texture to + // wrap. Must be a ID3D11Texture1D, ID3D11Texture2D or ID3D11Texture3D + // created by the same device used by `gpu`, must have D3D11_USAGE_DEFAULT, + // and must not be mipmapped or multisampled. + ID3D11Resource *tex; + + // If tex is a texture array, this is the array member to use as the pl_tex. + int array_slice; + + // If tex is a video resource (eg. DXGI_FORMAT_AYUV, DXGI_FORMAT_NV12, + // DXGI_FORMAT_P010, etc.,) it can be wrapped as a pl_tex by specifying the + // type and size of the shader view. For planar video formats, the plane + // that is wrapped depends on the chosen format. + // + // If tex is not a video resource, these fields are unnecessary. The correct + // format will be determined automatically. If tex is not 2D, these fields + // are ignored. + // + // For a list of supported video formats and their corresponding view + // formats and sizes, see: + // https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#VideoViews + DXGI_FORMAT fmt; + int w; + int h; +}; + +#define pl_d3d11_wrap_params(...) (&(struct pl_d3d11_wrap_params) { __VA_ARGS__ }) + +// Wraps an external texture into a pl_tex abstraction. `pl_d3d11_wrap` takes a +// reference to the texture, which is released when `pl_tex_destroy` is called. +// +// This function may fail due to incompatible formats, incompatible flags or +// other reasons, in which case it will return NULL. +PL_API pl_tex pl_d3d11_wrap(pl_gpu gpu, const struct pl_d3d11_wrap_params *params); + +PL_API_END + +#endif // LIBPLACEBO_D3D11_H_ diff --git a/src/include/libplacebo/dispatch.h b/src/include/libplacebo/dispatch.h new file mode 100644 index 0000000..7d43794 --- /dev/null +++ b/src/include/libplacebo/dispatch.h @@ -0,0 +1,239 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_DISPATCH_H_ +#define LIBPLACEBO_DISPATCH_H_ + +#include <libplacebo/shaders.h> +#include <libplacebo/gpu.h> + +PL_API_BEGIN + +// Thread-safety: Safe +typedef struct pl_dispatch_t *pl_dispatch; + +// Creates a new shader dispatch object. This object provides a translation +// layer between generated shaders (pl_shader) and the ra context such that it +// can be used to execute shaders. This dispatch object will also provide +// shader caching (for efficient re-use). +PL_API pl_dispatch pl_dispatch_create(pl_log log, pl_gpu gpu); +PL_API void pl_dispatch_destroy(pl_dispatch *dp); + +// Reset/increments the internal counters of the pl_dispatch. This must be +// called whenever the user is going to begin with a new frame, in order to +// perform garbage collection and advance the state of the internal PRNG. +// +// Note that shaders generated by `pl_dispatch` are therefore entirely +// deterministic, as long as the sequence of calls (and inputs to the shader) +// are the same. +PL_API void pl_dispatch_reset_frame(pl_dispatch dp); + +// Returns a blank pl_shader object, suitable for recording rendering commands. +// For more information, see the header documentation in `shaders/*.h`. +PL_API pl_shader pl_dispatch_begin(pl_dispatch dp); + +// Struct passed to `info_callback`. Only valid until that function returns. +struct pl_dispatch_info { + // Information about the shader for this shader execution, as well as a + // 64-bit signature uniquely identifying it. + pl_shader_info shader; + uint64_t signature; + + // A list of execution times for this pass, in nanoseconds. May be empty. + uint64_t samples[256]; + int num_samples; + + // As a convenience, this contains the last, average and peak of the above + // list of samples. If `num_samples` is 0, these values are also 0. + uint64_t last; + uint64_t peak; + uint64_t average; +}; + +// Helper function to make a copy of `pl_dispatch_info`, while overriding +// (and dereferencing) whatever was previously stored there. +static inline void pl_dispatch_info_move(struct pl_dispatch_info *dst, + const struct pl_dispatch_info *src) +{ + pl_shader_info_deref(&dst->shader); + *dst = *src; + dst->shader = pl_shader_info_ref(src->shader); +} + +// Set up a dispatch callback for this `pl_dispatch` object. The given callback +// will be run for every successfully dispatched shader. Call this again with +// `cb == NULL` to disable. +PL_API void pl_dispatch_callback(pl_dispatch dp, void *priv, + void (*cb)(void *priv, + const struct pl_dispatch_info *)); + +struct pl_dispatch_params { + // The shader to execute. The pl_dispatch will take over ownership + // of this shader, and return it back to the internal pool. + // + // This shader must have a compatible signature, i.e. inputs + // `PL_SHADER_SIG_NONE` and outputs `PL_SHADER_SIG_COLOR`. + pl_shader *shader; + + // The texture to render to. This must have params compatible with the + // shader, i.e. `target->params.renderable` for fragment shaders and + // `target->params.storable` for compute shaders. + // + // Note: Even when not using compute shaders, users are advised to always + // set `target->params.storable` if permitted by the `pl_fmt`, since this + // allows the use of compute shaders instead of full-screen quads, which is + // faster on some platforms. + pl_tex target; + + // The target rect to render to. Optional, if left as {0}, then the + // entire texture will be rendered to. + pl_rect2d rect; + + // If set, enables and controls the blending for this pass. Optional. When + // using this with fragment shaders, `target->params.fmt->caps` must + // include `PL_FMT_CAP_BLENDABLE`. + const struct pl_blend_params *blend_params; + + // If set, records the execution time of this dispatch into the given + // timer object. Optional. + // + // Note: If this is set, `pl_dispatch` cannot internally measure the + // execution time of the shader, which means `pl_dispatch_info.samples` may + // be empty as a result. + pl_timer timer; +}; + +#define pl_dispatch_params(...) (&(struct pl_dispatch_params) { __VA_ARGS__ }) + +// Dispatch a generated shader (via the pl_shader mechanism). Returns whether +// or not the dispatch was successful. +PL_API bool pl_dispatch_finish(pl_dispatch dp, const struct pl_dispatch_params *params); + +struct pl_dispatch_compute_params { + // The shader to execute. This must be a compute shader with the input + // set to PL_SHADER_SIG_NONE. The output, if it has any, is ignored. + pl_shader *shader; + + // The number of work groups to dispatch in each dimension. If this is left + // as [0} and `width/height` are both set, the number of work groups will + // be inferred from the shader's `compute_group_sizes`. + int dispatch_size[3]; + + // If set, simulate vertex attributes (similar to `pl_dispatch_finish`) + // according to the given dimensions. The first two components of the + // thread's global ID will be interpreted as the X and Y locations. + // + // Optional, ignored if either component is left as 0. + int width, height; + + // If set, records the execution time of this dispatch into the given + // timer object. Optional. + // + // Note: If this is set, `pl_dispatch` cannot internally measure the + // execution time of the shader, which means `pl_dispatch_info.samples` may + // be empty as a result. + pl_timer timer; +}; + +#define pl_dispatch_compute_params(...) (&(struct pl_dispatch_compute_params) { __VA_ARGS__ }) + +// A variant of `pl_dispatch_finish`, this one only dispatches a compute shader +// while ignoring its output (if it has one). It's only useful for shaders +// which have otherwise observable side effects (such as updating state +// objects). +PL_API bool pl_dispatch_compute(pl_dispatch dp, const struct pl_dispatch_compute_params *params); + +enum pl_vertex_coords { + PL_COORDS_ABSOLUTE, // Absolute/integer `target` coordinates + PL_COORDS_RELATIVE, // Relative `target` coordinates in range [0, 1] + PL_COORDS_NORMALIZED, // GL-normalized coordinates in range [-1, 1] +}; + +struct pl_dispatch_vertex_params { + // The shader to execute. This must be a raster shader with the input set + // to `PL_SHADER_SIG_NONE` and the output set to `PL_SHADER_SIG_COLOR`. + // + // Additionally, the shader must not have any attached vertex attributes. + pl_shader *shader; + + // The texture to render to. Requires `target->params.renderable`. + pl_tex target; + + // The target rect to clip the rendering to. (Optional) + pl_rect2d scissors; + + // If set, enables and controls the blending for this pass. Optional. When + // enabled, `target->params.fmt->caps` must include `PL_FMT_CAP_BLENDABLE`. + const struct pl_blend_params *blend_params; + + // The description of the vertex format, including offsets. + // + // Note: `location` is ignored and can safely be left unset. + const struct pl_vertex_attrib *vertex_attribs; + int num_vertex_attribs; + size_t vertex_stride; + + // The index of the vertex position in `vertex_attribs`, as well as the + // interpretation of its contents. + int vertex_position_idx; + enum pl_vertex_coords vertex_coords; + bool vertex_flipped; // flip all vertex y coordinates + + // Type and number of vertices to render. + enum pl_prim_type vertex_type; + int vertex_count; + + // Vertex data. See `pl_pass_run_params.vertex_data`. + const void *vertex_data; + pl_buf vertex_buf; + size_t buf_offset; + + // Index data. See `pl_pass_run_params.index_data`. Optional. + const void *index_data; + enum pl_index_format index_fmt; + pl_buf index_buf; + size_t index_offset; + + // If set, records the execution time of this dispatch into the given + // timer object. Optional. + // + // Note: If this is set, `pl_dispatch` cannot internally measure the + // execution time of the shader, which means `pl_dispatch_info.samples` may + // be empty as a result. + pl_timer timer; +}; + +#define pl_dispatch_vertex_params(...) (&(struct pl_dispatch_vertex_params) { __VA_ARGS__ }) + +// Dispatch a generated shader using custom vertices, rather than using a quad +// generated by the dispatch. This allows the use of e.g. custom fragment +// shaders for things like rendering custom UI elements, or possibly doing +// advanced things like sampling from a cube map or spherical video. +PL_API bool pl_dispatch_vertex(pl_dispatch dp, const struct pl_dispatch_vertex_params *params); + +// Cancel an active shader without submitting anything. Useful, for example, +// if the shader was instead merged into a different shader. +PL_API void pl_dispatch_abort(pl_dispatch dp, pl_shader *sh); + +// Deprecated in favor of `pl_cache_save/pl_cache_load` on the `pl_cache` +// associated with the `pl_gpu` this dispatch is using. +PL_DEPRECATED PL_API size_t pl_dispatch_save(pl_dispatch dp, uint8_t *out_cache); +PL_DEPRECATED PL_API void pl_dispatch_load(pl_dispatch dp, const uint8_t *cache); + +PL_API_END + +#endif // LIBPLACEBO_DISPATCH_H diff --git a/src/include/libplacebo/dither.h b/src/include/libplacebo/dither.h new file mode 100644 index 0000000..84f17c7 --- /dev/null +++ b/src/include/libplacebo/dither.h @@ -0,0 +1,82 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_DITHER_H_ +#define LIBPLACEBO_DITHER_H_ + +#include <libplacebo/common.h> + +PL_API_BEGIN + +// Generates a deterministic NxN bayer (ordered) dither matrix, storing the +// result in `data`. `size` must be a power of two. The resulting matrix will +// be roughly uniformly distributed within the range [0,1). +PL_API void pl_generate_bayer_matrix(float *data, int size); + +// Generates a random NxN blue noise texture. storing the result in `data`. +// `size` must be a positive power of two no larger than 256. The resulting +// texture will be roughly uniformly distributed within the range [0,1). +// +// Note: This function is very, *very* slow for large sizes. Generating a +// dither matrix with size 256 can take several seconds on a modern processor. +PL_API void pl_generate_blue_noise(float *data, int size); + +// Defines the border of all error diffusion kernels +#define PL_EDF_MIN_DX (-2) +#define PL_EDF_MAX_DX (2) +#define PL_EDF_MAX_DY (2) + +struct pl_error_diffusion_kernel { + const char *name; // Short and concise identifier + const char *description; // Longer / friendly name + + // The minimum value such that a (y, x) -> (y, x + y * shift) mapping will + // make all error pushing operations affect next column (and after it) + // only. + // + // Higher shift values are significantly more computationally intensive. + int shift; + + // The diffusion factor for (y, x) is pattern[y][x - PL_EDF_MIN_DX] / divisor. + int pattern[PL_EDF_MAX_DY + 1][PL_EDF_MAX_DX - PL_EDF_MIN_DX + 1]; + int divisor; +}; + +// Algorithms with shift=1: +PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_simple; +PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_false_fs; +// Algorithms with shift=2: +PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_sierra_lite; +PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_floyd_steinberg; +PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_atkinson; +// Algorithms with shift=3, probably too heavy for low end GPUs: +PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_jarvis_judice_ninke; +PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_stucki; +PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_burkes; +PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_sierra2; +PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_sierra3; + +// A list of built-in error diffusion kernels, terminated by NULL +PL_API extern const struct pl_error_diffusion_kernel * const pl_error_diffusion_kernels[]; +PL_API extern const int pl_num_error_diffusion_kernels; // excluding trailing NULL + +// Find the error diffusion kernel with the given name, or NULL on failure. +PL_API const struct pl_error_diffusion_kernel *pl_find_error_diffusion_kernel(const char *name); + +PL_API_END + +#endif // LIBPLACEBO_DITHER_H_ diff --git a/src/include/libplacebo/dummy.h b/src/include/libplacebo/dummy.h new file mode 100644 index 0000000..c298438 --- /dev/null +++ b/src/include/libplacebo/dummy.h @@ -0,0 +1,131 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_DUMMY_H_ +#define LIBPLACEBO_DUMMY_H_ + +#include <libplacebo/gpu.h> + +PL_API_BEGIN + +// The functions in this file allow creating and manipulating "dummy" contexts. +// A dummy context isn't actually mapped by the GPU, all data exists purely on +// the CPU. It also isn't capable of compiling or executing any shaders, any +// attempts to do so will simply fail. +// +// The main use case for this dummy context is for users who want to generate +// advanced shaders that depend on specific GLSL features or support for +// certain types of GPU resources (e.g. LUTs). This dummy context allows such +// shaders to be generated, with all of the referenced shader objects and +// textures simply containing their data in a host-accessible way. + +struct pl_gpu_dummy_params { + // These GPU parameters correspond to their equivalents in `pl_gpu`, and + // must obey the same rules as documented there. The values from + // `pl_gpu_dummy_default_params` are set to support pretty much everything + // and are set for GLSL version 450. + // + // Individual fields such as `glsl.compute` or `glsl.version` description + // can and should be overridden by the user based on their requirements. + // Individual limits should ideally be set based on the corresponding + // `glGet` queries etc. + struct pl_glsl_version glsl; + struct pl_gpu_limits limits; +}; + +#define PL_GPU_DUMMY_DEFAULTS \ + .glsl = { \ + .version = 450, \ + .gles = false, \ + .vulkan = false, \ + .compute = true, \ + .max_shmem_size = SIZE_MAX, \ + .max_group_threads = 1024, \ + .max_group_size = { 1024, 1024, 1024 }, \ + .subgroup_size = 32, \ + .min_gather_offset = INT16_MIN, \ + .max_gather_offset = INT16_MAX, \ + }, \ + .limits = { \ + /* pl_gpu */ \ + .callbacks = false, \ + .thread_safe = true, \ + /* pl_buf */ \ + .max_buf_size = SIZE_MAX, \ + .max_ubo_size = SIZE_MAX, \ + .max_ssbo_size = SIZE_MAX, \ + .max_vbo_size = SIZE_MAX, \ + .max_mapped_size = SIZE_MAX, \ + .max_buffer_texels = UINT64_MAX, \ + /* pl_tex */ \ + .max_tex_1d_dim = UINT32_MAX, \ + .max_tex_2d_dim = UINT32_MAX, \ + .max_tex_3d_dim = UINT32_MAX, \ + .buf_transfer = true, \ + .align_tex_xfer_pitch = 1, \ + .align_tex_xfer_offset = 1, \ + /* pl_pass */ \ + .max_variable_comps = SIZE_MAX, \ + .max_constants = SIZE_MAX, \ + .max_pushc_size = SIZE_MAX, \ + .max_dispatch = { UINT32_MAX, UINT32_MAX, UINT32_MAX }, \ + .fragment_queues = 0, \ + .compute_queues = 0, \ + }, + +#define pl_gpu_dummy_params(...) (&(struct pl_gpu_dummy_params) { PL_GPU_DUMMY_DEFAULTS __VA_ARGS__ }) +PL_API extern const struct pl_gpu_dummy_params pl_gpu_dummy_default_params; + +// Create a dummy GPU context based on the given parameters. This GPU will have +// a format for each host-representable type (i.e. intN_t, floats and doubles), +// in the canonical channel order RGBA. These formats will have every possible +// capability activated, respectively. +// +// If `params` is left as NULL, it defaults to `&pl_gpu_dummy_params`. +PL_API pl_gpu pl_gpu_dummy_create(pl_log log, const struct pl_gpu_dummy_params *params); +PL_API void pl_gpu_dummy_destroy(pl_gpu *gpu); + +// Back-doors into the `pl_tex` and `pl_buf` representations. These allow you +// to access the raw data backing this object. Textures are always laid out in +// a tightly packed manner. +// +// For "placeholder" dummy textures, this always returns NULL. +PL_API uint8_t *pl_buf_dummy_data(pl_buf buf); +PL_API uint8_t *pl_tex_dummy_data(pl_tex tex); + +// Skeleton of `pl_tex_params` containing only the fields relevant to +// `pl_tex_dummy_create`, plus the extra `sampler_type` field. +struct pl_tex_dummy_params { + int w, h, d; + pl_fmt format; + enum pl_sampler_type sampler_type; + void *user_data; +}; + +#define pl_tex_dummy_params(...) (&(struct pl_tex_dummy_params) { __VA_ARGS__ }) + +// Allows creating a "placeholder" dummy texture. This is basically a texture +// that isn't even backed by anything. All `pl_tex_*` operations (other than +// `pl_tex_destroy`) performed on it will simply fail. +// +// All of the permissions will be set to `false`, except `sampleable`, which is +// set to `true`. (So you can use it as an input to shader sampling functions) +PL_API pl_tex pl_tex_dummy_create(pl_gpu gpu, const struct pl_tex_dummy_params *params); + +PL_API_END + +#endif // LIBPLACEBO_DUMMY_H_ diff --git a/src/include/libplacebo/filters.h b/src/include/libplacebo/filters.h new file mode 100644 index 0000000..a95649d --- /dev/null +++ b/src/include/libplacebo/filters.h @@ -0,0 +1,415 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_FILTER_KERNELS_H_ +#define LIBPLACEBO_FILTER_KERNELS_H_ + +#include <stdbool.h> +#include <libplacebo/log.h> + +PL_API_BEGIN + +#define PL_FILTER_MAX_PARAMS 2 + +// Invocation parameters for a given kernel +struct pl_filter_ctx { + float radius; + float params[PL_FILTER_MAX_PARAMS]; +}; + +// Represents a single filter function, i.e. kernel or windowing function. +struct pl_filter_function { + // The cosmetic name associated with this filter function. + const char *name; + + // The radius of the filter function. For resizable filters, this gives + // the radius needed to represent a single filter lobe (tap). + float radius; + + // If true, the filter function is resizable (see pl_filter_config.radius) + bool resizable; + + // If true, the filter function is tunable (see pl_filter_config.params) + bool tunable[PL_FILTER_MAX_PARAMS]; + + // If the relevant parameter is tunable, this contains the default values. + float params[PL_FILTER_MAX_PARAMS]; + + // The underlying filter function itself: Computes the weight as a function + // of the offset. All filter functions must be normalized such that x=0 is + // the center point, and in particular weight(0) = 1.0. The functions may + // be undefined for values of x outside [0, radius]. + double (*weight)(const struct pl_filter_ctx *f, double x); + + // If true, this filter represents an opaque placeholder for a more + // sophisticated filter function which does not fit into the pl_filter + // framework. `weight()` will always return 0.0. + bool opaque; +}; + +// Deprecated function, merely checks a->weight == b->weight +PL_DEPRECATED PL_API bool +pl_filter_function_eq(const struct pl_filter_function *a, + const struct pl_filter_function *b); + +// Box filter: Entirely 1.0 within the radius, entirely 0.0 outside of it. +// This is also sometimes called a Dirichlet window +PL_API extern const struct pl_filter_function pl_filter_function_box; + +// Triangle filter: Linear transitions from 1.0 at x=0 to 0.0 at x=radius. +// This is also sometimes called a Bartlett window. +PL_API extern const struct pl_filter_function pl_filter_function_triangle; + +// Cosine filter: Ordinary cosine function, single lobe. +PL_API extern const struct pl_filter_function pl_filter_function_cosine; + +// Hann function: Cosine filter named after Julius von Hann. Also commonly +// mislabeled as a "Hanning" function, due to its similarly to the Hamming +// function. +PL_API extern const struct pl_filter_function pl_filter_function_hann; + +// Hamming function: Cosine filter named after Richard Hamming. +PL_API extern const struct pl_filter_function pl_filter_function_hamming; + +// Welch filter: Polynomial function consisting of a single parabolic section. +PL_API extern const struct pl_filter_function pl_filter_function_welch; + +// Kaiser filter: Approximation of the DPSS window using Bessel functions. +// Also sometimes called a Kaiser-Bessel window. +// Parameter [0]: Shape (alpha). Determines the trade-off between the main lobe +// and the side lobes. +PL_API extern const struct pl_filter_function pl_filter_function_kaiser; + +// Blackman filter: Cosine filter named after Ralph Beebe Blackman. +// Parameter [0]: Scale (alpha). Influences the shape. The defaults result in +// zeros at the third and fourth sidelobes. +PL_API extern const struct pl_filter_function pl_filter_function_blackman; + +// Bohman filter: 2nd order Cosine filter. +PL_API extern const struct pl_filter_function pl_filter_function_bohman; + +// Gaussian function: Similar to the Gaussian distribution, this defines a +// bell curve function. +// Parameter [0]: Scale (t), increasing makes the result blurrier. +PL_API extern const struct pl_filter_function pl_filter_function_gaussian; + +// Quadratic function: 2nd order approximation of the gaussian function. Also +// sometimes called a "quadric" window. +PL_API extern const struct pl_filter_function pl_filter_function_quadratic; + +// Sinc function: Widely used for both kernels and windows, sinc(x) = sin(x)/x. +PL_API extern const struct pl_filter_function pl_filter_function_sinc; + +// Jinc function: Similar to sinc, but extended to the 2D domain. Widely +// used as the kernel of polar (EWA) filters. Also sometimes called a Sombrero +// function. +PL_API extern const struct pl_filter_function pl_filter_function_jinc; + +// Sphinx function: Similar to sinc and jinx, but extended to the 3D domain. +// The name is derived from "spherical" sinc. Can be used to filter 3D signals +// in theory. +PL_API extern const struct pl_filter_function pl_filter_function_sphinx; + +// B/C-tunable Spline function: This is a family of commonly used spline +// functions with two tunable parameters. Does not need to be windowed. +// Parameter [0]: "B" +// Parameter [1]: "C" +// Some popular variants of this function are: +// B = 1.0, C = 0.0: "base" Cubic (blurry) +// B = 0.0, C = 0.0: Hermite filter (blocky) +// B = 0.0, C = 0.5: Catmull-Rom filter (sharp) +// B = 1/3, C = 1/3: Mitchell-Netravali filter (soft, doesn't ring) +// B ≈ 0.37, C ≈ 0.31: Robidoux filter (used by ImageMagick) +// B ≈ 0.26, C ≈ 0.37: RobidouxSharp filter (sharper variant of Robidoux) +PL_API extern const struct pl_filter_function pl_filter_function_cubic; +PL_API extern const struct pl_filter_function pl_filter_function_hermite; +#define pl_filter_function_bicubic pl_filter_function_cubic +#define pl_filter_function_bcspline pl_filter_function_cubic + +// Cubic splines with 2/3/4 taps. Referred to as "spline16", "spline36", and +// "spline64" mainly for historical reasons, based on the number of pixels in +// their window when using them as 2D orthogonal filters. Do not need to be +// windowed. +PL_API extern const struct pl_filter_function pl_filter_function_spline16; +PL_API extern const struct pl_filter_function pl_filter_function_spline36; +PL_API extern const struct pl_filter_function pl_filter_function_spline64; + +// Special filter function for the built-in oversampling algorithm. This is an +// opaque filter with no meaningful representation. though it has one tunable +// parameter controlling the threshold at which to switch back to ordinary +// nearest neighbour sampling. (See `pl_shader_sample_oversample`) +PL_API extern const struct pl_filter_function pl_filter_function_oversample; + +// A list of built-in filter functions, terminated by NULL +// +// Note: May contain extra aliases for the above functions. +PL_API extern const struct pl_filter_function * const pl_filter_functions[]; +PL_API extern const int pl_num_filter_functions; // excluding trailing NULL + +// Find the filter function with the given name, or NULL on failure. +PL_API const struct pl_filter_function *pl_find_filter_function(const char *name); + +// Backwards compatibility with the older configuration API. Redundant with +// `pl_filter_function.name`. May be formally deprecated in the future. + +struct pl_filter_function_preset { + const char *name; + const struct pl_filter_function *function; +}; + +// A list of built-in filter function presets, terminated by {0} +PL_API extern const struct pl_filter_function_preset pl_filter_function_presets[]; +PL_API extern const int pl_num_filter_function_presets; // excluding trailing {0} + +// Find the filter function preset with the given name, or NULL on failure. +PL_API const struct pl_filter_function_preset *pl_find_filter_function_preset(const char *name); + +// Different usage domains for a filter +enum pl_filter_usage { + PL_FILTER_UPSCALING = (1 << 0), + PL_FILTER_DOWNSCALING = (1 << 1), + PL_FILTER_FRAME_MIXING = (1 << 2), + + PL_FILTER_SCALING = PL_FILTER_UPSCALING | PL_FILTER_DOWNSCALING, + PL_FILTER_ALL = PL_FILTER_SCALING | PL_FILTER_FRAME_MIXING, +}; + +// Represents a tuned combination of filter functions, plus parameters +struct pl_filter_config { + // The cosmetic name associated with this filter config. Optional for + // user-provided configs, but always set by built-in configurations. + const char *name; + + // Longer / friendly name. Always set for built-in configurations, + // except for names which are merely aliases of other filters. + const char *description; + + // Allowed and recommended usage domains (respectively) + // + // When it is desired to maintain a simpler user interface, it may be + // recommended to include only scalers whose recommended usage domains + // includes the relevant context in which it will be used. + enum pl_filter_usage allowed; + enum pl_filter_usage recommended; + + // The kernel function and (optionally) windowing function. + const struct pl_filter_function *kernel; + const struct pl_filter_function *window; + + // The radius. Ignored if !kernel->resizable. Optional, defaults to + // kernel->radius if unset. + float radius; + + // Parameters for the respective filter function. Ignored if not tunable. + float params[PL_FILTER_MAX_PARAMS]; + float wparams[PL_FILTER_MAX_PARAMS]; + + // Represents a clamping coefficient for negative weights. A value of 0.0 + // (the default) represents no clamping. A value of 1.0 represents full + // clamping, i.e. all negative weights will be clamped to 0. Values in + // between will be linearly scaled. + float clamp; + + // Additional blur coefficient. This effectively stretches the kernel, + // without changing the effective radius of the filter radius. Setting this + // to a value of 0.0 is equivalent to disabling it. Values significantly + // below 1.0 may seriously degrade the visual output, and should be used + // with care. + float blur; + + // Additional taper coefficient. This essentially flattens the function's + // center. The values within [-taper, taper] will return 1.0, with the + // actual function being squished into the remainder of [taper, radius]. + // Defaults to 0.0. + float taper; + + // If true, this filter is intended to be used as a polar/2D filter (EWA) + // instead of a separable/1D filter. Does not affect the actual sampling, + // but provides information about how the results are to be interpreted. + bool polar; + + // Antiringing strength. A value of 0.0 disables antiringing, and a value + // of 1.0 enables full-strength antiringing. Defaults to 0.0 if + // unspecified. + // + // Note: This is only included in `pl_filter_config` for convenience. Does + // not affect the actual filter sampling, but provides information to the + // downstream consumer of the `pl_filter`. + float antiring; +}; + +PL_API bool pl_filter_config_eq(const struct pl_filter_config *a, + const struct pl_filter_config *b); + +// Samples a given filter configuration at a given x coordinate, while +// respecting all parameters of the configuration. +PL_API double pl_filter_sample(const struct pl_filter_config *c, double x); + +// A list of built-in filter configurations. Since they are just combinations +// of the above filter functions, they are not described in much further +// detail. +PL_API extern const struct pl_filter_config pl_filter_spline16; // 2 taps +PL_API extern const struct pl_filter_config pl_filter_spline36; // 3 taps +PL_API extern const struct pl_filter_config pl_filter_spline64; // 4 taps +PL_API extern const struct pl_filter_config pl_filter_nearest; +PL_API extern const struct pl_filter_config pl_filter_box; +PL_API extern const struct pl_filter_config pl_filter_bilinear; +PL_API extern const struct pl_filter_config pl_filter_gaussian; +// Sinc family (all configured to 3 taps): +PL_API extern const struct pl_filter_config pl_filter_sinc; // unwindowed +PL_API extern const struct pl_filter_config pl_filter_lanczos; // sinc-sinc +PL_API extern const struct pl_filter_config pl_filter_ginseng; // sinc-jinc +PL_API extern const struct pl_filter_config pl_filter_ewa_jinc; // unwindowed +PL_API extern const struct pl_filter_config pl_filter_ewa_lanczos; // jinc-jinc +PL_API extern const struct pl_filter_config pl_filter_ewa_lanczossharp; +PL_API extern const struct pl_filter_config pl_filter_ewa_lanczos4sharpest; +PL_API extern const struct pl_filter_config pl_filter_ewa_ginseng; // jinc-sinc +PL_API extern const struct pl_filter_config pl_filter_ewa_hann; // jinc-hann +// Spline family +PL_API extern const struct pl_filter_config pl_filter_bicubic; +PL_API extern const struct pl_filter_config pl_filter_hermite; +PL_API extern const struct pl_filter_config pl_filter_catmull_rom; +PL_API extern const struct pl_filter_config pl_filter_mitchell; +PL_API extern const struct pl_filter_config pl_filter_mitchell_clamp; // clamp = 1.0 +PL_API extern const struct pl_filter_config pl_filter_robidoux; +PL_API extern const struct pl_filter_config pl_filter_robidouxsharp; +PL_API extern const struct pl_filter_config pl_filter_ewa_robidoux; +PL_API extern const struct pl_filter_config pl_filter_ewa_robidouxsharp; +// Special/opaque filters +PL_API extern const struct pl_filter_config pl_filter_oversample; + +// Backwards compatibility +#define pl_filter_triangle pl_filter_bilinear +#define pl_oversample_frame_mixer pl_filter_oversample + +// A list of built-in filter configs, terminated by NULL +PL_API extern const struct pl_filter_config * const pl_filter_configs[]; +PL_API extern const int pl_num_filter_configs; // excluding trailing NULL + +// Find the filter config with the given name, or NULL on failure. +// `usage` restricts the valid usage (based on `pl_filter_config.allowed`). +PL_API const struct pl_filter_config * +pl_find_filter_config(const char *name, enum pl_filter_usage usage); + +// Backward compatibility with the previous filter configuration API. Redundant +// with pl_filter_config.name/description. May be deprecated in the future. +struct pl_filter_preset { + const char *name; + const struct pl_filter_config *filter; + + // Longer / friendly name, or NULL for aliases + const char *description; +}; + +// A list of built-in filter presets, terminated by {0} +PL_API extern const struct pl_filter_preset pl_filter_presets[]; +PL_API extern const int pl_num_filter_presets; // excluding trailing {0} + +// Find the filter preset with the given name, or NULL on failure. +PL_API const struct pl_filter_preset *pl_find_filter_preset(const char *name); + +// Parameters for filter generation. +struct pl_filter_params { + // The particular filter configuration to be sampled. config.kernel must + // be set to a valid pl_filter_function. + struct pl_filter_config config; + + // The precision of the resulting LUT. A value of 64 should be fine for + // most practical purposes, but higher or lower values may be justified + // depending on the use case. This value must be set to something > 0. + int lut_entries; + + // --- Polar filers only (config.polar) + + // As a micro-optimization, all samples below this cutoff value will be + // ignored when updating the cutoff radius. Setting it to a value of 0.0 + // disables this optimization. + float cutoff; + + // --- Separable filters only (!config.polar) + + // Indicates the maximum row size that is supported by the calling code, or + // 0 for no limit. + int max_row_size; + + // Indicates the row stride alignment. For some use cases (e.g. uploading + // the weights as a texture), there are certain alignment requirements for + // each row. The chosen row_size will always be a multiple of this value. + // Specifying 0 indicates no alignment requirements. + int row_stride_align; + + // --- Deprecated options + float filter_scale PL_DEPRECATED; // no effect, use `config.blur` instead +}; + +#define pl_filter_params(...) (&(struct pl_filter_params) { __VA_ARGS__ }) + +// Represents an initialized instance of a particular filter, with a +// precomputed LUT. The interpretation of the LUT depends on the type of the +// filter (polar or separable). +typedef const struct pl_filter_t { + // Deep copy of the parameters, for convenience. + struct pl_filter_params params; + + // Contains the true radius of the computed filter. This may be + // smaller than the configured radius depending on the exact filter + // parameters used. Mainly relevant for polar filters, since + // it affects the value range of *weights. + float radius; + + // Radius of the first zero crossing (main lobe size). + float radius_zero; + + // The computed look-up table (LUT). For polar filters, this is interpreted + // as a 1D array with dimensions [lut_entries] containing the raw filter + // samples on the scale [0, radius]. For separable (non-polar) filters, + // this is interpreted as a 2D array with dimensions + // [lut_entries][row_stride]. The inner rows contain the `row_size` samples + // to convolve with the corresponding input pixels. The outer coordinate is + // used to very the fractional offset (phase). So for example, if the + // sample position to reconstruct is directly aligned with the source + // texels, you would use the values from weights[0]. If the sample position + // to reconstruct is exactly half-way between two source texels (180° out + // of phase), you would use the values from weights[lut_entries/2]. + const float *weights; + + // --- separable filters only (!params.config.polar) + + // The number of source texels to convolve over for each row. This value + // will never exceed the given `max_row_size`. If the filter ends up + // cut off because of this, the bool `insufficient` will be set to true. + int row_size; + bool insufficient; + + // The separation (in *weights) between each row of the filter. Always + // a multiple of params.row_stride_align. + int row_stride; + + // --- deprecated / removed fields + float radius_cutoff PL_DEPRECATED; // identical to `radius` +} *pl_filter; + +// Generate (compute) a filter instance based on a given filter configuration. +// The resulting pl_filter must be freed with `pl_filter_free` when no longer +// needed. Returns NULL if filter generation fails due to invalid parameters +// (i.e. missing a required parameter). +PL_API pl_filter pl_filter_generate(pl_log log, const struct pl_filter_params *params); +PL_API void pl_filter_free(pl_filter *filter); + +PL_API_END + +#endif // LIBPLACEBO_FILTER_KERNELS_H_ diff --git a/src/include/libplacebo/gamut_mapping.h b/src/include/libplacebo/gamut_mapping.h new file mode 100644 index 0000000..a92a73b --- /dev/null +++ b/src/include/libplacebo/gamut_mapping.h @@ -0,0 +1,182 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_GAMUT_MAPPING_H_ +#define LIBPLACEBO_GAMUT_MAPPING_H_ + +#include <libplacebo/common.h> +#include <libplacebo/colorspace.h> + +PL_API_BEGIN + +struct pl_gamut_map_params; +struct pl_gamut_map_function { + const char *name; // Identifier + const char *description; // Friendly / longer name + + // The gamut-mapping function itself. Iterates over all values in `lut`, + // and adapts them as needed. + void (*map)(float *lut, const struct pl_gamut_map_params *params); + + // Returns true if `map` supports both stretching and contracting the + // gamut. In this case, `map` is always executed, even if the output gamut + // is larger than the input gamut. + bool bidirectional; + + // Private data. Unused by libplacebo, but may be accessed by `map`. + void *priv; +}; + +struct pl_gamut_map_constants { + // (Relative) chromaticity protection zone for perceptual mapping [0,1] + float perceptual_deadzone; + + // Strength of the perceptual saturation mapping component [0,1] + float perceptual_strength; + + // I vs C curve gamma to use for colorimetric clipping [0,10] + float colorimetric_gamma; + + // Knee point to use for softclipping methods (perceptual, softclip) [0,1] + float softclip_knee; + + // Desaturation strength (for softclip only) [0,1] + float softclip_desat; +}; + +#define PL_GAMUT_MAP_CONSTANTS \ + .colorimetric_gamma = 1.80f, \ + .softclip_knee = 0.70f, \ + .softclip_desat = 0.35f, \ + .perceptual_deadzone = 0.30f, \ + .perceptual_strength = 0.80f, + +struct pl_gamut_map_params { + // If `function` is NULL, defaults to `pl_gamut_map_clip`. + const struct pl_gamut_map_function *function; + + // The desired input/output primaries. This affects the subjective color + // volume in which the desired mapping shall take place. + struct pl_raw_primaries input_gamut; + struct pl_raw_primaries output_gamut; + + // Minimum/maximum luminance (PQ) of the target display. Note that the same + // value applies to both the input and output, since it's assumed that tone + // mapping has already happened by this stage. This effectively defines the + // legal gamut boundary in RGB space. + // + // This also defines the I channel value range, for `pl_gamut_map_generate` + float min_luma; + float max_luma; + + // Common constants, should be initialized to PL_GAMUT_MAP_CONSTANTS if + // not intending to override them further. + struct pl_gamut_map_constants constants; + + // -- LUT generation options (for `pl_gamut_map_generate` only) + + // The size of the resulting LUT, per channel. + // + // Note: For quality, it's generally best to increase h > I > C + int lut_size_I; + int lut_size_C; + int lut_size_h; + + // The stride (in number of floats) between elements in the resulting LUT. + int lut_stride; + + // -- Removed parameters + float chroma_margin PL_DEPRECATED; // non-functional +}; + +#define pl_gamut_map_params(...) (&(struct pl_gamut_map_params) { \ + .constants = { PL_GAMUT_MAP_CONSTANTS }, \ + __VA_ARGS__ \ +}) + +// Note: Only does pointer equality testing on `function` +PL_API bool pl_gamut_map_params_equal(const struct pl_gamut_map_params *a, + const struct pl_gamut_map_params *b); + +// Returns true if the given gamut mapping configuration effectively represents +// a no-op configuration. Gamut mapping can be skipped in this case. +PL_API bool pl_gamut_map_params_noop(const struct pl_gamut_map_params *params); + +// Generate a gamut-mapping LUT for a given configuration. LUT samples are +// stored as IPTPQc4 values, but the LUT itself is indexed by IChPQc4,spanning +// the effective range [min_luma, max_luma] × [0, 0.5] × [-pi,pi]. +// +// This ordering is designed to keep frequently co-occurring values close in +// memory, while permitting simple wrapping of the 'h' component. +PL_API void pl_gamut_map_generate(float *out, const struct pl_gamut_map_params *params); + +// Samples a gamut mapping function for a single IPTPQc4 value. The input +// values are updated in-place. +PL_API void pl_gamut_map_sample(float x[3], const struct pl_gamut_map_params *params); + +// Performs no gamut-mapping, just hard clips out-of-range colors per-channel. +PL_API extern const struct pl_gamut_map_function pl_gamut_map_clip; + +// Performs a perceptually balanced (saturation) gamut mapping, using a soft +// knee function to preserve in-gamut colors, followed by a final softclip +// operation. This works bidirectionally, meaning it can both compress and +// expand the gamut. Behaves similar to a blend of `saturation` and `softclip`. +PL_API extern const struct pl_gamut_map_function pl_gamut_map_perceptual; + +// Performs a perceptually balanced gamut mapping using a soft knee function to +// roll-off clipped regions, and a hue shifting function to preserve saturation. +PL_API extern const struct pl_gamut_map_function pl_gamut_map_softclip; + +// Performs relative colorimetric clipping, while maintaining an exponential +// relationship between brightness and chromaticity. +PL_API extern const struct pl_gamut_map_function pl_gamut_map_relative; + +// Performs simple RGB->RGB saturation mapping. The input R/G/B channels are +// mapped directly onto the output R/G/B channels. Will never clip, but will +// distort all hues and/or result in a faded look. +PL_API extern const struct pl_gamut_map_function pl_gamut_map_saturation; + +// Performs absolute colorimetric clipping. Like pl_gamut_map_relative, but +// does not adapt the white point. +PL_API extern const struct pl_gamut_map_function pl_gamut_map_absolute; + +// Performs constant-luminance colorimetric clipping, desaturing colors +// towards white until they're in-range. +PL_API extern const struct pl_gamut_map_function pl_gamut_map_desaturate; + +// Uniformly darkens the input slightly to prevent clipping on blown-out +// highlights, then clamps colorimetrically to the input gamut boundary, +// biased slightly to preserve chromaticity over luminance. +PL_API extern const struct pl_gamut_map_function pl_gamut_map_darken; + +// Performs no gamut mapping, but simply highlights out-of-gamut pixels. +PL_API extern const struct pl_gamut_map_function pl_gamut_map_highlight; + +// Linearly/uniformly desaturates the image in order to bring the entire +// image into the target gamut. +PL_API extern const struct pl_gamut_map_function pl_gamut_map_linear; + +// A list of built-in gamut mapping functions, terminated by NULL +PL_API extern const struct pl_gamut_map_function * const pl_gamut_map_functions[]; +PL_API extern const int pl_num_gamut_map_functions; // excluding trailing NULL + +// Find the gamut mapping function with the given name, or NULL on failure. +PL_API const struct pl_gamut_map_function *pl_find_gamut_map_function(const char *name); + +PL_API_END + +#endif // LIBPLACEBO_GAMUT_MAPPING_H_ diff --git a/src/include/libplacebo/gpu.h b/src/include/libplacebo/gpu.h new file mode 100644 index 0000000..a63fdf7 --- /dev/null +++ b/src/include/libplacebo/gpu.h @@ -0,0 +1,1464 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_GPU_H_ +#define LIBPLACEBO_GPU_H_ + +#include <stddef.h> +#include <stdbool.h> +#include <stdint.h> + +#include <libplacebo/common.h> +#include <libplacebo/cache.h> +#include <libplacebo/log.h> + +PL_API_BEGIN + +// These are not memory managed, and should represent compile-time constants +typedef const char *pl_debug_tag; +#define PL_DEBUG_TAG (__FILE__ ":" PL_TOSTRING(__LINE__)) + +// Type of a shader input descriptor. +enum pl_desc_type { + PL_DESC_INVALID = 0, + PL_DESC_SAMPLED_TEX, // C: pl_tex* GLSL: combined texture sampler + // (`pl_tex->params.sampleable` must be set) + PL_DESC_STORAGE_IMG, // C: pl_tex* GLSL: storage image + // (`pl_tex->params.storable` must be set) + PL_DESC_BUF_UNIFORM, // C: pl_buf* GLSL: uniform buffer + // (`pl_buf->params.uniform` must be set) + PL_DESC_BUF_STORAGE, // C: pl_buf* GLSL: storage buffer + // (`pl_buf->params.storable` must be set) + PL_DESC_BUF_TEXEL_UNIFORM,// C: pl_buf* GLSL: uniform samplerBuffer + // (`pl_buf->params.uniform` and `format` must be set) + PL_DESC_BUF_TEXEL_STORAGE,// C: pl_buf* GLSL: uniform imageBuffer + // (`pl_buf->params.uniform` and `format` must be set) + PL_DESC_TYPE_COUNT +}; + +// This file contains the definition of an API which is designed to abstract +// away from platform-specific APIs like the various OpenGL variants, Direct3D +// and Vulkan in a common way. It is a much more limited API than those APIs, +// since it tries targeting a very small common subset of features that is +// needed to implement libplacebo's rendering. +// +// NOTE: Most, but not all, parameter conditions (phrases such as "must" or +// "valid usage" are explicitly tested and result in error messages followed by +// graceful failure. Exceptions are noted where they exist. + +// Structure which wraps metadata describing GLSL capabilities. +struct pl_glsl_version { + int version; // GLSL version (e.g. 450), for #version + bool gles; // GLSL ES semantics (ESSL) + bool vulkan; // GL_KHR_vulkan_glsl semantics + + // Compute shader support and limits. If `compute` is false, then all + // of the remaining fields in this section are {0}. + bool compute; + size_t max_shmem_size; // maximum compute shader shared memory size + uint32_t max_group_threads; // maximum number of local threads per work group + uint32_t max_group_size[3]; // maximum work group size per dimension + + // If nonzero, signals availability of shader subgroups. This guarantess + // availability of all of the following extensions: + // - GL_KHR_shader_subgroup_basic + // - GL_KHR_shader_subgroup_vote + // - GL_KHR_shader_subgroup_arithmetic + // - GL_KHR_shader_subgroup_ballot + // - GL_KHR_shader_subgroup_shuffle + uint32_t subgroup_size; + + // Miscellaneous shader limits + int16_t min_gather_offset; // minimum `textureGatherOffset` offset + int16_t max_gather_offset; // maximum `textureGatherOffset` offset +}; + +// Backwards compatibility alias +#define pl_glsl_desc pl_glsl_version + +// Structure defining the physical limits and capabilities of this GPU +// instance. If a limit is given as 0, that means that feature is unsupported. +struct pl_gpu_limits { + // --- pl_gpu + bool thread_safe; // `pl_gpu` calls are thread-safe + bool callbacks; // supports asynchronous GPU callbacks + + // --- pl_buf + size_t max_buf_size; // maximum size of any buffer + size_t max_ubo_size; // maximum size of a `uniform` buffer + size_t max_ssbo_size; // maximum size of a `storable` buffer + size_t max_vbo_size; // maximum size of a `drawable` buffer + size_t max_mapped_size; // maximum size of a `host_mapped` buffer + uint64_t max_buffer_texels; // maximum number of texels in a texel buffer + bool host_cached; // if true, PL_BUF_MEM_HOST buffers are cached + + // Required alignment for PL_HANDLE_HOST_PTR imports. This is provided + // merely as a hint to the user. If the host pointer being imported is + // misaligned, libplacebo will internally round (over-map) the region. + size_t align_host_ptr; + + // --- pl_tex + uint32_t max_tex_1d_dim; // maximum width for a 1D texture + uint32_t max_tex_2d_dim; // maximum width/height for a 2D texture (required) + uint32_t max_tex_3d_dim; // maximum width/height/depth for a 3D texture + bool blittable_1d_3d; // supports blittable 1D/3D textures + bool buf_transfer; // supports `pl_tex_transfer_params.buf` + + // These don't represent hard limits but indicate performance hints for + // optimal alignment. For best performance, the corresponding field + // should be aligned to a multiple of these. They will always be a power + // of two. + size_t align_tex_xfer_pitch; // optimal `pl_tex_transfer_params.row_pitch` + size_t align_tex_xfer_offset; // optimal `pl_tex_transfer_params.buf_offset` + + // --- pl_pass + size_t max_variable_comps; // maximum components passed in variables + size_t max_constants; // maximum `pl_pass_params.num_constants` + bool array_size_constants; // push constants can be used to size arrays + size_t max_pushc_size; // maximum `push_constants_size` + size_t align_vertex_stride; // alignment of `pl_pass_params.vertex_stride` + uint32_t max_dispatch[3]; // maximum dispatch size per dimension + + // Note: At least one of `max_variable_comps` or `max_ubo_size` is + // guaranteed to be nonzero. + + // As a performance hint, the GPU may signal the number of command queues + // it has for fragment and compute shaders, respectively. Users may use + // this information to decide the appropriate type of shader to dispatch. + uint32_t fragment_queues; + uint32_t compute_queues; +}; + +// Backwards compatibility aliases +#define max_xfer_size max_buf_size +#define align_tex_xfer_stride align_tex_xfer_pitch + +// Some `pl_gpu` operations allow sharing GPU resources with external APIs - +// examples include interop with other graphics APIs such as CUDA, and also +// various hardware decoding APIs. This defines the mechanism underpinning the +// communication of such an interoperation. +typedef uint64_t pl_handle_caps; +enum pl_handle_type { + PL_HANDLE_FD = (1 << 0), // `int fd` for POSIX-style APIs + PL_HANDLE_WIN32 = (1 << 1), // `HANDLE` for win32 API + PL_HANDLE_WIN32_KMT = (1 << 2), // `HANDLE` for pre-Windows-8 win32 API + PL_HANDLE_DMA_BUF = (1 << 3), // 'int fd' for a dma_buf fd + PL_HANDLE_HOST_PTR = (1 << 4), // `void *` for a host-allocated pointer + PL_HANDLE_MTL_TEX = (1 << 5), // `MTLTexture*` for Apple platforms + PL_HANDLE_IOSURFACE = (1 << 6), // `IOSurfaceRef` for Apple platforms +}; + +struct pl_gpu_handle_caps { + pl_handle_caps tex; // supported handles for `pl_tex` + `pl_shared_mem` + pl_handle_caps buf; // supported handles for `pl_buf` + `pl_shared_mem` + pl_handle_caps sync; // supported handles for `pl_sync` / semaphores +}; + +// Wrapper for the handle used to communicate a shared resource externally. +// This handle is owned by the `pl_gpu` - if a user wishes to use it in a way +// that takes over ownership (e.g. importing into some APIs), they must clone +// the handle before doing so (e.g. using `dup` for fds). It is important to +// read the external API documentation _very_ carefully as different handle +// types may be managed in different ways. (eg: CUDA takes ownership of an fd, +// but does not take ownership of a win32 handle). +union pl_handle { + int fd; // PL_HANDLE_FD / PL_HANDLE_DMA_BUF + void *handle; // PL_HANDLE_WIN32 / PL_HANDLE_WIN32_KMT / PL_HANDLE_MTL_TEX / PL_HANDLE_IOSURFACE + void *ptr; // PL_HANDLE_HOST_PTR +}; + +// Structure encapsulating memory that is shared between libplacebo and the +// user. This memory can be imported into external APIs using the handle. +// +// If the object a `pl_shared_mem` belongs to is destroyed (e.g. via +// `pl_buf_destroy`), the handle becomes undefined, as do the contents of the +// memory it points to, as well as any external API objects imported from it. +struct pl_shared_mem { + union pl_handle handle; + size_t size; // the total size of the memory referenced by this handle + size_t offset; // the offset of the object within the referenced memory + + // Note: `size` is optional for some APIs and handle types, in particular + // when importing DMABUFs or D3D11 textures. + + // For PL_HANDLE_DMA_BUF, this specifies the DRM format modifier that + // describes this resource. Note that when importing `pl_buf`, this must + // be DRM_FORMAT_MOD_LINEAR. For importing `pl_tex`, it can be any + // format modifier supported by the implementation. + uint64_t drm_format_mod; + + // When importing a `pl_tex` of type PL_HANDLE_DMA_BUF, this can be used to + // set the image stride (AKA pitch) in memory. If left as 0, defaults to + // the image width/height. + size_t stride_w; + size_t stride_h; + + // When importing a `pl_tex` of type PL_HANDLE_MTL_TEX, this determines + // which plane is imported (0 - 2). + unsigned plane; +}; + +// Structure grouping PCI bus address fields for GPU devices +struct pl_gpu_pci_address { + uint32_t domain; + uint32_t bus; + uint32_t device; + uint32_t function; +}; + +typedef const struct pl_fmt_t *pl_fmt; + +// Abstract device context which wraps an underlying graphics context and can +// be used to dispatch rendering commands. +// +// Thread-safety: Depends on `pl_gpu_limits.thread_safe` +typedef const struct pl_gpu_t { + pl_log log; + + struct pl_glsl_version glsl; // GLSL features supported by this GPU + struct pl_gpu_limits limits; // physical device limits and capabilities + + // Fields relevant to external API interop. If the underlying device does + // not support interop with other APIs, these will all be {0}. + struct pl_gpu_handle_caps export_caps; // supported handles for exporting + struct pl_gpu_handle_caps import_caps; // supported handles for importing + uint8_t uuid[16]; // underlying device UUID + + // Supported texture formats, in preference order. (If there are multiple + // similar formats, the "better" ones come first) + pl_fmt *formats; + int num_formats; + + // PCI Bus address of the underlying device, to help with interop. + // This will only be filled in if interop is supported. + struct pl_gpu_pci_address pci; +} *pl_gpu; + +// Attach a pl_cache object to this GPU instance. This cache will be +// used to cache all compiled shaders, as well as several other shader objects +// (e.g. cached 3DLUTs). Calling this with `cache = NULL` disables the cache. +// +// Note: Calling this after shaders have already been compiled will not +// retroactively add those shaders to the cache, so it's recommended to set +// this early, before creating any passes. +PL_API void pl_gpu_set_cache(pl_gpu gpu, pl_cache cache); + +enum pl_fmt_type { + PL_FMT_UNKNOWN = 0, // also used for inconsistent multi-component formats + PL_FMT_UNORM, // unsigned, normalized integer format (sampled as float) + PL_FMT_SNORM, // signed, normalized integer format (sampled as float) + PL_FMT_UINT, // unsigned integer format (sampled as integer) + PL_FMT_SINT, // signed integer format (sampled as integer) + PL_FMT_FLOAT, // (signed) float formats, any bit size + PL_FMT_TYPE_COUNT, +}; + +enum pl_fmt_caps { + PL_FMT_CAP_SAMPLEABLE = 1 << 0, // may be sampled from (PL_DESC_SAMPLED_TEX) + PL_FMT_CAP_STORABLE = 1 << 1, // may be used as storage image (PL_DESC_STORAGE_IMG) + PL_FMT_CAP_LINEAR = 1 << 2, // may be linearly samplied from (PL_TEX_SAMPLE_LINEAR) + PL_FMT_CAP_RENDERABLE = 1 << 3, // may be rendered to (pl_pass_params.target_fmt) + PL_FMT_CAP_BLENDABLE = 1 << 4, // may be blended to (pl_pass_params.enable_blend) + PL_FMT_CAP_BLITTABLE = 1 << 5, // may be blitted from/to (pl_tex_blit) + PL_FMT_CAP_VERTEX = 1 << 6, // may be used as a vertex attribute + PL_FMT_CAP_TEXEL_UNIFORM = 1 << 7, // may be used as a texel uniform buffer + PL_FMT_CAP_TEXEL_STORAGE = 1 << 8, // may be used as a texel storage buffer + PL_FMT_CAP_HOST_READABLE = 1 << 9, // may be used with `host_readable` textures + PL_FMT_CAP_READWRITE = 1 << 10, // may be used with PL_DESC_ACCESS_READWRITE + + // Notes: + // - PL_FMT_CAP_LINEAR also implies PL_FMT_CAP_SAMPLEABLE + // - PL_FMT_CAP_STORABLE also implies `pl_gpu.glsl.compute` + // - PL_FMT_CAP_BLENDABLE implies PL_FMT_CAP_RENDERABLE + // - PL_FMT_CAP_VERTEX implies that the format is non-opaque + // - PL_FMT_CAP_HOST_READABLE implies that the format is non-opaque +}; + +struct pl_fmt_plane { + // Underlying format of this particular sub-plane. This describes the + // components, texel size and host representation for the purpose of + // e.g. transfers, blits, and sampling. + pl_fmt format; + + // X/Y subsampling shift factor for this plane. + uint8_t shift_x, shift_y; +}; + +// Structure describing a texel/vertex format. +struct pl_fmt_t { + const char *name; // symbolic name for this format (e.g. rgba32f) + uint64_t signature; // unique but stable signature (for pass reusability) + + enum pl_fmt_type type; // the format's data type and interpretation + enum pl_fmt_caps caps; // the features supported by this format + int num_components; // number of components for this format + int component_depth[4]; // meaningful bits per component, texture precision + size_t internal_size; // internal texel size (for blit compatibility) + + // For planar formats, this provides a description of each sub-plane. + // + // Note on planar formats: Planar formats are always opaque and typically + // support only a limit subset of capabilities (or none at all). Access + // should be done via sub-planes. (See `pl_tex.planes`) + struct pl_fmt_plane planes[4]; + int num_planes; // or 0 for non-planar textures + + // This controls the relationship between the data as seen by the host and + // the way it's interpreted by the texture. The host representation is + // always tightly packed (no padding bits in between each component). + // + // This representation assumes little endian ordering, i.e. components + // being ordered from LSB to MSB in memory. Note that for oddly packed + // formats like rgb10a2 or rgb565, this is inconsistent with the naming. + // (That is to say, rgb565 has sample order {2, 1, 0} under this convention + // - because rgb565 treats the R channel as the *most* significant bits) + // + // If `opaque` is true, then there's no meaningful correspondence between + // the two, and all of the remaining fields in this section are unset. + // + // If `emulated` is true, then this format doesn't actually exist on the + // GPU as an uploadable texture format - and any apparent support is being + // emulated (typically using compute shaders in the upload path). + bool opaque; + bool emulated; + size_t texel_size; // total size in bytes per texel + size_t texel_align; // texel alignment requirements (bytes) + int host_bits[4]; // number of meaningful bits in host memory + int sample_order[4]; // sampled index for each component, e.g. + // {2, 1, 0, 3} for BGRA textures + + // For sampleable formats, this bool indicates whether or not the format + // is compatible with `textureGather()` + bool gatherable; + + // If usable as a vertex or texel buffer format, this gives the GLSL type + // corresponding to the data. (e.g. vec4) + const char *glsl_type; + + // If usable as a storage image or texel storage buffer + // (PL_FMT_CAP_STORABLE / PL_FMT_CAP_TEXEL_STORAGE), this gives the GLSL + // texel format corresponding to the format (e.g. rgba16ui), if any. This + // field may be NULL, in which case the format modifier may be left + // unspecified. + const char *glsl_format; + + // If available, this gives the fourcc associated with the host + // representation. In particular, this is intended for use with + // PL_HANDLE_DMA_BUF, where this field will match the DRM format from + // <drm_fourcc.h>. May be 0, for formats without matching DRM fourcc. + uint32_t fourcc; + + // If `fourcc` is set, this contains the list of supported drm format + // modifiers for this format. + const uint64_t *modifiers; + int num_modifiers; +}; + +// Returns whether or not a pl_fmt's components are ordered sequentially +// in memory in the order RGBA. +PL_API bool pl_fmt_is_ordered(pl_fmt fmt); + +// Returns whether or not a pl_fmt is sampled as a float (e.g. UNORM) +PL_API bool pl_fmt_is_float(pl_fmt fmt); + +// Returns whether or not a pl_fmt supports a given DRM modifier. +PL_API bool pl_fmt_has_modifier(pl_fmt fmt, uint64_t modifier); + +// Helper function to find a format with a given number of components and +// minimum effective precision per component. If `host_bits` is set, then the +// format will always be non-opaque, unpadded, ordered and have exactly this +// bit depth for each component. Finally, all `caps` must be supported. +PL_API pl_fmt pl_find_fmt(pl_gpu gpu, enum pl_fmt_type type, int num_components, + int min_depth, int host_bits, enum pl_fmt_caps caps); + +// Finds a vertex format for a given configuration. The resulting vertex will +// have a component depth equivalent to the sizeof() the equivalent host type. +// (e.g. PL_FMT_FLOAT will always have sizeof(float)) +PL_API pl_fmt pl_find_vertex_fmt(pl_gpu gpu, enum pl_fmt_type type, int num_components); + +// Find a format based on its name. +PL_API pl_fmt pl_find_named_fmt(pl_gpu gpu, const char *name); + +// Find a format based on its fourcc. +PL_API pl_fmt pl_find_fourcc(pl_gpu gpu, uint32_t fourcc); + +// A generic 'timer query' object. These can be used to measure an +// approximation of the GPU execution time of a given operation. Due to the +// highly asynchronous nature of GPUs, the actual results of any individual +// timer query may be delayed by quite a bit. As such, users should avoid +// trying to pair any particular GPU command with any particular timer query +// result, and only reuse `pl_timer` objects with identical operations. The +// results of timer queries are guaranteed to be in-order, but individual +// queries may be dropped, and some operations might not record timer results +// at all. (For example, if the underlying hardware does not support timer +// queries for a given operation type) +// +// Thread-safety: Unsafe +typedef struct pl_timer_t *pl_timer; + +// Creates a new timer object. This may return NULL, for example if the +// implementation does not support timers, but since passing NULL to +// `pl_timer_destroy` and `pl_timer_query` is safe, users generally need not +// concern themselves with handling this. +PL_API pl_timer pl_timer_create(pl_gpu gpu); +PL_API void pl_timer_destroy(pl_gpu gpu, pl_timer *); + +// Queries any results that have been measured since the last execution of +// `pl_timer_query`. There may be more than one result, in which case the user +// should simply call the function again to get the subsequent values. This +// function returns a value of 0 in the event that there are no more +// unprocessed results. +// +// The results are reported in nanoseconds, but the actual precision of the +// timestamp queries may be significantly lower. +// +// Note: Results do not queue up indefinitely. Generally, the implementation +// will only keep track of a small, fixed number of results internally. Make +// sure to include this function as part of your main rendering loop to process +// all of its results, or older results will be overwritten by newer ones. +PL_API uint64_t pl_timer_query(pl_gpu gpu, pl_timer); + +enum pl_buf_mem_type { + PL_BUF_MEM_AUTO = 0, // use whatever seems most appropriate + PL_BUF_MEM_HOST, // try allocating from host memory (RAM) + PL_BUF_MEM_DEVICE, // try allocating from device memory (VRAM) + PL_BUF_MEM_TYPE_COUNT, + + // Note: This distinction only matters for discrete GPUs +}; + +// Structure describing a buffer. +struct pl_buf_params { + size_t size; // size in bytes (must be <= `pl_gpu_limits.max_buf_size`) + bool host_writable; // contents may be updated via pl_buf_write() + bool host_readable; // contents may be read back via pl_buf_read() + bool host_mapped; // create a persistent, RW mapping (pl_buf.data) + + // May be used as PL_DESC_BUF_UNIFORM or PL_DESC_BUF_TEXEL_UNIFORM. + // Requires `size <= pl_gpu_limits.max_ubo_size` + bool uniform; + + // May be used as PL_DESC_BUF_STORAGE or PL_DESC_BUF_TEXEL_STORAGE. + // Requires `size <= pl_gpu_limits.max_ssbo_size` + bool storable; + + // May be used as the source of vertex data for `pl_pass_run`. + bool drawable; + + // Provide a hint for the memory type you want to use when allocating + // this buffer's memory. + // + // Note: Restrictions may apply depending on the usage flags. In + // particular, allocating buffers with `uniform` or `storable` enabled from + // non-device memory will almost surely fail. + enum pl_buf_mem_type memory_type; + + // Setting this to a format with the `PL_FMT_CAP_TEXEL_*` capability allows + // this buffer to be used as a `PL_DESC_BUF_TEXEL_*`, when `uniform` and + // `storage` are respectively also enabled. + pl_fmt format; + + // At most one of `export_handle` and `import_handle` can be set for a + // buffer. + + // Setting this indicates that the memory backing this buffer should be + // shared with external APIs, If so, this must be exactly *one* of + // `pl_gpu.export_caps.buf`. + enum pl_handle_type export_handle; + + // Setting this indicates that the memory backing this buffer will be + // imported from an external API. If so, this must be exactly *one* of + // `pl_gpu.import_caps.buf`. + enum pl_handle_type import_handle; + + // If the shared memory is being imported, the import handle must be + // specified here. Otherwise, this is ignored. + struct pl_shared_mem shared_mem; + + // If non-NULL, the buffer will be created with these contents. Otherwise, + // the initial data is undefined. Using this does *not* require setting + // host_writable. + const void *initial_data; + + // Arbitrary user data. libplacebo does not use this at all. + void *user_data; + + // Arbitrary identifying tag. Used only for debugging purposes. + pl_debug_tag debug_tag; +}; + +#define pl_buf_params(...) (&(struct pl_buf_params) { \ + .debug_tag = PL_DEBUG_TAG, \ + __VA_ARGS__ \ + }) + +// A generic buffer, which can be used for multiple purposes (texture transfer, +// storage buffer, uniform buffer, etc.) +// +// Note on efficiency: A pl_buf does not necessarily represent a true "buffer" +// object on the underlying graphics API. It may also refer to a sub-slice of +// a larger buffer, depending on the implementation details of the GPU. The +// bottom line is that users do not need to worry about the efficiency of using +// many small pl_buf objects. Having many small pl_bufs, even lots of few-byte +// vertex buffers, is designed to be completely fine. +// +// Thread-safety: Unsafe +typedef const struct pl_buf_t { + struct pl_buf_params params; + uint8_t *data; // for persistently mapped buffers, points to the first byte + + // If `params.handle_type` is set, this structure references the shared + // memory backing this buffer, via the requested handle type. + // + // While this buffer is not in an "exported" state, the contents of the + // memory are undefined. (See: `pl_buf_export`) + struct pl_shared_mem shared_mem; +} *pl_buf; + +// Create a buffer. The type of buffer depends on the parameters. The buffer +// parameters must adhere to the restrictions imposed by the pl_gpu_limits. +// Returns NULL on failure. +// +// For buffers with shared memory, the buffer is considered to be in an +// "exported" state by default, and may be used directly by the external API +// after being created (until the first libplacebo operation on the buffer). +PL_API pl_buf pl_buf_create(pl_gpu gpu, const struct pl_buf_params *params); +PL_API void pl_buf_destroy(pl_gpu gpu, pl_buf *buf); + +// This behaves like `pl_buf_create`, but if the buffer already exists and has +// incompatible parameters, it will get destroyed first. A buffer is considered +// "compatible" if it has the same buffer type and texel format, a size greater +// than or equal to the requested size, and it has a superset of the features +// the user requested. After this operation, the contents of the buffer are +// undefined. +// +// Note: Due to its unpredictability, it's not allowed to use this with +// `params->initial_data` being set. Similarly, it's not allowed on a buffer +// with `params->export_handle`. since this may invalidate the corresponding +// external API's handle. Conversely, it *is* allowed on a buffer with +// `params->host_mapped`, and the corresponding `buf->data` pointer *may* +// change as a result of doing so. +// +// Note: If the `user_data` alone changes, this does not trigger a buffer +// recreation. In theory, this can be used to detect when the buffer ended +// up being recreated. +PL_API bool pl_buf_recreate(pl_gpu gpu, pl_buf *buf, const struct pl_buf_params *params); + +// Update the contents of a buffer, starting at a given offset (must be a +// multiple of 4) and up to a given size, with the contents of *data. +// +// This function will block until the buffer is no longer in use. Use +// `pl_buf_poll` to perform non-blocking queries of buffer availability. +// +// Note: This function can incur synchronization overhead, so it shouldn't be +// used in tight loops. If you do need to loop (e.g. to perform a strided +// write), consider using host-mapped buffers, or fixing the memory in RAM, +// before calling this function. +PL_API void pl_buf_write(pl_gpu gpu, pl_buf buf, size_t buf_offset, + const void *data, size_t size); + +// Read back the contents of a buffer, starting at a given offset, storing the +// data into *dest. Returns whether successful. +// +// This function will block until the buffer is no longer in use. Use +// `pl_buf_poll` to perform non-blocking queries of buffer availability. +PL_API bool pl_buf_read(pl_gpu gpu, pl_buf buf, size_t buf_offset, + void *dest, size_t size); + +// Copy `size` bytes from one buffer to another, reading from and writing to +// the respective offsets. +PL_API void pl_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset, + pl_buf src, size_t src_offset, size_t size); + +// Initiates a buffer export operation, allowing a buffer to be accessed by an +// external API. This is only valid for buffers with `params.handle_type`. +// Calling this twice in a row is a harmless no-op. Returns whether successful. +// +// There is no corresponding "buffer import" operation, the next libplacebo +// operation that touches the buffer (e.g. pl_tex_upload, but also pl_buf_write +// and pl_buf_read) will implicitly import the buffer back to libplacebo. Users +// must ensure that all pending operations made by the external API are fully +// completed before using it in libplacebo again. (Otherwise, the behaviour +// is undefined) +// +// Please note that this function returning does not mean the memory is +// immediately available as such. In general, it will mark a buffer as "in use" +// in the same way any other buffer operation would, and it is the user's +// responsibility to wait until `pl_buf_poll` returns false before accessing +// the memory from the external API. +// +// In terms of the access performed by this operation, it is not considered a +// "read" or "write" and therefore does not technically conflict with reads or +// writes to the buffer performed by the host (via mapped memory - any use of +// `pl_buf_read` or `pl_buf_write` would defeat the purpose of the export). +// However, restrictions made by the external API may apply that prevent this. +// +// The recommended use pattern is something like this: +// +// while (loop) { +// pl_buf buf = get_free_buffer(); // or block on pl_buf_poll +// // write to the buffer using the external API +// pl_tex_upload(gpu, /* ... buf ... */); // implicitly imports +// pl_buf_export(gpu, buf); +// } +// +// i.e. perform an external API operation, then use and immediately export the +// buffer in libplacebo, and finally wait until `pl_buf_poll` is false before +// re-using it in the external API. (Or get a new buffer in the meantime) +PL_API bool pl_buf_export(pl_gpu gpu, pl_buf buf); + +// Returns whether or not a buffer is currently "in use". This can either be +// because of a pending read operation, a pending write operation or a pending +// buffer export operation. Any access to the buffer by external APIs or via +// the host pointer (for host-mapped buffers) is forbidden while a buffer is +// "in use". The only exception to this rule is multiple reads, for example +// reading from a buffer with `pl_tex_upload` while simultaneously reading from +// it using mapped memory. +// +// The `timeout`, specified in nanoseconds, indicates how long to block for +// before returning. If set to 0, this function will never block, and only +// returns the current status of the buffer. The actual precision of the +// timeout may be significantly longer than one nanosecond, and has no upper +// bound. This function does not provide hard latency guarantees. This function +// may also return at any time, even if the buffer is still in use. If the user +// wishes to block until the buffer is definitely no longer in use, the +// recommended usage is: +// +// while (pl_buf_poll(gpu, buf, UINT64_MAX)) +// ; // do nothing +// +// Note: libplacebo operations on buffers are always internally synchronized, +// so this is only needed for host-mapped or externally exported buffers. +// However, it may be used to do non-blocking queries before calling blocking +// functions such as `pl_buf_read`. +// +// Note: If `pl_gpu_limits.thread_safe` is set, this function is implicitly +// synchronized, meaning it can safely be called on a `pl_buf` that is in use +// by another thread. +PL_API bool pl_buf_poll(pl_gpu gpu, pl_buf buf, uint64_t timeout); + +enum pl_tex_sample_mode { + PL_TEX_SAMPLE_NEAREST, // nearest neighbour sampling + PL_TEX_SAMPLE_LINEAR, // linear filtering, requires PL_FMT_CAP_LINEAR + PL_TEX_SAMPLE_MODE_COUNT, +}; + +enum pl_tex_address_mode { + PL_TEX_ADDRESS_CLAMP, // clamp the nearest edge texel + PL_TEX_ADDRESS_REPEAT, // repeat (tile) the texture + PL_TEX_ADDRESS_MIRROR, // repeat (mirror) the texture + PL_TEX_ADDRESS_MODE_COUNT, +}; + +// Structure describing a texture. +struct pl_tex_params { + int w, h, d; // physical dimension; unused dimensions must be 0 + pl_fmt format; + + // The following bools describe what operations can be performed. The + // corresponding pl_fmt capability must be set for every enabled + // operation type. + // + // Note: For planar formats, it is also possible to set capabilities only + // supported by sub-planes. In this case, the corresponding functionality + // will be available for the sub-plane, but not the planar texture itself. + bool sampleable; // usable as a PL_DESC_SAMPLED_TEX + bool renderable; // usable as a render target (pl_pass_run) + // (must only be used with 2D textures) + bool storable; // usable as a storage image (PL_DESC_IMG_*) + bool blit_src; // usable as a blit source + bool blit_dst; // usable as a blit destination + bool host_writable; // may be updated with pl_tex_upload() + bool host_readable; // may be fetched with pl_tex_download() + + // Note: For `blit_src`, `blit_dst`, the texture must either be + // 2-dimensional or `pl_gpu_limits.blittable_1d_3d` must be set. + + // At most one of `export_handle` and `import_handle` can be set for a + // texture. + + // Setting this indicates that the memory backing this texture should be + // shared with external APIs, If so, this must be exactly *one* of + // `pl_gpu.export_caps.tex`. + enum pl_handle_type export_handle; + + // Setting this indicates that the memory backing this texture will be + // imported from an external API. If so, this must be exactly *one* of + // `pl_gpu.import_caps.tex`. Mutually exclusive with `initial_data`. + enum pl_handle_type import_handle; + + // If the shared memory is being imported, the import handle must be + // specified here. Otherwise, this is ignored. + struct pl_shared_mem shared_mem; + + // If non-NULL, the texture will be created with these contents (tightly + // packed). Using this does *not* require setting host_writable. Otherwise, + // the initial data is undefined. Mutually exclusive with `import_handle`. + const void *initial_data; + + // Arbitrary user data. libplacebo does not use this at all. + void *user_data; + + // Arbitrary identifying tag. Used only for debugging purposes. + pl_debug_tag debug_tag; +}; + +#define pl_tex_params(...) (&(struct pl_tex_params) { \ + .debug_tag = PL_DEBUG_TAG, \ + __VA_ARGS__ \ + }) + +static inline int pl_tex_params_dimension(const struct pl_tex_params params) +{ + return params.d ? 3 : params.h ? 2 : 1; +} + +enum pl_sampler_type { + PL_SAMPLER_NORMAL, // gsampler2D, gsampler3D etc. + PL_SAMPLER_RECT, // gsampler2DRect + PL_SAMPLER_EXTERNAL, // gsamplerExternalOES + PL_SAMPLER_TYPE_COUNT, +}; + +// Conflates the following typical GPU API concepts: +// - texture itself +// - sampler state +// - staging buffers for texture upload +// - framebuffer objects +// - wrappers for swapchain framebuffers +// - synchronization needed for upload/rendering/etc. +// +// Essentially a pl_tex can be anything ranging from a normal texture, a wrapped +// external/real framebuffer, a framebuffer object + texture pair, a mapped +// texture (via pl_hwdec), or other sorts of things that can be sampled from +// and/or rendered to. +// +// Thread-safety: Unsafe +typedef const struct pl_tex_t *pl_tex; +struct pl_tex_t { + struct pl_tex_params params; + + // If `params.format` is a planar format, this contains `pl_tex` handles + // encapsulating individual texture planes. Conversely, if this is a + // sub-plane of a planar texture, `parent` points to the planar texture. + // + // Note: Calling `pl_tex_destroy` on sub-planes is undefined behavior. + pl_tex planes[4]; + pl_tex parent; + + // If `params.export_handle` is set, this structure references the shared + // memory backing this buffer, via the requested handle type. + // + // While this texture is not in an "exported" state, the contents of the + // memory are undefined. (See: `pl_tex_export`) + // + // Note: Due to vulkan driver limitations, `shared_mem.drm_format_mod` will + // currently always be set to DRM_FORMAT_MOD_INVALID. No guarantee can be + // made about the cross-driver compatibility of textures exported this way. + struct pl_shared_mem shared_mem; + + // If `params.sampleable` is true, this indicates the correct sampler type + // to use when sampling from this texture. + enum pl_sampler_type sampler_type; +}; + +// Create a texture (with undefined contents). Returns NULL on failure. This is +// assumed to be an expensive/rare operation, and may need to perform memory +// allocation or framebuffer creation. +PL_API pl_tex pl_tex_create(pl_gpu gpu, const struct pl_tex_params *params); +PL_API void pl_tex_destroy(pl_gpu gpu, pl_tex *tex); + +// This works like `pl_tex_create`, but if the texture already exists and has +// incompatible texture parameters, it will get destroyed first. A texture is +// considered "compatible" if it has the same texture format and sample/address +// mode and it supports a superset of the features the user requested. +// +// Even if the texture is not recreated, calling this function will still +// invalidate the contents of the texture. (Note: Because of this, +// `initial_data` may not be used with `pl_tex_recreate`. Doing so is an error) +// +// Note: If the `user_data` alone changes, this does not trigger a texture +// recreation. In theory, this can be used to detect when the texture ended +// up being recreated. +PL_API bool pl_tex_recreate(pl_gpu gpu, pl_tex *tex, const struct pl_tex_params *params); + +// Invalidates the contents of a texture. After this, the contents are fully +// undefined. +PL_API void pl_tex_invalidate(pl_gpu gpu, pl_tex tex); + +union pl_clear_color { + float f[4]; + int32_t i[4]; + uint32_t u[4]; +}; + +// Clear the dst texture with the given color (rgba). This is functionally +// identical to a blit operation, which means `dst->params.blit_dst` must be +// set. +PL_API void pl_tex_clear_ex(pl_gpu gpu, pl_tex dst, const union pl_clear_color color); + +// Wrapper for `pl_tex_clear_ex` which only works for floating point textures. +PL_API void pl_tex_clear(pl_gpu gpu, pl_tex dst, const float color[4]); + +struct pl_tex_blit_params { + // The texture to blit from. Must have `params.blit_src` enabled. + pl_tex src; + + // The texture to blit to. Must have `params.blit_dst` enabled, and a + // format that is loosely compatible with `src`. This essentially means + // that they must have the same `internal_size`. Additionally, UINT + // textures can only be blitted to other UINT textures, and SINT textures + // can only be blitted to other SINT textures. + pl_tex dst; + + // The region of the source texture to blit. Must be within the texture + // bounds of `src`. May be flipped. (Optional) + pl_rect3d src_rc; + + // The region of the destination texture to blit into. Must be within the + // texture bounds of `dst`. May be flipped. Areas outside of `dst_rc` in + // `dst` are preserved. (Optional) + pl_rect3d dst_rc; + + // If `src_rc` and `dst_rc` have different sizes, the texture will be + // scaled using the given texture sampling mode. + enum pl_tex_sample_mode sample_mode; +}; + +#define pl_tex_blit_params(...) (&(struct pl_tex_blit_params) { __VA_ARGS__ }) + +// Copy a sub-rectangle from one texture to another. +PL_API void pl_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params); + +// Structure describing a texture transfer operation. +struct pl_tex_transfer_params { + // Texture to transfer to/from. Depending on the type of the operation, + // this must have params.host_writable (uploads) or params.host_readable + // (downloads) set, respectively. + pl_tex tex; + + // Note: Superfluous parameters are ignored, i.e. for a 1D texture, the y + // and z fields of `rc`, as well as the corresponding pitches, are ignored. + // In all other cases, the pitch must be large enough to contain the + // corresponding dimension of `rc`, and the `rc` must be normalized and + // fully contained within the image dimensions. Missing fields in the `rc` + // are inferred from the image size. If unset, the pitch is inferred + // from `rc` (that is, it's assumed that the data is tightly packed in the + // buffer). Otherwise, `row_pitch` *must* be a multiple of + // `tex->params.format->texel_align`, and `depth_pitch` must be a multiple + // of `row_pitch`. + pl_rect3d rc; // region of the texture to transfer + size_t row_pitch; // the number of bytes separating image rows + size_t depth_pitch; // the number of bytes separating image planes + + // An optional timer to report the approximate duration of the texture + // transfer to. Note that this is only an approximation, since the actual + // texture transfer may happen entirely in the background (in particular, + // for implementations with asynchronous transfer capabilities). It's also + // not guaranteed that all GPUs support this. + pl_timer timer; + + // An optional callback to fire after the operation completes. If this is + // specified, then the operation is performed asynchronously. Note that + // transfers to/from buffers are always asynchronous, even without, this + // field, so it's more useful for `ptr` transfers. (Though it can still be + // helpful to avoid having to manually poll buffers all the time) + // + // When this is *not* specified, uploads from `ptr` are still asynchronous + // but require a host memcpy, while downloads from `ptr` are blocking. As + // such, it's recommended to always try using asynchronous texture + // transfers wherever possible. + // + // Note: Requires `pl_gpu_limits.callbacks` + // + // Note: Callbacks are implicitly synchronized, meaning that callbacks are + // guaranteed to never execute concurrently with other callbacks. However, + // they may execute from any thread that the `pl_gpu` is used on. + void (*callback)(void *priv); + void *priv; // arbitrary user data + + // For the data source/target of a transfer operation, there are two valid + // options: + // + // 1. Transferring to/from a buffer: (requires `pl_gpu_limits.buf_transfer`) + pl_buf buf; // buffer to use + size_t buf_offset; // offset of data within buffer, should be a + // multiple of `tex->params.format->texel_size` + // 2. Transferring to/from host memory directly: + void *ptr; // address of data + bool no_import; // always use memcpy, bypassing host ptr import + + // Note: The contents of the memory region / buffer must exactly match the + // texture format; i.e. there is no explicit conversion between formats. +}; + +#define pl_tex_transfer_params(...) (&(struct pl_tex_transfer_params) { __VA_ARGS__ }) + +// Upload data to a texture. Returns whether successful. +PL_API bool pl_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params); + +// Download data from a texture. Returns whether successful. +PL_API bool pl_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params); + +// Returns whether or not a texture is currently "in use". This can either be +// because of a pending read operation, a pending write operation or a pending +// texture export operation. Note that this function's usefulness is extremely +// limited under ordinary circumstances. In practically all cases, textures do +// not need to be directly synchronized by the user, except when interfacing +// with external libraries. This function should NOT, however, be used as a +// crutch to avoid having to implement semaphore-based synchronization. Use +// the API-specific functions such as `pl_vulkan_hold/release` for that. +// +// A good example of a use case in which this function is required is when +// interoperating with external memory management that needs to know when an +// imported texture is safe to free / reclaim internally, in which case +// semaphores are insufficient because memory management is a host operation. +// +// The `timeout`, specified in nanoseconds, indicates how long to block for +// before returning. If set to 0, this function will never block, and only +// returns the current status of the texture. The actual precision of the +// timeout may be significantly longer than one nanosecond, and has no upper +// bound. This function does not provide hard latency guarantees. This function +// may also return at any time, even if the texture is still in use. If the +// user wishes to block until the texture is definitely no longer in use, the +// recommended usage is: +// +// while (pl_tex_poll(gpu, buf, UINT64_MAX)) +// ; // do nothing +// +// Note: If `pl_gpu_limits.thread_safe` is set, this function is implicitly +// synchronized, meaning it can safely be called on a `pl_tex` that is in use +// by another thread. +PL_API bool pl_tex_poll(pl_gpu gpu, pl_tex tex, uint64_t timeout); + +// Data type of a shader input variable (e.g. uniform, or UBO member) +enum pl_var_type { + PL_VAR_INVALID = 0, + PL_VAR_SINT, // C: int GLSL: int/ivec + PL_VAR_UINT, // C: unsigned int GLSL: uint/uvec + PL_VAR_FLOAT, // C: float GLSL: float/vec/mat + PL_VAR_TYPE_COUNT +}; + +// Returns the host size (in bytes) of a pl_var_type. +PL_API size_t pl_var_type_size(enum pl_var_type type); + +// Represents a shader input variable (concrete data, e.g. vector, matrix) +struct pl_var { + const char *name; // name as used in the shader + enum pl_var_type type; + // The total number of values is given by dim_v * dim_m. For example, a + // vec2 would have dim_v = 2 and dim_m = 1. A mat3x4 would have dim_v = 4 + // and dim_m = 3. + int dim_v; // vector dimension + int dim_m; // matrix dimension (number of columns, see below) + int dim_a; // array dimension +}; + +// Helper functions for constructing the most common pl_vars, with names +// corresponding to their corresponding GLSL built-in types. +PL_API struct pl_var pl_var_float(const char *name); +PL_API struct pl_var pl_var_vec2(const char *name); +PL_API struct pl_var pl_var_vec3(const char *name); +PL_API struct pl_var pl_var_vec4(const char *name); +PL_API struct pl_var pl_var_mat2(const char *name); +PL_API struct pl_var pl_var_mat2x3(const char *name); +PL_API struct pl_var pl_var_mat2x4(const char *name); +PL_API struct pl_var pl_var_mat3(const char *name); +PL_API struct pl_var pl_var_mat3x4(const char *name); +PL_API struct pl_var pl_var_mat4x2(const char *name); +PL_API struct pl_var pl_var_mat4x3(const char *name); +PL_API struct pl_var pl_var_mat4(const char *name); +PL_API struct pl_var pl_var_int(const char *name); +PL_API struct pl_var pl_var_ivec2(const char *name); +PL_API struct pl_var pl_var_ivec3(const char *name); +PL_API struct pl_var pl_var_ivec4(const char *name); +PL_API struct pl_var pl_var_uint(const char *name); +PL_API struct pl_var pl_var_uvec2(const char *name); +PL_API struct pl_var pl_var_uvec3(const char *name); +PL_API struct pl_var pl_var_uvec4(const char *name); + +struct pl_named_var { + const char *glsl_name; + struct pl_var var; +}; + +// The same list as above, tagged by name and terminated with a {0} entry. +PL_API extern const struct pl_named_var pl_var_glsl_types[]; + +// Efficient helper function for performing a lookup in the above array. +// Returns NULL if the variable is not legal. Note that the array dimension is +// ignored, since it's usually part of the variable name and not the type name. +PL_API const char *pl_var_glsl_type_name(struct pl_var var); + +// Converts a pl_fmt to an "equivalent" pl_var. Equivalent in this sense means +// that the pl_var's type will be the same as the vertex's sampled type (e.g. +// PL_FMT_UNORM gets turned into PL_VAR_FLOAT). +PL_API struct pl_var pl_var_from_fmt(pl_fmt fmt, const char *name); + +// Describes the memory layout of a variable, relative to some starting location +// (typically the offset within a uniform/storage/pushconstant buffer) +// +// Note on matrices: All GPUs expect column major matrices, for both buffers and +// input variables. Care needs to be taken to avoid trying to use e.g. a +// pl_matrix3x3 (which is row major) directly as a pl_var_update.data! +// +// In terms of the host layout, a column-major matrix (e.g. matCxR) with C +// columns and R rows is treated like an array vecR[C]. The `stride` here refers +// to the separation between these array elements, i.e. the separation between +// the individual columns. +// +// Visualization of a mat4x3: +// +// 0 1 2 3 <- columns +// 0 [ (A) (D) (G) (J) ] +// 1 [ (B) (E) (H) (K) ] +// 2 [ (C) (F) (I) (L) ] +// ^ rows +// +// Layout in GPU memory: (stride=16, size=60) +// +// [ A B C ] X <- column 0, offset +0 +// [ D E F ] X <- column 1, offset +16 +// [ G H I ] X <- column 2, offset +32 +// [ J K L ] <- column 3, offset +48 +// +// Note the lack of padding on the last column in this example. +// In general: size <= stride * dim_m +// +// C representation: (stride=12, size=48) +// +// { { A, B, C }, +// { D, E, F }, +// { G, H, I }, +// { J, K, L } } +// +// Note on arrays: `stride` represents both the stride between elements of a +// matrix, and the stride between elements of an array. That is, there is no +// distinction between the columns of a matrix and the rows of an array. For +// example, a mat2[10] and a vec2[20] share the same pl_var_layout - the stride +// would be sizeof(vec2) and the size would be sizeof(vec2) * 2 * 10. +// +// For non-array/matrix types, `stride` is equal to `size`. + +struct pl_var_layout { + size_t offset; // the starting offset of the first byte + size_t stride; // the delta between two elements of an array/matrix + size_t size; // the total size of the input +}; + +// Returns the host layout of an input variable as required for a +// tightly-packed, byte-aligned C data type, given a starting offset. +PL_API struct pl_var_layout pl_var_host_layout(size_t offset, const struct pl_var *var); + +// Returns the GLSL std140 layout of an input variable given a current buffer +// offset, as required for a buffer descriptor of type PL_DESC_BUF_UNIFORM +// +// The normal way to use this function is when calculating the size and offset +// requirements of a uniform buffer in an incremental fashion, to calculate the +// new offset of the next variable in this buffer. +PL_API struct pl_var_layout pl_std140_layout(size_t offset, const struct pl_var *var); + +// Returns the GLSL std430 layout of an input variable given a current buffer +// offset, as required for a buffer descriptor of type PL_DESC_BUF_STORAGE, and +// for push constants. +PL_API struct pl_var_layout pl_std430_layout(size_t offset, const struct pl_var *var); + +// Convenience definitions / friendly names for these +#define pl_buf_uniform_layout pl_std140_layout +#define pl_buf_storage_layout pl_std430_layout +#define pl_push_constant_layout pl_std430_layout + +// Like memcpy, but copies bytes from `src` to `dst` in a manner governed by +// the stride and size of `dst_layout` as well as `src_layout`. Also takes +// into account the respective `offset`. +PL_API void memcpy_layout(void *dst, struct pl_var_layout dst_layout, + const void *src, struct pl_var_layout src_layout); + +// Represents a compile-time constant. +struct pl_constant { + enum pl_var_type type; // constant data type + uint32_t id; // GLSL `constant_id` + size_t offset; // byte offset in `constant_data` +}; + +// Represents a vertex attribute. +struct pl_vertex_attrib { + const char *name; // name as used in the shader + pl_fmt fmt; // data format (must have PL_FMT_CAP_VERTEX) + size_t offset; // byte offset into the vertex struct + int location; // vertex location (as used in the shader) +}; + +// Returns an abstract namespace index for a given descriptor type. This will +// always be a value >= 0 and < PL_DESC_TYPE_COUNT. Implementations can use +// this to figure out which descriptors may share the same value of `binding`. +// Bindings must only be unique for all descriptors within the same namespace. +PL_API int pl_desc_namespace(pl_gpu gpu, enum pl_desc_type type); + +// Access mode of a shader input descriptor. +enum pl_desc_access { + PL_DESC_ACCESS_READWRITE, + PL_DESC_ACCESS_READONLY, + PL_DESC_ACCESS_WRITEONLY, + PL_DESC_ACCESS_COUNT, +}; + +// Returns the GLSL syntax for a given access mode (e.g. "readonly"). +PL_API const char *pl_desc_access_glsl_name(enum pl_desc_access mode); + +// Represents a shader descriptor (e.g. texture or buffer binding) +struct pl_desc { + const char *name; // name as used in the shader + enum pl_desc_type type; + + // The binding of this descriptor, as used in the shader. All bindings + // within a namespace must be unique. (see: pl_desc_namespace) + int binding; + + // For storage images and storage buffers, this can be used to restrict + // the type of access that may be performed on the descriptor. Ignored for + // the other descriptor types (uniform buffers and sampled textures are + // always read-only). + enum pl_desc_access access; +}; + +// Framebuffer blending mode (for raster passes) +enum pl_blend_mode { + PL_BLEND_ZERO, + PL_BLEND_ONE, + PL_BLEND_SRC_ALPHA, + PL_BLEND_ONE_MINUS_SRC_ALPHA, + PL_BLEND_MODE_COUNT, +}; + +struct pl_blend_params { + enum pl_blend_mode src_rgb; + enum pl_blend_mode dst_rgb; + enum pl_blend_mode src_alpha; + enum pl_blend_mode dst_alpha; +}; + +#define pl_blend_params(...) (&(struct pl_blend_params) { __VA_ARGS__ }) + +// Typical alpha compositing +PL_API extern const struct pl_blend_params pl_alpha_overlay; + +enum pl_prim_type { + PL_PRIM_TRIANGLE_LIST, + PL_PRIM_TRIANGLE_STRIP, + PL_PRIM_TYPE_COUNT, +}; + +enum pl_index_format { + PL_INDEX_UINT16 = 0, + PL_INDEX_UINT32, + PL_INDEX_FORMAT_COUNT, +}; + +enum pl_pass_type { + PL_PASS_INVALID = 0, + PL_PASS_RASTER, // vertex+fragment shader + PL_PASS_COMPUTE, // compute shader (requires `pl_gpu.glsl.compute`) + PL_PASS_TYPE_COUNT, +}; + +// Description of a rendering pass. It conflates the following: +// - GLSL shader(s) and its list of inputs +// - target parameters (for raster passes) +struct pl_pass_params { + enum pl_pass_type type; + + // Input variables. + struct pl_var *variables; + int num_variables; + + // Input descriptors. + struct pl_desc *descriptors; + int num_descriptors; + + // Compile-time specialization constants. + struct pl_constant *constants; + int num_constants; + + // Initial data for the specialization constants. Optional. If NULL, + // specialization constants receive the values from the shader text. + void *constant_data; + + // Push constant region. Must be be a multiple of 4 <= limits.max_pushc_size + size_t push_constants_size; + + // The shader text in GLSL. For PL_PASS_RASTER, this is interpreted + // as a fragment shader. For PL_PASS_COMPUTE, this is interpreted as + // a compute shader. + const char *glsl_shader; + + // --- type==PL_PASS_RASTER only + + // Describes the interpretation and layout of the vertex data. + enum pl_prim_type vertex_type; + struct pl_vertex_attrib *vertex_attribs; + int num_vertex_attribs; + size_t vertex_stride; // must be a multiple of limits.align_vertex_stride + + // The vertex shader itself. + const char *vertex_shader; + + // Target format. The format must support PL_FMT_CAP_RENDERABLE. The + // resulting pass may only be used on textures that have a format with a + // `pl_fmt.signature` compatible to this format. + pl_fmt target_format; + + // Target blending mode. If this is NULL, blending is disabled. Otherwise, + // the `target_format` must also support PL_FMT_CAP_BLENDABLE. + const struct pl_blend_params *blend_params; + + // If false, the target's existing contents will be discarded before the + // pass is run. (Semantically equivalent to calling pl_tex_invalidate + // before every pl_pass_run, but slightly more efficient) + // + // Specifying `blend_params` requires `load_target` to be true. + bool load_target; + + // --- Deprecated / removed fields. + PL_DEPRECATED const uint8_t *cached_program; // Non-functional + PL_DEPRECATED size_t cached_program_len; +}; + +#define pl_pass_params(...) (&(struct pl_pass_params) { __VA_ARGS__ }) + +// Conflates the following typical GPU API concepts: +// - various kinds of shaders +// - rendering pipelines +// - descriptor sets, uniforms, other bindings +// - all synchronization necessary +// - the current values of all inputs +// +// Thread-safety: Unsafe +typedef const struct pl_pass_t { + struct pl_pass_params params; +} *pl_pass; + +// Compile a shader and create a render pass. This is a rare/expensive +// operation and may take a significant amount of time, even if a cached +// program is used. Returns NULL on failure. +PL_API pl_pass pl_pass_create(pl_gpu gpu, const struct pl_pass_params *params); +PL_API void pl_pass_destroy(pl_gpu gpu, pl_pass *pass); + +struct pl_desc_binding { + const void *object; // pl_* object with type corresponding to pl_desc_type + + // For PL_DESC_SAMPLED_TEX, this can be used to configure the sampler. + enum pl_tex_address_mode address_mode; + enum pl_tex_sample_mode sample_mode; +}; + +struct pl_var_update { + int index; // index into params.variables[] + const void *data; // pointer to raw byte data corresponding to pl_var_host_layout() +}; + +struct pl_pass_run_params { + pl_pass pass; + + // If present, the shader will be re-specialized with the new constants + // provided. This is a significantly cheaper operation than recompiling a + // brand new shader, but should still be avoided if possible. + // + // Leaving it as NULL re-uses the existing specialization values. Ignored + // if the shader has no specialization constants. Guaranteed to be a no-op + // if the values have not changed since the last invocation. + void *constant_data; + + // This list only contains descriptors/variables which have changed + // since the previous invocation. All non-mentioned variables implicitly + // preserve their state from the last invocation. + struct pl_var_update *var_updates; + int num_var_updates; + + // This list contains all descriptors used by this pass. It must + // always be filled, even if the descriptors haven't changed. The order + // must match that of pass->params.descriptors + struct pl_desc_binding *desc_bindings; + + // The push constants for this invocation. This must always be set and + // fully defined for every invocation if params.push_constants_size > 0. + void *push_constants; + + // An optional timer to report the approximate runtime of this shader pass + // invocation to. Note that this is only an approximation, since shaders + // may overlap their execution times and contend for GPU time. + pl_timer timer; + + // --- pass->params.type==PL_PASS_RASTER only + + // Target must be a 2D texture, `target->params.renderable` must be true, + // and `target->params.format->signature` must match the signature provided + // in `pass->params.target_format`. + // + // If the viewport or scissors are left blank, they are inferred from + // target->params. + // + // WARNING: Rendering to a *target that is being read from by the same + // shader is undefined behavior. In general, trying to bind the same + // resource multiple times to the same shader is undefined behavior. + pl_tex target; + pl_rect2d viewport; // screen space viewport (must be normalized) + pl_rect2d scissors; // target render scissors (must be normalized) + + // Number of vertices to render + int vertex_count; + + // Vertex data may be provided in one of two forms: + // + // 1. Drawing from host memory directly + const void *vertex_data; + // 2. Drawing from a vertex buffer (requires `vertex_buf->params.drawable`) + pl_buf vertex_buf; + size_t buf_offset; + + // (Optional) Index data may be provided in the form given by `index_fmt`. + // These will be used for instanced rendering. Similar to vertex data, this + // can be provided in two forms: + // 1. From host memory + const void *index_data; + enum pl_index_format index_fmt; + // 2. From an index buffer (requires `index_buf->params.drawable`) + pl_buf index_buf; + size_t index_offset; + // Note: Drawing from an index buffer requires vertex data to also be + // present in buffer form, i.e. it's forbidden to mix `index_buf` with + // `vertex_data` (though vice versa is allowed). + + // --- pass->params.type==PL_PASS_COMPUTE only + + // Number of work groups to dispatch per dimension (X/Y/Z). Must be <= the + // corresponding index of limits.max_dispatch + int compute_groups[3]; +}; + +#define pl_pass_run_params(...) (&(struct pl_pass_run_params) { __VA_ARGS__ }) + +// Execute a render pass. +PL_API void pl_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params); + +// This is semantically a no-op, but it provides a hint that you want to flush +// any partially queued up commands and begin execution. There is normally no +// need to call this, because queued commands will always be implicitly flushed +// whenever necessary to make forward progress on commands like `pl_buf_poll`, +// or when submitting a frame to a swapchain for display. In fact, calling this +// function can negatively impact performance, because some GPUs rely on being +// able to re-order and modify queued commands in order to enable optimizations +// retroactively. +// +// The only time this might be beneficial to call explicitly is if you're doing +// lots of offline processing, i.e. you aren't rendering to a swapchain but to +// textures that you download from again. In that case you should call this +// function after each "work item" to ensure good parallelism between them. +// +// It's worth noting that this function may block if you're over-feeding the +// GPU without waiting for existing results to finish. +PL_API void pl_gpu_flush(pl_gpu gpu); + +// This is like `pl_gpu_flush` but also blocks until the GPU is fully idle +// before returning. Using this in your rendering loop is seriously disadvised, +// and almost never the right solution. The intended use case is for deinit +// logic, where users may want to force the all pending GPU operations to +// finish so they can clean up their state more easily. +// +// After this operation is called, it's guaranteed that all pending buffer +// operations are complete - i.e. `pl_buf_poll` is guaranteed to return false. +// It's also guaranteed that any outstanding timer query results are available. +// +// Note: If you only care about buffer operations, you can accomplish this more +// easily by using `pl_buf_poll` with the timeout set to `UINT64_MAX`. But if +// you have many buffers it may be more convenient to call this function +// instead. The difference is that this function will also affect e.g. renders +// to a `pl_swapchain`. +PL_API void pl_gpu_finish(pl_gpu gpu); + +// Returns true if the GPU is considered to be in a "failed" state, which +// during normal operation is typically the result of things like the device +// being lost (due to e.g. power management). +// +// If this returns true, users *should* destroy and recreate the `pl_gpu`, +// including all associated resources, via the appropriate mechanism. +PL_API bool pl_gpu_is_failed(pl_gpu gpu); + + +// Deprecated objects and functions: + +// A generic synchronization object intended for use with an external API. This +// is not required when solely using libplacebo API functions, as all required +// synchronisation is done internally. This comes in the form of a pair of +// semaphores - one to synchronize access in each direction. +// +// Thread-safety: Unsafe +typedef const struct pl_sync_t { + enum pl_handle_type handle_type; + + // This handle is signalled by the `pl_gpu`, and waited on by the user. It + // fires when it is safe for the user to access the shared resource. + union pl_handle wait_handle; + + // This handle is signalled by the user, and waited on by the `pl_gpu`. It + // must fire when the user has finished accessing the shared resource. + union pl_handle signal_handle; +} *pl_sync; + +// Create a synchronization object. Returns NULL on failure. +// +// `handle_type` must be exactly *one* of `pl_gpu.export_caps.sync`, and +// indicates which type of handle to generate for sharing this sync object. +// +// Deprecated in favor of API-specific semaphore creation operations such as +// `pl_vulkan_sem_create`. +PL_DEPRECATED PL_API pl_sync pl_sync_create(pl_gpu gpu, enum pl_handle_type handle_type); + +// Destroy a `pl_sync`. Note that this invalidates the externally imported +// semaphores. Users should therefore make sure that all operations that +// wait on or signal any of the semaphore have been fully submitted and +// processed by the external API before destroying the `pl_sync`. +// +// Despite this, it's safe to destroy a `pl_sync` if the only pending +// operations that involve it are internal to libplacebo. +PL_DEPRECATED PL_API void pl_sync_destroy(pl_gpu gpu, pl_sync *sync); + +// Initiates a texture export operation, allowing a texture to be accessed by +// an external API. Returns whether successful. After this operation +// successfully returns, it is guaranteed that `sync->wait_handle` will +// eventually be signalled. For APIs where this is relevant, the image layout +// should be specified as "general", e.g. `GL_LAYOUT_GENERAL_EXT` for OpenGL. +// +// There is no corresponding "import" operation - the next operation that uses +// a texture will implicitly import the texture. Valid API usage requires that +// the user *must* submit a semaphore signal operation on `sync->signal_handle` +// before doing so. Not doing so is undefined behavior and may very well +// deadlock the calling process and/or the graphics card! +// +// Note that despite this restriction, it is always valid to call +// `pl_tex_destroy`, even if the texture is in an exported state, without +// having to signal the corresponding sync object first. +// +// Deprecated in favor of API-specific synchronization mechanisms such as +// `pl_vulkan_hold/release_ex`. +PL_DEPRECATED PL_API bool pl_tex_export(pl_gpu gpu, pl_tex tex, pl_sync sync); + + +PL_API_END + +#endif // LIBPLACEBO_GPU_H_ diff --git a/src/include/libplacebo/log.h b/src/include/libplacebo/log.h new file mode 100644 index 0000000..b24c931 --- /dev/null +++ b/src/include/libplacebo/log.h @@ -0,0 +1,113 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_LOG_H_ +#define LIBPLACEBO_LOG_H_ + +#include <libplacebo/config.h> +#include <libplacebo/common.h> + +PL_API_BEGIN + +// The log level associated with a given log message. +enum pl_log_level { + PL_LOG_NONE = 0, + PL_LOG_FATAL, // results in total loss of function of a major component + PL_LOG_ERR, // serious error; may result in degraded function + PL_LOG_WARN, // warning; potentially bad, probably user-relevant + PL_LOG_INFO, // informational message, also potentially harmless errors + PL_LOG_DEBUG, // verbose debug message, informational + PL_LOG_TRACE, // very noisy trace of activity,, usually benign + PL_LOG_ALL = PL_LOG_TRACE, +}; + +struct pl_log_params { + // Logging callback. All messages, informational or otherwise, will get + // redirected to this callback. The logged messages do not include trailing + // newlines. Optional. + void (*log_cb)(void *log_priv, enum pl_log_level level, const char *msg); + void *log_priv; + + // The current log level. Controls the level of message that will be + // redirected to the log callback. Setting this to PL_LOG_ALL means all + // messages will be forwarded, but doing so indiscriminately can result + // in increased CPU usage as it may enable extra debug paths based on the + // configured log level. + enum pl_log_level log_level; +}; + +#define pl_log_params(...) (&(struct pl_log_params) { __VA_ARGS__ }) +PL_API extern const struct pl_log_params pl_log_default_params; + +// Thread-safety: Safe +// +// Note: In any context in which `pl_log` is used, users may also pass NULL +// to disable logging. In other words, NULL is a valid `pl_log`. +typedef const struct pl_log_t { + struct pl_log_params params; +} *pl_log; + +#define pl_log_glue1(x, y) x##y +#define pl_log_glue2(x, y) pl_log_glue1(x, y) +// Force a link error in the case of linking against an incompatible API +// version. +#define pl_log_create pl_log_glue2(pl_log_create_, PL_API_VER) +// Creates a pl_log. `api_ver` is for historical reasons and ignored currently. +// `params` defaults to `&pl_log_default_params` if left as NULL. +// +// Note: As a general rule, any `params` struct used as an argument to a +// function need only live until the corresponding function returns. +PL_API pl_log pl_log_create(int api_ver, const struct pl_log_params *params); + +// Destroy a `pl_log` object. +// +// Note: As a general rule, all `_destroy` functions take the pointer to the +// object to free as their parameter. This pointer is overwritten by NULL +// afterwards. Calling a _destroy function on &{NULL} is valid, but calling it +// on NULL itself is invalid. +PL_API void pl_log_destroy(pl_log *log); + +// Update the parameters of a `pl_log` without destroying it. This can be +// used to change the log function, log context or log level retroactively. +// `params` defaults to `&pl_log_default_params` if left as NULL. +// +// Returns the previous params, atomically. +PL_API struct pl_log_params pl_log_update(pl_log log, const struct pl_log_params *params); + +// Like `pl_log_update` but only updates the log level, leaving the log +// callback intact. +// +// Returns the previous log level, atomically. +PL_API enum pl_log_level pl_log_level_update(pl_log log, enum pl_log_level level); + +// Two simple, stream-based loggers. You can use these as the log_cb. If you +// also set log_priv to a FILE* (e.g. stdout or stderr) it will be printed +// there; otherwise, it will be printed to stdout or stderr depending on the +// log level. +// +// The version with colors will use ANSI escape sequences to indicate the log +// level. The version without will use explicit prefixes. +PL_API void pl_log_simple(void *stream, enum pl_log_level level, const char *msg); +PL_API void pl_log_color(void *stream, enum pl_log_level level, const char *msg); + +// Backwards compatibility with older versions of libplacebo +#define pl_context pl_log +#define pl_context_params pl_log_params + +PL_API_END + +#endif // LIBPLACEBO_LOG_H_ diff --git a/src/include/libplacebo/meson.build b/src/include/libplacebo/meson.build new file mode 100644 index 0000000..2f4631e --- /dev/null +++ b/src/include/libplacebo/meson.build @@ -0,0 +1,6 @@ +sources += configure_file( + input: 'config.h.in', + output: 'config.h', + install_dir: get_option('includedir') / meson.project_name(), + configuration: conf_public, +) diff --git a/src/include/libplacebo/opengl.h b/src/include/libplacebo/opengl.h new file mode 100644 index 0000000..46597b2 --- /dev/null +++ b/src/include/libplacebo/opengl.h @@ -0,0 +1,230 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_OPENGL_H_ +#define LIBPLACEBO_OPENGL_H_ + +#include <string.h> + +#include <libplacebo/gpu.h> +#include <libplacebo/swapchain.h> + +PL_API_BEGIN + +// Note on thread safety: The thread safety of `pl_opengl` and any associated +// GPU objects follows the same thread safety rules as the underlying OpenGL +// context. In other words, they must only be called from the thread the OpenGL +// context is current on. + +typedef const struct pl_opengl_t { + pl_gpu gpu; + + // Detected GL version + int major, minor; + + // List of GL/EGL extensions, provided for convenience + const char * const *extensions; + int num_extensions; +} *pl_opengl; + +static inline bool pl_opengl_has_ext(pl_opengl gl, const char *ext) +{ + for (int i = 0; i < gl->num_extensions; i++) + if (!strcmp(ext, gl->extensions[i])) + return true; + return false; +} + +typedef void (*pl_voidfunc_t)(void); + +struct pl_opengl_params { + // Main gl*GetProcAddr function. This will be used to load all GL/EGL + // functions. Optional - if unspecified, libplacebo will default to an + // internal loading logic which should work on most platforms. + pl_voidfunc_t (*get_proc_addr_ex)(void *proc_ctx, const char *procname); + void *proc_ctx; + + // Simpler API for backwards compatibility / convenience. (This one + // directly matches the signature of most gl*GetProcAddr library functions) + pl_voidfunc_t (*get_proc_addr)(const char *procname); + + // Enable OpenGL debug report callbacks. May have little effect depending + // on whether or not the GL context was initialized with appropriate + // debugging enabled. + bool debug; + + // Allow the use of (suspected) software rasterizers and renderers. These + // can be useful for debugging purposes, but normally, their use is + // undesirable when GPU-accelerated processing is expected. + bool allow_software; + + // Restrict the maximum allowed GLSL version. (Mainly for testing) + int max_glsl_version; + + // Optional. Required when importing/exporting dmabufs as textures. + void *egl_display; + void *egl_context; + + // Optional callbacks to bind/release the OpenGL context on the current + // thread. If these are specified, then the resulting `pl_gpu` will have + // `pl_gpu_limits.thread_safe` enabled, and may therefore be used from any + // thread without first needing to bind the OpenGL context. + // + // If the user is re-using the same OpenGL context in non-libplacebo code, + // then these callbacks should include whatever synchronization is + // necessary to prevent simultaneous use between libplacebo and the user. + bool (*make_current)(void *priv); + void (*release_current)(void *priv); + void *priv; +}; + +// Default/recommended parameters +#define pl_opengl_params(...) (&(struct pl_opengl_params) { __VA_ARGS__ }) +PL_API extern const struct pl_opengl_params pl_opengl_default_params; + +// Creates a new OpenGL renderer based on the given parameters. This will +// internally use whatever platform-defined mechanism (WGL, X11, EGL) is +// appropriate for loading the OpenGL function calls, so the user doesn't need +// to pass in a `getProcAddress` callback. If `params` is left as NULL, it +// defaults to `&pl_opengl_default_params`. The context must be active when +// calling this function, and must remain active whenever calling any +// libplacebo function on the resulting `pl_opengl` or `pl_gpu`. +// +// Note that creating multiple `pl_opengl` instances from the same OpenGL +// context is undefined behavior. +PL_API pl_opengl pl_opengl_create(pl_log log, const struct pl_opengl_params *params); + +// All resources allocated from the `pl_gpu` contained by this `pl_opengl` must +// be explicitly destroyed by the user before calling `pl_opengl_destroy`. +PL_API void pl_opengl_destroy(pl_opengl *gl); + +// For a `pl_gpu` backed by `pl_opengl`, this function can be used to retrieve +// the underlying `pl_opengl`. Returns NULL for any other type of `gpu`. +PL_API pl_opengl pl_opengl_get(pl_gpu gpu); + +struct pl_opengl_framebuffer { + // ID of the framebuffer, or 0 to use the context's default framebuffer. + int id; + + // If true, then the framebuffer is assumed to be "flipped" relative to + // normal GL semantics, i.e. set this to `true` if the first pixel is the + // top left corner. + bool flipped; +}; + +struct pl_opengl_swapchain_params { + // Set this to the platform-specific function to swap buffers, e.g. + // glXSwapBuffers, eglSwapBuffers etc. This will be called internally by + // `pl_swapchain_swap_buffers`. Required, unless you never call that + // function. + void (*swap_buffers)(void *priv); + + // Initial framebuffer description. This can be changed later on using + // `pl_opengl_swapchain_update_fb`. + struct pl_opengl_framebuffer framebuffer; + + // Attempt forcing a specific latency. If this is nonzero, then + // `pl_swapchain_swap_buffers` will wait until fewer than N frames are "in + // flight" before returning. Setting this to a high number generally + // accomplished nothing, because the OpenGL driver typically limits the + // number of buffers on its own. But setting it to a low number like 2 or + // even 1 can reduce latency (at the cost of throughput). + int max_swapchain_depth; + + // Arbitrary user pointer that gets passed to `swap_buffers` etc. + void *priv; +}; + +#define pl_opengl_swapchain_params(...) (&(struct pl_opengl_swapchain_params) { __VA_ARGS__ }) + +// Creates an instance of `pl_swapchain` tied to the active context. +// Note: Due to OpenGL semantics, users *must* call `pl_swapchain_resize` +// before attempting to use this swapchain, otherwise calls to +// `pl_swapchain_start_frame` will fail. +PL_API pl_swapchain pl_opengl_create_swapchain(pl_opengl gl, + const struct pl_opengl_swapchain_params *params); + +// Update the framebuffer description. After calling this function, users +// *must* call `pl_swapchain_resize` before attempting to use the swapchain +// again, otherwise calls to `pl_swapchain_start_frame` will fail. +PL_API void pl_opengl_swapchain_update_fb(pl_swapchain sw, + const struct pl_opengl_framebuffer *fb); + +struct pl_opengl_wrap_params { + // The GLuint texture object itself. Optional. If no texture is provided, + // then only the opaque framebuffer `fbo` will be wrapped, leaving the + // resulting `pl_tex` object with some operations (such as sampling) being + // unsupported. + unsigned int texture; + + // The GLuint associated framebuffer. Optional. If this is not specified, + // then libplacebo will attempt creating a framebuffer from the provided + // texture object (if possible). + // + // Note: As a special case, if neither a texture nor an FBO are provided, + // this is equivalent to wrapping the OpenGL default framebuffer (id 0). + unsigned int framebuffer; + + // The image's dimensions (unused dimensions must be 0) + int width; + int height; + int depth; + + // Texture-specific fields: + // + // Note: These are only relevant if `texture` is provided. + + // The GLenum for the texture target to use, e.g. GL_TEXTURE_2D. Optional. + // If this is left as 0, the target is inferred from the number of + // dimensions. Users may want to set this to something specific like + // GL_TEXTURE_EXTERNAL_OES depending on the nature of the texture. + unsigned int target; + + // The texture's GLint sized internal format (e.g. GL_RGBA16F). Required. + int iformat; +}; + +#define pl_opengl_wrap_params(...) (&(struct pl_opengl_wrap_params) { __VA_ARGS__ }) + +// Wraps an external OpenGL object into a `pl_tex` abstraction. Due to the +// internally synchronized nature of OpenGL, no explicit synchronization +// is needed between libplacebo `pl_tex_` operations, and host accesses to +// the texture. Wrapping the same OpenGL texture multiple times is permitted. +// Note that this function transfers no ownership. +// +// This wrapper can be destroyed by simply calling `pl_tex_destroy` on it, +// which will *not* destroy the user-provided OpenGL texture or framebuffer. +// +// This function may fail, in which case it returns NULL. +PL_API pl_tex pl_opengl_wrap(pl_gpu gpu, const struct pl_opengl_wrap_params *params); + +// Analogous to `pl_opengl_wrap`, this function takes any `pl_tex` (including +// ones created by `pl_tex_create`) and unwraps it to expose the underlying +// OpenGL texture to the user. Note that this function transfers no ownership, +// i.e. the texture object and framebuffer shall not be destroyed by the user. +// +// Returns the OpenGL texture. `out_target` and `out_iformat` will be updated +// to hold the target type and internal format, respectively. (Optional) +// +// For renderable/blittable textures, `out_fbo` will be updated to the ID of +// the framebuffer attached to this texture, or 0 if there is none. (Optional) +PL_API unsigned int pl_opengl_unwrap(pl_gpu gpu, pl_tex tex, unsigned int *out_target, + int *out_iformat, unsigned int *out_fbo); + +PL_API_END + +#endif // LIBPLACEBO_OPENGL_H_ diff --git a/src/include/libplacebo/options.h b/src/include/libplacebo/options.h new file mode 100644 index 0000000..e40f5e7 --- /dev/null +++ b/src/include/libplacebo/options.h @@ -0,0 +1,201 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_OPTIONS_H_ +#define LIBPLACEBO_OPTIONS_H_ + +#include <libplacebo/renderer.h> + +PL_API_BEGIN + +// High-level heap-managed struct containing storage for all options implied by +// pl_render_params, including a high-level interface for serializing, +// deserializing and interfacing with them in a programmatic way. + +typedef const struct pl_opt_t *pl_opt; +typedef struct pl_options_t { + // Non-NULL `params.*_params` pointers must always point into this struct + struct pl_render_params params; + + // Backing storage for all of the various rendering parameters. Whether + // or not these params are active is determined by whether or not + // `params.*_params` is set to this address or NULL. + struct pl_deband_params deband_params; + struct pl_sigmoid_params sigmoid_params; + struct pl_color_adjustment color_adjustment; + struct pl_peak_detect_params peak_detect_params; + struct pl_color_map_params color_map_params; + struct pl_dither_params dither_params; + struct pl_icc_params icc_params PL_DEPRECATED; + struct pl_cone_params cone_params; + struct pl_blend_params blend_params; + struct pl_deinterlace_params deinterlace_params; + struct pl_distort_params distort_params; + + // Backing storage for "custom" scalers. `params.upscaler` etc. will + // always be a pointer either to a built-in pl_filter_config, or one of + // these structs. `name`, `description` and `allowed` will always be + // valid for the respective type of filter config. + struct pl_filter_config upscaler; + struct pl_filter_config downscaler; + struct pl_filter_config plane_upscaler; + struct pl_filter_config plane_downscaler; + struct pl_filter_config frame_mixer; +} *pl_options; + +// Allocate a new set of render params, with internally backed storage for +// all parameters. Initialized to an "empty" config (PL_RENDER_DEFAULTS), +// equivalent to `&pl_render_fast_params`. To initialize the struct instead to +// the recommended default parameters, use `pl_options_reset` with +// `pl_render_default_params`. +// +// If `log` is provided, errors related to parsing etc. will be logged there. +PL_API pl_options pl_options_alloc(pl_log log); +PL_API void pl_options_free(pl_options *opts); + +// Resets all options to their default values from a given struct. If `preset` +// is NULL, `opts` is instead reset back to the initial "empty" configuration, +// with all options disabled, as if it was freshly allocated. +// +// Note: This function will also reset structs which were not included in +// `preset`, such as any custom upscalers. +PL_API void pl_options_reset(pl_options opts, const struct pl_render_params *preset); + +typedef const struct pl_opt_data_t { + // Original options struct. + pl_options opts; + + // Triggering option for this callback invocation. + pl_opt opt; + + // The raw data associated with this option. Always some pointer into + // `opts`. Note that only PL_OPT_BOOL, PL_OPT_INT and PL_OPT_FLOAT have + // a fixed representation, for other fields its usefulness is dubious. + const void *value; + + // The underlying data, as a formatted, locale-invariant string. Lifetime + // is limited until the return of this callback. + const char *text; +} *pl_opt_data; + +// Query a single option from `opts` by key, or NULL if none was found. +// The resulting pointer is only valid until the next pl_options_* call. +PL_API pl_opt_data pl_options_get(pl_options opts, const char *key); + +// Update an option from a formatted value string (see `pl_opt_data.text`). +// This can be used for all type of options, even non-string ones. In this case, +// `value` will be parsed according to the option type. +// +// Returns whether successful. +PL_API bool pl_options_set_str(pl_options opts, const char *key, const char *value); + +// Programmatically iterate over options set in a `pl_options`, running the +// provided callback on each entry. +PL_API void pl_options_iterate(pl_options opts, + void (*cb)(void *priv, pl_opt_data data), + void *priv); + +// Serialize a `pl_options` structs to a comma-separated key/value string. The +// returned string has a lifetime valid until either the next call to +// `pl_options_save`, or until the `pl_options` is freed. +PL_API const char *pl_options_save(pl_options opts); + +// Parse a `pl_options` struct from a key/value string, in standard syntax +// "key1=value1,key2=value2,...", and updates `opts` with the new values. +// Valid separators include whitespace, commas (,) and (semi)colons (:;). +// +// Returns true if no errors occurred. +PL_API bool pl_options_load(pl_options opts, const char *str); + +// Helpers for interfacing with `opts->params.hooks`. Note that using any of +// these helpers will overwrite the array by an internally managed pointer, +// so care must be taken when combining them with external management of +// this memory. Negative indices are possible and are counted relative to the +// end of the list. +// +// Note: These hooks are *not* included in pl_options_save() and related. +PL_API void pl_options_add_hook(pl_options opts, const struct pl_hook *hook); +PL_API void pl_options_insert_hook(pl_options opts, const struct pl_hook *hook, int idx); +PL_API void pl_options_remove_hook_at(pl_options opts, int idx); + +// Underlying options system and list +// +// Note: By necessity, this option list does not cover every single field +// present in `pl_render_params`. In particular, fields like `info_callback`, +// `lut` and `hooks` cannot be configured through the options system, as doing +// so would require interop with C code or I/O. (However, see +// `pl_options_add_hook` and related) + +enum pl_option_type { + // Accepts `yes/no`, `on/off`, `true/false` and variants + PL_OPT_BOOL, + + // Parsed as human-readable locale-invariant (C) numbers, scientific + // notation accepted for floats + PL_OPT_INT, + PL_OPT_FLOAT, + + // Parsed as a short string containing only alphanumerics and _-, + // corresponding to some name/identifier. Catch-all bucket for several + // other types of options, such as presets, struct pointers, and functions + // + // Note: These options do not correspond to actual strings in C, the + // underlying type of option will determine the values of `size` and + // corresponding interpretation of pointers. + PL_OPT_STRING, + + PL_OPT_TYPE_COUNT, +}; + +struct pl_opt_t { + // Programmatic key uniquely identifying this option. + const char *key; + + // Longer, human readable friendly name + const char *name; + + // Data type of option, affects how it is parsed. This field is purely + // informative for the user, the actual implementation may vary. + enum pl_option_type type; + + // Minimum/maximum value ranges for numeric options (int / float) + // If both are 0.0, these limits are disabled/ignored. + float min, max; + + // If true, this option is considered deprecated and may be removed + // in the future. + bool deprecated; + + // If true, this option is considered a 'preset' (read-only), which can + // be loaded but not saved. (The equivalent underlying options this preset + // corresponds to will be saved instead) + bool preset; + + // Internal implementation details (for parsing/saving), opaque to user + const void *priv; +}; + +// A list of options, terminated by {0} for convenience +PL_API extern const struct pl_opt_t pl_option_list[]; +PL_API extern const int pl_option_count; // excluding terminating {0} + +// Returns the `pl_option` associated with a given key, or NULL +PL_API pl_opt pl_find_option(const char *key); + +PL_API_END + +#endif // LIBPLACEBO_OPTIONS_H_ diff --git a/src/include/libplacebo/renderer.h b/src/include/libplacebo/renderer.h new file mode 100644 index 0000000..d2e01e4 --- /dev/null +++ b/src/include/libplacebo/renderer.h @@ -0,0 +1,847 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_RENDERER_H_ +#define LIBPLACEBO_RENDERER_H_ + +#include <libplacebo/config.h> +#include <libplacebo/colorspace.h> +#include <libplacebo/filters.h> +#include <libplacebo/gpu.h> +#include <libplacebo/shaders/colorspace.h> +#include <libplacebo/shaders/deinterlacing.h> +#include <libplacebo/shaders/dithering.h> +#include <libplacebo/shaders/film_grain.h> +#include <libplacebo/shaders/icc.h> +#include <libplacebo/shaders/lut.h> +#include <libplacebo/shaders/sampling.h> +#include <libplacebo/shaders/custom.h> +#include <libplacebo/swapchain.h> + +PL_API_BEGIN + +// Thread-safety: Unsafe +typedef struct pl_renderer_t *pl_renderer; + +// Enum values used in pl_renderer_errors_t as a bit positions for error flags +enum pl_render_error { + PL_RENDER_ERR_NONE = 0, + PL_RENDER_ERR_FBO = 1 << 0, + PL_RENDER_ERR_SAMPLING = 1 << 1, + PL_RENDER_ERR_DEBANDING = 1 << 2, + PL_RENDER_ERR_BLENDING = 1 << 3, + PL_RENDER_ERR_OVERLAY = 1 << 4, + PL_RENDER_ERR_PEAK_DETECT = 1 << 5, + PL_RENDER_ERR_FILM_GRAIN = 1 << 6, + PL_RENDER_ERR_FRAME_MIXING = 1 << 7, + PL_RENDER_ERR_DEINTERLACING = 1 << 8, + PL_RENDER_ERR_ERROR_DIFFUSION = 1 << 9, + PL_RENDER_ERR_HOOKS = 1 << 10, + PL_RENDER_ERR_CONTRAST_RECOVERY = 1 << 11, +}; + +// Struct describing current renderer state, including internal processing errors, +// as well as list of signatures of disabled hooks. +struct pl_render_errors { + enum pl_render_error errors; + // List containing signatures of disabled hooks + const uint64_t *disabled_hooks; + int num_disabled_hooks; +}; + +// Creates a new renderer object, which is backed by a GPU context. This is a +// high-level object that takes care of the rendering chain as a whole, from +// the source textures to the finished frame. +PL_API pl_renderer pl_renderer_create(pl_log log, pl_gpu gpu); +PL_API void pl_renderer_destroy(pl_renderer *rr); + +// Returns current renderer state, see pl_render_errors. +PL_API struct pl_render_errors pl_renderer_get_errors(pl_renderer rr); + +// Clears errors state of renderer. If `errors` is NULL, all render errors will +// be cleared. Otherwise only selected errors/hooks will be cleared. +// If `PL_RENDER_ERR_HOOKS` is set and `num_disabled_hooks` is 0, clear all hooks. +// Otherwise only selected hooks will be cleard based on `disabled_hooks` array. +PL_API void pl_renderer_reset_errors(pl_renderer rr, + const struct pl_render_errors *errors); + +enum pl_lut_type { + PL_LUT_UNKNOWN = 0, + PL_LUT_NATIVE, // applied to raw image contents (after fixing bit depth) + PL_LUT_NORMALIZED, // applied to normalized (HDR) RGB values + PL_LUT_CONVERSION, // LUT fully replaces color conversion + + // Note: When using a PL_LUT_CONVERSION to replace the YUV->RGB conversion, + // `pl_render_params.color_adjustment` is no longer applied. Similarly, + // when using a PL_LUT_CONVERSION to replace the image->target color space + // conversion, `pl_render_params.color_map_params` are ignored. + // + // Note: For LUTs attached to the output frame, PL_LUT_CONVERSION should + // instead perform the inverse (RGB->native) conversion. + // + // Note: PL_LUT_UNKNOWN tries inferring the meaning of the LUT from the + // LUT's tagged metadata, and otherwise falls back to PL_LUT_NATIVE. +}; + +enum pl_render_stage { + PL_RENDER_STAGE_FRAME, // full frame redraws, for fresh/uncached frames + PL_RENDER_STAGE_BLEND, // the output blend pass (only for pl_render_image_mix) + PL_RENDER_STAGE_COUNT, +}; + +struct pl_render_info { + const struct pl_dispatch_info *pass; // information about the shader + enum pl_render_stage stage; // the associated render stage + + // This specifies the chronological index of this pass within the frame and + // stage (starting at `index == 0`). + int index; + + // For PL_RENDER_STAGE_BLEND, this specifies the number of frames + // being blended (since that results in a different shader). + int count; +}; + +// Represents the options used for rendering. These affect the quality of +// the result. +struct pl_render_params { + // Configures the algorithms used for upscaling and downscaling, + // respectively. If left as NULL, then libplacebo will only use inexpensive + // sampling (bilinear or nearest neighbour depending on the capabilities + // of the hardware / texture). + // + // Note: Setting `downscaler` to NULL also implies `skip_anti_aliasing`, + // since the built-in GPU sampling algorithms can't anti-alias. + // + // Note: If set to the same address as the built-in `pl_filter_bicubic`, + // `pl_filter_nearest` etc.; libplacebo will also use the more efficient + // direct sampling algorithm where possible without quality loss. + const struct pl_filter_config *upscaler; + const struct pl_filter_config *downscaler; + + // If set, this overrides the value of `upscaler`/`downscaling` for + // subsampled (chroma) planes. These scalers are used whenever the size of + // multiple different `pl_plane`s in a single `pl_frame` differ, requiring + // adaptation when converting to/from RGB. Note that a value of NULL simply + // means "no override". To force built-in scaling explicitly, set this to + // `&pl_filter_bilinear`. + const struct pl_filter_config *plane_upscaler; + const struct pl_filter_config *plane_downscaler; + + // The anti-ringing strength to apply to filters. See the equivalent option + // in `pl_sample_filter_params` for more information. + float antiringing_strength; + + // Configures the algorithm used for frame mixing (when using + // `pl_render_image_mix`). Ignored otherwise. As a special requirement, + // this must be a filter config with `polar` set to false, since it's only + // used for 1D mixing and thus only 1D filters are compatible. + // + // If set to NULL, frame mixing is disabled, in which case + // `pl_render_image_mix` will use nearest-neighbour semantics. (Note that + // this still goes through the redraw cache, unless you also enable + // `skip_caching_single_frame`) + const struct pl_filter_config *frame_mixer; + + // Configures the settings used to deband source textures. Leaving this as + // NULL disables debanding. + // + // Note: The `deband_params.grain` setting is automatically adjusted to + // prevent blowing up on HDR sources. The user need not account for this. + const struct pl_deband_params *deband_params; + + // Configures the settings used to sigmoidize the image before upscaling. + // This is not always used. If NULL, disables sigmoidization. + const struct pl_sigmoid_params *sigmoid_params; + + // Configures the color adjustment parameters used to decode the color. + // This can be used to apply additional artistic settings such as + // desaturation, etc. If NULL, defaults to &pl_color_adjustment_neutral. + const struct pl_color_adjustment *color_adjustment; + + // Configures the settings used to detect the peak of the source content, + // for HDR sources. Has no effect on SDR content. If NULL, peak detection + // is disabled. + const struct pl_peak_detect_params *peak_detect_params; + + // Configures the settings used to tone map from HDR to SDR, or from higher + // gamut to standard gamut content. If NULL, defaults to + // `&pl_color_map_default_params`. + const struct pl_color_map_params *color_map_params; + + // Configures the settings used to dither to the output depth. Leaving this + // as NULL disables dithering. + const struct pl_dither_params *dither_params; + + // Configures the error diffusion kernel to use for error diffusion + // dithering. If set, this will be used instead of `dither_params` whenever + // possible. Leaving this as NULL disables error diffusion. + const struct pl_error_diffusion_kernel *error_diffusion; + + // Configures the settings used to simulate color blindness, if desired. + // If NULL, this feature is disabled. + const struct pl_cone_params *cone_params; + + // Configures output blending. When rendering to the final target, the + // framebuffer contents will be blended using this blend mode. Requires + // that the target format has PL_FMT_CAP_BLENDABLE. NULL disables blending. + const struct pl_blend_params *blend_params; + + // Configures the settings used to deinterlace frames (see + // `pl_frame.field`), if required.. If NULL, deinterlacing is "disabled", + // meaning interlaced frames are rendered as weaved frames instead. + // + // Note: As a consequence of how `pl_frame` represents individual fields, + // and especially when using the `pl_queue`, this will still result in + // frames being redundantly rendered twice. As such, it's highly + // recommended to, instead, fully disable deinterlacing by not marking + // source frames as interlaced in the first place. + const struct pl_deinterlace_params *deinterlace_params; + + // If set, applies an extra distortion matrix to the image, after + // scaling and before presenting it to the screen. Can be used for e.g. + // fractional rotation. + // + // Note: The distortion canvas will be set to the size of `target->crop`, + // so this cannot effectively draw outside the specified target area, + // nor change the aspect ratio of the image. + const struct pl_distort_params *distort_params; + + // List of custom user shaders / hooks. + // See <libplacebo/shaders/custom.h> for more information. + const struct pl_hook * const *hooks; + int num_hooks; + + // Color mapping LUT. If present, this will be applied as part of the + // image being rendered, in normalized RGB space. + // + // Note: In this context, PL_LUT_NATIVE means "gamma light" and + // PL_LUT_NORMALIZED means "linear light". For HDR signals, normalized LUTs + // are scaled so 1.0 corresponds to the `pl_color_transfer_nominal_peak`. + // + // Note: A PL_LUT_CONVERSION fully replaces the color adaptation from + // `image` to `target`, including any tone-mapping (if necessary) and ICC + // profiles. It has the same representation as PL_LUT_NATIVE, so in this + // case the input and output are (respectively) non-linear light RGB. + const struct pl_custom_lut *lut; + enum pl_lut_type lut_type; + + // If the image being rendered does not span the entire size of the target, + // it will be cleared explicitly using this background color (RGB). To + // disable this logic, set `skip_target_clearing`. + float background_color[3]; + float background_transparency; // 0.0 for opaque, 1.0 for fully transparent + bool skip_target_clearing; + + // If set to a value above 0.0, the output will be rendered with rounded + // corners, as if an alpha transparency mask had been applied. The value + // indicates the relative fraction of the side length to round - a value + // of 1.0 rounds the corners as much as possible. + float corner_rounding; + + // If true, then transparent images will made opaque by painting them + // against a checkerboard pattern consisting of alternating colors. If both + // colors are left as {0}, they default respectively to 93% and 87% gray. + bool blend_against_tiles; + float tile_colors[2][3]; + int tile_size; + + // --- Performance / quality trade-off options: + // These should generally be left off where quality is desired, as they can + // degrade the result quite noticeably; but may be useful for older or + // slower hardware. Note that libplacebo will automatically disable + // advanced features on hardware where they are unsupported, regardless of + // these settings. So only enable them if you need a performance bump. + + // Disables anti-aliasing on downscaling. This will result in moiré + // artifacts and nasty, jagged pixels when downscaling, except for some + // very limited special cases (e.g. bilinear downsampling to exactly 0.5x). + // + // Significantly speeds up downscaling with high downscaling ratios. + bool skip_anti_aliasing; + + // Normally, when the size of the `target` used with `pl_render_image_mix` + // changes, or the render parameters are updated, the internal cache of + // mixed frames must be discarded in order to re-render all required + // frames. Setting this option to `true` will skip the cache invalidation + // and instead re-use the existing frames (with bilinear scaling to the new + // size if necessary), which comes at a quality loss shortly after a + // resize, but should make it much more smooth. + bool preserve_mixing_cache; + + // --- Performance tuning / debugging options + // These may affect performance or may make debugging problems easier, + // but shouldn't have any effect on the quality. + + // Normally, `pl_render_image_mix` will also push single frames through the + // mixer cache, in order to speed up re-draws. Enabling this option + // disables that logic, causing single frames to bypass the cache. (Though + // it will still read from, if they happen to already be cached) + bool skip_caching_single_frame; + + // Disables linearization / sigmoidization before scaling. This might be + // useful when tracking down unexpected image artifacts or excessing + // ringing, but it shouldn't normally be necessary. + bool disable_linear_scaling; + + // Forces the use of the "general" scaling algorithms even when using the + // special-cased built-in presets like `pl_filter_bicubic`. Basically, this + // disables the more efficient implementations in favor of the slower, + // general-purpose ones. + bool disable_builtin_scalers; + + // Forces correction of subpixel offsets (using the configured `upscaler`). + bool correct_subpixel_offsets; + + // Forces the use of dithering, even when rendering to 16-bit FBOs. This is + // generally pretty pointless because most 16-bit FBOs have high enough + // depth that rounding errors are below the human perception threshold, + // but this can be used to test the dither code. + bool force_dither; + + // Disables the gamma-correct dithering logic which normally applies when + // dithering to low bit depths. No real use, outside of testing. + bool disable_dither_gamma_correction; + + // Completely overrides the use of FBOs, as if there were no renderable + // texture format available. This disables most features. + bool disable_fbos; + + // Use only low-bit-depth FBOs (8 bits). Note that this also implies + // disabling linear scaling and sigmoidization. + bool force_low_bit_depth_fbos; + + // If this is true, all shaders will be generated as "dynamic" shaders, + // with any compile-time constants being replaced by runtime-adjustable + // values. This is generally a performance loss, but has the advantage of + // being able to freely change parameters without triggering shader + // recompilations. + // + // It's a good idea to enable while presenting configurable settings to the + // user, but it should be set to false once those values are "dialed in". + bool dynamic_constants; + + // This callback is invoked for every pass successfully executed in the + // process of rendering a frame. Optional. + // + // Note: `info` is only valid until this function returns. + void (*info_callback)(void *priv, const struct pl_render_info *info); + void *info_priv; + + // --- Deprecated/removed fields + bool allow_delayed_peak_detect PL_DEPRECATED; // moved to pl_peak_detect_params + const struct pl_icc_params *icc_params PL_DEPRECATED; // use pl_frame.icc + bool ignore_icc_profiles PL_DEPRECATED; // non-functional, just set pl_frame.icc to NULL + int lut_entries PL_DEPRECATED; // hard-coded as 256 + float polar_cutoff PL_DEPRECATED; // hard-coded as 1e-3 +}; + +// Bare minimum parameters, with no features enabled. This is the fastest +// possible configuration, and should therefore be fine on any system. +#define PL_RENDER_DEFAULTS \ + .color_map_params = &pl_color_map_default_params, \ + .color_adjustment = &pl_color_adjustment_neutral, \ + .tile_colors = {{0.93, 0.93, 0.93}, \ + {0.87, 0.87, 0.87}}, \ + .tile_size = 32, + +#define pl_render_params(...) (&(struct pl_render_params) { PL_RENDER_DEFAULTS __VA_ARGS__ }) +PL_API extern const struct pl_render_params pl_render_fast_params; + +// This contains the default/recommended options for reasonable image quality, +// while also not being too terribly slow. All of the *_params structs are +// defaulted to the corresponding *_default_params, except for deband_params, +// which is disabled by default. +// +// This should be fine on most integrated GPUs, but if it's too slow, +// consider using `pl_render_fast_params` instead. +PL_API extern const struct pl_render_params pl_render_default_params; + +// This contains a higher quality preset for better image quality at the cost +// of quite a bit of performance. In addition to the settings implied by +// `pl_render_default_params`, it enables debanding, sets the upscaler to +// `pl_filter_ewa_lanczossharp`, and uses pl_*_high_quality_params structs where +// available. This should only really be used with a discrete GPU and where +// maximum image quality is desired. +PL_API extern const struct pl_render_params pl_render_high_quality_params; + +#define PL_MAX_PLANES 4 + +// High level description of a single slice of an image. This basically +// represents a single 2D plane, with any number of components +struct pl_plane { + // The texture underlying this plane. The texture must be 2D, and must + // have specific parameters set depending on what the plane is being used + // for (see `pl_render_image`). + pl_tex texture; + + // The preferred behaviour when sampling outside of this texture. Optional, + // since the default (PL_TEX_ADDRESS_CLAMP) is very reasonable. + enum pl_tex_address_mode address_mode; + + // Controls whether or not the `texture` will be considered flipped + // vertically with respect to the overall image dimensions. It's generally + // preferable to flip planes using this setting instead of the crop in + // cases where the flipping is the result of e.g. negative plane strides or + // flipped framebuffers (OpenGL). + // + // Note that any planar padding (due to e.g. size mismatch or misalignment + // of subsampled planes) is always at the physical end of the texture + // (highest y coordinate) - even if this bool is true. However, any + // subsampling shift (`shift_y`) is applied with respect to the flipped + // direction. This ensures the correct interpretation when e.g. vertically + // flipping 4:2:0 sources by flipping all planes. + bool flipped; + + // Describes the number and interpretation of the components in this plane. + // This defines the mapping from component index to the canonical component + // order (RGBA, YCbCrA or XYZA). It's worth pointing out that this is + // completely separate from `texture->format.sample_order`. The latter is + // essentially irrelevant/transparent for the API user, since it just + // determines which order the texture data shows up as inside the GLSL + // shader; whereas this field controls the actual meaning of the component. + // + // Example; if the user has a plane with just {Y} and a plane with just + // {Cb Cr}, and a GPU that only supports bgra formats, you would still + // specify the component mapping as {0} and {1 2} respectively, even though + // the GPU is sampling the data in the order BGRA. Use -1 for "ignored" + // components. + int components; // number of relevant components + int component_mapping[4]; // semantic index of each component + + // Controls the sample offset, relative to the "reference" dimensions. For + // an example of what to set here, see `pl_chroma_location_offset`. Note + // that this is given in unit of reference pixels. For a graphical example, + // imagine you have a 2x2 image with a 1x1 (subsampled) plane. Without any + // shift (0.0), the situation looks like this: + // + // X-------X X = reference pixel + // | | P = plane pixel + // | P | + // | | + // X-------X + // + // For 4:2:0 subsampling, this corresponds to PL_CHROMA_CENTER. If the + // shift_x was instead set to -0.5, the `P` pixel would be offset to the + // left by half the separation between the reference (`X` pixels), resulting + // in the following: + // + // X-------X X = reference pixel + // | | P = plane pixel + // P | + // | | + // X-------X + // + // For 4:2:0 subsampling, this corresponds to PL_CHROMA_LEFT. + // + // Note: It's recommended to fill this using `pl_chroma_location_offset` on + // the chroma planes. + float shift_x, shift_y; +}; + +enum pl_overlay_mode { + PL_OVERLAY_NORMAL = 0, // treat the texture as a normal, full-color texture + PL_OVERLAY_MONOCHROME, // treat the texture as a single-component alpha map + PL_OVERLAY_MODE_COUNT, +}; + +enum pl_overlay_coords { + PL_OVERLAY_COORDS_AUTO = 0, // equal to SRC/DST_FRAME, respectively + PL_OVERLAY_COORDS_SRC_FRAME, // relative to the raw src frame + PL_OVERLAY_COORDS_SRC_CROP, // relative to the src frame crop + PL_OVERLAY_COORDS_DST_FRAME, // relative to the raw dst frame + PL_OVERLAY_COORDS_DST_CROP, // relative to the dst frame crop + PL_OVERLAY_COORDS_COUNT, + + // Note on rotations: If there is an end-to-end rotation between `src` and + // `dst`, then any overlays relative to SRC_FRAME or SRC_CROP will be + // rotated alongside the image, while overlays relative to DST_FRAME or + // DST_CROP will not. +}; + +struct pl_overlay_part { + pl_rect2df src; // source coordinate with respect to `pl_overlay.tex` + pl_rect2df dst; // target coordinates with respect to `pl_overlay.coords` + + // If `mode` is PL_OVERLAY_MONOCHROME, then this specifies the color of + // this overlay part. The color is multiplied into the sampled texture's + // first channel. + float color[4]; +}; + +// A struct representing an image overlay (e.g. for subtitles or on-screen +// status messages, controls, ...) +struct pl_overlay { + // The texture containing the backing data for overlay parts. Must have + // `params.sampleable` set. + pl_tex tex; + + // This controls the coloring mode of this overlay. + enum pl_overlay_mode mode; + + // Controls which coordinates this overlay is addressed relative to. + enum pl_overlay_coords coords; + + // This controls the colorspace information for this overlay. The contents + // of the texture / the value of `color` are interpreted according to this. + struct pl_color_repr repr; + struct pl_color_space color; + + // The number of parts for this overlay. + const struct pl_overlay_part *parts; + int num_parts; +}; + +// High-level description of a complete frame, including metadata and planes +struct pl_frame { + // Each frame is split up into some number of planes, each of which may + // carry several components and be of any size / offset. + int num_planes; + struct pl_plane planes[PL_MAX_PLANES]; + + // For interlaced frames. If set, this `pl_frame` corresponds to a single + // field of the underlying source textures. `first_field` indicates which + // of these fields is ordered first in time. `prev` and `next` should point + // to the previous/next frames in the file, or NULL if there are none. + // + // Note: Setting these fields on the render target has no meaning and will + // be ignored. + enum pl_field field; + enum pl_field first_field; + const struct pl_frame *prev, *next; + + // If set, will be called immediately before GPU access to this frame. This + // function *may* be used to, for example, perform synchronization with + // external APIs (e.g. `pl_vulkan_hold/release`). If your mapping requires + // a memcpy of some sort (e.g. pl_tex_transfer), users *should* instead do + // the memcpy up-front and avoid the use of these callbacks - because they + // might be called multiple times on the same frame. + // + // This function *may* arbitrarily mutate the `pl_frame`, but it *should* + // ideally only update `planes` - in particular, color metadata and so + // forth should be provided up-front as best as possible. Note that changes + // here will not be reflected back to the structs provided in the original + // `pl_render_*` call (e.g. via `pl_frame_mix`). + // + // Note: Unless dealing with interlaced frames, only one frame will ever be + // acquired at a time per `pl_render_*` call. So users *can* safely use + // this with, for example, hwdec mappers that can only map a single frame + // at a time. When using this with, for example, `pl_render_image_mix`, + // each frame to be blended is acquired and release in succession, before + // moving on to the next frame. For interlaced frames, the previous and + // next frames must also be acquired simultaneously. + bool (*acquire)(pl_gpu gpu, struct pl_frame *frame); + + // If set, will be called after a plane is done being used by the GPU, + // *including* after any errors (e.g. `acquire` returning false). + void (*release)(pl_gpu gpu, struct pl_frame *frame); + + // Color representation / encoding / semantics of this frame. + struct pl_color_repr repr; + struct pl_color_space color; + + // Optional ICC profile associated with this frame. + pl_icc_object icc; + + // Alternative to `icc`, this can be used in cases where allocating and + // tracking an pl_icc_object externally may be inconvenient. The resulting + // profile will be managed internally by the pl_renderer. + struct pl_icc_profile profile; + + // Optional LUT associated with this frame. + const struct pl_custom_lut *lut; + enum pl_lut_type lut_type; + + // The logical crop / rectangle containing the valid information, relative + // to the reference plane's dimensions (e.g. luma). Pixels outside of this + // rectangle will ostensibly be ignored, but note that this is not a hard + // guarantee. In particular, scaler filters may end up sampling outside of + // this crop. This rect may be flipped, and may be partially or wholly + // outside the bounds of the underlying textures. (Optional) + // + // Note that `pl_render_image` will map the input crop directly to the + // output crop, stretching and scaling as needed. If you wish to preserve + // the aspect ratio, use a dedicated function like pl_rect2df_aspect_copy. + pl_rect2df crop; + + // Logical rotation of the image, with respect to the underlying planes. + // For example, if this is PL_ROTATION_90, then the image will be rotated + // to the right by 90° when mapping to `crop`. The actual position on-screen + // is unaffected, so users should ensure that the (rotated) aspect ratio + // matches the source. (Or use a helper like `pl_rect2df_aspect_set_rot`) + // + // Note: For `target` frames, this corresponds to a rotation of the + // display, for `image` frames, this corresponds to a rotation of the + // camera. + // + // So, as an example, target->rotation = PL_ROTATE_90 means the end user + // has rotated the display to the right by 90° (meaning rendering will be + // rotated 90° to the *left* to compensate), and image->rotation = + // PL_ROTATE_90 means the video provider has rotated the camera to the + // right by 90° (so rendering will be rotated 90° to the *right* to + // compensate). + pl_rotation rotation; + + // A list of additional overlays associated with this frame. Note that will + // be rendered directly onto intermediate/cache frames, so changing any of + // these overlays may require flushing the renderer cache. + const struct pl_overlay *overlays; + int num_overlays; + + // Note on subsampling and plane correspondence: All planes belonging to + // the same frame will only be stretched by an integer multiple (or inverse + // thereof) in order to match the reference dimensions of this image. For + // example, suppose you have an 8x4 image. A valid plane scaling would be + // 4x2 -> 8x4 or 4x4 -> 4x4, but not 6x4 -> 8x4. So if a 6x4 plane is + // given, then it would be treated like a cropped 8x4 plane (since 1.0 is + // the closest scaling ratio to the actual ratio of 1.3). + // + // For an explanation of why this makes sense, consider the relatively + // common example of a subsampled, oddly sized (e.g. jpeg) image. In such + // cases, for example a 35x23 image, the 4:2:0 subsampled chroma plane + // would have to end up as 17.5x11.5, which gets rounded up to 18x12 by + // implementations. So in this example, the 18x12 chroma plane would get + // treated by libplacebo as an oversized chroma plane - i.e. the plane + // would get sampled as if it was 17.5 pixels wide and 11.5 pixels large. + + // Associated film grain data (see <libplacebo/shaders/film_grain.h>). + // + // Note: This is ignored for the `target` of `pl_render_image`, since + // un-applying grain makes little sense. + struct pl_film_grain_data film_grain; + + // Ignored by libplacebo. May be useful for users. + void *user_data; +}; + +// Helper function to infer the chroma location offset for each plane in a +// frame. This is equivalent to calling `pl_chroma_location_offset` on all +// subsampled planes' shift_x/shift_y variables. +PL_API void pl_frame_set_chroma_location(struct pl_frame *frame, + enum pl_chroma_location chroma_loc); + +// Fills in a `pl_frame` based on a swapchain frame's FBO and metadata. +PL_API void pl_frame_from_swapchain(struct pl_frame *out_frame, + const struct pl_swapchain_frame *frame); + +// Helper function to determine if a frame is logically cropped or not. In +// particular, this is useful in determining whether or not an output frame +// needs to be cleared before rendering or not. +PL_API bool pl_frame_is_cropped(const struct pl_frame *frame); + +// Helper function to reset a frame to a given RGB color. If the frame's +// color representation is something other than RGB, the clear color will +// be adjusted accordingly. `clear_color` should be non-premultiplied. +PL_API void pl_frame_clear_rgba(pl_gpu gpu, const struct pl_frame *frame, + const float clear_color[4]); + +// Like `pl_frame_clear_rgba` but without an alpha channel. +static inline void pl_frame_clear(pl_gpu gpu, const struct pl_frame *frame, + const float clear_color[3]) +{ + const float clear_color_rgba[4] = { clear_color[0], clear_color[1], clear_color[2], 1.0 }; + pl_frame_clear_rgba(gpu, frame, clear_color_rgba); +} + +// Helper functions to return the fixed/inferred pl_frame parameters used +// for rendering internally. Mutates `image` and `target` in-place to hold +// the modified values, which are what will actually be used for rendering. +// +// This currently includes: +// - Defaulting all missing pl_color_space/repr parameters +// - Coalescing all rotation to the target +// - Rounding and clamping the target crop to pixel boundaries and adjusting the +// image crop correspondingly +// +// Note: This is idempotent and does not generally alter the effects of a +// subsequent `pl_render_image` on the same pl_frame pair. (But see the +// following warning) +// +// Warning: This does *not* call pl_frame.acquire/release, and so the returned +// metadata *may* be incorrect if the acquire callback mutates the pl_frame in +// nontrivial ways, in particular the crop and color space fields. +PL_API void pl_frames_infer(pl_renderer rr, struct pl_frame *image, + struct pl_frame *target); + + +// Render a single image to a target using the given parameters. This is +// fully dynamic, i.e. the params can change at any time. libplacebo will +// internally detect and flush whatever caches are invalidated as a result of +// changing colorspace, size etc. +// +// Required plane capabilities: +// - Planes in `image` must be `sampleable` +// - Planes in `target` must be `renderable` +// +// Recommended plane capabilities: (Optional, but good for performance) +// - Planes in `image` should have `sample_mode` PL_TEX_SAMPLE_LINEAR +// - Planes in `target` should be `storable` +// - Planes in `target` should have `blit_dst` +// +// Note on lifetime: Once this call returns, the passed structures may be +// freely overwritten or discarded by the caller, even the referenced +// `pl_tex` objects may be freely reused. +// +// Note: `image` may be NULL, in which case `target.overlays` will still be +// rendered, but nothing else. +PL_API bool pl_render_image(pl_renderer rr, const struct pl_frame *image, + const struct pl_frame *target, + const struct pl_render_params *params); + +// Flushes the internal state of this renderer. This is normally not needed, +// even if the image parameters, colorspace or target configuration change, +// since libplacebo will internally detect such circumstances and recreate +// outdated resources automatically. Doing this explicitly *may* be useful to +// purge some state related to things like HDR peak detection or frame mixing, +// so calling it is a good idea if the content source is expected to change +// dramatically (e.g. when switching to a different file). +PL_API void pl_renderer_flush_cache(pl_renderer rr); + +// Mirrors `pl_get_detected_hdr_metadata`, giving you the current internal peak +// detection HDR metadata (when peak detection is active). Returns false if no +// information is available (e.g. not HDR source, peak detection disabled). +PL_API bool pl_renderer_get_hdr_metadata(pl_renderer rr, + struct pl_hdr_metadata *metadata); + +// Represents a mixture of input frames, distributed temporally. +// +// NOTE: Frames must be sorted by timestamp, i.e. `timestamps` must be +// monotonically increasing. +struct pl_frame_mix { + // The number of frames in this mixture. The number of frames should be + // sufficient to meet the needs of the configured frame mixer. See the + // section below for more information. + // + // If the number of frames is 0, this call will be equivalent to + // `pl_render_image` with `image == NULL`. + int num_frames; + + // A list of the frames themselves. The frames can have different + // colorspaces, configurations of planes, or even sizes. + // + // Note: This is a list of pointers, to avoid users having to copy + // around `pl_frame` structs when re-organizing this array. + const struct pl_frame **frames; + + // A list of unique signatures, one for each frame. These are used to + // identify frames across calls to this function, so it's crucial that they + // be both unique per-frame but also stable across invocations of + // `pl_render_frame_mix`. + const uint64_t *signatures; + + // A list of relative timestamps for each frame. These are relative to the + // time of the vsync being drawn, i.e. this function will render the frame + // that will be made visible at timestamp 0.0. The values are expected to + // be normalized such that a separation of 1.0 corresponds to roughly one + // nominal source frame duration. So a constant framerate video file will + // always have timestamps like e.g. {-2.3, -1.3, -0.3, 0.7, 1.7, 2.7}, + // using an example radius of 3. + // + // In cases where the framerate is variable (e.g. VFR video), the choice of + // what to scale to use can be difficult to answer. A typical choice would + // be either to use the canonical (container-tagged) framerate, or the + // highest momentary framerate, as a reference. If all else fails, you + // could also use the display's framerate. + // + // Note: This function assumes zero-order-hold semantics, i.e. the frame at + // timestamp 0.7 is intended to remain visible until timestamp 1.7, when + // the next frame replaces it. + const float *timestamps; + + // The duration for which the vsync being drawn will be held, using the + // same scale as `timestamps`. If the display has an unknown or variable + // frame-rate (e.g. Adaptive Sync), then you're probably better off not + // using this function and instead just painting the frames directly using + // `pl_render_frame` at the correct PTS. + // + // As an example, if `vsync_duration` is 0.4, then it's assumed that the + // vsync being painted is visible for the period [0.0, 0.4]. + float vsync_duration; + + // Explanation of the frame mixing radius: The algorithm chosen in + // `pl_render_params.frame_mixer` has a canonical radius equal to + // `pl_filter_config.kernel->radius`. This means that the frame mixing + // algorithm will (only) need to consult all of the frames that have a + // distance within the interval [-radius, radius]. As such, the user should + // include all such frames in `frames`, but may prune or omit frames that + // lie outside it. + // + // The built-in frame mixing (`pl_render_params.frame_mixer == NULL`) has + // no concept of radius, it just always needs access to the "current" and + // "next" frames. +}; + +// Helper function to calculate the base frame mixing radius. +// +// Note: When the source FPS exceeds the display FPS, this radius must be +// increased by the corresponding ratio. +static inline float pl_frame_mix_radius(const struct pl_render_params *params) +{ + // For backwards compatibility, allow !frame_mixer->kernel + if (!params->frame_mixer || !params->frame_mixer->kernel) + return 0.0; + + return params->frame_mixer->kernel->radius; +} + +// Find closest frame to current PTS by zero-order hold semantics, or NULL. +PL_API const struct pl_frame *pl_frame_mix_current(const struct pl_frame_mix *mix); + +// Find closest frame to current PTS by nearest neighbour semantics, or NULL. +PL_API const struct pl_frame *pl_frame_mix_nearest(const struct pl_frame_mix *mix); + +// Render a mixture of images to the target using the given parameters. This +// functions much like a generalization of `pl_render_image`, for when the API +// user has more control over the frame queue / vsync loop, and can provide a +// few frames from the past and future + timestamp information. +// +// This allows libplacebo to perform rudimentary frame mixing / interpolation, +// in order to eliminate judder artifacts typically associated with +// source/display frame rate mismatch. +PL_API bool pl_render_image_mix(pl_renderer rr, const struct pl_frame_mix *images, + const struct pl_frame *target, + const struct pl_render_params *params); + +// Analog of `pl_frame_infer` corresponding to `pl_render_image_mix`. This +// function will *not* mutate the frames contained in `mix`, and instead +// return an adjusted copy of the "reference" frame for that image mix in +// `out_refimage`, or {0} if the mix is empty. +PL_API void pl_frames_infer_mix(pl_renderer rr, const struct pl_frame_mix *mix, + struct pl_frame *target, struct pl_frame *out_ref); + +// Backwards compatibility with old filters API, may be deprecated. +// Redundant with pl_filter_configs and masking `allowed` for +// PL_FILTER_SCALING and PL_FILTER_FRAME_MIXING respectively. + +// A list of recommended frame mixer presets, terminated by {0} +PL_API extern const struct pl_filter_preset pl_frame_mixers[]; +PL_API extern const int pl_num_frame_mixers; // excluding trailing {0} + +// A list of recommended scaler presets, terminated by {0}. This is almost +// equivalent to `pl_filter_presets` with the exception of including extra +// built-in filters that don't map to the `pl_filter` architecture. +PL_API extern const struct pl_filter_preset pl_scale_filters[]; +PL_API extern const int pl_num_scale_filters; // excluding trailing {0} + +// Deprecated in favor of `pl_cache_save/pl_cache_load` on the `pl_cache` +// associated with the `pl_gpu` this renderer is using. +PL_DEPRECATED PL_API size_t pl_renderer_save(pl_renderer rr, uint8_t *out_cache); +PL_DEPRECATED PL_API void pl_renderer_load(pl_renderer rr, const uint8_t *cache); + +PL_API_END + +#endif // LIBPLACEBO_RENDERER_H_ diff --git a/src/include/libplacebo/shaders.h b/src/include/libplacebo/shaders.h new file mode 100644 index 0000000..b8046be --- /dev/null +++ b/src/include/libplacebo/shaders.h @@ -0,0 +1,273 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_SHADERS_H_ +#define LIBPLACEBO_SHADERS_H_ + +// This function defines the "direct" interface to libplacebo's GLSL shaders, +// suitable for use in contexts where the user controls GLSL shader compilation +// but wishes to include functions generated by libplacebo as part of their +// own rendering process. This API is normally not used for operation with +// libplacebo's higher-level constructs such as `pl_dispatch` or `pl_renderer`. + +#include <libplacebo/gpu.h> + +PL_API_BEGIN + +// Thread-safety: Unsafe +typedef struct pl_shader_t *pl_shader; + +struct pl_shader_params { + // The `id` represents an abstract identifier for the shader, to avoid + // collisions with other shaders being used as part of the same larger, + // overarching shader. This is relevant for users which want to combine + // multiple `pl_shader` objects together, in which case all `pl_shader` + // objects should have a unique `id`. + uint8_t id; + + // If `gpu` is non-NULL, then this `gpu` will be used to create objects + // such as textures and buffers, or check for required capabilities, for + // operations which depend on either of those. This is fully optional, i.e. + // these GLSL primitives are designed to be used without a dependency on + // `gpu` wherever possible - however, some features may not work, and will + // be disabled even if requested. + pl_gpu gpu; + + // The `index` represents an abstract frame index, which shaders may use + // internally to do things like temporal dithering or seeding PRNGs. If the + // user does not care about temporal dithering/debanding, or wants + // deterministic rendering, this may safely be left as 0. Otherwise, it + // should be incremented by 1 on successive frames. + uint8_t index; + + // If `glsl.version` is nonzero, then this structure will be used to + // determine the effective GLSL mode and capabilities. If `gpu` is also + // set, then this overrides `gpu->glsl`. + struct pl_glsl_version glsl; + + // If this is true, all constants in the shader will be replaced by + // dynamic variables. This is mainly useful to avoid recompilation for + // shaders which expect to have their values change constantly. + bool dynamic_constants; +}; + +#define pl_shader_params(...) (&(struct pl_shader_params) { __VA_ARGS__ }) + +// Creates a new, blank, mutable pl_shader object. +// +// Note: Rather than allocating and destroying many shaders, users are +// encouraged to reuse them (using `pl_shader_reset`) for efficiency. +PL_API pl_shader pl_shader_alloc(pl_log log, const struct pl_shader_params *params); + +// Frees a pl_shader and all resources associated with it. +PL_API void pl_shader_free(pl_shader *sh); + +// Resets a pl_shader to a blank slate, without releasing internal memory. +// If you're going to be re-generating shaders often, this function will let +// you skip the re-allocation overhead. +PL_API void pl_shader_reset(pl_shader sh, const struct pl_shader_params *params); + +// Returns whether or not a shader is in a "failed" state. Trying to modify a +// shader in illegal ways (e.g. signature mismatch) will result in the shader +// being marked as "failed". Since most pl_shader_ operations have a void +// return type, the user can use this function to figure out whether a specific +// shader operation has failed or not. This function is somewhat redundant +// since `pl_shader_finalize` will also return NULL in this case. +PL_API bool pl_shader_is_failed(const pl_shader sh); + +// Returns whether or not a pl_shader needs to be run as a compute shader. This +// will never be the case unless the `pl_glsl_version` this `pl_shader` was +// created using has `compute` support enabled. +PL_API bool pl_shader_is_compute(const pl_shader sh); + +// Returns whether or not the shader has any particular output size +// requirements. Some shaders, in particular those that sample from other +// textures, have specific output size requirements which need to be respected +// by the caller. If this is false, then the shader is compatible with every +// output size. If true, the size requirements are stored into *w and *h. +PL_API bool pl_shader_output_size(const pl_shader sh, int *w, int *h); + +// Indicates the type of signature that is associated with a shader result. +// Every shader result defines a function that may be called by the user, and +// this enum indicates the type of value that this function takes and/or +// returns. +// +// Which signature a shader ends up with depends on the type of operation being +// performed by a shader fragment, as determined by the user's calls. See below +// for more information. +enum pl_shader_sig { + PL_SHADER_SIG_NONE = 0, // no input / void output + PL_SHADER_SIG_COLOR, // vec4 color (normalized so that 1.0 is the ref white) + + // The following are only valid as input signatures: + PL_SHADER_SIG_SAMPLER, // (gsampler* src_tex, vecN tex_coord) pair, + // specifics depend on how the shader was generated +}; + +// Structure encapsulating information about a shader. This is internally +// refcounted, to allow moving it around without having to create deep copies. +typedef const struct pl_shader_info_t { + // A copy of the parameters used to create the shader. + struct pl_shader_params params; + + // A list of friendly names for the semantic operations being performed by + // this shader, e.g. "color decoding" or "debanding". + const char **steps; + int num_steps; + + // As a convenience, this contains a pretty-printed version of the + // above list, with entries tallied and separated by commas + const char *description; +} *pl_shader_info; + +PL_API pl_shader_info pl_shader_info_ref(pl_shader_info info); +PL_API void pl_shader_info_deref(pl_shader_info *info); + +// Represents a finalized shader fragment. This is not a complete shader, but a +// collection of raw shader text together with description of the input +// attributes, variables and vertices it expects to be available. +struct pl_shader_res { + // Descriptive information about the shader. Note that this reference is + // attached to the shader itself - the user does not need to manually ref + // or deref `info` unless they wish to move it elsewhere. + pl_shader_info info; + + // The shader text, as literal GLSL. This will always be a function + // definition, such that the the function with the indicated name and + // signature may be called by the user. + const char *glsl; + const char *name; + enum pl_shader_sig input; // what the function expects + enum pl_shader_sig output; // what the function returns + + // For compute shaders (pl_shader_is_compute), this indicates the requested + // work group size. Otherwise, both fields are 0. The interpretation of + // these work groups is that they're tiled across the output image. + int compute_group_size[2]; + + // If this pass is a compute shader, this field indicates the shared memory + // size requirements for this shader pass. + size_t compute_shmem; + + // A set of input vertex attributes needed by this shader fragment. + const struct pl_shader_va *vertex_attribs; + int num_vertex_attribs; + + // A set of input variables needed by this shader fragment. + const struct pl_shader_var *variables; + int num_variables; + + // A list of input descriptors needed by this shader fragment, + const struct pl_shader_desc *descriptors; + int num_descriptors; + + // A list of compile-time constants used by this shader fragment. + const struct pl_shader_const *constants; + int num_constants; + + // --- Deprecated fields (see `info`) + struct pl_shader_params params PL_DEPRECATED; + const char **steps PL_DEPRECATED; + int num_steps PL_DEPRECATED; + const char *description PL_DEPRECATED; +}; + +// Represents a vertex attribute. The four values will be bound to the four +// corner vertices respectively, in row-wise order starting from the top left: +// data[0] data[1] +// data[2] data[3] +struct pl_shader_va { + struct pl_vertex_attrib attr; // VA type, excluding `offset` and `location` + const void *data[4]; +}; + +// Represents a bound shared variable / descriptor +struct pl_shader_var { + struct pl_var var; // the underlying variable description + const void *data; // the raw data (as per `pl_var_host_layout`) + bool dynamic; // if true, the value is expected to change frequently +}; + +struct pl_buffer_var { + struct pl_var var; + struct pl_var_layout layout; +}; + +typedef uint16_t pl_memory_qualifiers; +enum { + PL_MEMORY_COHERENT = 1 << 0, // supports synchronization across shader invocations + PL_MEMORY_VOLATILE = 1 << 1, // all writes are synchronized automatically + + // Note: All descriptors are also implicitly assumed to have the 'restrict' + // memory qualifier. There is currently no way to override this behavior. +}; + +struct pl_shader_desc { + struct pl_desc desc; // descriptor type, excluding `int binding` + struct pl_desc_binding binding; // contents of the descriptor binding + + // For PL_DESC_BUF_UNIFORM/STORAGE, this specifies the layout of the + // variables contained by a buffer. Ignored for the other descriptor types + struct pl_buffer_var *buffer_vars; + int num_buffer_vars; + + // For storage images and buffers, this specifies additional memory + // qualifiers on the descriptor. It's highly recommended to always use + // at least PL_MEMORY_RESTRICT. Ignored for other descriptor types. + pl_memory_qualifiers memory; +}; + +// Represents a compile-time constant. This can be lowered to a specialization +// constant to support cheaper recompilations. +struct pl_shader_const { + enum pl_var_type type; + const char *name; + const void *data; + + // If true, this constant *must* be a compile-time constant, which + // basically just overrides `pl_shader_params.dynamic_constants`. Useful + // for constants which will serve as inputs to e.g. array sizes. + bool compile_time; +}; + +// Finalize a pl_shader. It is no longer mutable at this point, and any further +// attempts to modify it result in an error. (Functions which take a `const +// pl_shader` argument do not modify the shader and may be freely +// called on an already-finalized shader) +// +// The returned pl_shader_res is bound to the lifetime of the pl_shader - and +// will only remain valid until the pl_shader is freed or reset. This function +// may be called multiple times, and will produce the same result each time. +// +// This function will return NULL if the shader is considered to be in a +// "failed" state (see pl_shader_is_failed). +PL_API const struct pl_shader_res *pl_shader_finalize(pl_shader sh); + +// Shader objects represent abstract resources that shaders need to manage in +// order to ensure their operation. This could include shader storage buffers, +// generated lookup textures, or other sorts of configured state. The body +// of a shader object is fully opaque; but the user is in charge of cleaning up +// after them and passing them to the right shader passes. +// +// Note: pl_shader_obj objects must be initialized to NULL by the caller. +typedef struct pl_shader_obj_t *pl_shader_obj; + +PL_API void pl_shader_obj_destroy(pl_shader_obj *obj); + +PL_API_END + +#endif // LIBPLACEBO_SHADERS_H_ diff --git a/src/include/libplacebo/shaders/colorspace.h b/src/include/libplacebo/shaders/colorspace.h new file mode 100644 index 0000000..ead0958 --- /dev/null +++ b/src/include/libplacebo/shaders/colorspace.h @@ -0,0 +1,381 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_SHADERS_COLORSPACE_H_ +#define LIBPLACEBO_SHADERS_COLORSPACE_H_ + +// Color space transformation shaders. These all input and output a color +// value (PL_SHADER_SIG_COLOR). + +#include <libplacebo/colorspace.h> +#include <libplacebo/gamut_mapping.h> +#include <libplacebo/tone_mapping.h> +#include <libplacebo/shaders.h> + +// For backwards compatibility +#include <libplacebo/shaders/dithering.h> + +PL_API_BEGIN + +// Transform the input color, in its given representation, to ensure +// compatibility with the indicated alpha mode. Mutates `repr` to reflect the +// change. Note that this is a no-op if the input is PL_ALPHA_UNKNOWN. +PL_API void pl_shader_set_alpha(pl_shader sh, struct pl_color_repr *repr, + enum pl_alpha_mode mode); + +// Colorspace reshaping for PL_COLOR_SYSTEM_DOLBYVISION. Note that this is done +// automatically by `pl_shader_decode_color` for PL_COLOR_SYSTEM_DOLBYVISION. +PL_API void pl_shader_dovi_reshape(pl_shader sh, const struct pl_dovi_metadata *data); + +// Decode the color into normalized RGB, given a specified color_repr. This +// also takes care of additional pre- and post-conversions requires for the +// "special" color systems (XYZ, BT.2020-C, etc.). If `params` is left as NULL, +// it defaults to &pl_color_adjustment_neutral. +// +// Note: This function always returns PC-range RGB with independent alpha. +// It mutates the pl_color_repr to reflect the change. +// +// Note: For DCDM XYZ decoding output is linear +PL_API void pl_shader_decode_color(pl_shader sh, struct pl_color_repr *repr, + const struct pl_color_adjustment *params); + +// Encodes a color from normalized, PC-range, independent alpha RGB into a +// given representation. That is, this performs the inverse operation of +// `pl_shader_decode_color` (sans color adjustments). +// +// Note: For DCDM XYZ encoding input is expected to be linear +PL_API void pl_shader_encode_color(pl_shader sh, const struct pl_color_repr *repr); + +// Linearize (expand) `vec4 color`, given a specified color space. In essence, +// this corresponds to the ITU-R EOTF. +// +// Note: Unlike the ITU-R EOTF, it never includes the OOTF - even for systems +// where the EOTF includes the OOTF (such as HLG). +PL_API void pl_shader_linearize(pl_shader sh, const struct pl_color_space *csp); + +// Delinearize (compress), given a color space as output. This loosely +// corresponds to the inverse EOTF (not the OETF) in ITU-R terminology, again +// assuming a reference monitor. +PL_API void pl_shader_delinearize(pl_shader sh, const struct pl_color_space *csp); + +struct pl_sigmoid_params { + // The center (bias) of the sigmoid curve. Must be between 0.0 and 1.0. + // If left as NULL, defaults to 0.75 + float center; + + // The slope (steepness) of the sigmoid curve. Must be between 1.0 and 20.0. + // If left as NULL, defaults to 6.5. + float slope; +}; + +#define PL_SIGMOID_DEFAULTS \ + .center = 0.75, \ + .slope = 6.50, + +#define pl_sigmoid_params(...) (&(struct pl_sigmoid_params) { PL_SIGMOID_DEFAULTS __VA_ARGS__ }) +PL_API extern const struct pl_sigmoid_params pl_sigmoid_default_params; + +// Applies a sigmoidal color transform to all channels. This helps avoid +// ringing artifacts during upscaling by bringing the color information closer +// to neutral and away from the extremes. If `params` is NULL, it defaults to +// &pl_sigmoid_default_params. +// +// Warning: This function clamps the input to the interval [0,1]; and as such +// it should *NOT* be used on already-decoded high-dynamic range content. +PL_API void pl_shader_sigmoidize(pl_shader sh, const struct pl_sigmoid_params *params); + +// This performs the inverse operation to `pl_shader_sigmoidize`. +PL_API void pl_shader_unsigmoidize(pl_shader sh, const struct pl_sigmoid_params *params); + +struct pl_peak_detect_params { + // Smoothing coefficient for the detected values. This controls the time + // parameter (tau) of an IIR low pass filter. In other words, it represent + // the cutoff period (= 1 / cutoff frequency) in frames. Frequencies below + // this length will be suppressed. This helps block out annoying + // "sparkling" or "flickering" due to small variations in frame-to-frame + // brightness. If left as 0.0, this smoothing is completely disabled. + float smoothing_period; + + // In order to avoid reacting sluggishly on scene changes as a result of + // the low-pass filter, we disable it when the difference between the + // current frame brightness and the average frame brightness exceeds a + // given threshold difference. But rather than a single hard cutoff, which + // would lead to weird discontinuities on fades, we gradually disable it + // over a small window of brightness ranges. These parameters control the + // lower and upper bounds of this window, in units of 1% PQ. + // + // Setting either one of these to 0.0 disables this logic. + float scene_threshold_low; + float scene_threshold_high; + + // Which percentile of the input image brightness histogram to consider as + // the true peak of the scene. If this is set to 100 (or 0), the brightest + // pixel is measured. Otherwise, the top of the frequency distribution is + // progressively cut off. Setting this too low will cause clipping of very + // bright details, but can improve the dynamic brightness range of scenes + // with very bright isolated highlights. + // + // A recommended value is 99.995%, which is very conservative and should + // cause no major issues in typical content. + float percentile; + + // Allows the peak detection result to be delayed by up to a single frame, + // which can sometimes improve thoughput, at the cost of introducing the + // possibility of 1-frame flickers on transitions. Disabled by default. + bool allow_delayed; + + // --- Deprecated / removed fields + float overshoot_margin PL_DEPRECATED; + float minimum_peak PL_DEPRECATED; +}; + +#define PL_PEAK_DETECT_DEFAULTS \ + .smoothing_period = 20.0f, \ + .scene_threshold_low = 1.0f, \ + .scene_threshold_high = 3.0f, \ + .percentile = 100.0f, + +#define PL_PEAK_DETECT_HQ_DEFAULTS \ + PL_PEAK_DETECT_DEFAULTS \ + .percentile = 99.995f, + +#define pl_peak_detect_params(...) (&(struct pl_peak_detect_params) { PL_PEAK_DETECT_DEFAULTS __VA_ARGS__ }) +PL_API extern const struct pl_peak_detect_params pl_peak_detect_default_params; +PL_API extern const struct pl_peak_detect_params pl_peak_detect_high_quality_params; + +// This function can be used to measure the CLL and FALL of a video +// source automatically, using a compute shader. The measured values are +// smoothed automatically (depending on the parameters), so to keep track of +// the measured results over time, a tone mapping shader state object is used +// to hold the state. Returns false on failure initializing the tone mapping +// object, or if compute shaders are not supported. +// +// It's important that the same shader object is used for successive frames +// belonging to the same source. If the source changes (e.g. due to a file +// change or seek), the user should reset it with `pl_reset_detected_peak` (or +// destroy it and use a new state object). +// +// The parameter `csp` holds the representation of the color values that are +// the input to this function. (They must already be in decoded RGB form, i.e. +// alternate color representations are not supported) +PL_API bool pl_shader_detect_peak(pl_shader sh, struct pl_color_space csp, + pl_shader_obj *state, + const struct pl_peak_detect_params *params); + +// After dispatching the above shader, this function can be used to retrieve +// the detected dynamic HDR10+ metadata parameters. The other fields of +// `metadata` are not written to. Returns whether or not any values were +// written. If not, the values are left untouched, so this can be used to +// safely update `pl_hdr_metadata` values in-place. This function may or may +// not block, depending on the previous setting of `allow_delayed`. +PL_API bool pl_get_detected_hdr_metadata(const pl_shader_obj state, + struct pl_hdr_metadata *metadata); + +// After dispatching the above shader, this function *may* be used to read out +// the detected CLL and FALL directly (in PL_HDR_NORM units). If the shader +// has never been dispatched yet, i.e. no information is available, this will +// return false. +// +// Deprecated in favor of `pl_get_detected_hdr_metadata` +PL_DEPRECATED PL_API bool pl_get_detected_peak(const pl_shader_obj state, + float *out_cll, float *out_fall); + +// Resets the peak detection state in a given tone mapping state object. This +// is not equal to `pl_shader_obj_destroy`, because it does not destroy any +// state used by `pl_shader_tone_map`. +PL_API void pl_reset_detected_peak(pl_shader_obj state); + +// Feature map extraction (for pl_color_map_args.feature_map). The result +// of this shader should be downscaled / low-passed to the indicated kernel +// size before use. (This does not happen automatically) +PL_API void pl_shader_extract_features(pl_shader sh, struct pl_color_space csp); + +// Deprecated and unused. Libplacebo now always performs a variant of the old +// hybrid tone-mapping, mixing together the intensity (I) and per-channel (LMS) +// results. +enum pl_tone_map_mode { + PL_TONE_MAP_AUTO PL_DEPRECATED_ENUMERATOR, + PL_TONE_MAP_RGB PL_DEPRECATED_ENUMERATOR, + PL_TONE_MAP_MAX PL_DEPRECATED_ENUMERATOR, + PL_TONE_MAP_HYBRID PL_DEPRECATED_ENUMERATOR, + PL_TONE_MAP_LUMA PL_DEPRECATED_ENUMERATOR, + PL_TONE_MAP_MODE_COUNT, +}; + +// Deprecated by <libplacebo/gamut_mapping.h> +enum pl_gamut_mode { + PL_GAMUT_CLIP PL_DEPRECATED_ENUMERATOR, // pl_gamut_map_clip + PL_GAMUT_WARN PL_DEPRECATED_ENUMERATOR, // pl_gamut_map_highlight + PL_GAMUT_DARKEN PL_DEPRECATED_ENUMERATOR, // pl_gamut_map_darken + PL_GAMUT_DESATURATE PL_DEPRECATED_ENUMERATOR, // pl_gamut_map_desaturate + PL_GAMUT_MODE_COUNT, +}; + +struct pl_color_map_params { + // --- Gamut mapping options + + // Gamut mapping function to use to handle out-of-gamut colors, including + // colors which are out-of-gamut as a consequence of tone mapping. + const struct pl_gamut_map_function *gamut_mapping; + + // Gamut mapping constants, for expert tuning. Leave as default otherwise. + struct pl_gamut_map_constants gamut_constants; + + // Gamut mapping 3DLUT size, for channels ICh. Defaults to {48, 32, 256} + int lut3d_size[3]; + + // Use higher quality, but slower, tricubic interpolation for gamut mapping + // 3DLUTs. May substantially improve the 3DLUT gamut mapping accuracy, in + // particular at smaller 3DLUT sizes. Shouldn't have much effect at the + // default size. + bool lut3d_tricubic; + + // If true, allows the gamut mapping function to expand the gamut, in + // cases where the target gamut exceeds that of the source. If false, + // the source gamut will never be enlarged, even when using a gamut + // mapping function capable of bidirectional mapping. + bool gamut_expansion; + + // --- Tone mapping options + + // Tone mapping function to use to handle out-of-range colors. + const struct pl_tone_map_function *tone_mapping_function; + + // Tone mapping constants, for expert tuning. Leave as default otherwise. + struct pl_tone_map_constants tone_constants; + + // If true, and supported by the given tone mapping function, libplacebo + // will perform inverse tone mapping to expand the dynamic range of a + // signal. libplacebo is not liable for any HDR-induced eye damage. + bool inverse_tone_mapping; + + // Data source to use when tone-mapping. Setting this to a specific + // value allows overriding the default metadata preference logic. + enum pl_hdr_metadata_type metadata; + + // Tone mapping LUT size. Defaults to 256. + int lut_size; + + // HDR contrast recovery strength. If set to a value above 0.0, the source + // image will be divided into high-frequency and low-frequency components, + // and a portion of the high-frequency image is added back onto the + // tone-mapped output. May cause excessive ringing artifacts for some HDR + // sources, but can improve the subjective sharpness and detail left over + // in the image after tone-mapping. + float contrast_recovery; + + // Contrast recovery lowpass kernel size. Defaults to 3.5. Increasing + // or decreasing this will affect the visual appearance substantially. + float contrast_smoothness; + + // --- Debugging options + + // Force the use of a full tone-mapping LUT even for functions that have + // faster pure GLSL replacements (e.g. clip, linear, saturation). + bool force_tone_mapping_lut; + + // Visualize the tone-mapping LUT and gamut mapping 3DLUT, in IPT space. + bool visualize_lut; + + // Controls where to draw the visualization, relative to the rendered + // video (dimensions 0-1). Optional, defaults to the full picture. + pl_rect2df visualize_rect; + + // Controls the rotation of the 3DLUT visualization. + float visualize_hue; // useful range [-pi, pi] + float visualize_theta; // useful range [0, pi/2] + + // Graphically highlight hard-clipped pixels during tone-mapping (i.e. + // pixels that exceed the claimed source luminance range). + bool show_clipping; + + // --- Deprecated fields + enum pl_tone_map_mode tone_mapping_mode PL_DEPRECATED; // removed + float tone_mapping_param PL_DEPRECATED; // see `tone_constants` + float tone_mapping_crosstalk PL_DEPRECATED; // now hard-coded as 0.04 + enum pl_rendering_intent intent PL_DEPRECATED; // see `gamut_mapping` + enum pl_gamut_mode gamut_mode PL_DEPRECATED; // see `gamut_mapping` + float hybrid_mix PL_DEPRECATED; // removed +}; + +#define PL_COLOR_MAP_DEFAULTS \ + .gamut_mapping = &pl_gamut_map_perceptual, \ + .tone_mapping_function = &pl_tone_map_spline, \ + .gamut_constants = { PL_GAMUT_MAP_CONSTANTS }, \ + .tone_constants = { PL_TONE_MAP_CONSTANTS }, \ + .metadata = PL_HDR_METADATA_ANY, \ + .lut3d_size = {48, 32, 256}, \ + .lut_size = 256, \ + .visualize_rect = {0, 0, 1, 1}, \ + .contrast_smoothness = 3.5f, + +#define PL_COLOR_MAP_HQ_DEFAULTS \ + PL_COLOR_MAP_DEFAULTS \ + .contrast_recovery = 0.30f, + +#define pl_color_map_params(...) (&(struct pl_color_map_params) { PL_COLOR_MAP_DEFAULTS __VA_ARGS__ }) +PL_API extern const struct pl_color_map_params pl_color_map_default_params; +PL_API extern const struct pl_color_map_params pl_color_map_high_quality_params; + +// Execution arguments for the `pl_shader_color_map_ex` call. Distinct from +// `pl_color_map_params` because it is filled by internally-provided execution +// metadata, instead of user-tunable aesthetic parameters. +struct pl_color_map_args { + // Input/output color space for the mapping. + struct pl_color_space src; + struct pl_color_space dst; + + // If true, the logic will assume the input has already been linearized by + // the caller (e.g. as part of a previous linear light scaling operation). + bool prelinearized; + + // Object to be used to store generated LUTs. Note that this is the same + // state object used by `pl_shader_detect_peak`, and if that function has + // been called on `state` prior to `pl_shader_color_map`, the detected + // values will be used to guide the tone mapping algorithm. If this is not + // provided, tone/gamut mapping are disabled. + pl_shader_obj *state; + + // Low-resolution intensity feature map, as generated by + // `pl_shader_extract_features`. Optional. No effect if + // `params->contrast_recovery` is disabled. + pl_tex feature_map; +}; + +#define pl_color_map_args(...) (&(struct pl_color_map_args) { __VA_ARGS__ }) + +// Maps `vec4 color` from one color space to another color space according +// to the parameters (described in greater depth above). If `params` is left +// as NULL, it defaults to `&pl_color_map_default_params` +PL_API void pl_shader_color_map_ex(pl_shader sh, + const struct pl_color_map_params *params, + const struct pl_color_map_args *args); + +// Backwards compatibility wrapper around `pl_shader_color_map_ex` +PL_API void pl_shader_color_map(pl_shader sh, const struct pl_color_map_params *params, + struct pl_color_space src, struct pl_color_space dst, + pl_shader_obj *state, bool prelinearized); + +// Applies a set of cone distortion parameters to `vec4 color` in a given color +// space. This can be used to simulate color blindness. See `pl_cone_params` +// for more information. +PL_API void pl_shader_cone_distort(pl_shader sh, struct pl_color_space csp, + const struct pl_cone_params *params); + +PL_API_END + +#endif // LIBPLACEBO_SHADERS_COLORSPACE_H_ diff --git a/src/include/libplacebo/shaders/custom.h b/src/include/libplacebo/shaders/custom.h new file mode 100644 index 0000000..a4eec69 --- /dev/null +++ b/src/include/libplacebo/shaders/custom.h @@ -0,0 +1,341 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_SHADERS_CUSTOM_H_ +#define LIBPLACEBO_SHADERS_CUSTOM_H_ + +#include <stdlib.h> + +// Functions for writing custom shaders and hooking them into the `pl_renderer` +// pipeline, as well as compatibility functions for parsing shaders in mpv +// format. + +#include <libplacebo/shaders.h> +#include <libplacebo/dispatch.h> +#include <libplacebo/colorspace.h> + +PL_API_BEGIN + +// Parameters describing custom shader text to be embedded into a `pl_shader` +// object. All of the strings are optional and can be left as NULL, but without +// a `body` in particular, the shader will do nothing useful on its own. +struct pl_custom_shader { + // The prelude contains text such as extra #defines, #extension pragmas, + // or other parts of the shader that must be placed at the very + // beginning (before input layout declarations etc.) + // + // Note: #extension pragmas do not need to be emitted to enable support for + // resource types already attached to the shader (e.g. SSBOs), compute + // shaders, or GPU capabilities known to libplacebo (e.g. subgroups). + const char *prelude; + + // The header contains text such as helper function definitions, extra + // uniforms, shared memory variables or buffer descriptions. + const char *header; + + // A friendly name for the shader. (Optional) + const char *description; + + // The "primary" GLSL code. This will be effectively appended to the "main" + // function. It lives in an environment given by the `input` signature, and + // is expected to return results in a way given by the `output` signature. + // + // Note: In the case of PL_SHADER_SIG_COLOR, the output `vec4 color` is + // allocated by `pl_shader_custom`, the user merely needs to assign to it. + // + // Note: For ease of development it can be useful to have the main logic + // live inside a helper function defined as part of `header`, and specify + // the `body` as a single line that simply calls the helper function. + const char *body; + enum pl_shader_sig input; + enum pl_shader_sig output; + + // Extra descriptors, variables and vertex attributes to attach to the + // resulting `pl_shader_res`. + // + // Note: The names inside these will possibly be replaced by fresh + // identifiers internally, so users should avoid looking for exact string + // matches for the given names inside the `pl_shader_res`. + const struct pl_shader_desc *descriptors; + int num_descriptors; + const struct pl_shader_var *variables; + int num_variables; + const struct pl_shader_va *vertex_attribs; + int num_vertex_attribs; + const struct pl_shader_const *constants; + int num_constants; + + // If true, this shader must be a compute shader. The desired workgroup + // size and shared memory usage can be optionally specified, or 0 if no + // specific work group size or shared memory size restrictions apply. + // + // See also: `pl_shader_res.compute_group_size` + bool compute; + size_t compute_shmem; + int compute_group_size[2]; + + // Fixes the output size requirements of the shader to exact dimensions. + // Optional, if left as 0, means the shader can be dispatched at any size. + int output_w; + int output_h; +}; + +// Append custom shader code, including extra descriptors and variables, to an +// existing `pl_shader` object. Returns whether successful. This function may +// fail in the event that e.g. the custom shader requires compute shaders on +// an unsupported GPU, or exceeds the GPU's shared memory capabilities. +PL_API bool pl_shader_custom(pl_shader sh, const struct pl_custom_shader *params); + +// Which "rendering stages" are available for user shader hooking purposes. +// Except where otherwise noted, all stages are "non-resizable", i.e. the +// shaders already have specific output size requirements. +enum pl_hook_stage { + // Hook stages for the untouched planes, as made available by the source. + // These are all resizable, i.e. there are no specific output stage + // requirements. + PL_HOOK_RGB_INPUT = 1 << 0, + PL_HOOK_LUMA_INPUT = 1 << 1, + PL_HOOK_CHROMA_INPUT = 1 << 2, + PL_HOOK_ALPHA_INPUT = 1 << 3, + PL_HOOK_XYZ_INPUT = 1 << 4, + + // Hook stages for the scaled/aligned planes + PL_HOOK_CHROMA_SCALED = 1 << 5, + PL_HOOK_ALPHA_SCALED = 1 << 6, + + PL_HOOK_NATIVE = 1 << 7, // Combined image in its native color space + PL_HOOK_RGB = 1 << 8, // After conversion to RGB (resizable) + PL_HOOK_LINEAR = 1 << 9, // After linearization but before scaling + PL_HOOK_SIGMOID = 1 << 10, // After sigmoidization + PL_HOOK_PRE_KERNEL = 1 << 11, // Immediately before the main scaler kernel + PL_HOOK_POST_KERNEL = 1 << 12, // Immediately after the main scaler kernel + PL_HOOK_SCALED = 1 << 13, // After scaling, before color management + PL_HOOK_PRE_OUTPUT = 1 << 14, // After color management, before blending/rotation + PL_HOOK_OUTPUT = 1 << 15, // After blending/rotation, before dithering +}; + +// Returns true if a given hook stage is resizable +static inline bool pl_hook_stage_resizable(enum pl_hook_stage stage) { + switch (stage) { + case PL_HOOK_RGB_INPUT: + case PL_HOOK_LUMA_INPUT: + case PL_HOOK_CHROMA_INPUT: + case PL_HOOK_ALPHA_INPUT: + case PL_HOOK_XYZ_INPUT: + case PL_HOOK_NATIVE: + case PL_HOOK_RGB: + return true; + + case PL_HOOK_CHROMA_SCALED: + case PL_HOOK_ALPHA_SCALED: + case PL_HOOK_LINEAR: + case PL_HOOK_SIGMOID: + case PL_HOOK_PRE_KERNEL: + case PL_HOOK_POST_KERNEL: + case PL_HOOK_SCALED: + case PL_HOOK_PRE_OUTPUT: + case PL_HOOK_OUTPUT: + return false; + } + + abort(); +} + +// The different forms of communicating image data between the renderer and +// the hooks +enum pl_hook_sig { + PL_HOOK_SIG_NONE, // No data is passed, no data is received/returned + PL_HOOK_SIG_COLOR, // `vec4 color` already pre-sampled in a `pl_shader` + PL_HOOK_SIG_TEX, // `pl_tex` containing the image data + PL_HOOK_SIG_COUNT, +}; + +struct pl_hook_params { + // GPU objects associated with the `pl_renderer`, which the user may + // use for their own purposes. + pl_gpu gpu; + pl_dispatch dispatch; + + // Helper function to fetch a new temporary texture, using renderer-backed + // storage. This is guaranteed to have sane image usage requirements and a + // 16-bit or floating point format. The user does not need to free/destroy + // this texture in any way. May return NULL. + pl_tex (*get_tex)(void *priv, int width, int height); + void *priv; + + // Which stage triggered the hook to run. + enum pl_hook_stage stage; + + // For `PL_HOOK_SIG_COLOR`, this contains the existing shader object with + // the color already pre-sampled into `vec4 color`. The user may modify + // this as much as they want, as long as they don't dispatch/finalize/reset + // it. + // + // Note that this shader might have specific output size requirements, + // depending on the exact shader stage hooked by the user, and may already + // be a compute shader. + pl_shader sh; + + // For `PL_HOOK_SIG_TEX`, this contains the texture that the user should + // sample from. + // + // Note: This texture object is owned by the renderer, and users must not + // modify its contents. It will not be touched for the duration of a frame, + // but the contents are lost in between frames. + pl_tex tex; + + // The effective current rectangle of the image we're rendering in this + // shader, i.e. the effective rect of the content we're interested in, + // as a crop of either `sh` or `tex` (depending on the signature). + // + // Note: This is still set even for `PL_HOOK_SIG_NONE`! + pl_rect2df rect; + + // The current effective colorspace and representation, of either the + // pre-sampled color (in `sh`), or the contents of `tex`, respectively. + // + // Note: This is still set even for `PL_HOOK_SIG_NONE`! + struct pl_color_repr repr; + struct pl_color_space color; + int components; + + // The representation and colorspace of the original image, for reference. + const struct pl_color_repr *orig_repr; + const struct pl_color_space *orig_color; + + // The (cropped) source and destination rectangles of the overall + // rendering. These are functionallty equivalent to `image.crop` and + // `target.crop`, respectively, but `src_rect` in particular may change as + // a result of previous hooks being executed. (e.g. prescalers) + pl_rect2df src_rect; + pl_rect2d dst_rect; +}; + +struct pl_hook_res { + // If true, the hook is assumed to have "failed" or errored in some way, + // and all other fields are ignored. + bool failed; + + // What type of output this hook is returning. + // Note: If this is `PL_HOOK_SIG_NONE`, all other fields are ignored. + enum pl_hook_sig output; + + // For `PL_HOOK_SIG_COLOR`, this *must* be set to a valid `pl_shader` + // object containing the sampled color value (i.e. with an output signature + // of `PL_SHADER_SIG_COLOR`), and *should* be allocated from the given + // `pl_dispatch` object. Ignored otherwise. + pl_shader sh; + + // For `PL_HOOK_SIG_TEX`, this *must* contain the texture object containing + // the result of rendering the hook. This *should* be a texture allocated + // using the given `get_tex` callback, to ensure the format and texture + // usage flags are compatible with what the renderer expects. + pl_tex tex; + + // For shaders that return some sort of output, this contains the + // new/altered versions of the existing "current texture" metadata. + struct pl_color_repr repr; + struct pl_color_space color; + int components; + + // This contains the new effective rect of the contents. This may be + // different from the original `rect` for resizable passes. Ignored for + // non-resizable passes. + pl_rect2df rect; +}; + +enum pl_hook_par_mode { + PL_HOOK_PAR_VARIABLE, // normal shader variable + PL_HOOK_PAR_DYNAMIC, // dynamic shader variable, e.g. per-frame changing + PL_HOOK_PAR_CONSTANT, // fixed at compile time (e.g. for array sizes), + // must be scalar (non-vector/matrix) + PL_HOOK_PAR_DEFINE, // defined in the preprocessor, must be `int` + PL_HOOK_PAR_MODE_COUNT, +}; + +typedef union pl_var_data { + int i; + unsigned u; + float f; +} pl_var_data; + +struct pl_hook_par { + // Name as used in the shader. + const char *name; + + // Type of this shader parameter, and how it's manifested in the shader. + enum pl_var_type type; + enum pl_hook_par_mode mode; + + // Human-readable explanation of this parameter. (Optional) + const char *description; + + // Mutable data pointer to current value of variable. + pl_var_data *data; + + // Default/initial value, and lower/upper bounds. + pl_var_data initial; + pl_var_data minimum; + pl_var_data maximum; + + // Human-readable names for the variants of an integer option. This array + // can be indexed directly by integer values, ranging from `minimum.i` to + // `maximum.i`. May be NULL, in which case options are unnamed. + const char * const *names; +}; + +// Struct describing a hook. +// +// Note: Users may freely create their own instances of this struct, there is +// nothing particularly special about `pl_mpv_user_shader_parse`. +struct pl_hook { + enum pl_hook_stage stages; // Which stages to hook on + enum pl_hook_sig input; // Which input signature this hook expects + void *priv; // Arbitrary user context + + // Custom tunable shader parameters exported by this hook. These may be + // updated at any time by the user, to influence the behavior of the hook. + // Contents are arbitrary and subject to the method of hook construction. + const struct pl_hook_par *parameters; + int num_parameters; + + // Called at the beginning of passes, to reset/initialize the hook. (Optional) + void (*reset)(void *priv); + + // The hook function itself. Called by the renderer at any of the indicated + // hook stages. See `pl_hook_res` for more info on the return values. + struct pl_hook_res (*hook)(void *priv, const struct pl_hook_params *params); + + // Unique signature identifying this hook, used to disable misbehaving hooks. + // All hooks with the same signature will be disabled, should they fail to + // execute during run-time. + uint64_t signature; +}; + +// Compatibility layer with `mpv` user shaders. See the mpv man page for more +// information on the format. Will return `NULL` if the shader fails parsing. +// +// The resulting `pl_hook` objects should be destroyed with the corresponding +// destructor when no longer needed. +PL_API const struct pl_hook * +pl_mpv_user_shader_parse(pl_gpu gpu, const char *shader_text, size_t shader_len); + +PL_API void pl_mpv_user_shader_destroy(const struct pl_hook **hook); + +PL_API_END + +#endif // LIBPLACEBO_SHADERS_CUSTOM_H_ diff --git a/src/include/libplacebo/shaders/deinterlacing.h b/src/include/libplacebo/shaders/deinterlacing.h new file mode 100644 index 0000000..40e74e8 --- /dev/null +++ b/src/include/libplacebo/shaders/deinterlacing.h @@ -0,0 +1,137 @@ + +/* + * This file is part of libplacebo, which is normally licensed under the terms + * of the LGPL v2.1+. However, this file (film_grain.h) is also available under + * the terms of the more permissive MIT license: + * + * Copyright (c) 2018-2019 Niklas Haas + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef LIBPLACEBO_SHADERS_DEINTERLACING_H_ +#define LIBPLACEBO_SHADERS_DEINTERLACING_H_ + +#include <libplacebo/shaders.h> + +PL_API_BEGIN + +enum pl_field { + PL_FIELD_NONE = 0, // no deinterlacing + PL_FIELD_EVEN, // "top" fields, with even y coordinates + PL_FIELD_ODD, // "bottom" fields, with odd y coordinates + + // Convenience aliases + PL_FIELD_TOP = PL_FIELD_EVEN, + PL_FIELD_BOTTOM = PL_FIELD_ODD, +}; + +static inline enum pl_field pl_field_other(enum pl_field field) +{ + switch (field) { + case PL_FIELD_EVEN: return PL_FIELD_ODD; + case PL_FIELD_ODD: return PL_FIELD_EVEN; + default: return field; + } +} + +struct pl_field_pair { + // Top texture. If only this is specified, it's assumed to contain both + // fields in an interleaved fashion (MBAFF). + // + // Note: Support for separate fields (PAFF), is currently pending, so this + // is the only way to provide interlaced frames at the moment. + pl_tex top; +}; + +#define pl_field_pair(...) ((struct pl_field_pair) { __VA_ARGS__ }) + +struct pl_deinterlace_source { + // Previous, current and next source (interlaced) frames. `prev` and `next` + // may be NULL, but `cur` is required. If present, they must all have the + // exact same texture dimensions. + // + // Note: `prev` and `next` are only required for PL_DEINTERLACE_YADIF. + struct pl_field_pair prev, cur, next; + + // The parity of the current field to output. This field will be unmodified + // from `cur`, with the corresponding other field interpolated. + // + // If this is `PL_FIELD_NONE`, no deinterlacing is performed, and the + // texture is merely sampled as-is. + enum pl_field field; + + // The parity of the first frame in a stream. Set this the field that is + // (conceptually) ordered first in time. + // + // If this is `PL_FIELD_NONE`, it will instead default to `PL_FIELD_TOP`. + enum pl_field first_field; + + // Components to deinterlace. Components not specified will be ignored. + // Optional, if left as 0, all components will be deinterlaced. + uint8_t component_mask; +}; + +#define pl_deinterlace_source(...) (&(struct pl_deinterlace_source) { __VA_ARGS__ }) + +enum pl_deinterlace_algorithm { + // No-op deinterlacing, just sample the weaved frame un-touched. + PL_DEINTERLACE_WEAVE = 0, + + // Naive bob deinterlacing. Doubles the field lines vertically. + PL_DEINTERLACE_BOB, + + // "Yet another deinterlacing filter". Deinterlacer with temporal and + // spatial information. Based on FFmpeg's Yadif filter algorithm, but + // adapted slightly for the GPU. + PL_DEINTERLACE_YADIF, + + PL_DEINTERLACE_ALGORITHM_COUNT, +}; + +// Returns whether or not an algorithm requires `prev`/`next` refs to be set. +static inline bool pl_deinterlace_needs_refs(enum pl_deinterlace_algorithm algo) +{ + return algo == PL_DEINTERLACE_YADIF; +} + +struct pl_deinterlace_params { + // Algorithm to use. The recommended default is PL_DEINTERLACE_YADIF, which + // provides a good trade-off of quality and speed. + enum pl_deinterlace_algorithm algo; + + // Skip the spatial interlacing check. (PL_DEINTERLACE_YADIF only) + bool skip_spatial_check; +}; + +#define PL_DEINTERLACE_DEFAULTS \ + .algo = PL_DEINTERLACE_YADIF, + +#define pl_deinterlace_params(...) (&(struct pl_deinterlace_params) { PL_DEINTERLACE_DEFAULTS __VA_ARGS__ }) +PL_API extern const struct pl_deinterlace_params pl_deinterlace_default_params; + +// Deinterlaces a set of interleaved source frames and outputs the result into +// `vec4 color`. If `params` is left as NULL, it defaults to +// `&pl_deinterlace_default_params`. +PL_API void pl_shader_deinterlace(pl_shader sh, const struct pl_deinterlace_source *src, + const struct pl_deinterlace_params *params); + +PL_API_END + +#endif // LIBPLACEBO_SHADERS_DEINTERLACING_H_ diff --git a/src/include/libplacebo/shaders/dithering.h b/src/include/libplacebo/shaders/dithering.h new file mode 100644 index 0000000..9146c81 --- /dev/null +++ b/src/include/libplacebo/shaders/dithering.h @@ -0,0 +1,140 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_SHADERS_DITHERING_H_ +#define LIBPLACEBO_SHADERS_DITHERING_H_ + +// Dithering shaders + +#include <libplacebo/colorspace.h> +#include <libplacebo/dither.h> +#include <libplacebo/shaders.h> + +PL_API_BEGIN + +enum pl_dither_method { + // Dither with blue noise. Very high quality, but requires the use of a + // LUT. Warning: Computing a blue noise texture with a large size can be + // very slow, however this only needs to be performed once. Even so, using + // this with a `lut_size` greater than 6 is generally ill-advised. This is + // the preferred/default dither method. + PL_DITHER_BLUE_NOISE, + + // Dither with an ordered (bayer) dither matrix, using a LUT. Low quality, + // and since this also uses a LUT, there's generally no advantage to picking + // this instead of `PL_DITHER_BLUE_NOISE`. It's mainly there for testing. + PL_DITHER_ORDERED_LUT, + + // The same as `PL_DITHER_ORDERED_LUT`, but uses fixed function math instead + // of a LUT. This is faster, but only supports a fixed dither matrix size + // of 16x16 (equal to a `lut_size` of 4). + PL_DITHER_ORDERED_FIXED, + + // Dither with white noise. This does not require a LUT and is fairly cheap + // to compute. Unlike the other modes it doesn't show any repeating + // patterns either spatially or temporally, but the downside is that this + // is visually fairly jarring due to the presence of low frequencies in the + // noise spectrum. + PL_DITHER_WHITE_NOISE, + + PL_DITHER_METHOD_COUNT, +}; + +struct pl_dither_params { + // The source of the dither noise to use. + enum pl_dither_method method; + + // For the dither methods which require the use of a LUT, this controls + // the size of the LUT (base 2). If left as NULL, this defaults to 6, which + // is equivalent to a 64x64 dither matrix. Must not be larger than 8. + int lut_size; + + // Enables temporal dithering. This reduces the persistence of dithering + // artifacts by perturbing the dithering matrix per frame. + // Warning: This can cause nasty aliasing artifacts on some LCD screens. + bool temporal; + + // Gamma function to use for dither gamma correction. This will only have + // an effect when dithering to low bit depths (<= 4). + enum pl_color_transfer transfer; +}; + +#define PL_DITHER_DEFAULTS \ + .method = PL_DITHER_BLUE_NOISE, \ + .lut_size = 6, \ + /* temporal dithering commonly flickers on LCDs */ \ + .temporal = false, + +#define pl_dither_params(...) (&(struct pl_dither_params) { PL_DITHER_DEFAULTS __VA_ARGS__ }) +PL_API extern const struct pl_dither_params pl_dither_default_params; + +// Dither the colors to a lower depth, given in bits. This can be used on input +// colors of any precision. Basically, this rounds the colors to only linear +// multiples of the stated bit depth. The average intensity of the result +// will not change (i.e., the dither noise is balanced in both directions). +// If `params` is NULL, it defaults to &pl_dither_default_params. +// +// For the dither methods which require the use of a LUT, `dither_state` must +// be set to a valid pointer. To avoid thrashing the resource, users should +// avoid trying to re-use the same LUT for different dither configurations. If +// passed as NULL, libplacebo will automatically fall back to dither algorithms +// that don't require the use of a LUT. +// +// Warning: This dithering algorithm is not gamma-invariant; so using it for +// very low bit depths (below 4 or so) will noticeably increase the brightness +// of the resulting image. When doing low bit depth dithering for aesthetic +// purposes, it's recommended that the user explicitly (de)linearize the colors +// before and after this algorithm. +PL_API void pl_shader_dither(pl_shader sh, int new_depth, + pl_shader_obj *dither_state, + const struct pl_dither_params *params); + +struct pl_error_diffusion_params { + // Both the input and output texture must be provided up-front, with the + // same size. The output texture must be storable, and the input texture + // must be sampleable. + pl_tex input_tex; + pl_tex output_tex; + + // Depth to dither to. Required. + int new_depth; + + // Error diffusion kernel to use. Optional. If unspecified, defaults to + // `&pl_error_diffusion_sierra_lite`. + const struct pl_error_diffusion_kernel *kernel; +}; + +#define pl_error_diffusion_params(...) (&(struct pl_error_diffusion_params) { __VA_ARGS__ }) + +// Computes the shared memory requirements for a given error diffusion kernel. +// This can be used to test up-front whether or not error diffusion would be +// supported or not, before having to initialize textures. +PL_API size_t pl_error_diffusion_shmem_req(const struct pl_error_diffusion_kernel *kernel, + int height); + +// Apply an error diffusion dithering kernel. This is a much more expensive and +// heavy dithering method, and is not generally recommended for realtime usage +// where performance is critical. +// +// Requires compute shader support. Returns false if dithering fail e.g. as a +// result of shader memory limits being exceeded. The resulting shader must be +// dispatched with a work group count of exactly 1. +PL_API bool pl_shader_error_diffusion(pl_shader sh, const struct pl_error_diffusion_params *params); + +PL_API_END + +#endif // LIBPLACEBO_SHADERS_DITHERING_H_ diff --git a/src/include/libplacebo/shaders/film_grain.h b/src/include/libplacebo/shaders/film_grain.h new file mode 100644 index 0000000..8a9c78b --- /dev/null +++ b/src/include/libplacebo/shaders/film_grain.h @@ -0,0 +1,137 @@ +/* + * This file is part of libplacebo, which is normally licensed under the terms + * of the LGPL v2.1+. However, this file (film_grain.h) is also available under + * the terms of the more permissive MIT license: + * + * Copyright (c) 2018-2019 Niklas Haas + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef LIBPLACEBO_SHADERS_FILM_GRAIN_H_ +#define LIBPLACEBO_SHADERS_FILM_GRAIN_H_ + +// Film grain synthesis shaders for AV1 / H.274. + +#include <stdint.h> +#include <stdbool.h> + +#include <libplacebo/colorspace.h> +#include <libplacebo/shaders.h> + +PL_API_BEGIN + +enum pl_film_grain_type { + PL_FILM_GRAIN_NONE = 0, + PL_FILM_GRAIN_AV1, + PL_FILM_GRAIN_H274, + PL_FILM_GRAIN_COUNT, +}; + +// AV1 film grain parameters. For the exact meaning of these, see the AV1 +// specification (section 6.8.20). +struct pl_av1_grain_data { + int num_points_y; + uint8_t points_y[14][2]; // [n][0] = value, [n][1] = scaling + bool chroma_scaling_from_luma; + int num_points_uv[2]; // should be {0} for grayscale images + uint8_t points_uv[2][10][2]; // like points_y for points_uv[0, 1] = u, v + int scaling_shift; + int ar_coeff_lag; + int8_t ar_coeffs_y[24]; + int8_t ar_coeffs_uv[2][25]; + int ar_coeff_shift; + int grain_scale_shift; + int8_t uv_mult[2]; + int8_t uv_mult_luma[2]; + int16_t uv_offset[2]; // 9-bit value, range [-256, 255] + bool overlap; +}; + +// H.274 film grain parameters. For the exact meaning of these, see the H.274 +// specification (section 8.5). +struct pl_h274_grain_data { + int model_id; + int blending_mode_id; + int log2_scale_factor; + bool component_model_present[3]; + uint16_t num_intensity_intervals[3]; + uint8_t num_model_values[3]; + const uint8_t *intensity_interval_lower_bound[3]; + const uint8_t *intensity_interval_upper_bound[3]; + const int16_t (*comp_model_value[3])[6]; +}; + +// Tagged union for film grain data +struct pl_film_grain_data { + enum pl_film_grain_type type; // film grain type + uint64_t seed; // shared seed value + + union { + // Warning: These values are not sanity-checked at all, Invalid grain + // data results in undefined behavior! + struct pl_av1_grain_data av1; + struct pl_h274_grain_data h274; + } params; +}; + +// Options for the `pl_shader_film_grain` call. +struct pl_film_grain_params { + // Required for all film grain types: + struct pl_film_grain_data data; // film grain data + pl_tex tex; // texture to sample from + struct pl_color_repr *repr; // underlying color representation (see notes) + int components; + int component_mapping[4]; // same as `struct pl_plane` + + // Notes for `repr`: + // - repr->bits affects the rounding for grain generation + // - repr->levels affects whether or not we clip to full range or not + // - repr->sys affects the interpretation of channels + // - *repr gets normalized by this shader, which is why it's a pointer + + // Required for PL_FILM_GRAIN_AV1 only: + pl_tex luma_tex; // "luma" texture (see notes) + int luma_comp; // index of luma in `luma_tex` + + // Notes for `luma_tex`: + // - `luma_tex` must be specified if the `tex` does not itself contain the + // "luma-like" component. For XYZ systems, the Y channel is the luma + // component. For RGB systems, the G channel is. +}; + +#define pl_film_grain_params(...) (&(struct pl_film_grain_params) { __VA_ARGS__ }) + +// Test if film grain needs to be applied. This is a helper function that users +// can use to decide whether or not `pl_shader_film_grain` needs to be called, +// based on the given grain metadata. +PL_API bool pl_needs_film_grain(const struct pl_film_grain_params *params); + +// Sample from a texture while applying film grain at the same time. +// `grain_state` must be unique for every plane configuration, as it may +// contain plane-dependent state. +// +// Returns false on any error, or if film grain generation is not supported +// due to GLSL limitations. +PL_API bool pl_shader_film_grain(pl_shader sh, pl_shader_obj *grain_state, + const struct pl_film_grain_params *params); + +PL_API_END + +#endif // LIBPLACEBO_SHADERS_FILM_GRAIN_H_ diff --git a/src/include/libplacebo/shaders/icc.h b/src/include/libplacebo/shaders/icc.h new file mode 100644 index 0000000..a4003f4 --- /dev/null +++ b/src/include/libplacebo/shaders/icc.h @@ -0,0 +1,135 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_SHADERS_ICC_H_ +#define LIBPLACEBO_SHADERS_ICC_H_ + +// Functions for generating and applying ICC-derived (3D)LUTs + +#include <libplacebo/colorspace.h> +#include <libplacebo/shaders.h> + +PL_API_BEGIN + +struct pl_icc_params { + // The rendering intent to use, for profiles with multiple intents. A + // recommended value is PL_INTENT_RELATIVE_COLORIMETRIC for color-accurate + // video reproduction, or PL_INTENT_PERCEPTUAL for profiles containing + // meaningful perceptual mapping tables for some more suitable color space + // like BT.709. + // + // If this is set to the special value PL_INTENT_AUTO, will use the + // preferred intent provided by the profile header. + enum pl_rendering_intent intent; + + // The size of the 3DLUT to generate. If left as NULL, these individually + // default to values appropriate for the profile. (Based on internal + // precision heuristics) + // + // Note: Setting this manually is strongly discouraged, as it can result + // in excessively high 3DLUT sizes where a much smaller LUT would have + // sufficed. + int size_r, size_g, size_b; + + // This field can be used to override the detected brightness level of the + // ICC profile. If you set this to the special value 0 (or a negative + // number), libplacebo will attempt reading the brightness value from the + // ICC profile's tagging (if available), falling back to PL_COLOR_SDR_WHITE + // if unavailable. + float max_luma; + + // Force black point compensation. May help avoid crushed or raised black + // points on "improper" profiles containing e.g. colorimetric tables that + // do not round-trip. Should not be required on well-behaved profiles, + // or when using PL_INTENT_PERCEPTUAL, but YMMV. + bool force_bpc; + + // If provided, this pl_cache instance will be used, instead of the + // GPU-internal cache, to cache the generated 3DLUTs. Note that these can + // get large, especially for large values of size_{r,g,b}, so the user may + // wish to split this cache off from the main shader cache. (Optional) + pl_cache cache; + + // Deprecated legacy caching API. Replaced by `cache`. + PL_DEPRECATED void *cache_priv; + PL_DEPRECATED void (*cache_save)(void *priv, uint64_t sig, const uint8_t *cache, size_t size); + PL_DEPRECATED bool (*cache_load)(void *priv, uint64_t sig, uint8_t *cache, size_t size); +}; + +#define PL_ICC_DEFAULTS \ + .intent = PL_INTENT_RELATIVE_COLORIMETRIC, \ + .max_luma = PL_COLOR_SDR_WHITE, + +#define pl_icc_params(...) (&(struct pl_icc_params) { PL_ICC_DEFAULTS __VA_ARGS__ }) +PL_API extern const struct pl_icc_params pl_icc_default_params; + +// This object represents a "parsed" ICC profile. +typedef const struct pl_icc_object_t { + // Provided params, with the `intent` and `size` fields set (as described) + struct pl_icc_params params; + + // Signature of the corresponding ICC profile. + uint64_t signature; + + // Detected color space (or UNKNOWN for profiles which don't contain an + // exact match), with HDR metedata set to the detected gamut and + // white/black value ranges. + struct pl_color_space csp; + + // Best estimate of profile gamma. This only serves as a rough guideline. + float gamma; + + // Smallest containing primary set, always set. + enum pl_color_primaries containing_primaries; +} *pl_icc_object; + +// Attempts opening/parsing the contents of an ICC profile. The resulting +// object is memory managed and may outlive the original profile - access +// to the underlying profile is no longer needed once this returns. +PL_API pl_icc_object pl_icc_open(pl_log log, const struct pl_icc_profile *profile, + const struct pl_icc_params *params); +PL_API void pl_icc_close(pl_icc_object *icc); + +// Update an existing pl_icc_object, which may be NULL, replacing it by the +// new profile and parameters (if incompatible). +// +// Returns success. `obj` is set to the created profile, or NULL on error. +// +// Note: If `profile->signature` matches `(*obj)->signature`, or if `profile` is +// NULL, then the existing profile is directly reused, with only the effective +// parameters changing. In this case, `profile->data` is also *not* read from, +// and may safely be NULL. +PL_API bool pl_icc_update(pl_log log, pl_icc_object *obj, + const struct pl_icc_profile *profile, + const struct pl_icc_params *params); + +// Decode the input from the colorspace determined by the attached ICC profile +// to linear light RGB (in the profile's containing primary set). `lut` must be +// set to a shader object that will store the GPU resources associated with the +// generated LUT. The resulting color space will be written to `out_csp`. +PL_API void pl_icc_decode(pl_shader sh, pl_icc_object profile, pl_shader_obj *lut, + struct pl_color_space *out_csp); + +// Encode the input from linear light RGB (in the profile's containing primary +// set) into the colorspace determined by the attached ICC profile. `lut` must +// be set to a shader object that will store the GPU resources associated with +// the generated LUT. +PL_API void pl_icc_encode(pl_shader sh, pl_icc_object profile, pl_shader_obj *lut); + +PL_API_END + +#endif // LIBPLACEBO_SHADERS_ICC_H_ diff --git a/src/include/libplacebo/shaders/lut.h b/src/include/libplacebo/shaders/lut.h new file mode 100644 index 0000000..6e30ddc --- /dev/null +++ b/src/include/libplacebo/shaders/lut.h @@ -0,0 +1,78 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_SHADERS_LUT_H_ +#define LIBPLACEBO_SHADERS_LUT_H_ + +// Shaders for loading and applying arbitrary custom 1D/3DLUTs + +#include <libplacebo/colorspace.h> +#include <libplacebo/shaders.h> + +PL_API_BEGIN + +// Struct defining custom LUTs +// +// Note: Users may freely create their own instances of this struct, there is +// nothing particularly special about `pl_lut_parse_cube`. +struct pl_custom_lut { + // Some unique signature identifying this LUT, needed to detect state + // changes (for cache invalidation). This should ideally be a hash of the + // file contents. (Which is what `pl_lut_parse_*` will set it to.) + uint64_t signature; + + // Size of each dimension, in the order R, G, B. For 1D LUTs, only the R + // dimension should be specified (the others left as 0). + int size[3]; + + // Raw LUT data itself, in properly scaled floating point format. For 3D + // LUTs, the innermost dimension is the first dimension (R), and the + // outermost dimension is the last dimension (B). Individual color samples + // are in the order R, G, B. + const float *data; + + // Extra input/output shaper matrices. Ignored if equal to {0}. This is + // mostly useful for 1D LUTs, since 3D LUTs can bake the shaper matrix into + // the LUT itself - but it can still help optimize LUT precision. + pl_matrix3x3 shaper_in, shaper_out; + + // Nominal metadata for the input/output of a LUT. Left as {0} if unknown. + // Note: This is purely informative, `pl_shader_custom_lut` ignores it. + struct pl_color_repr repr_in, repr_out; + struct pl_color_space color_in, color_out; +}; + +// Parse a 3DLUT in .cube format. Returns NULL if the file fails parsing. +PL_API struct pl_custom_lut *pl_lut_parse_cube(pl_log log, const char *str, size_t str_len); + +// Frees a LUT created by `pl_lut_parse_*`. +PL_API void pl_lut_free(struct pl_custom_lut **lut); + +// Apply a `pl_custom_lut`. The user is responsible for ensuring colors going +// into the LUT are in the expected format as informed by the LUT metadata. +// +// `lut_state` must be a pointer to a NULL-initialized shader state object that +// will be used to encapsulate any required GPU state. +// +// Note: `lut` does not have to be allocated by `pl_lut_parse_*`. It can be a +// struct filled out by the user. +PL_API void pl_shader_custom_lut(pl_shader sh, const struct pl_custom_lut *lut, + pl_shader_obj *lut_state); + +PL_API_END + +#endif // LIBPLACEBO_SHADERS_LUT_H_ diff --git a/src/include/libplacebo/shaders/sampling.h b/src/include/libplacebo/shaders/sampling.h new file mode 100644 index 0000000..5221e44 --- /dev/null +++ b/src/include/libplacebo/shaders/sampling.h @@ -0,0 +1,257 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_SHADERS_SAMPLING_H_ +#define LIBPLACEBO_SHADERS_SAMPLING_H_ + +// Sampling operations. These shaders perform some form of sampling operation +// from a given pl_tex. In order to use these, the pl_shader *must* have been +// created using the same `gpu` as the originating `pl_tex`. Otherwise, this +// is undefined behavior. They require nothing (PL_SHADER_SIG_NONE) and return +// a color (PL_SHADER_SIG_COLOR). + +#include <libplacebo/colorspace.h> +#include <libplacebo/filters.h> +#include <libplacebo/shaders.h> + +PL_API_BEGIN + +// Common parameters for sampling operations +struct pl_sample_src { + // There are two mutually exclusive ways of providing the source to sample + // from: + // + // 1. Provide the texture and sampled region directly. This generates + // a shader with input signature `PL_SHADER_SIG_NONE`, which binds the + // texture as a descriptor (and the coordinates as a vertex attribute) + pl_tex tex; // texture to sample + pl_rect2df rect; // sub-rect to sample from (optional) + enum pl_tex_address_mode address_mode; // preferred texture address mode + + // 2. Have the shader take it as an argument. Doing this requires + // specifying the missing metadata of the texture backing the sampler, so + // that the shader generation can generate the correct code. + int tex_w, tex_h; // dimensions of the actual texture + enum pl_fmt_type format; // format of the sampler being accepted + enum pl_sampler_type sampler; // type of the sampler being accepted + enum pl_tex_sample_mode mode; // sample mode of the sampler being accepted + float sampled_w, sampled_h; // dimensions of the sampled region (optional) + + // Common metadata for both sampler input types: + int components; // number of components to sample (optional) + uint8_t component_mask; // bitmask of components to sample (optional) + int new_w, new_h; // dimensions of the resulting output (optional) + float scale; // factor to multiply into sampled signal (optional) + + // Note: `component_mask` and `components` are mutually exclusive, the + // former is preferred if both are specified. +}; + +#define pl_sample_src(...) (&(struct pl_sample_src) { __VA_ARGS__ }) + +struct pl_deband_params { + // The number of debanding steps to perform per sample. Each step reduces a + // bit more banding, but takes time to compute. Note that the strength of + // each step falls off very quickly, so high numbers (>4) are practically + // useless. Defaults to 1. + int iterations; + + // The debanding filter's cut-off threshold. Higher numbers increase the + // debanding strength dramatically, but progressively diminish image + // details. Defaults to 3.0. + float threshold; + + // The debanding filter's initial radius. The radius increases linearly + // for each iteration. A higher radius will find more gradients, but a + // lower radius will smooth more aggressively. Defaults to 16.0. + float radius; + + // Add some extra noise to the image. This significantly helps cover up + // remaining quantization artifacts. Higher numbers add more noise. + // Note: When debanding HDR sources, even a small amount of grain can + // result in a very big change to the brightness level. It's recommended to + // either scale this value down or disable it entirely for HDR. + // + // Defaults to 4.0, which is very mild. + float grain; + + // 'Neutral' grain value for each channel being debanded (sorted in order + // from low to high index). Grain application will be modulated to avoid + // disturbing colors close to this value. Set this to a value corresponding + // to black in the relevant colorspace. + float grain_neutral[3]; +}; + +#define PL_DEBAND_DEFAULTS \ + .iterations = 1, \ + .threshold = 3.0, \ + .radius = 16.0, \ + .grain = 4.0, + +#define pl_deband_params(...) (&(struct pl_deband_params) {PL_DEBAND_DEFAULTS __VA_ARGS__ }) +PL_API extern const struct pl_deband_params pl_deband_default_params; + +// Debands a given texture and returns the sampled color in `vec4 color`. If +// `params` is left as NULL, it defaults to &pl_deband_default_params. Note +// that `tex->params.format` must have PL_FMT_CAP_LINEAR. When the given +// `pl_sample_src` implies scaling, this effectively performs bilinear +// sampling on the input (but not the output). +// +// Note: This can also be used as a pure grain function, by setting the number +// of iterations to 0. +PL_API void pl_shader_deband(pl_shader sh, const struct pl_sample_src *src, + const struct pl_deband_params *params); + +// Performs direct / native texture sampling, using whatever texture filter is +// available (linear for linearly sampleable sources, nearest otherwise). +// +// Note: This is generally very low quality and should be avoided if possible, +// for both upscaling and downscaling. +PL_API bool pl_shader_sample_direct(pl_shader sh, const struct pl_sample_src *src); + +// Performs hardware-accelerated nearest neighbour sampling. This is similar to +// `pl_shader_sample_direct`, but forces nearest neighbour interpolation. +PL_API bool pl_shader_sample_nearest(pl_shader sh, const struct pl_sample_src *src); + +// Performs hardware-accelerated bilinear sampling. This is similar to +// `pl_shader_sample_direct`, but forces bilinear interpolation. +PL_API bool pl_shader_sample_bilinear(pl_shader sh, const struct pl_sample_src *src); + +// Optimized versions of specific, strictly positive scaler kernels that take +// adantage of linear texture sampling to reduce the number of fetches needed +// by a factor of four. This family of functions performs radius-2 scaling +// with only four texture fetches, which is far more efficient than using +// the generalized 1D scaling method. Only works well for upscaling. +PL_API bool pl_shader_sample_bicubic(pl_shader sh, const struct pl_sample_src *src); +PL_API bool pl_shader_sample_hermite(pl_shader sh, const struct pl_sample_src *src); +PL_API bool pl_shader_sample_gaussian(pl_shader sh, const struct pl_sample_src *src); + +// A sampler that is similar to nearest neighbour sampling, but tries to +// preserve pixel aspect ratios. This is mathematically equivalent to taking an +// idealized image with square pixels, sampling it at an infinite resolution, +// and then downscaling that to the desired resolution. (Hence it being called +// "oversample"). Good for pixel art. +// +// The threshold provides a cutoff threshold below which the contribution of +// pixels should be ignored, trading some amount of aspect ratio distortion for +// a slightly crisper image. A value of `threshold == 0.5` makes this filter +// equivalent to regular nearest neighbour sampling. +PL_API bool pl_shader_sample_oversample(pl_shader sh, const struct pl_sample_src *src, + float threshold); + +struct pl_sample_filter_params { + // The filter to use for sampling. + struct pl_filter_config filter; + + // Antiringing strength. A value of 0.0 disables antiringing, and a value + // of 1.0 enables full-strength antiringing. Defaults to 0.0 if + // unspecified. + // + // Note: Ignored if `filter.antiring` is already set to something nonzero. + float antiring; + + // Disable the use of compute shaders (e.g. if rendering to non-storable tex) + bool no_compute; + // Disable the use of filter widening / anti-aliasing (for downscaling) + bool no_widening; + + // This shader object is used to store the LUT, and will be recreated + // if necessary. To avoid thrashing the resource, users should avoid trying + // to re-use the same LUT for different filter configurations or scaling + // ratios. Must be set to a valid pointer, and the target NULL-initialized. + pl_shader_obj *lut; + + // Deprecated / removed fields + int lut_entries PL_DEPRECATED; // hard-coded as 256 + float cutoff PL_DEPRECATED; // hard-coded as 1e-3 +}; + +#define pl_sample_filter_params(...) (&(struct pl_sample_filter_params) { __VA_ARGS__ }) + +// Performs polar sampling. This internally chooses between an optimized compute +// shader, and various fragment shaders, depending on the supported GLSL version +// and GPU features. Returns whether or not it was successful. +// +// Note: `params->filter.polar` must be true to use this function. +PL_API bool pl_shader_sample_polar(pl_shader sh, const struct pl_sample_src *src, + const struct pl_sample_filter_params *params); + +// Performs orthogonal (1D) sampling. Using this twice in a row (once vertical +// and once horizontal) effectively performs a 2D upscale. This is lower +// quality than polar sampling, but significantly faster, and therefore the +// recommended default. Returns whether or not it was successful. +// +// `src` must represent a scaling operation that only scales in one direction, +// i.e. either only X or only Y. The other direction must be left unscaled. +// +// Note: Due to internal limitations, this may currently only be used on 2D +// textures - even though the basic principle would work for 1D and 3D textures +// as well. +PL_API bool pl_shader_sample_ortho2(pl_shader sh, const struct pl_sample_src *src, + const struct pl_sample_filter_params *params); + +struct pl_distort_params { + // An arbitrary 2x2 affine transformation to apply to the input image. + // For simplicity, the input image is explicitly centered and scaled such + // that the longer dimension is in [-1,1], before applying this. + pl_transform2x2 transform; + + // If true, the texture is placed inside the center of the canvas without + // scaling. If false, it is effectively stretched to the canvas size. + bool unscaled; + + // If true, the transformation is automatically scaled down and shifted to + // ensure that the resulting image fits inside the output canvas. + bool constrain; + + // If true, use bicubic interpolation rather than faster bilinear + // interpolation. Higher quality but slower. + bool bicubic; + + // Specifies the texture address mode to use when sampling out of bounds. + enum pl_tex_address_mode address_mode; + + // If set, all out-of-bounds accesses will instead be treated as + // transparent, according to the given alpha mode. (Which should match the + // alpha mode of the texture) + // + // Note: `address_mode` has no effect when this is specified. + enum pl_alpha_mode alpha_mode; +}; + +#define PL_DISTORT_DEFAULTS \ + .transform.mat.m = {{ 1, 0 }, {0, 1}}, + +#define pl_distort_params(...) (&(struct pl_distort_params) {PL_DISTORT_DEFAULTS __VA_ARGS__ }) +PL_API extern const struct pl_distort_params pl_distort_default_params; + +// Distorts the input image using a given set of transformation parameters. +// `out_w` and `out_h` determine the size of the effective canvas inside which +// the distorted result may be rendered. Areas outside of this canvas will +// be implicitly cut off. +PL_API void pl_shader_distort(pl_shader sh, pl_tex tex, int out_w, int out_h, + const struct pl_distort_params *params); + +enum PL_DEPRECATED { // for `int pass` + PL_SEP_VERT = 0, + PL_SEP_HORIZ, + PL_SEP_PASSES +}; + +PL_API_END + +#endif // LIBPLACEBO_SHADERS_SAMPLING_H_ diff --git a/src/include/libplacebo/swapchain.h b/src/include/libplacebo/swapchain.h new file mode 100644 index 0000000..b53aa5c --- /dev/null +++ b/src/include/libplacebo/swapchain.h @@ -0,0 +1,171 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_SWAPCHAIN_H_ +#define LIBPLACEBO_SWAPCHAIN_H_ + +#include <libplacebo/common.h> +#include <libplacebo/colorspace.h> +#include <libplacebo/gpu.h> + +PL_API_BEGIN + +// This abstraction represents a low-level interface to visible surfaces +// exposed by a graphics API (and accompanying GPU instance), allowing users to +// directly present frames to the screen (or window, typically). This is a +// sister API to gpu.h and follows the same convention w.r.t undefined behavior. +// +// Thread-safety: Safe +typedef const struct pl_swapchain_t { + pl_log log; + pl_gpu gpu; +} *pl_swapchain; + +// Destroys this swapchain. May be used at any time, and may block until the +// completion of all outstanding rendering commands. The swapchain and any +// resources retrieved from it must not be used afterwards. +PL_API void pl_swapchain_destroy(pl_swapchain *sw); + +// Returns the approximate current swapchain latency in vsyncs, or 0 if +// unknown. A latency of 1 means that `submit_frame` followed by `swap_buffers` +// will block until the just-submitted frame has finished rendering. Typical +// values are 2 or 3, which enable better pipelining by allowing the GPU to be +// processing one or two frames at the same time as the user is preparing the +// next for submission. +PL_API int pl_swapchain_latency(pl_swapchain sw); + +// Update/query the swapchain size. This function performs both roles: it tries +// setting the swapchain size to the values requested by the user, and returns +// in the same variables what width/height the swapchain was actually set to - +// which may be (substantially) different from the values requested by the +// user. A value of 0 means "unknown/none" (in which case, libplacebo won't try +// updating the size - it will simply return the current state of the +// swapchain). It's also possible for libplacebo to return values of 0, such as +// in the case that the swapchain doesn't exist yet. +// +// Returns false on significant errors (e.g. dead surface). This function can +// effectively be used to probe if creating a swapchain works. +PL_API bool pl_swapchain_resize(pl_swapchain sw, int *width, int *height); + +// Backwards compatibility +#define pl_swapchain_colors pl_color_space + +// Inform the swapchain about the input color space. This API deliberately +// provides no feedback, because the swapchain can internally decide what to do +// with this information, including ignoring it entirely, or applying it +// asynchronously. Users must still base their rendering on the value of +// `pl_swapchain_frame.color_space`. +// +// Note: Calling this function a second time completely overrides any +// previously specified hint. So calling this on {0} or NULL resets the +// swapchain back to its initial/preferred colorspace. +// +// Note: If `csp->transfer` is a HDR transfer curve but HDR metadata is left +// unspecified, the HDR metadata defaults to `pl_hdr_metadata_hdr10`. +// Conversely, if the HDR metadata is non-empty but `csp->transfer` is left as +// PL_COLOR_TRC_UNKNOWN, then it instead defaults to PL_COLOR_TRC_PQ. +PL_API void pl_swapchain_colorspace_hint(pl_swapchain sw, const struct pl_color_space *csp); + +// The struct used to hold the results of `pl_swapchain_start_frame` +struct pl_swapchain_frame { + // A texture representing the framebuffer users should use for rendering. + // It's guaranteed that `fbo->params.renderable` and `fbo->params.blit_dst` + // will be true, but no other guarantees are made - not even that + // `fbo->params.format` is a real format. + pl_tex fbo; + + // If true, the user should assume that this framebuffer will be flipped + // as a result of presenting it on-screen. If false, nothing special needs + // to be done - but if true, users should flip the coordinate system of + // the `pl_pass` that is rendering to this framebuffer. + // + // Note: Normally, libplacebo follows the convention that (0,0) represents + // the top left of the image/screen. So when flipped is true, this means + // (0,0) on this framebuffer gets displayed as the bottom left of the image. + bool flipped; + + // Indicates the color representation this framebuffer will be interpreted + // as by the host system / compositor / display, including the bit depth + // and alpha handling (where available). + struct pl_color_repr color_repr; + struct pl_color_space color_space; +}; + +// Retrieve a new frame from the swapchain. Returns whether successful. It's +// worth noting that this function can fail sporadically for benign reasons, +// for example the window being invisible or inaccessible. This function may +// block until an image is available, which may be the case if the GPU is +// rendering frames significantly faster than the display can output them. It +// may also be non-blocking, so users shouldn't rely on this call alone in +// order to meter rendering speed. (Specifics depend on the underlying graphics +// API) +PL_API bool pl_swapchain_start_frame(pl_swapchain sw, struct pl_swapchain_frame *out_frame); + +// Submits the previously started frame. Non-blocking. This must be issued in +// lockstep with pl_swapchain_start_frame - there is no way to start multiple +// frames and submit them out-of-order. The frames submitted this way will +// generally be made visible in a first-in first-out fashion, although +// specifics depend on the mechanism used to create the pl_swapchain. (See the +// platform-specific APIs for more info). +// +// Returns whether successful. This should normally never fail, unless the +// GPU/surface has been lost or some other critical error has occurred. The +// "started" frame is consumed even in the event of failure. +// +// Note that `start_frame` and `submit_frame` form a lock pair, i.e. trying to +// call e.g. `pl_swapchain_resize` from another thread will block until +// `pl_swapchain_submit_frame` is finished. +PL_API bool pl_swapchain_submit_frame(pl_swapchain sw); + +// Performs a "buffer swap", or some generalization of the concept. In layman's +// terms, this blocks until the execution of the Nth previously submitted frame +// has been "made complete" in some sense. (The N derives from the swapchain's +// built-in latency. See `pl_swapchain_latency` for more information). +// +// Users should include this call in their rendering loops in order to make +// sure they aren't submitting rendering commands faster than the GPU can +// process them, which would potentially lead to a queue overrun or exhaust +// memory. +// +// An example loop might look like this: +// +// while (rendering) { +// struct pl_swapchain_frame frame; +// bool ok = pl_swapchain_start_frame(swapchain, &frame); +// if (!ok) { +// /* wait some time, or decide to stop rendering */ +// continue; +// } +// +// /* do some rendering with frame.fbo */ +// +// ok = pl_swapchain_submit_frame(swapchain); +// if (!ok) +// break; +// +// pl_swapchain_swap_buffers(swapchain); +// } +// +// The duration this function blocks for, if at all, may be very inconsistent +// and should not be used as an authoritative source of vsync timing +// information without sufficient smoothing/filtering (and if so, the time that +// `start_frame` blocked for should also be included). +PL_API void pl_swapchain_swap_buffers(pl_swapchain sw); + +PL_API_END + +#endif // LIBPLACEBO_SWAPCHAIN_H_ diff --git a/src/include/libplacebo/tone_mapping.h b/src/include/libplacebo/tone_mapping.h new file mode 100644 index 0000000..48f1eb7 --- /dev/null +++ b/src/include/libplacebo/tone_mapping.h @@ -0,0 +1,268 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_TONE_MAPPING_H_ +#define LIBPLACEBO_TONE_MAPPING_H_ + +#include <stddef.h> +#include <stdbool.h> + +#include <libplacebo/common.h> +#include <libplacebo/colorspace.h> + +PL_API_BEGIN + +struct pl_tone_map_params; +struct pl_tone_map_function { + const char *name; // Identifier + const char *description; // Friendly / longer name + + // This controls the type of values input/output to/from `map` + enum pl_hdr_scaling scaling; + + // The tone-mapping function itself. Iterates over all values in `lut`, and + // adapts them as needed. + // + // Note that the `params` struct fed into this function is guaranteed to + // satisfy `params->input_scaling == params->output_scaling == scaling`, + // and also obeys `params->input_max >= params->output_max`. + void (*map)(float *lut, const struct pl_tone_map_params *params); + + // Inverse tone mapping function. Optional. If absent, this tone mapping + // curve only works in the forwards direction. + // + // For this function, `params->input_max <= params->output_max`. + void (*map_inverse)(float *lut, const struct pl_tone_map_params *params); + + // Private data. Unused by libplacebo, but may be accessed by `map`. + void *priv; + + // --- Deprecated fields + const char *param_desc PL_DEPRECATED; + float param_min PL_DEPRECATED; + float param_def PL_DEPRECATED; + float param_max PL_DEPRECATED; +}; + +struct pl_tone_map_constants { + // Configures the knee point, as a ratio between the source average and + // target average (in PQ space). An adaptation of 1.0 always adapts the + // source scene average brightness to the (scaled) target average, + // while a value of 0.0 never modifies scene brightness. [0,1] + // + // Affects all methods that use the ST2094 knee point determination + // (currently ST2094-40, ST2094-10 and spline) + float knee_adaptation; + + // Configures the knee point minimum and maximum, respectively, as + // a percentage of the PQ luminance range. Provides a hard limit on the + // knee point chosen by `knee_adaptation`. + float knee_minimum; // (0, 0.5) + float knee_maximum; // (0.5, 1.0) + + // Default knee point to use in the absence of source scene average + // metadata. Normally, this is ignored in favor of picking the knee + // point as the (relative) source scene average brightness level. + float knee_default; // [knee_minimum, knee_maximum] + + // Knee point offset (for BT.2390 only). Note that a value of 0.5 is + // the spec-defined default behavior, which differs from the libplacebo + // default of 1.0. [0.5, 2] + float knee_offset; + + // For the single-pivot polynomial (spline) function, this controls the + // coefficients used to tune the slope of the curve. This tuning is designed + // to make the slope closer to 1.0 when the difference in peaks is low, + // and closer to linear when the difference between peaks is high. + float slope_tuning; // [0,10] + float slope_offset; // [0,1] + + // Contrast setting for the spline function. Higher values make the curve + // steeper (closer to `clip`), preserving midtones at the cost of losing + // shadow/highlight details, while lower values make the curve shallowed + // (closer to `linear`), preserving highlights at the cost of losing midtone + // contrast. Values above 1.0 are possible, resulting in an output with more + // contrast than the input. + float spline_contrast; // [0,1.5] + + // For the reinhard function, this specifies the local contrast coefficient + // at the display peak. Essentially, a value of 0.5 implies that the + // reference white will be about half as bright as when clipping. (0,1) + float reinhard_contrast; + + // For legacy functions (mobius, gamma) which operate on linear light, this + // directly sets the corresponding knee point. (0,1) + float linear_knee; + + // For linear methods (linear, linearlight), this controls the linear + // exposure/gain applied to the image. (0,10] + float exposure; +}; + +#define PL_TONE_MAP_CONSTANTS \ + .knee_adaptation = 0.4f, \ + .knee_minimum = 0.1f, \ + .knee_maximum = 0.8f, \ + .knee_default = 0.4f, \ + .knee_offset = 1.0f, \ + .slope_tuning = 1.5f, \ + .slope_offset = 0.2f, \ + .spline_contrast = 0.5f, \ + .reinhard_contrast = 0.5f, \ + .linear_knee = 0.3f, \ + .exposure = 1.0f, + +struct pl_tone_map_params { + // If `function` is NULL, defaults to `pl_tone_map_clip`. + const struct pl_tone_map_function *function; + + // Common constants, should be initialized to PL_TONE_MAP_CONSTANTS if + // not intending to override them further. + struct pl_tone_map_constants constants; + + // The desired input/output scaling of the tone map. If this differs from + // `function->scaling`, any required conversion will be performed. + // + // Note that to maximize LUT efficiency, it's *highly* recommended to use + // either PL_HDR_PQ or PL_HDR_SQRT as the input scaling, except when + // using `pl_tone_map_sample`. + enum pl_hdr_scaling input_scaling; + enum pl_hdr_scaling output_scaling; + + // The size of the resulting LUT. (For `pl_tone_map_generate` only) + size_t lut_size; + + // The characteristics of the input, in `input_scaling` units. + float input_min; + float input_max; + float input_avg; // or 0 if unknown + + // The desired characteristics of the output, in `output_scaling` units. + float output_min; + float output_max; + + // The input HDR metadata. Only used by a select few tone-mapping + // functions, currently only SMPTE ST2094. (Optional) + struct pl_hdr_metadata hdr; + + // --- Deprecated fields + float param PL_DEPRECATED; // see `constants` +}; + +#define pl_tone_map_params(...) (&(struct pl_tone_map_params) { __VA_ARGS__ }); + +// Note: Only does pointer equality testing on `function` +PL_API bool pl_tone_map_params_equal(const struct pl_tone_map_params *a, + const struct pl_tone_map_params *b); + +// Clamps/defaults the parameters, including input/output maximum. +PL_API void pl_tone_map_params_infer(struct pl_tone_map_params *params); + +// Returns true if the given tone mapping configuration effectively represents +// a no-op configuration. Tone mapping can be skipped in this case (although +// strictly speaking, the LUT would still clip illegal input values) +PL_API bool pl_tone_map_params_noop(const struct pl_tone_map_params *params); + +// Generate a tone-mapping LUT for a given configuration. This will always +// span the entire input range, as given by `input_min` and `input_max`. +PL_API void pl_tone_map_generate(float *out, const struct pl_tone_map_params *params); + +// Samples a tone mapping function at a single position. Note that this is less +// efficient than `pl_tone_map_generate` for generating multiple values. +// +// Ignores `params->lut_size`. +PL_API float pl_tone_map_sample(float x, const struct pl_tone_map_params *params); + +// Performs no tone-mapping, just clips out-of-range colors. Retains perfect +// color accuracy for in-range colors but completely destroys out-of-range +// information. Does not perform any black point adaptation. +PL_API extern const struct pl_tone_map_function pl_tone_map_clip; + +// EETF from SMPTE ST 2094-40 Annex B, which uses the provided OOTF based on +// Bezier curves to perform tone-mapping. The OOTF used is adjusted based on +// the ratio between the targeted and actual display peak luminances. In the +// absence of HDR10+ metadata, falls back to a simple constant bezier curve. +PL_API extern const struct pl_tone_map_function pl_tone_map_st2094_40; + +// EETF from SMPTE ST 2094-10 Annex B.2, which takes into account the input +// signal average luminance in addition to the maximum/minimum. +// +// Note: This does *not* currently include the subjective gain/offset/gamma +// controls defined in Annex B.3. (Open an issue with a valid sample file if +// you want such parameters to be respected.) +PL_API extern const struct pl_tone_map_function pl_tone_map_st2094_10; + +// EETF from the ITU-R Report BT.2390, a hermite spline roll-off with linear +// segment. +PL_API extern const struct pl_tone_map_function pl_tone_map_bt2390; + +// EETF from ITU-R Report BT.2446, method A. Can be used for both forward +// and inverse tone mapping. +PL_API extern const struct pl_tone_map_function pl_tone_map_bt2446a; + +// Simple spline consisting of two polynomials, joined by a single pivot point, +// which is tuned based on the source scene average brightness (taking into +// account dynamic metadata if available). This function can be used +// for both forward and inverse tone mapping. +PL_API extern const struct pl_tone_map_function pl_tone_map_spline; + +// Very simple non-linear curve. Named after Erik Reinhard. +PL_API extern const struct pl_tone_map_function pl_tone_map_reinhard; + +// Generalization of the reinhard tone mapping algorithm to support an +// additional linear slope near black. The name is derived from its function +// shape (ax+b)/(cx+d), which is known as a Möbius transformation. +PL_API extern const struct pl_tone_map_function pl_tone_map_mobius; + +// Piece-wise, filmic tone-mapping algorithm developed by John Hable for use in +// Uncharted 2, inspired by a similar tone-mapping algorithm used by Kodak. +// Popularized by its use in video games with HDR rendering. Preserves both +// dark and bright details very well, but comes with the drawback of changing +// the average brightness quite significantly. This is sort of similar to +// pl_tone_map_reinhard with `reinhard_contrast=0.24`. +PL_API extern const struct pl_tone_map_function pl_tone_map_hable; + +// Fits a gamma (power) function to transfer between the source and target +// color spaces, effectively resulting in a perceptual hard-knee joining two +// roughly linear sections. This preserves details at all scales, but can result +// in an image with a muted or dull appearance. +PL_API extern const struct pl_tone_map_function pl_tone_map_gamma; + +// Linearly stretches the input range to the output range, in PQ space. This +// will preserve all details accurately, but results in a significantly +// different average brightness. Can be used for inverse tone-mapping in +// addition to regular tone-mapping. +PL_API extern const struct pl_tone_map_function pl_tone_map_linear; + +// Like `pl_tone_map_linear`, but in linear light (instead of PQ). Works well +// for small range adjustments but may cause severe darkening when +// downconverting from e.g. 10k nits to SDR. +PL_API extern const struct pl_tone_map_function pl_tone_map_linear_light; + +// A list of built-in tone mapping functions, terminated by NULL +PL_API extern const struct pl_tone_map_function * const pl_tone_map_functions[]; +PL_API extern const int pl_num_tone_map_functions; // excluding trailing NULL + +// Find the tone mapping function with the given name, or NULL on failure. +PL_API const struct pl_tone_map_function *pl_find_tone_map_function(const char *name); + +// Deprecated alias, do not use +#define pl_tone_map_auto pl_tone_map_spline + +PL_API_END + +#endif // LIBPLACEBO_TONE_MAPPING_H_ diff --git a/src/include/libplacebo/utils/dav1d.h b/src/include/libplacebo/utils/dav1d.h new file mode 100644 index 0000000..ece97c5 --- /dev/null +++ b/src/include/libplacebo/utils/dav1d.h @@ -0,0 +1,129 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_DAV1D_H_ +#define LIBPLACEBO_DAV1D_H_ + +#include <libplacebo/gpu.h> +#include <libplacebo/utils/upload.h> +#include <dav1d/dav1d.h> + +#if defined(__cplusplus) && !defined(PL_DAV1D_IMPLEMENTATION) +# define PL_DAV1D_API +# define PL_DAV1D_IMPLEMENTATION 0 +# warning Remember to include this file with a PL_DAV1D_IMPLEMENTATION set to 1 in \ + C translation unit to provide implementation. Suppress this warning by \ + defining PL_DAV1D_IMPLEMENTATION to 0 in C++ files. +#elif !defined(PL_DAV1D_IMPLEMENTATION) +# define PL_DAV1D_API static inline +# define PL_DAV1D_IMPLEMENTATION 1 +#else +# define PL_DAV1D_API +#endif + +PL_API_BEGIN + +// Fill in the details of a `pl_frame` from a Dav1dPicture. This function will +// explicitly clear `out_frame`, setting all extra fields to 0. After this +// function returns, the only missing data is information related to the plane +// texture itself (`planes[N].texture`). +// +// Note: This will include all possible metadata, including HDR metadata and +// AV1 film grain data. Users should explicitly clear this out if undesired. +PL_DAV1D_API void pl_frame_from_dav1dpicture(struct pl_frame *out_frame, + const Dav1dPicture *picture); + +// Helper function to generate a `pl_color_space` struct from a Dav1dPicture. +// Useful to update the swapchain colorspace mode dynamically (e.g. for HDR). +PL_DAV1D_API void pl_swapchain_colors_from_dav1dpicture(struct pl_color_space *out_colors, + const Dav1dPicture *picture); + +struct pl_dav1d_upload_params { + // The picture to upload. Not modified unless `asynchronous` is true. + Dav1dPicture *picture; + + // If true, film grain present in `picture` will be exported to the + // `pl_frame` as well. This should be set to false unless the user has + // disabled `Dav1dSettings.apply_grain`. + bool film_grain; + + // If true, libplacebo will probe for the allocation metadata set by + // `pl_allocate_dav1dpicture`, and directly import the attached buffers + // (saving a memcpy in some cases). Has no effect if the Dav1dPicture was + // not allocated using `pl_allocate_dav1dpicture`. + // + // Note: When this is the case, `asynchronous` has no further effect - + // uploads from attached buffers are already asynchronous. + bool gpu_allocated; + + // If true, `picture` will be asynchronously uploaded and unref'd + // internally by libplacebo, and the struct passed by the user cleared to + // {0}. This is needed to avoid `memcpy` in some cases, so setting it to + // true is highly recommended wherever possible. + // + // Note: If `pl_upload_dav1dpicture` returns false, `picture` does not get + // unref'd. + bool asynchronous; +}; + +#define pl_dav1d_upload_params(...) (&(struct pl_dav1d_upload_params) { __VA_ARGS__ }) + +// Very high level helper function to take a `Dav1dPicture` and upload it to +// the GPU. Similar in spirit to `pl_upload_plane`, and the same notes apply. +// `tex` must be an array of 3 pointers of type `pl_tex`, each +// either pointing to a valid texture, or NULL. Returns whether successful. +PL_DAV1D_API bool pl_upload_dav1dpicture(pl_gpu gpu, + struct pl_frame *out_frame, pl_tex tex[3], + const struct pl_dav1d_upload_params *params); + +// Allocate a Dav1dPicture from persistently mapped buffers. This can be more +// efficient than regular Dav1dPictures, especially when using the synchronous +// `pl_upload_dav1dpicture`, or on platforms that don't support importing +// PL_HANDLE_HOST_PTR as buffers. Returns 0 or a negative DAV1D_ERR value. +// +// Note: These may only be used directly as a Dav1dPicAllocator if the `gpu` +// passed as the value of `cookie` is `pl_gpu.limits.thread_safe`. Otherwise, +// the user must manually synchronize this to ensure it runs on the correct +// thread. +PL_DAV1D_API int pl_allocate_dav1dpicture(Dav1dPicture *picture, void *gpu); +PL_DAV1D_API void pl_release_dav1dpicture(Dav1dPicture *picture, void *gpu); + +// Mapping functions for the various Dav1dColor* enums. Note that these are not +// quite 1:1, and even for values that exist in both, the semantics sometimes +// differ. Some special cases (e.g. ICtCp, or XYZ) are handled differently in +// libplacebo and libdav1d, respectively. +PL_DAV1D_API enum pl_color_system pl_system_from_dav1d(enum Dav1dMatrixCoefficients mc); +PL_DAV1D_API enum Dav1dMatrixCoefficients pl_system_to_dav1d(enum pl_color_system sys); +PL_DAV1D_API enum pl_color_levels pl_levels_from_dav1d(int color_range); +PL_DAV1D_API int pl_levels_to_dav1d(enum pl_color_levels levels); +PL_DAV1D_API enum pl_color_primaries pl_primaries_from_dav1d(enum Dav1dColorPrimaries prim); +PL_DAV1D_API enum Dav1dColorPrimaries pl_primaries_to_dav1d(enum pl_color_primaries prim); +PL_DAV1D_API enum pl_color_transfer pl_transfer_from_dav1d(enum Dav1dTransferCharacteristics trc); +PL_DAV1D_API enum Dav1dTransferCharacteristics pl_transfer_to_dav1d(enum pl_color_transfer trc); +PL_DAV1D_API enum pl_chroma_location pl_chroma_from_dav1d(enum Dav1dChromaSamplePosition loc); +PL_DAV1D_API enum Dav1dChromaSamplePosition pl_chroma_to_dav1d(enum pl_chroma_location loc); + + +// Actual implementation, included as part of this header to avoid having +// a compile-time dependency on libdav1d. +#if PL_DAV1D_IMPLEMENTATION +# include <libplacebo/utils/dav1d_internal.h> +#endif + +PL_API_END + +#endif // LIBPLACEBO_DAV1D_H_ diff --git a/src/include/libplacebo/utils/dav1d_internal.h b/src/include/libplacebo/utils/dav1d_internal.h new file mode 100644 index 0000000..2e0512a --- /dev/null +++ b/src/include/libplacebo/utils/dav1d_internal.h @@ -0,0 +1,613 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_DAV1D_H_ +#error This header should be included as part of <libplacebo/utils/dav1d.h> +#elif defined(__cplusplus) +#error This header cannot be included from C++ define PL_DAV1D_IMPLEMENTATION appropriately +#else + +#include <assert.h> +#include <stdlib.h> +#include <string.h> + +PL_DAV1D_API enum pl_color_system pl_system_from_dav1d(enum Dav1dMatrixCoefficients mc) +{ + switch (mc) { + case DAV1D_MC_IDENTITY: return PL_COLOR_SYSTEM_RGB; // or XYZ (unlikely) + case DAV1D_MC_BT709: return PL_COLOR_SYSTEM_BT_709; + case DAV1D_MC_UNKNOWN: return PL_COLOR_SYSTEM_UNKNOWN; + case DAV1D_MC_FCC: return PL_COLOR_SYSTEM_UNKNOWN; // missing + case DAV1D_MC_BT470BG: return PL_COLOR_SYSTEM_BT_601; + case DAV1D_MC_BT601: return PL_COLOR_SYSTEM_BT_601; + case DAV1D_MC_SMPTE240: return PL_COLOR_SYSTEM_SMPTE_240M; + case DAV1D_MC_SMPTE_YCGCO: return PL_COLOR_SYSTEM_YCGCO; + case DAV1D_MC_BT2020_NCL: return PL_COLOR_SYSTEM_BT_2020_NC; + case DAV1D_MC_BT2020_CL: return PL_COLOR_SYSTEM_BT_2020_C; + case DAV1D_MC_SMPTE2085: return PL_COLOR_SYSTEM_UNKNOWN; // missing + case DAV1D_MC_CHROMAT_NCL: return PL_COLOR_SYSTEM_UNKNOWN; // missing + case DAV1D_MC_CHROMAT_CL: return PL_COLOR_SYSTEM_UNKNOWN; // missing + // Note: this colorspace is confused between PQ and HLG, which dav1d + // requires inferring from other sources, but libplacebo makes + // explicit. Default to PQ as it's the more common scenario. + case DAV1D_MC_ICTCP: return PL_COLOR_SYSTEM_BT_2100_PQ; + case DAV1D_MC_RESERVED: abort(); + } + + return PL_COLOR_SYSTEM_UNKNOWN; +} + +PL_DAV1D_API enum Dav1dMatrixCoefficients pl_system_to_dav1d(enum pl_color_system sys) +{ + switch (sys) { + case PL_COLOR_SYSTEM_UNKNOWN: return DAV1D_MC_UNKNOWN; + case PL_COLOR_SYSTEM_BT_601: return DAV1D_MC_BT601; + case PL_COLOR_SYSTEM_BT_709: return DAV1D_MC_BT709; + case PL_COLOR_SYSTEM_SMPTE_240M: return DAV1D_MC_SMPTE240; + case PL_COLOR_SYSTEM_BT_2020_NC: return DAV1D_MC_BT2020_NCL; + case PL_COLOR_SYSTEM_BT_2020_C: return DAV1D_MC_BT2020_CL; + case PL_COLOR_SYSTEM_BT_2100_PQ: return DAV1D_MC_ICTCP; + case PL_COLOR_SYSTEM_BT_2100_HLG: return DAV1D_MC_ICTCP; + case PL_COLOR_SYSTEM_DOLBYVISION: return DAV1D_MC_UNKNOWN; // missing + case PL_COLOR_SYSTEM_YCGCO: return DAV1D_MC_SMPTE_YCGCO; + case PL_COLOR_SYSTEM_RGB: return DAV1D_MC_IDENTITY; + case PL_COLOR_SYSTEM_XYZ: return DAV1D_MC_IDENTITY; + case PL_COLOR_SYSTEM_COUNT: abort(); + } + + return DAV1D_MC_UNKNOWN; +} + +PL_DAV1D_API enum pl_color_levels pl_levels_from_dav1d(int color_range) +{ + return color_range ? PL_COLOR_LEVELS_FULL : PL_COLOR_LEVELS_LIMITED; +} + +PL_DAV1D_API int pl_levels_to_dav1d(enum pl_color_levels levels) +{ + return levels == PL_COLOR_LEVELS_FULL; +} + +PL_DAV1D_API enum pl_color_primaries pl_primaries_from_dav1d(enum Dav1dColorPrimaries prim) +{ + switch (prim) { + case DAV1D_COLOR_PRI_BT709: return PL_COLOR_PRIM_BT_709; + case DAV1D_COLOR_PRI_UNKNOWN: return PL_COLOR_PRIM_UNKNOWN; + case DAV1D_COLOR_PRI_RESERVED: return PL_COLOR_PRIM_UNKNOWN; + case DAV1D_COLOR_PRI_BT470M: return PL_COLOR_PRIM_BT_470M; + case DAV1D_COLOR_PRI_BT470BG: return PL_COLOR_PRIM_BT_601_625; + case DAV1D_COLOR_PRI_BT601: return PL_COLOR_PRIM_BT_601_525; + case DAV1D_COLOR_PRI_SMPTE240: return PL_COLOR_PRIM_BT_601_525; + case DAV1D_COLOR_PRI_FILM: return PL_COLOR_PRIM_FILM_C; + case DAV1D_COLOR_PRI_BT2020: return PL_COLOR_PRIM_BT_2020; + case DAV1D_COLOR_PRI_XYZ: return PL_COLOR_PRIM_UNKNOWN; + case DAV1D_COLOR_PRI_SMPTE431: return PL_COLOR_PRIM_DCI_P3; + case DAV1D_COLOR_PRI_SMPTE432: return PL_COLOR_PRIM_DISPLAY_P3; + case DAV1D_COLOR_PRI_EBU3213: return PL_COLOR_PRIM_EBU_3213; + } + + return PL_COLOR_PRIM_UNKNOWN; +} + +PL_DAV1D_API enum Dav1dColorPrimaries pl_primaries_to_dav1d(enum pl_color_primaries prim) +{ + switch (prim) { + case PL_COLOR_PRIM_UNKNOWN: return DAV1D_COLOR_PRI_UNKNOWN; + case PL_COLOR_PRIM_BT_601_525: return DAV1D_COLOR_PRI_BT601; + case PL_COLOR_PRIM_BT_601_625: return DAV1D_COLOR_PRI_BT470BG; + case PL_COLOR_PRIM_BT_709: return DAV1D_COLOR_PRI_BT709; + case PL_COLOR_PRIM_BT_470M: return DAV1D_COLOR_PRI_BT470M; + case PL_COLOR_PRIM_EBU_3213: return DAV1D_COLOR_PRI_EBU3213; + case PL_COLOR_PRIM_BT_2020: return DAV1D_COLOR_PRI_BT2020; + case PL_COLOR_PRIM_APPLE: return DAV1D_COLOR_PRI_UNKNOWN; // missing + case PL_COLOR_PRIM_ADOBE: return DAV1D_COLOR_PRI_UNKNOWN; // missing + case PL_COLOR_PRIM_PRO_PHOTO: return DAV1D_COLOR_PRI_UNKNOWN; // missing + case PL_COLOR_PRIM_CIE_1931: return DAV1D_COLOR_PRI_UNKNOWN; // missing + case PL_COLOR_PRIM_DCI_P3: return DAV1D_COLOR_PRI_SMPTE431; + case PL_COLOR_PRIM_DISPLAY_P3: return DAV1D_COLOR_PRI_SMPTE432; + case PL_COLOR_PRIM_V_GAMUT: return DAV1D_COLOR_PRI_UNKNOWN; // missing + case PL_COLOR_PRIM_S_GAMUT: return DAV1D_COLOR_PRI_UNKNOWN; // missing + case PL_COLOR_PRIM_FILM_C: return DAV1D_COLOR_PRI_FILM; + case PL_COLOR_PRIM_ACES_AP0: return DAV1D_COLOR_PRI_UNKNOWN; // missing + case PL_COLOR_PRIM_ACES_AP1: return DAV1D_COLOR_PRI_UNKNOWN; // missing + case PL_COLOR_PRIM_COUNT: abort(); + } + + return DAV1D_COLOR_PRI_UNKNOWN; +} + +PL_DAV1D_API enum pl_color_transfer pl_transfer_from_dav1d(enum Dav1dTransferCharacteristics trc) +{ + switch (trc) { + case DAV1D_TRC_BT709: return PL_COLOR_TRC_BT_1886; // EOTF != OETF + case DAV1D_TRC_UNKNOWN: return PL_COLOR_TRC_UNKNOWN; + case DAV1D_TRC_BT470M: return PL_COLOR_TRC_GAMMA22; + case DAV1D_TRC_BT470BG: return PL_COLOR_TRC_GAMMA28; + case DAV1D_TRC_BT601: return PL_COLOR_TRC_BT_1886; // EOTF != OETF + case DAV1D_TRC_SMPTE240: return PL_COLOR_TRC_BT_1886; // EOTF != OETF + case DAV1D_TRC_LINEAR: return PL_COLOR_TRC_LINEAR; + case DAV1D_TRC_LOG100: return PL_COLOR_TRC_UNKNOWN; // missing + case DAV1D_TRC_LOG100_SQRT10: return PL_COLOR_TRC_UNKNOWN; // missing + case DAV1D_TRC_IEC61966: return PL_COLOR_TRC_BT_1886; // EOTF != OETF + case DAV1D_TRC_BT1361: return PL_COLOR_TRC_BT_1886; // ETOF != OETF + case DAV1D_TRC_SRGB: return PL_COLOR_TRC_SRGB; + case DAV1D_TRC_BT2020_10BIT: return PL_COLOR_TRC_BT_1886; // EOTF != OETF + case DAV1D_TRC_BT2020_12BIT: return PL_COLOR_TRC_BT_1886; // EOTF != OETF + case DAV1D_TRC_SMPTE2084: return PL_COLOR_TRC_PQ; + case DAV1D_TRC_SMPTE428: return PL_COLOR_TRC_ST428; + case DAV1D_TRC_HLG: return PL_COLOR_TRC_HLG; + case DAV1D_TRC_RESERVED: abort(); + } + + return PL_COLOR_TRC_UNKNOWN; +} + +PL_DAV1D_API enum Dav1dTransferCharacteristics pl_transfer_to_dav1d(enum pl_color_transfer trc) +{ + switch (trc) { + case PL_COLOR_TRC_UNKNOWN: return DAV1D_TRC_UNKNOWN; + case PL_COLOR_TRC_BT_1886: return DAV1D_TRC_BT709; // EOTF != OETF + case PL_COLOR_TRC_SRGB: return DAV1D_TRC_SRGB; + case PL_COLOR_TRC_LINEAR: return DAV1D_TRC_LINEAR; + case PL_COLOR_TRC_GAMMA18: return DAV1D_TRC_UNKNOWN; // missing + case PL_COLOR_TRC_GAMMA20: return DAV1D_TRC_UNKNOWN; // missing + case PL_COLOR_TRC_GAMMA22: return DAV1D_TRC_BT470M; + case PL_COLOR_TRC_GAMMA24: return DAV1D_TRC_UNKNOWN; // missing + case PL_COLOR_TRC_GAMMA26: return DAV1D_TRC_UNKNOWN; // missing + case PL_COLOR_TRC_GAMMA28: return DAV1D_TRC_BT470BG; + case PL_COLOR_TRC_ST428: return DAV1D_TRC_SMPTE428; + case PL_COLOR_TRC_PRO_PHOTO: return DAV1D_TRC_UNKNOWN; // missing + case PL_COLOR_TRC_PQ: return DAV1D_TRC_SMPTE2084; + case PL_COLOR_TRC_HLG: return DAV1D_TRC_HLG; + case PL_COLOR_TRC_V_LOG: return DAV1D_TRC_UNKNOWN; // missing + case PL_COLOR_TRC_S_LOG1: return DAV1D_TRC_UNKNOWN; // missing + case PL_COLOR_TRC_S_LOG2: return DAV1D_TRC_UNKNOWN; // missing + case PL_COLOR_TRC_COUNT: abort(); + } + + return DAV1D_TRC_UNKNOWN; +} + +PL_DAV1D_API enum pl_chroma_location pl_chroma_from_dav1d(enum Dav1dChromaSamplePosition loc) +{ + switch (loc) { + case DAV1D_CHR_UNKNOWN: return PL_CHROMA_UNKNOWN; + case DAV1D_CHR_VERTICAL: return PL_CHROMA_LEFT; + case DAV1D_CHR_COLOCATED: return PL_CHROMA_TOP_LEFT; + } + + return PL_CHROMA_UNKNOWN; +} + +PL_DAV1D_API enum Dav1dChromaSamplePosition pl_chroma_to_dav1d(enum pl_chroma_location loc) +{ + switch (loc) { + case PL_CHROMA_UNKNOWN: return DAV1D_CHR_UNKNOWN; + case PL_CHROMA_LEFT: return DAV1D_CHR_VERTICAL; + case PL_CHROMA_CENTER: return DAV1D_CHR_UNKNOWN; // missing + case PL_CHROMA_TOP_LEFT: return DAV1D_CHR_COLOCATED; + case PL_CHROMA_TOP_CENTER: return DAV1D_CHR_UNKNOWN; // missing + case PL_CHROMA_BOTTOM_LEFT: return DAV1D_CHR_UNKNOWN; // missing + case PL_CHROMA_BOTTOM_CENTER: return DAV1D_CHR_UNKNOWN; // missing + case PL_CHROMA_COUNT: abort(); + } + + return DAV1D_CHR_UNKNOWN; +} + +static inline float pl_fixed24_8(uint32_t n) +{ + return (float) n / (1 << 8); +} + +static inline float pl_fixed18_14(uint32_t n) +{ + return (float) n / (1 << 14); +} + +static inline float pl_fixed0_16(uint16_t n) +{ + return (float) n / (1 << 16); +} + +// Align to a power of 2 +#define PL_ALIGN2(x, align) (((x) + (align) - 1) & ~((align) - 1)) + +PL_DAV1D_API void pl_frame_from_dav1dpicture(struct pl_frame *out, + const Dav1dPicture *picture) +{ + const Dav1dSequenceHeader *seq_hdr = picture->seq_hdr; + int num_planes; + switch (picture->p.layout) { + case DAV1D_PIXEL_LAYOUT_I400: + num_planes = 1; + break; + case DAV1D_PIXEL_LAYOUT_I420: + case DAV1D_PIXEL_LAYOUT_I422: + case DAV1D_PIXEL_LAYOUT_I444: + num_planes = 3; + break; + default: abort(); + } + + *out = (struct pl_frame) { + .num_planes = num_planes, + .planes = { + // Components are always in order, which makes things easy + { + .components = 1, + .component_mapping = {0}, + }, { + .components = 1, + .component_mapping = {1}, + }, { + .components = 1, + .component_mapping = {2}, + }, + }, + .crop = { + 0, 0, picture->p.w, picture->p.h, + }, + .color = { + .primaries = pl_primaries_from_dav1d(seq_hdr->pri), + .transfer = pl_transfer_from_dav1d(seq_hdr->trc), + }, + .repr = { + .sys = pl_system_from_dav1d(seq_hdr->mtrx), + .levels = pl_levels_from_dav1d(seq_hdr->color_range), + .bits = { + .sample_depth = PL_ALIGN2(picture->p.bpc, 8), + .color_depth = picture->p.bpc, + }, + }, + }; + + if (seq_hdr->mtrx == DAV1D_MC_ICTCP && seq_hdr->trc == DAV1D_TRC_HLG) { + + // dav1d makes no distinction between PQ and HLG ICtCp, so we need + // to manually fix it in the case that we have HLG ICtCp data. + out->repr.sys = PL_COLOR_SYSTEM_BT_2100_HLG; + + } else if (seq_hdr->mtrx == DAV1D_MC_IDENTITY && + seq_hdr->pri == DAV1D_COLOR_PRI_XYZ) + { + + // dav1d handles this as a special case, but doesn't provide an + // explicit flag for it either, so we have to resort to this ugly hack, + // even though CIE 1931 RGB *is* a valid thing in principle! + out->repr.sys= PL_COLOR_SYSTEM_XYZ; + + } else if (!out->repr.sys) { + + // PL_COLOR_SYSTEM_UNKNOWN maps to RGB, so hard-code this one + out->repr.sys = pl_color_system_guess_ycbcr(picture->p.w, picture->p.h); + } + + const Dav1dContentLightLevel *cll = picture->content_light; + if (cll) { + out->color.hdr.max_cll = cll->max_content_light_level; + out->color.hdr.max_fall = cll->max_frame_average_light_level; + } + + // This overrides the CLL values above, if both are present + const Dav1dMasteringDisplay *md = picture->mastering_display; + if (md) { + out->color.hdr.max_luma = pl_fixed24_8(md->max_luminance); + out->color.hdr.min_luma = pl_fixed18_14(md->min_luminance); + out->color.hdr.prim = (struct pl_raw_primaries) { + .red.x = pl_fixed0_16(md->primaries[0][0]), + .red.y = pl_fixed0_16(md->primaries[0][1]), + .green.x = pl_fixed0_16(md->primaries[1][0]), + .green.y = pl_fixed0_16(md->primaries[1][1]), + .blue.x = pl_fixed0_16(md->primaries[2][0]), + .blue.y = pl_fixed0_16(md->primaries[2][1]), + .white.x = pl_fixed0_16(md->white_point[0]), + .white.y = pl_fixed0_16(md->white_point[1]), + }; + } + + if (picture->frame_hdr->film_grain.present) { + const Dav1dFilmGrainData *fg = &picture->frame_hdr->film_grain.data; + out->film_grain = (struct pl_film_grain_data) { + .type = PL_FILM_GRAIN_AV1, + .seed = fg->seed, + .params.av1 = { + .num_points_y = fg->num_y_points, + .chroma_scaling_from_luma = fg->chroma_scaling_from_luma, + .num_points_uv = { fg->num_uv_points[0], fg->num_uv_points[1] }, + .scaling_shift = fg->scaling_shift, + .ar_coeff_lag = fg->ar_coeff_lag, + .ar_coeff_shift = (int) fg->ar_coeff_shift, + .grain_scale_shift = fg->grain_scale_shift, + .uv_mult = { fg->uv_mult[0], fg->uv_mult[1] }, + .uv_mult_luma = { fg->uv_luma_mult[0], fg->uv_luma_mult[1] }, + .uv_offset = { fg->uv_offset[0], fg->uv_offset[1] }, + .overlap = fg->overlap_flag, + }, + }; + + struct pl_av1_grain_data *av1 = &out->film_grain.params.av1; + memcpy(av1->points_y, fg->y_points, sizeof(av1->points_y)); + memcpy(av1->points_uv, fg->uv_points, sizeof(av1->points_uv)); + memcpy(av1->ar_coeffs_y, fg->ar_coeffs_y, sizeof(av1->ar_coeffs_y)); + memcpy(av1->ar_coeffs_uv[0], fg->ar_coeffs_uv[0], sizeof(av1->ar_coeffs_uv[0])); + memcpy(av1->ar_coeffs_uv[1], fg->ar_coeffs_uv[1], sizeof(av1->ar_coeffs_uv[1])); + } + + switch (picture->p.layout) { + case DAV1D_PIXEL_LAYOUT_I400: + case DAV1D_PIXEL_LAYOUT_I444: + break; + case DAV1D_PIXEL_LAYOUT_I420: + case DAV1D_PIXEL_LAYOUT_I422: + // Only set the chroma location for definitely subsampled images + pl_frame_set_chroma_location(out, pl_chroma_from_dav1d(seq_hdr->chr)); + break; + } +} + +PL_DAV1D_API void pl_swapchain_colors_from_dav1dpicture(struct pl_swapchain_colors *out_colors, + const Dav1dPicture *picture) +{ + struct pl_frame frame; + pl_frame_from_dav1dpicture(&frame, picture); + + *out_colors = (struct pl_swapchain_colors) { + .primaries = frame.color.primaries, + .transfer = frame.color.transfer, + }; + + const Dav1dContentLightLevel *cll = picture->content_light; + if (cll) { + out_colors->hdr.max_cll = cll->max_content_light_level; + out_colors->hdr.max_fall = cll->max_frame_average_light_level; + } + + const Dav1dMasteringDisplay *md = picture->mastering_display; + if (md) { + out_colors->hdr.min_luma = pl_fixed18_14(md->min_luminance); + out_colors->hdr.max_luma = pl_fixed24_8(md->max_luminance); + out_colors->hdr.prim.red.x = pl_fixed0_16(md->primaries[0][0]); + out_colors->hdr.prim.red.y = pl_fixed0_16(md->primaries[0][1]); + out_colors->hdr.prim.green.x = pl_fixed0_16(md->primaries[1][0]); + out_colors->hdr.prim.green.y = pl_fixed0_16(md->primaries[1][1]); + out_colors->hdr.prim.blue.x = pl_fixed0_16(md->primaries[2][0]); + out_colors->hdr.prim.blue.y = pl_fixed0_16(md->primaries[2][1]); + out_colors->hdr.prim.white.x = pl_fixed0_16(md->white_point[0]); + out_colors->hdr.prim.white.y = pl_fixed0_16(md->white_point[1]); + } +} + +#define PL_MAGIC0 0x2c2a1269 +#define PL_MAGIC1 0xc6d02577 + +struct pl_dav1dalloc { + uint32_t magic[2]; + pl_gpu gpu; + pl_buf buf; +}; + +struct pl_dav1dref { + Dav1dPicture pic; + uint8_t count; +}; + +static void pl_dav1dpicture_unref(void *priv) +{ + struct pl_dav1dref *ref = priv; + if (--ref->count == 0) { + dav1d_picture_unref(&ref->pic); + free(ref); + } +} + +PL_DAV1D_API bool pl_upload_dav1dpicture(pl_gpu gpu, + struct pl_frame *out, + pl_tex tex[3], + const struct pl_dav1d_upload_params *params) +{ + Dav1dPicture *pic = params->picture; + pl_frame_from_dav1dpicture(out, pic); + if (!params->film_grain) + out->film_grain.type = PL_FILM_GRAIN_NONE; + + const int bytes = (pic->p.bpc + 7) / 8; // rounded up + int sub_x = 0, sub_y = 0; + switch (pic->p.layout) { + case DAV1D_PIXEL_LAYOUT_I400: + case DAV1D_PIXEL_LAYOUT_I444: + break; + case DAV1D_PIXEL_LAYOUT_I420: + sub_x = sub_y = 1; + break; + case DAV1D_PIXEL_LAYOUT_I422: + sub_x = 1; + break; + } + + struct pl_plane_data data[3] = { + { + // Y plane + .type = PL_FMT_UNORM, + .width = pic->p.w, + .height = pic->p.h, + .pixel_stride = bytes, + .component_size = {bytes * 8}, + .component_map = {0}, + }, { + // U plane + .type = PL_FMT_UNORM, + .width = pic->p.w >> sub_x, + .height = pic->p.h >> sub_y, + .pixel_stride = bytes, + .component_size = {bytes * 8}, + .component_map = {1}, + }, { + // V plane + .type = PL_FMT_UNORM, + .width = pic->p.w >> sub_x, + .height = pic->p.h >> sub_y, + .pixel_stride = bytes, + .component_size = {bytes * 8}, + .component_map = {2}, + }, + }; + + pl_buf buf = NULL; + struct pl_dav1dalloc *alloc = params->gpu_allocated ? pic->allocator_data : NULL; + struct pl_dav1dref *ref = NULL; + + if (alloc && alloc->magic[0] == PL_MAGIC0 && alloc->magic[1] == PL_MAGIC1) { + // Re-use pre-allocated buffers directly + assert(alloc->gpu == gpu); + buf = alloc->buf; + } else if (params->asynchronous && gpu->limits.callbacks) { + ref = malloc(sizeof(*ref)); + if (!ref) + return false; + memcpy(&ref->pic, pic, sizeof(Dav1dPicture)); + ref->count = out->num_planes; + } + + for (int p = 0; p < out->num_planes; p++) { + ptrdiff_t stride = p > 0 ? pic->stride[1] : pic->stride[0]; + if (stride < 0) { + data[p].pixels = (uint8_t *) pic->data[p] + stride * (data[p].height - 1); + data[p].row_stride = -stride; + out->planes[p].flipped = true; + } else { + data[p].pixels = pic->data[p]; + data[p].row_stride = stride; + } + + if (buf) { + data[p].buf = buf; + data[p].buf_offset = (uintptr_t) data[p].pixels - (uintptr_t) buf->data; + data[p].pixels = NULL; + } else if (ref) { + data[p].priv = ref; + data[p].callback = pl_dav1dpicture_unref; + } + + if (!pl_upload_plane(gpu, &out->planes[p], &tex[p], &data[p])) { + free(ref); + return false; + } + } + + if (params->asynchronous) { + if (ref) { + *pic = (Dav1dPicture) {0}; + } else { + dav1d_picture_unref(pic); + } + } + + return true; +} + +PL_DAV1D_API int pl_allocate_dav1dpicture(Dav1dPicture *p, void *cookie) +{ + pl_gpu gpu = cookie; + if (!gpu->limits.max_mapped_size || !gpu->limits.host_cached || + !gpu->limits.buf_transfer) + { + return DAV1D_ERR(ENOTSUP); + } + + // Copied from dav1d_default_picture_alloc + const int hbd = p->p.bpc > 8; + const int aligned_w = PL_ALIGN2(p->p.w, 128); + const int aligned_h = PL_ALIGN2(p->p.h, 128); + const int has_chroma = p->p.layout != DAV1D_PIXEL_LAYOUT_I400; + const int ss_ver = p->p.layout == DAV1D_PIXEL_LAYOUT_I420; + const int ss_hor = p->p.layout != DAV1D_PIXEL_LAYOUT_I444; + p->stride[0] = aligned_w << hbd; + p->stride[1] = has_chroma ? (aligned_w >> ss_hor) << hbd : 0; + + // Align strides up to multiples of the GPU performance hints + p->stride[0] = PL_ALIGN2(p->stride[0], gpu->limits.align_tex_xfer_pitch); + p->stride[1] = PL_ALIGN2(p->stride[1], gpu->limits.align_tex_xfer_pitch); + + // Aligning offsets to 4 also implicitly aligns to the texel alignment (1 or 2) + size_t off_align = PL_ALIGN2(gpu->limits.align_tex_xfer_offset, 4); + const size_t y_sz = PL_ALIGN2(p->stride[0] * aligned_h, off_align); + const size_t uv_sz = PL_ALIGN2(p->stride[1] * (aligned_h >> ss_ver), off_align); + + // The extra DAV1D_PICTURE_ALIGNMENTs are to brute force plane alignment, + // even in the case that the driver gives us insane alignments + const size_t pic_size = y_sz + 2 * uv_sz; + const size_t total_size = pic_size + DAV1D_PICTURE_ALIGNMENT * 4; + + // Validate size limitations + if (total_size > gpu->limits.max_mapped_size) + return DAV1D_ERR(ENOMEM); + + pl_buf buf = pl_buf_create(gpu, pl_buf_params( + .size = total_size, + .host_mapped = true, + .memory_type = PL_BUF_MEM_HOST, + )); + + if (!buf) + return DAV1D_ERR(ENOMEM); + + struct pl_dav1dalloc *alloc = malloc(sizeof(struct pl_dav1dalloc)); + if (!alloc) { + pl_buf_destroy(gpu, &buf); + return DAV1D_ERR(ENOMEM); + } + + *alloc = (struct pl_dav1dalloc) { + .magic = { PL_MAGIC0, PL_MAGIC1 }, + .gpu = gpu, + .buf = buf, + }; + + assert(buf->data); + uintptr_t base = (uintptr_t) buf->data, data[3]; + data[0] = PL_ALIGN2(base, DAV1D_PICTURE_ALIGNMENT); + data[1] = PL_ALIGN2(data[0] + y_sz, DAV1D_PICTURE_ALIGNMENT); + data[2] = PL_ALIGN2(data[1] + uv_sz, DAV1D_PICTURE_ALIGNMENT); + + p->allocator_data = alloc; + p->data[0] = (void *) data[0]; + p->data[1] = (void *) data[1]; + p->data[2] = (void *) data[2]; + return 0; +} + +PL_DAV1D_API void pl_release_dav1dpicture(Dav1dPicture *p, void *cookie) +{ + struct pl_dav1dalloc *alloc = p->allocator_data; + if (!alloc) + return; + + assert(alloc->magic[0] == PL_MAGIC0); + assert(alloc->magic[1] == PL_MAGIC1); + assert(alloc->gpu == cookie); + pl_buf_destroy(alloc->gpu, &alloc->buf); + free(alloc); + + p->data[0] = p->data[1] = p->data[2] = p->allocator_data = NULL; +} + +#undef PL_ALIGN2 +#undef PL_MAGIC0 +#undef PL_MAGIC1 + +#endif // LIBPLACEBO_DAV1D_H_ diff --git a/src/include/libplacebo/utils/dolbyvision.h b/src/include/libplacebo/utils/dolbyvision.h new file mode 100644 index 0000000..6d4d72e --- /dev/null +++ b/src/include/libplacebo/utils/dolbyvision.h @@ -0,0 +1,34 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_DOLBYVISION_H_ +#define LIBPLACEBO_DOLBYVISION_H_ + +#include <libplacebo/colorspace.h> + +PL_API_BEGIN + +// Parses the Dolby Vision RPU, and sets the `pl_hdr_metadata` dynamic +// brightness metadata fields accordingly. +// +// Note: requires `PL_HAVE_LIBDOVI` to be defined, no-op otherwise. +PL_API void pl_hdr_metadata_from_dovi_rpu(struct pl_hdr_metadata *out, + const uint8_t *buf, size_t size); + +PL_API_END + +#endif // LIBPLACEBO_DOLBYVISION_H_ diff --git a/src/include/libplacebo/utils/frame_queue.h b/src/include/libplacebo/utils/frame_queue.h new file mode 100644 index 0000000..2a9c90c --- /dev/null +++ b/src/include/libplacebo/utils/frame_queue.h @@ -0,0 +1,230 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_FRAME_QUEUE_H +#define LIBPLACEBO_FRAME_QUEUE_H + +#include <libplacebo/renderer.h> +#include <libplacebo/shaders/deinterlacing.h> + +PL_API_BEGIN + +// An abstraction layer for automatically turning a conceptual stream of +// (frame, pts) pairs, as emitted by a decoder or filter graph, into a +// `pl_frame_mix` suitable for `pl_render_image_mix`. +// +// This API ensures that minimal work is performed (e.g. only mapping frames +// that are actually required), while also satisfying the requirements +// of any configured frame mixer. +// +// Thread-safety: Safe +typedef struct pl_queue_t *pl_queue; + +enum pl_queue_status { + PL_QUEUE_OK, // success + PL_QUEUE_EOF, // no more frames are available + PL_QUEUE_MORE, // more frames needed, but not (yet) available + PL_QUEUE_ERR = -1, // some unknown error occurred while retrieving frames +}; + +struct pl_source_frame { + // The frame's presentation timestamp, in seconds relative to the first + // frame. These must be monotonically increasing for subsequent frames. + // To implement a discontinuous jump, users must explicitly reset the + // frame queue with `pl_queue_reset` and restart from PTS 0.0. + double pts; + + // The frame's duration. This is not needed in normal scenarios, as the + // FPS can be inferred from the `pts` values themselves. Providing it + // only helps initialize the value for initial frames, which can smooth + // out the interpolation weights. Its use is also highly recommended + // when displaying interlaced frames. (Optional) + float duration; + + // If set to something other than PL_FIELD_NONE, this source frame is + // marked as interlaced. It will be split up into two separate frames + // internally, and exported to the resulting `pl_frame_mix` as a pair of + // fields, referencing the corresponding previous and next frames. The + // first field will have the same PTS as `pts`, and the second field will + // be inserted at the timestamp `pts + duration/2`. + // + // Note: As a result of FPS estimates being unreliable around streams with + // mixed FPS (or when mixing interlaced and progressive frames), it's + // highly recommended to always specify a valid `duration` for interlaced + // frames. + enum pl_field first_field; + + // Abstract frame data itself. To allow mapping frames only when they're + // actually needed, frames use a lazy representation. The provided + // callbacks will be invoked to interface with it. + void *frame_data; + + // This will be called to map the frame to the GPU, only if needed. + // + // `tex` is a pointer to an array of 4 texture objects (or NULL), which + // *may* serve as backing storage for the texture being mapped. These are + // intended to be recreated by `map`, e.g. using `pl_tex_recreate` or + // `pl_upload_plane` as appropriate. They will be managed internally by + // `pl_queue` and destroyed at some unspecified future point in time. + // + // Note: If `map` fails, it will not be retried, nor will `discard` be run. + // The user should clean up state in this case. + bool (*map)(pl_gpu gpu, pl_tex *tex, const struct pl_source_frame *src, + struct pl_frame *out_frame); + + // If present, this will be called on frames that are done being used by + // `pl_queue`. This may be useful to e.g. unmap textures backed by external + // APIs such as hardware decoders. (Optional) + void (*unmap)(pl_gpu gpu, struct pl_frame *frame, const struct pl_source_frame *src); + + // This function will be called for frames that are deemed unnecessary + // (e.g. never became visible) and should instead be cleanly freed. + // (Optional) + void (*discard)(const struct pl_source_frame *src); +}; + +// Create a new, empty frame queue. +// +// It's highly recommended to fully render a single frame with `pts == 0.0`, +// and flush the GPU pipeline with `pl_gpu_finish`, prior to starting the timed +// playback loop. +PL_API pl_queue pl_queue_create(pl_gpu gpu); +PL_API void pl_queue_destroy(pl_queue *queue); + +// Explicitly clear the queue. This is essentially equivalent to destroying +// and recreating the queue, but preserves any internal memory allocations. +// +// Note: Calling `pl_queue_reset` may block, if another thread is currently +// blocked on a different `pl_queue_*` call. +PL_API void pl_queue_reset(pl_queue queue); + +// Explicitly push a frame. This is an alternative way to feed the frame queue +// with incoming frames, the other method being the asynchronous callback +// specified as `pl_queue_params.get_frame`. Both methods may be used +// simultaneously, although providing `get_frame` is recommended since it +// avoids the risk of the queue underrunning. +// +// When no more frames are available, call this function with `frame == NULL` +// to indicate EOF and begin draining the frame queue. +PL_API void pl_queue_push(pl_queue queue, const struct pl_source_frame *frame); + +// Variant of `pl_queue_push` that blocks while the queue is judged +// (internally) to be "too full". This is useful for asynchronous decoder loops +// in order to prevent the queue from exhausting available RAM if frames are +// decoded significantly faster than they're displayed. +// +// The given `timeout` parameter specifies how long to wait before giving up, +// in nanoseconds. Returns false if this timeout was reached. +PL_API bool pl_queue_push_block(pl_queue queue, uint64_t timeout, + const struct pl_source_frame *frame); + +struct pl_queue_params { + // The PTS of the frame that will be rendered. This should be set to the + // timestamp (in seconds) of the next vsync, relative to the initial frame. + // + // These must be monotonically increasing. To implement a discontinuous + // jump, users must explicitly reset the frame queue with `pl_queue_reset` + // and restart from PTS 0.0. + double pts; + + // The radius of the configured mixer. This should be set to the value + // as returned by `pl_frame_mix_radius`. + float radius; + + // The estimated duration of a vsync, in seconds. This will only be used as + // a hint, the true value will be estimated by comparing `pts` timestamps + // between calls to `pl_queue_update`. (Optional) + float vsync_duration; + + // If the difference between the (estimated) vsync duration and the + // (measured) frame duration is smaller than this threshold, silently + // disable interpolation and switch to ZOH semantics instead. + // + // For example, a value of 0.01 allows the FPS to differ by up to 1% + // without being interpolated. Note that this will result in a continuous + // phase drift unless also compensated for by the user, which will + // eventually resulted in a dropped or duplicated frame. (Though this can + // be preferable to seeing that same phase drift result in a temporally + // smeared image) + float interpolation_threshold; + + // Specifies how long `pl_queue_update` will wait for frames to become + // available, in nanoseconds, before giving up and returning with + // QUEUE_MORE. + // + // If `get_frame` is provided, this value is ignored by `pl_queue` and + // should instead be interpreted by the provided callback. + uint64_t timeout; + + // This callback will be used to pull new frames from the decoder. It may + // block if needed. The user is responsible for setting appropriate time + // limits and/or returning and interpreting QUEUE_MORE as sensible. + // + // Providing this callback is entirely optional. Users can instead choose + // to manually feed the frame queue with new frames using `pl_queue_push`. + enum pl_queue_status (*get_frame)(struct pl_source_frame *out_frame, + const struct pl_queue_params *params); + void *priv; +}; + +#define pl_queue_params(...) (&(struct pl_queue_params) { __VA_ARGS__ }) + +// Advance the frame queue's internal state to the target timestamp. Any frames +// which are no longer needed (i.e. too far in the past) are automatically +// unmapped and evicted. Any future frames which are needed to fill the queue +// must either have been pushed in advance, or will be requested using the +// provided `get_frame` callback. If you call this on `out_mix == NULL`, the +// queue state will advance, but no frames will be mapped. +// +// This function may return with PL_QUEUE_MORE, in which case the user may wish +// to ensure more frames are available and then re-run this function with the +// same parameters. In this case, `out_mix` is still written to, but it may be +// incomplete (or even contain no frames at all). Additionally, when the source +// contains interlaced frames (see `pl_source_frame.first_field`), this +// function may return with PL_QUEUE_MORE if a frame is missing references to +// a future frame. +// +// The resulting mix of frames in `out_mix` will represent the neighbourhood of +// the target timestamp, and can be passed to `pl_render_image_mix` as-is. +// +// Note: `out_mix` will only remain valid until the next call to +// `pl_queue_update` or `pl_queue_reset`. +PL_API enum pl_queue_status pl_queue_update(pl_queue queue, struct pl_frame_mix *out_mix, + const struct pl_queue_params *params); + +// Returns a pl_queue's internal estimates for FPS and VPS (vsyncs per second). +// Returns 0.0 if no estimate is available. +PL_API float pl_queue_estimate_fps(pl_queue queue); +PL_API float pl_queue_estimate_vps(pl_queue queue); + +// Returns the number of frames currently contained in a pl_queue. +PL_API int pl_queue_num_frames(pl_queue queue); + +// Inspect the contents of the Nth queued frame. Returns false if `idx` is +// out of range. +// +// Warning: No guarantee is made to ensure validity of `out->frame_data` +// after this call. In particular, pl_queue_* calls made from another thread +// may call `discard()` on the frame in question. The user bears responsibility +// to avoid accessing `out->frame_data` in a multi-threaded scenario unless +// an external guarantee can be made that the frame won't be dequeued until +// it is done being used by the user. +PL_API bool pl_queue_peek(pl_queue queue, int idx, struct pl_source_frame *out); + +PL_API_END + +#endif // LIBPLACEBO_FRAME_QUEUE_H diff --git a/src/include/libplacebo/utils/libav.h b/src/include/libplacebo/utils/libav.h new file mode 100644 index 0000000..91f3dd8 --- /dev/null +++ b/src/include/libplacebo/utils/libav.h @@ -0,0 +1,284 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_LIBAV_H_ +#define LIBPLACEBO_LIBAV_H_ + +#include <libplacebo/config.h> +#include <libplacebo/gpu.h> +#include <libplacebo/shaders/deinterlacing.h> +#include <libplacebo/utils/upload.h> + +#if defined(__cplusplus) && !defined(PL_LIBAV_IMPLEMENTATION) +# define PL_LIBAV_API +# define PL_LIBAV_IMPLEMENTATION 0 +# warning Remember to include this file with a PL_LIBAV_IMPLEMENTATION set to 1 in \ + C translation unit to provide implementation. Suppress this warning by \ + defining PL_LIBAV_IMPLEMENTATION to 0 in C++ files. +#elif !defined(PL_LIBAV_IMPLEMENTATION) +# define PL_LIBAV_API static inline +# define PL_LIBAV_IMPLEMENTATION 1 +#else +# define PL_LIBAV_API +#endif + +PL_API_BEGIN + +#include <libavformat/avformat.h> +#include <libavutil/frame.h> +#include <libavutil/version.h> +#include <libavcodec/avcodec.h> + +#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 16, 100) && defined(PL_HAVE_DOVI) +# define PL_HAVE_LAV_DOLBY_VISION +# include <libavutil/dovi_meta.h> +#endif + +#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(56, 61, 100) +# define PL_HAVE_LAV_FILM_GRAIN +# include <libavutil/film_grain_params.h> +#endif + +#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(56, 25, 100) +# define PL_HAVE_LAV_HDR +# include <libavutil/hdr_dynamic_metadata.h> +# include <libavutil/mastering_display_metadata.h> +#endif + +//------------------------------------------------------------------------ +// Important note: For support for AVVkFrame, which depends on <vulkan.h>, +// users *SHOULD* include <vulkan/vulkan.h> manually before this header. +//------------------------------------------------------------------------ + + +// Fill in the details of a `pl_frame` from an AVFrame. This function will +// explicitly clear `out_frame`, setting all extra fields to 0. After this +// function returns, the only missing data is information related to the plane +// texture itself (`planes[N].texture`), as well as any overlays (e.g. +// subtitles). +// +// Note: If the AVFrame contains an embedded ICC profile or H.274 film grain +// metadata, the resulting `out_image->profile` will reference this pointer, +// meaning that in general, the `pl_frame` is only guaranteed to be valid as +// long as the AVFrame is not freed. +// +// Note: This will ignore Dolby Vision metadata by default (to avoid leaking +// memory), either switch to pl_map_avframe_ex or do it manually using +// pl_map_dovi_metadata. +PL_LIBAV_API void pl_frame_from_avframe(struct pl_frame *out_frame, const AVFrame *frame); + +// Deprecated aliases for backwards compatibility +#define pl_image_from_avframe pl_frame_from_avframe +#define pl_target_from_avframe pl_frame_from_avframe + +// Copy extra metadata from an AVStream to a pl_frame. This should be called +// after `pl_frame_from_avframe` or `pl_map_avframe` (respectively), and sets +// metadata associated with stream-level side data. This is needed because +// FFmpeg rather annoyingly does not propagate stream-level metadata to frames. +PL_LIBAV_API void pl_frame_copy_stream_props(struct pl_frame *out_frame, + const AVStream *stream); + +#ifdef PL_HAVE_LAV_HDR +struct pl_av_hdr_metadata { + // All fields are optional and may be passed as `NULL`. + const AVMasteringDisplayMetadata *mdm; + const AVContentLightMetadata *clm; + const AVDynamicHDRPlus *dhp; +}; + +// Helper function to update a `pl_hdr_metadata` struct from HDR10/HDR10+ +// metadata in the FFmpeg format. Unspecified/invalid elements will be left +// uninitialized in `out`. +PL_LIBAV_API void pl_map_hdr_metadata(struct pl_hdr_metadata *out, + const struct pl_av_hdr_metadata *metadata); +#endif + +#ifdef PL_HAVE_LAV_DOLBY_VISION +// Helper function to map Dolby Vision metadata from the FFmpeg format. +PL_LIBAV_API void pl_map_dovi_metadata(struct pl_dovi_metadata *out, + const AVDOVIMetadata *metadata); + +// Helper function to map Dolby Vision metadata from the FFmpeg format +// to `pl_dovi_metadata`, and adds it to the `pl_frame`. +// The `pl_frame` colorspace fields and HDR struct are also updated with +// values from the `AVDOVIMetadata`. +// +// Note: The `pl_dovi_metadata` must be allocated externally. +// Also, currently the metadata is only used if the `AVDOVIRpuDataHeader` +// `disable_residual_flag` field is not zero and can be checked before allocating. +PL_LIBAV_API void pl_frame_map_avdovi_metadata(struct pl_frame *out_frame, + struct pl_dovi_metadata *dovi, + const AVDOVIMetadata *metadata); +#endif + +// Helper function to test if a pixfmt would be supported by the GPU. +// Essentially, this can be used to check if `pl_map_avframe` would work for a +// given AVPixelFormat, without actually uploading or allocating anything. +PL_LIBAV_API bool pl_test_pixfmt(pl_gpu gpu, enum AVPixelFormat pixfmt); + +// Variant of `pl_test_pixfmt` that also tests for the given capabilities +// being present. Note that in the presence of hardware accelerated frames, +// this cannot be tested without frame-specific information (i.e. swformat), +// but in practice this should be a non-issue as GPU-native hwformats will +// probably be fully supported. +PL_LIBAV_API bool pl_test_pixfmt_caps(pl_gpu gpu, enum AVPixelFormat pixfmt, + enum pl_fmt_caps caps); + +// Like `pl_frame_from_avframe`, but the texture pointers are also initialized +// to ensure they have the correct size and format to match the AVframe. +// Similar in spirit to `pl_recreate_plane`, and the same notes apply. `tex` +// must be an array of 4 pointers of type `pl_tex`, each either +// pointing to a valid texture, or NULL. Returns whether successful. +PL_LIBAV_API bool pl_frame_recreate_from_avframe(pl_gpu gpu, struct pl_frame *out_frame, + pl_tex tex[4], const AVFrame *frame); + +struct pl_avframe_params { + // The AVFrame to map. Required. + const AVFrame *frame; + + // Backing textures for frame data. Required for all non-hwdec formats. + // This must point to an array of four valid textures (or NULL entries). + // + // Note: Not cleaned up by `pl_unmap_avframe`. The intent is for users to + // re-use this texture array for subsequent frames, to avoid texture + // creation/destruction overhead. + pl_tex *tex; + + // Also map Dolby Vision metadata (if supported). Note that this also + // overrides the colorimetry metadata (forces BT.2020+PQ). + bool map_dovi; +}; + +#define PL_AVFRAME_DEFAULTS \ + .map_dovi = true, + +#define pl_avframe_params(...) (&(struct pl_avframe_params) { PL_AVFRAME_DEFAULTS __VA_ARGS__ }) + +// Very high level helper function to take an `AVFrame` and map it to the GPU. +// The resulting `pl_frame` remains valid until `pl_unmap_avframe` is called, +// which must be called at some point to clean up state. The `AVFrame` is +// automatically ref'd and unref'd if needed. Returns whether successful. +// +// Note: `out_frame->user_data` points to a privately managed opaque struct +// and must not be touched by the user. +PL_LIBAV_API bool pl_map_avframe_ex(pl_gpu gpu, struct pl_frame *out_frame, + const struct pl_avframe_params *params); +PL_LIBAV_API void pl_unmap_avframe(pl_gpu gpu, struct pl_frame *frame); + +// Backwards compatibility with previous versions of this API. +PL_LIBAV_API bool pl_map_avframe(pl_gpu gpu, struct pl_frame *out_frame, + pl_tex tex[4], const AVFrame *avframe); + +// Return the AVFrame* that a pl_frame was mapped from (via pl_map_avframe_ex) +// Note: This reference is attached to the `pl_frame` and will get freed by +// pl_unmap_avframe. +PL_LIBAV_API AVFrame *pl_get_mapped_avframe(const struct pl_frame *frame); + +// Download the texture contents of a `pl_frame` back to a corresponding +// AVFrame. Blocks until completion. +// +// Note: This function performs minimal verification, so incorrect usage will +// likely result in broken frames. Use `pl_frame_recreate_from_avframe` to +// ensure matching formats. +PL_LIBAV_API bool pl_download_avframe(pl_gpu gpu, + const struct pl_frame *frame, + AVFrame *out_frame); + +// Helper functions to update the colorimetry data in an AVFrame based on +// the values specified in the given color space / color repr / profile. +// +// Note: These functions can and will allocate AVFrame side data if needed, +// in particular to encode HDR metadata in `space.hdr`. +PL_LIBAV_API void pl_avframe_set_color(AVFrame *frame, struct pl_color_space space); +PL_LIBAV_API void pl_avframe_set_repr(AVFrame *frame, struct pl_color_repr repr); +PL_LIBAV_API void pl_avframe_set_profile(AVFrame *frame, struct pl_icc_profile profile); + +// Map an AVPixelFormat to an array of pl_plane_data structs. The array must +// have at least `av_pix_fmt_count_planes(fmt)` elements, but never more than +// 4. This function leaves `width`, `height` and `row_stride`, as well as the +// data pointers, uninitialized. +// +// If `bits` is non-NULL, this function will attempt aligning the resulting +// `pl_plane_data` struct for optimal compatibility, placing the resulting +// `pl_bit_depth` metadata into `bits`. +// +// Returns the number of plane structs written to, or 0 on error. +// +// Note: This function is usually clumsier to use than the higher-level +// functions above, but it might have some fringe use cases, for example if +// the user wants to replace the data buffers by `pl_buf` references in the +// `pl_plane_data` before uploading it to the GPU. +PL_LIBAV_API int pl_plane_data_from_pixfmt(struct pl_plane_data data[4], + struct pl_bit_encoding *bits, + enum AVPixelFormat pix_fmt); + +// Callback for AVCodecContext.get_buffer2 that allocates memory from +// persistently mapped buffers. This can be more efficient than regular +// system memory, especially on platforms that don't support importing +// PL_HANDLE_HOST_PTR as buffers. +// +// Note: `avctx->opaque` must be a pointer that *points* to the GPU instance. +// That is, it should have type `pl_gpu *`. +PL_LIBAV_API int pl_get_buffer2(AVCodecContext *avctx, AVFrame *pic, int flags); + +// Mapping functions for the various libavutil enums. Note that these are not +// quite 1:1, and even for values that exist in both, the semantics sometimes +// differ. Some special cases (e.g. ICtCp, or XYZ) are handled differently in +// libplacebo and libavutil, respectively. +// +// Because of this, it's generally recommended to avoid these and instead use +// helpers like `pl_frame_from_avframe`, which contain extra logic to patch +// through all of the special cases. +PL_LIBAV_API enum pl_color_system pl_system_from_av(enum AVColorSpace spc); +PL_LIBAV_API enum AVColorSpace pl_system_to_av(enum pl_color_system sys); +PL_LIBAV_API enum pl_color_levels pl_levels_from_av(enum AVColorRange range); +PL_LIBAV_API enum AVColorRange pl_levels_to_av(enum pl_color_levels levels); +PL_LIBAV_API enum pl_color_primaries pl_primaries_from_av(enum AVColorPrimaries prim); +PL_LIBAV_API enum AVColorPrimaries pl_primaries_to_av(enum pl_color_primaries prim); +PL_LIBAV_API enum pl_color_transfer pl_transfer_from_av(enum AVColorTransferCharacteristic trc); +PL_LIBAV_API enum AVColorTransferCharacteristic pl_transfer_to_av(enum pl_color_transfer trc); +PL_LIBAV_API enum pl_chroma_location pl_chroma_from_av(enum AVChromaLocation loc); +PL_LIBAV_API enum AVChromaLocation pl_chroma_to_av(enum pl_chroma_location loc); + +// Helper function to generate a `pl_color_space` struct from an AVFrame. +PL_LIBAV_API void pl_color_space_from_avframe(struct pl_color_space *out_csp, + const AVFrame *frame); + +// Helper function to pick the right `pl_field` value for an AVFrame. +PL_LIBAV_API enum pl_field pl_field_from_avframe(const AVFrame *frame); + +#ifdef PL_HAVE_LAV_FILM_GRAIN +// Fill in film grain parameters from an AVFilmGrainParams. +// +// Note: The resulting struct will only remain valid as long as the +// `AVFilmGrainParams` remains valid. +PL_LIBAV_API void pl_film_grain_from_av(struct pl_film_grain_data *out_data, + const AVFilmGrainParams *fgp); +#endif + +// Deprecated alias for backwards compatibility +#define pl_swapchain_colors_from_avframe pl_color_space_from_avframe + +// Actual implementation, included as part of this header to avoid having +// a compile-time dependency on libavutil. +#if PL_LIBAV_IMPLEMENTATION +# include <libplacebo/utils/libav_internal.h> +#endif + +PL_API_END + +#endif // LIBPLACEBO_LIBAV_H_ diff --git a/src/include/libplacebo/utils/libav_internal.h b/src/include/libplacebo/utils/libav_internal.h new file mode 100644 index 0000000..4c269e5 --- /dev/null +++ b/src/include/libplacebo/utils/libav_internal.h @@ -0,0 +1,1482 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_LIBAV_H_ +#error This header should be included as part of <libplacebo/utils/libav.h> +#elif defined(__cplusplus) +#error This header cannot be included from C++ define PL_LIBAV_IMPLEMENTATION appropriately +#else + +#include <assert.h> + +#include <libplacebo/utils/dolbyvision.h> + +#include <libavutil/hwcontext.h> +#include <libavutil/hwcontext_drm.h> +#include <libavutil/imgutils.h> +#include <libavutil/pixdesc.h> +#include <libavutil/display.h> +#include <libavcodec/version.h> + +// Try importing <vulkan.h> dynamically if it wasn't already +#if !defined(VK_API_VERSION_1_2) && defined(__has_include) +# if __has_include(<vulkan/vulkan.h>) +# include <vulkan/vulkan.h> +# endif +#endif + +#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 8, 100) && \ + defined(PL_HAVE_VULKAN) && defined(VK_API_VERSION_1_2) +# define PL_HAVE_LAV_VULKAN +# include <libavutil/hwcontext_vulkan.h> +# include <libplacebo/vulkan.h> +# if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(58, 11, 100) +# define PL_HAVE_LAV_VULKAN_V2 +# endif +#endif + +PL_LIBAV_API enum pl_color_system pl_system_from_av(enum AVColorSpace spc) +{ + switch (spc) { + case AVCOL_SPC_RGB: return PL_COLOR_SYSTEM_RGB; + case AVCOL_SPC_BT709: return PL_COLOR_SYSTEM_BT_709; + case AVCOL_SPC_UNSPECIFIED: return PL_COLOR_SYSTEM_UNKNOWN; + case AVCOL_SPC_RESERVED: return PL_COLOR_SYSTEM_UNKNOWN; + case AVCOL_SPC_FCC: return PL_COLOR_SYSTEM_UNKNOWN; // missing + case AVCOL_SPC_BT470BG: return PL_COLOR_SYSTEM_BT_601; + case AVCOL_SPC_SMPTE170M: return PL_COLOR_SYSTEM_BT_601; + case AVCOL_SPC_SMPTE240M: return PL_COLOR_SYSTEM_SMPTE_240M; + case AVCOL_SPC_YCGCO: return PL_COLOR_SYSTEM_YCGCO; + case AVCOL_SPC_BT2020_NCL: return PL_COLOR_SYSTEM_BT_2020_NC; + case AVCOL_SPC_BT2020_CL: return PL_COLOR_SYSTEM_BT_2020_C; + case AVCOL_SPC_SMPTE2085: return PL_COLOR_SYSTEM_UNKNOWN; // missing + case AVCOL_SPC_CHROMA_DERIVED_NCL: return PL_COLOR_SYSTEM_UNKNOWN; // missing + case AVCOL_SPC_CHROMA_DERIVED_CL: return PL_COLOR_SYSTEM_UNKNOWN; // missing + // Note: this colorspace is confused between PQ and HLG, which libav* + // requires inferring from other sources, but libplacebo makes explicit. + // Default to PQ as it's the more common scenario. + case AVCOL_SPC_ICTCP: return PL_COLOR_SYSTEM_BT_2100_PQ; + case AVCOL_SPC_NB: return PL_COLOR_SYSTEM_COUNT; + } + + return PL_COLOR_SYSTEM_UNKNOWN; +} + +PL_LIBAV_API enum AVColorSpace pl_system_to_av(enum pl_color_system sys) +{ + switch (sys) { + case PL_COLOR_SYSTEM_UNKNOWN: return AVCOL_SPC_UNSPECIFIED; + case PL_COLOR_SYSTEM_BT_601: return AVCOL_SPC_SMPTE170M; + case PL_COLOR_SYSTEM_BT_709: return AVCOL_SPC_BT709; + case PL_COLOR_SYSTEM_SMPTE_240M: return AVCOL_SPC_SMPTE240M; + case PL_COLOR_SYSTEM_BT_2020_NC: return AVCOL_SPC_BT2020_NCL; + case PL_COLOR_SYSTEM_BT_2020_C: return AVCOL_SPC_BT2020_CL; + case PL_COLOR_SYSTEM_BT_2100_PQ: return AVCOL_SPC_ICTCP; + case PL_COLOR_SYSTEM_BT_2100_HLG: return AVCOL_SPC_ICTCP; + case PL_COLOR_SYSTEM_DOLBYVISION: return AVCOL_SPC_UNSPECIFIED; // missing + case PL_COLOR_SYSTEM_YCGCO: return AVCOL_SPC_YCGCO; + case PL_COLOR_SYSTEM_RGB: return AVCOL_SPC_RGB; + case PL_COLOR_SYSTEM_XYZ: return AVCOL_SPC_UNSPECIFIED; // handled differently + case PL_COLOR_SYSTEM_COUNT: return AVCOL_SPC_NB; + } + + return AVCOL_SPC_UNSPECIFIED; +} + +PL_LIBAV_API enum pl_color_levels pl_levels_from_av(enum AVColorRange range) +{ + switch (range) { + case AVCOL_RANGE_UNSPECIFIED: return PL_COLOR_LEVELS_UNKNOWN; + case AVCOL_RANGE_MPEG: return PL_COLOR_LEVELS_LIMITED; + case AVCOL_RANGE_JPEG: return PL_COLOR_LEVELS_FULL; + case AVCOL_RANGE_NB: return PL_COLOR_LEVELS_COUNT; + } + + return PL_COLOR_LEVELS_UNKNOWN; +} + +PL_LIBAV_API enum AVColorRange pl_levels_to_av(enum pl_color_levels levels) +{ + switch (levels) { + case PL_COLOR_LEVELS_UNKNOWN: return AVCOL_RANGE_UNSPECIFIED; + case PL_COLOR_LEVELS_LIMITED: return AVCOL_RANGE_MPEG; + case PL_COLOR_LEVELS_FULL: return AVCOL_RANGE_JPEG; + case PL_COLOR_LEVELS_COUNT: return AVCOL_RANGE_NB; + } + + return AVCOL_RANGE_UNSPECIFIED; +} + +PL_LIBAV_API enum pl_color_primaries pl_primaries_from_av(enum AVColorPrimaries prim) +{ + switch (prim) { + case AVCOL_PRI_RESERVED0: return PL_COLOR_PRIM_UNKNOWN; + case AVCOL_PRI_BT709: return PL_COLOR_PRIM_BT_709; + case AVCOL_PRI_UNSPECIFIED: return PL_COLOR_PRIM_UNKNOWN; + case AVCOL_PRI_RESERVED: return PL_COLOR_PRIM_UNKNOWN; + case AVCOL_PRI_BT470M: return PL_COLOR_PRIM_BT_470M; + case AVCOL_PRI_BT470BG: return PL_COLOR_PRIM_BT_601_625; + case AVCOL_PRI_SMPTE170M: return PL_COLOR_PRIM_BT_601_525; + case AVCOL_PRI_SMPTE240M: return PL_COLOR_PRIM_BT_601_525; + case AVCOL_PRI_FILM: return PL_COLOR_PRIM_FILM_C; + case AVCOL_PRI_BT2020: return PL_COLOR_PRIM_BT_2020; + case AVCOL_PRI_SMPTE428: return PL_COLOR_PRIM_CIE_1931; + case AVCOL_PRI_SMPTE431: return PL_COLOR_PRIM_DCI_P3; + case AVCOL_PRI_SMPTE432: return PL_COLOR_PRIM_DISPLAY_P3; + case AVCOL_PRI_JEDEC_P22: return PL_COLOR_PRIM_EBU_3213; + case AVCOL_PRI_NB: return PL_COLOR_PRIM_COUNT; + } + + return PL_COLOR_PRIM_UNKNOWN; +} + +PL_LIBAV_API enum AVColorPrimaries pl_primaries_to_av(enum pl_color_primaries prim) +{ + switch (prim) { + case PL_COLOR_PRIM_UNKNOWN: return AVCOL_PRI_UNSPECIFIED; + case PL_COLOR_PRIM_BT_601_525: return AVCOL_PRI_SMPTE170M; + case PL_COLOR_PRIM_BT_601_625: return AVCOL_PRI_BT470BG; + case PL_COLOR_PRIM_BT_709: return AVCOL_PRI_BT709; + case PL_COLOR_PRIM_BT_470M: return AVCOL_PRI_BT470M; + case PL_COLOR_PRIM_EBU_3213: return AVCOL_PRI_JEDEC_P22; + case PL_COLOR_PRIM_BT_2020: return AVCOL_PRI_BT2020; + case PL_COLOR_PRIM_APPLE: return AVCOL_PRI_UNSPECIFIED; // missing + case PL_COLOR_PRIM_ADOBE: return AVCOL_PRI_UNSPECIFIED; // missing + case PL_COLOR_PRIM_PRO_PHOTO: return AVCOL_PRI_UNSPECIFIED; // missing + case PL_COLOR_PRIM_CIE_1931: return AVCOL_PRI_SMPTE428; + case PL_COLOR_PRIM_DCI_P3: return AVCOL_PRI_SMPTE431; + case PL_COLOR_PRIM_DISPLAY_P3: return AVCOL_PRI_SMPTE432; + case PL_COLOR_PRIM_V_GAMUT: return AVCOL_PRI_UNSPECIFIED; // missing + case PL_COLOR_PRIM_S_GAMUT: return AVCOL_PRI_UNSPECIFIED; // missing + case PL_COLOR_PRIM_FILM_C: return AVCOL_PRI_FILM; + case PL_COLOR_PRIM_ACES_AP0: return AVCOL_PRI_UNSPECIFIED; // missing + case PL_COLOR_PRIM_ACES_AP1: return AVCOL_PRI_UNSPECIFIED; // missing + case PL_COLOR_PRIM_COUNT: return AVCOL_PRI_NB; + } + + return AVCOL_PRI_UNSPECIFIED; +} + +PL_LIBAV_API enum pl_color_transfer pl_transfer_from_av(enum AVColorTransferCharacteristic trc) +{ + switch (trc) { + case AVCOL_TRC_RESERVED0: return PL_COLOR_TRC_UNKNOWN; + case AVCOL_TRC_BT709: return PL_COLOR_TRC_BT_1886; // EOTF != OETF + case AVCOL_TRC_UNSPECIFIED: return PL_COLOR_TRC_UNKNOWN; + case AVCOL_TRC_RESERVED: return PL_COLOR_TRC_UNKNOWN; + case AVCOL_TRC_GAMMA22: return PL_COLOR_TRC_GAMMA22; + case AVCOL_TRC_GAMMA28: return PL_COLOR_TRC_GAMMA28; + case AVCOL_TRC_SMPTE170M: return PL_COLOR_TRC_BT_1886; // EOTF != OETF + case AVCOL_TRC_SMPTE240M: return PL_COLOR_TRC_BT_1886; // EOTF != OETF + case AVCOL_TRC_LINEAR: return PL_COLOR_TRC_LINEAR; + case AVCOL_TRC_LOG: return PL_COLOR_TRC_UNKNOWN; // missing + case AVCOL_TRC_LOG_SQRT: return PL_COLOR_TRC_UNKNOWN; // missing + case AVCOL_TRC_IEC61966_2_4: return PL_COLOR_TRC_BT_1886; // EOTF != OETF + case AVCOL_TRC_BT1361_ECG: return PL_COLOR_TRC_BT_1886; // ETOF != OETF + case AVCOL_TRC_IEC61966_2_1: return PL_COLOR_TRC_SRGB; + case AVCOL_TRC_BT2020_10: return PL_COLOR_TRC_BT_1886; // EOTF != OETF + case AVCOL_TRC_BT2020_12: return PL_COLOR_TRC_BT_1886; // EOTF != OETF + case AVCOL_TRC_SMPTE2084: return PL_COLOR_TRC_PQ; + case AVCOL_TRC_SMPTE428: return PL_COLOR_TRC_ST428; + case AVCOL_TRC_ARIB_STD_B67: return PL_COLOR_TRC_HLG; + case AVCOL_TRC_NB: return PL_COLOR_TRC_COUNT; + } + + return PL_COLOR_TRC_UNKNOWN; +} + +PL_LIBAV_API enum AVColorTransferCharacteristic pl_transfer_to_av(enum pl_color_transfer trc) +{ + switch (trc) { + case PL_COLOR_TRC_UNKNOWN: return AVCOL_TRC_UNSPECIFIED; + case PL_COLOR_TRC_BT_1886: return AVCOL_TRC_BT709; // EOTF != OETF + case PL_COLOR_TRC_SRGB: return AVCOL_TRC_IEC61966_2_1; + case PL_COLOR_TRC_LINEAR: return AVCOL_TRC_LINEAR; + case PL_COLOR_TRC_GAMMA18: return AVCOL_TRC_UNSPECIFIED; // missing + case PL_COLOR_TRC_GAMMA20: return AVCOL_TRC_UNSPECIFIED; // missing + case PL_COLOR_TRC_GAMMA22: return AVCOL_TRC_GAMMA22; + case PL_COLOR_TRC_GAMMA24: return AVCOL_TRC_UNSPECIFIED; // missing + case PL_COLOR_TRC_GAMMA26: return AVCOL_TRC_UNSPECIFIED; // missing + case PL_COLOR_TRC_GAMMA28: return AVCOL_TRC_GAMMA28; + case PL_COLOR_TRC_ST428: return AVCOL_TRC_SMPTE428; + case PL_COLOR_TRC_PRO_PHOTO: return AVCOL_TRC_UNSPECIFIED; // missing + case PL_COLOR_TRC_PQ: return AVCOL_TRC_SMPTE2084; + case PL_COLOR_TRC_HLG: return AVCOL_TRC_ARIB_STD_B67; + case PL_COLOR_TRC_V_LOG: return AVCOL_TRC_UNSPECIFIED; // missing + case PL_COLOR_TRC_S_LOG1: return AVCOL_TRC_UNSPECIFIED; // missing + case PL_COLOR_TRC_S_LOG2: return AVCOL_TRC_UNSPECIFIED; // missing + case PL_COLOR_TRC_COUNT: return AVCOL_TRC_NB; + } + + return AVCOL_TRC_UNSPECIFIED; +} + +PL_LIBAV_API enum pl_chroma_location pl_chroma_from_av(enum AVChromaLocation loc) +{ + switch (loc) { + case AVCHROMA_LOC_UNSPECIFIED: return PL_CHROMA_UNKNOWN; + case AVCHROMA_LOC_LEFT: return PL_CHROMA_LEFT; + case AVCHROMA_LOC_CENTER: return PL_CHROMA_CENTER; + case AVCHROMA_LOC_TOPLEFT: return PL_CHROMA_TOP_LEFT; + case AVCHROMA_LOC_TOP: return PL_CHROMA_TOP_CENTER; + case AVCHROMA_LOC_BOTTOMLEFT: return PL_CHROMA_BOTTOM_LEFT; + case AVCHROMA_LOC_BOTTOM: return PL_CHROMA_BOTTOM_CENTER; + case AVCHROMA_LOC_NB: return PL_CHROMA_COUNT; + } + + return PL_CHROMA_UNKNOWN; +} + +PL_LIBAV_API enum AVChromaLocation pl_chroma_to_av(enum pl_chroma_location loc) +{ + switch (loc) { + case PL_CHROMA_UNKNOWN: return AVCHROMA_LOC_UNSPECIFIED; + case PL_CHROMA_LEFT: return AVCHROMA_LOC_LEFT; + case PL_CHROMA_CENTER: return AVCHROMA_LOC_CENTER; + case PL_CHROMA_TOP_LEFT: return AVCHROMA_LOC_TOPLEFT; + case PL_CHROMA_TOP_CENTER: return AVCHROMA_LOC_TOP; + case PL_CHROMA_BOTTOM_LEFT: return AVCHROMA_LOC_BOTTOMLEFT; + case PL_CHROMA_BOTTOM_CENTER: return AVCHROMA_LOC_BOTTOM; + case PL_CHROMA_COUNT: return AVCHROMA_LOC_NB; + } + + return AVCHROMA_LOC_UNSPECIFIED; +} + +#ifdef PL_HAVE_LAV_HDR +PL_LIBAV_API void pl_map_hdr_metadata(struct pl_hdr_metadata *out, + const struct pl_av_hdr_metadata *data) +{ + if (data->mdm) { + if (data->mdm->has_luminance) { + out->max_luma = av_q2d(data->mdm->max_luminance); + out->min_luma = av_q2d(data->mdm->min_luminance); + if (out->max_luma < 10.0 || out->min_luma >= out->max_luma) + out->max_luma = out->min_luma = 0; /* sanity */ + } + if (data->mdm->has_primaries) { + out->prim = (struct pl_raw_primaries) { + .red.x = av_q2d(data->mdm->display_primaries[0][0]), + .red.y = av_q2d(data->mdm->display_primaries[0][1]), + .green.x = av_q2d(data->mdm->display_primaries[1][0]), + .green.y = av_q2d(data->mdm->display_primaries[1][1]), + .blue.x = av_q2d(data->mdm->display_primaries[2][0]), + .blue.y = av_q2d(data->mdm->display_primaries[2][1]), + .white.x = av_q2d(data->mdm->white_point[0]), + .white.y = av_q2d(data->mdm->white_point[1]), + }; + } + } + + if (data->clm) { + out->max_cll = data->clm->MaxCLL; + out->max_fall = data->clm->MaxFALL; + } + + if (data->dhp && data->dhp->application_version < 2) { + float hist_max = 0; + const AVHDRPlusColorTransformParams *pars = &data->dhp->params[0]; + assert(data->dhp->num_windows > 0); + out->scene_max[0] = 10000 * av_q2d(pars->maxscl[0]); + out->scene_max[1] = 10000 * av_q2d(pars->maxscl[1]); + out->scene_max[2] = 10000 * av_q2d(pars->maxscl[2]); + out->scene_avg = 10000 * av_q2d(pars->average_maxrgb); + + // Calculate largest value from histogram to use as fallback for clips + // with missing MaxSCL information. Note that this may end up picking + // the "reserved" value at the 5% percentile, which in practice appears + // to track the brightest pixel in the scene. + for (int i = 0; i < pars->num_distribution_maxrgb_percentiles; i++) { + float hist_val = av_q2d(pars->distribution_maxrgb[i].percentile); + if (hist_val > hist_max) + hist_max = hist_val; + } + hist_max *= 10000; + if (!out->scene_max[0]) + out->scene_max[0] = hist_max; + if (!out->scene_max[1]) + out->scene_max[1] = hist_max; + if (!out->scene_max[2]) + out->scene_max[2] = hist_max; + + if (pars->tone_mapping_flag == 1) { + out->ootf.target_luma = av_q2d(data->dhp->targeted_system_display_maximum_luminance); + out->ootf.knee_x = av_q2d(pars->knee_point_x); + out->ootf.knee_y = av_q2d(pars->knee_point_y); + assert(pars->num_bezier_curve_anchors < 16); + for (int i = 0; i < pars->num_bezier_curve_anchors; i++) + out->ootf.anchors[i] = av_q2d(pars->bezier_curve_anchors[i]); + out->ootf.num_anchors = pars->num_bezier_curve_anchors; + } + } +} +#endif // PL_HAVE_LAV_HDR + +static inline void *pl_get_side_data_raw(const AVFrame *frame, + enum AVFrameSideDataType type) +{ + const AVFrameSideData *sd = av_frame_get_side_data(frame, type); + return sd ? (void *) sd->data : NULL; +} + +PL_LIBAV_API void pl_color_space_from_avframe(struct pl_color_space *out_csp, + const AVFrame *frame) +{ + *out_csp = (struct pl_color_space) { + .primaries = pl_primaries_from_av(frame->color_primaries), + .transfer = pl_transfer_from_av(frame->color_trc), + }; + +#ifdef PL_HAVE_LAV_HDR + pl_map_hdr_metadata(&out_csp->hdr, &(struct pl_av_hdr_metadata) { + .mdm = pl_get_side_data_raw(frame, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA), + .clm = pl_get_side_data_raw(frame, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL), + .dhp = pl_get_side_data_raw(frame, AV_FRAME_DATA_DYNAMIC_HDR_PLUS), + }); +#endif +} + +PL_LIBAV_API enum pl_field pl_field_from_avframe(const AVFrame *frame) +{ +#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(58, 7, 100) + if (!frame || !(frame->flags & AV_FRAME_FLAG_INTERLACED)) + return PL_FIELD_NONE; + return (frame->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST) + ? PL_FIELD_TOP : PL_FIELD_BOTTOM; +#else + if (!frame || !frame->interlaced_frame) + return PL_FIELD_NONE; + return frame->top_field_first ? PL_FIELD_TOP : PL_FIELD_BOTTOM; +#endif +} + +#ifdef PL_HAVE_LAV_FILM_GRAIN +PL_LIBAV_API void pl_film_grain_from_av(struct pl_film_grain_data *out_data, + const AVFilmGrainParams *fgp) +{ + out_data->seed = fgp->seed; + + switch (fgp->type) { + case AV_FILM_GRAIN_PARAMS_NONE: break; + case AV_FILM_GRAIN_PARAMS_AV1: { + const AVFilmGrainAOMParams *src = &fgp->codec.aom; + struct pl_av1_grain_data *dst = &out_data->params.av1; + out_data->type = PL_FILM_GRAIN_AV1; + *dst = (struct pl_av1_grain_data) { + .num_points_y = src->num_y_points, + .chroma_scaling_from_luma = src->chroma_scaling_from_luma, + .num_points_uv = { src->num_uv_points[0], src->num_uv_points[1] }, + .scaling_shift = src->scaling_shift, + .ar_coeff_lag = src->ar_coeff_lag, + .ar_coeff_shift = src->ar_coeff_shift, + .grain_scale_shift = src->grain_scale_shift, + .uv_mult = { src->uv_mult[0], src->uv_mult[1] }, + .uv_mult_luma = { src->uv_mult_luma[0], src->uv_mult_luma[1] }, + .uv_offset = { src->uv_offset[0], src->uv_offset[1] }, + .overlap = src->overlap_flag, + }; + + assert(sizeof(dst->ar_coeffs_uv) == sizeof(src->ar_coeffs_uv)); + memcpy(dst->points_y, src->y_points, sizeof(dst->points_y)); + memcpy(dst->points_uv, src->uv_points, sizeof(dst->points_uv)); + memcpy(dst->ar_coeffs_y, src->ar_coeffs_y, sizeof(dst->ar_coeffs_y)); + memcpy(dst->ar_coeffs_uv, src->ar_coeffs_uv, sizeof(dst->ar_coeffs_uv)); + break; + } +#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 2, 100) + case AV_FILM_GRAIN_PARAMS_H274: { + const AVFilmGrainH274Params *src = &fgp->codec.h274; + struct pl_h274_grain_data *dst = &out_data->params.h274; + out_data->type = PL_FILM_GRAIN_H274; + *dst = (struct pl_h274_grain_data) { + .model_id = src->model_id, + .blending_mode_id = src->blending_mode_id, + .log2_scale_factor = src->log2_scale_factor, + .component_model_present = { + src->component_model_present[0], + src->component_model_present[1], + src->component_model_present[2], + }, + .intensity_interval_lower_bound = { + src->intensity_interval_lower_bound[0], + src->intensity_interval_lower_bound[1], + src->intensity_interval_lower_bound[2], + }, + .intensity_interval_upper_bound = { + src->intensity_interval_upper_bound[0], + src->intensity_interval_upper_bound[1], + src->intensity_interval_upper_bound[2], + }, + .comp_model_value = { + src->comp_model_value[0], + src->comp_model_value[1], + src->comp_model_value[2], + }, + }; + memcpy(dst->num_intensity_intervals, src->num_intensity_intervals, + sizeof(dst->num_intensity_intervals)); + memcpy(dst->num_model_values, src->num_model_values, + sizeof(dst->num_model_values)); + break; + } +#endif + } +} +#endif // PL_HAVE_LAV_FILM_GRAIN + +static inline int pl_plane_data_num_comps(const struct pl_plane_data *data) +{ + for (int i = 0; i < 4; i++) { + if (data->component_size[i] == 0) + return i; + } + + return 4; +} + +PL_LIBAV_API int pl_plane_data_from_pixfmt(struct pl_plane_data out_data[4], + struct pl_bit_encoding *out_bits, + enum AVPixelFormat pix_fmt) +{ + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt); + int planes = av_pix_fmt_count_planes(pix_fmt); + struct pl_plane_data aligned_data[4]; + struct pl_bit_encoding bits; + bool first; + if (!desc || planes < 0) // e.g. AV_PIX_FMT_NONE + return 0; + + if (desc->flags & AV_PIX_FMT_FLAG_BITSTREAM) { + // Bitstream formats will most likely never be supported + return 0; + } + + if (desc->flags & AV_PIX_FMT_FLAG_PAL) { + // Palette formats are (currently) not supported + return 0; + } + + if (desc->flags & AV_PIX_FMT_FLAG_BAYER) { + // Bayer format don't have valid `desc->offset` values, so we can't + // use `pl_plane_data_from_mask` on them. + return 0; + } + + if (desc->nb_components == 0 || desc->nb_components > 4) { + // Bogus components, possibly fake/virtual/hwaccel format? + return 0; + } + + if (planes > 4) + return 0; // This shouldn't ever happen + + // Fill in the details for each plane + for (int p = 0; p < planes; p++) { + struct pl_plane_data *data = &out_data[p]; + int size[4] = {0}; + int shift[4] = {0}; + data->swapped = desc->flags & AV_PIX_FMT_FLAG_BE; + data->type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT) + ? PL_FMT_FLOAT + : PL_FMT_UNORM; + + data->pixel_stride = 0; + + for (int c = 0; c < desc->nb_components; c++) { + const AVComponentDescriptor *comp = &desc->comp[c]; + if (comp->plane != p) + continue; + if (data->swapped && comp->shift) { + // We cannot naively handle packed big endian formats because + // swapping the words also swaps the component order, so just + // exit out as a stupid safety measure + return 0; + } + + size[c] = comp->depth; + shift[c] = comp->shift + comp->offset * 8; + + if (data->pixel_stride && (int) data->pixel_stride != comp->step) { + // Pixel format contains components with different pixel stride + // (e.g. packed YUYV), this is currently not supported + return 0; + } + data->pixel_stride = comp->step; + } + + pl_plane_data_from_comps(data, size, shift); + } + + if (!out_bits) + return planes; + + // Attempt aligning all of the planes for optimum compatibility + first = true; + for (int p = 0; p < planes; p++) { + aligned_data[p] = out_data[p]; + + // Planes with only an alpha component should be ignored + if (pl_plane_data_num_comps(&aligned_data[p]) == 1 && + aligned_data[p].component_map[0] == PL_CHANNEL_A) + { + continue; + } + + if (!pl_plane_data_align(&aligned_data[p], &bits)) + goto misaligned; + + if (first) { + *out_bits = bits; + first = false; + } else { + if (!pl_bit_encoding_equal(&bits, out_bits)) + goto misaligned; + } + } + + // Overwrite the planes by their aligned versions + for (int p = 0; p < planes; p++) + out_data[p] = aligned_data[p]; + + return planes; + +misaligned: + *out_bits = (struct pl_bit_encoding) {0}; + return planes; +} + +PL_LIBAV_API bool pl_test_pixfmt_caps(pl_gpu gpu, enum AVPixelFormat pixfmt, + enum pl_fmt_caps caps) +{ + struct pl_bit_encoding bits; + struct pl_plane_data data[4]; + pl_fmt fmt; + int planes; + + switch (pixfmt) { + case AV_PIX_FMT_DRM_PRIME: + case AV_PIX_FMT_VAAPI: + return gpu->import_caps.tex & PL_HANDLE_DMA_BUF; + +#ifdef PL_HAVE_LAV_VULKAN + case AV_PIX_FMT_VULKAN: + return pl_vulkan_get(gpu); +#endif + + default: break; + } + + planes = pl_plane_data_from_pixfmt(data, &bits, pixfmt); + if (!planes) + return false; + + for (int i = 0; i < planes; i++) { + data[i].row_stride = 0; + fmt = pl_plane_find_fmt(gpu, NULL, &data[i]); + if (!fmt || (fmt->caps & caps) != caps) + return false; + + } + + return true; +} + +PL_LIBAV_API bool pl_test_pixfmt(pl_gpu gpu, enum AVPixelFormat pixfmt) +{ + return pl_test_pixfmt_caps(gpu, pixfmt, 0); +} + +PL_LIBAV_API void pl_avframe_set_color(AVFrame *frame, struct pl_color_space csp) +{ + const AVFrameSideData *sd; + (void) sd; + + frame->color_primaries = pl_primaries_to_av(csp.primaries); + frame->color_trc = pl_transfer_to_av(csp.transfer); + +#ifdef PL_HAVE_LAV_HDR + if (csp.hdr.max_cll) { + sd = av_frame_get_side_data(frame, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL); + if (!sd) { + sd = av_frame_new_side_data(frame, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL, + sizeof(AVContentLightMetadata)); + } + + if (sd) { + AVContentLightMetadata *clm = (AVContentLightMetadata *) sd->data; + *clm = (AVContentLightMetadata) { + .MaxCLL = csp.hdr.max_cll, + .MaxFALL = csp.hdr.max_fall, + }; + } + } + + if (csp.hdr.max_luma || csp.hdr.prim.red.x) { + sd = av_frame_get_side_data(frame, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA); + if (!sd) { + sd = av_frame_new_side_data(frame, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA, + sizeof(AVMasteringDisplayMetadata)); + } + + if (sd) { + AVMasteringDisplayMetadata *mdm = (AVMasteringDisplayMetadata *) sd->data; + *mdm = (AVMasteringDisplayMetadata) { + .max_luminance = av_d2q(csp.hdr.max_luma, 1000000), + .min_luminance = av_d2q(csp.hdr.min_luma, 1000000), + .has_luminance = !!csp.hdr.max_luma, + .display_primaries = { + { + av_d2q(csp.hdr.prim.red.x, 1000000), + av_d2q(csp.hdr.prim.red.y, 1000000), + }, { + av_d2q(csp.hdr.prim.green.x, 1000000), + av_d2q(csp.hdr.prim.green.y, 1000000), + }, { + av_d2q(csp.hdr.prim.blue.x, 1000000), + av_d2q(csp.hdr.prim.blue.y, 1000000), + } + }, + .white_point = { + av_d2q(csp.hdr.prim.white.x, 1000000), + av_d2q(csp.hdr.prim.white.y, 1000000), + }, + .has_primaries = !!csp.hdr.prim.red.x, + }; + } + } +#endif // PL_HAVE_LAV_HDR +} + +PL_LIBAV_API void pl_avframe_set_repr(AVFrame *frame, struct pl_color_repr repr) +{ + frame->colorspace = pl_system_to_av(repr.sys); + frame->color_range = pl_levels_to_av(repr.levels); + + // No real way to map repr.bits, the image format already has to match +} + +PL_LIBAV_API void pl_avframe_set_profile(AVFrame *frame, struct pl_icc_profile profile) +{ + const AVFrameSideData *sd; + av_frame_remove_side_data(frame, AV_FRAME_DATA_ICC_PROFILE); + + if (!profile.len) + return; + + sd = av_frame_new_side_data(frame, AV_FRAME_DATA_ICC_PROFILE, profile.len); + memcpy(sd->data, profile.data, profile.len); +} + +PL_LIBAV_API void pl_frame_from_avframe(struct pl_frame *out, + const AVFrame *frame) +{ + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); + int planes = av_pix_fmt_count_planes(frame->format); + const AVFrameSideData *sd; + assert(desc); + + if (desc->flags & AV_PIX_FMT_FLAG_HWACCEL) { + const AVHWFramesContext *hwfc = (AVHWFramesContext *) frame->hw_frames_ctx->data; + desc = av_pix_fmt_desc_get(hwfc->sw_format); + planes = av_pix_fmt_count_planes(hwfc->sw_format); + } + + // This should never fail, and there's nothing really useful we can do in + // this failure case anyway, since this is a `void` function. + assert(planes <= 4); + + *out = (struct pl_frame) { + .num_planes = planes, + .crop = { + .x0 = frame->crop_left, + .y0 = frame->crop_top, + .x1 = frame->width - frame->crop_right, + .y1 = frame->height - frame->crop_bottom, + }, + .repr = { + .sys = pl_system_from_av(frame->colorspace), + .levels = pl_levels_from_av(frame->color_range), + .alpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA) + ? PL_ALPHA_INDEPENDENT + : PL_ALPHA_UNKNOWN, + + // For sake of simplicity, just use the first component's depth as + // the authoritative color depth for the whole image. Usually, this + // will be overwritten by more specific information when using e.g. + // `pl_map_avframe`, but for the sake of e.g. users wishing to map + // hwaccel frames manually, this is a good default. + .bits.color_depth = desc->comp[0].depth, + }, + }; + + pl_color_space_from_avframe(&out->color, frame); + + if (frame->colorspace == AVCOL_SPC_ICTCP && + frame->color_trc == AVCOL_TRC_ARIB_STD_B67) + { + // libav* makes no distinction between PQ and HLG ICtCp, so we need + // to manually fix it in the case that we have HLG ICtCp data. + out->repr.sys = PL_COLOR_SYSTEM_BT_2100_HLG; + + } else if (strncmp(desc->name, "xyz", 3) == 0) { + + // libav* handles this as a special case, but doesn't provide an + // explicit flag for it either, so we have to resort to this ugly + // hack... + out->repr.sys = PL_COLOR_SYSTEM_XYZ; + + } else if (desc->flags & AV_PIX_FMT_FLAG_RGB) { + + out->repr.sys = PL_COLOR_SYSTEM_RGB; + out->repr.levels = PL_COLOR_LEVELS_FULL; // libav* ignores levels for RGB + + } else if (!pl_color_system_is_ycbcr_like(out->repr.sys)) { + // libav* likes leaving this as UNKNOWN (or even RGB) for YCbCr frames, + // which confuses libplacebo since we infer UNKNOWN as RGB. To get + // around this, explicitly infer a suitable colorspace. + out->repr.sys = pl_color_system_guess_ycbcr(frame->width, frame->height); + } + + if ((sd = av_frame_get_side_data(frame, AV_FRAME_DATA_ICC_PROFILE))) { + out->profile = (struct pl_icc_profile) { + .data = sd->data, + .len = sd->size, + }; + + // Needed to ensure profile uniqueness + pl_icc_profile_compute_signature(&out->profile); + } + + if ((sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DISPLAYMATRIX))) { + double rot = av_display_rotation_get((const int32_t *) sd->data); + out->rotation = pl_rotation_normalize(4.5 - rot / 90.0); + } + +#ifdef PL_HAVE_LAV_FILM_GRAIN + if ((sd = av_frame_get_side_data(frame, AV_FRAME_DATA_FILM_GRAIN_PARAMS))) + pl_film_grain_from_av(&out->film_grain, (AVFilmGrainParams *) sd->data); +#endif // HAVE_LAV_FILM_GRAIN + + for (int p = 0; p < out->num_planes; p++) { + struct pl_plane *plane = &out->planes[p]; + + // Fill in the component mapping array + for (int c = 0; c < desc->nb_components; c++) { + if (desc->comp[c].plane == p) + plane->component_mapping[plane->components++] = c; + } + + // Clear the superfluous components + for (int c = plane->components; c < 4; c++) + plane->component_mapping[c] = PL_CHANNEL_NONE; + } + + // Only set the chroma location for definitely subsampled images, makes no + // sense otherwise + if (desc->log2_chroma_w || desc->log2_chroma_h) { + enum pl_chroma_location loc = pl_chroma_from_av(frame->chroma_location); + pl_frame_set_chroma_location(out, loc); + } +} + +#if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(60, 15, 100) +PL_LIBAV_API const uint8_t *pl_av_stream_get_side_data(const AVStream *st, + enum AVPacketSideDataType type) +{ + const AVPacketSideData *sd; + sd = av_packet_side_data_get(st->codecpar->coded_side_data, + st->codecpar->nb_coded_side_data, + type); + return sd ? sd->data : NULL; +} +#else +# define pl_av_stream_get_side_data(st, type) av_stream_get_side_data(st, type, NULL) +#endif + +PL_LIBAV_API void pl_frame_copy_stream_props(struct pl_frame *out, + const AVStream *stream) +{ + const uint8_t *sd; + if ((sd = pl_av_stream_get_side_data(stream, AV_PKT_DATA_DISPLAYMATRIX))) { + double rot = av_display_rotation_get((const int32_t *) sd); + out->rotation = pl_rotation_normalize(4.5 - rot / 90.0); + } + +#ifdef PL_HAVE_LAV_HDR + pl_map_hdr_metadata(&out->color.hdr, &(struct pl_av_hdr_metadata) { + .mdm = (void *) pl_av_stream_get_side_data(stream, + AV_PKT_DATA_MASTERING_DISPLAY_METADATA), + .clm = (void *) pl_av_stream_get_side_data(stream, + AV_PKT_DATA_CONTENT_LIGHT_LEVEL), +# if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(59, 2, 100) + .dhp = (void *) pl_av_stream_get_side_data(stream, + AV_PKT_DATA_DYNAMIC_HDR10_PLUS), +# endif + }); +#endif +} + +#undef pl_av_stream_get_side_data + +#ifdef PL_HAVE_LAV_DOLBY_VISION +PL_LIBAV_API void pl_map_dovi_metadata(struct pl_dovi_metadata *out, + const AVDOVIMetadata *data) +{ + const AVDOVIRpuDataHeader *header; + const AVDOVIDataMapping *mapping; + const AVDOVIColorMetadata *color; + if (!data) + return; + + header = av_dovi_get_header(data); + mapping = av_dovi_get_mapping(data); + color = av_dovi_get_color(data); + + for (int i = 0; i < 3; i++) + out->nonlinear_offset[i] = av_q2d(color->ycc_to_rgb_offset[i]); + for (int i = 0; i < 9; i++) { + float *nonlinear = &out->nonlinear.m[0][0]; + float *linear = &out->linear.m[0][0]; + nonlinear[i] = av_q2d(color->ycc_to_rgb_matrix[i]); + linear[i] = av_q2d(color->rgb_to_lms_matrix[i]); + } + for (int c = 0; c < 3; c++) { + const AVDOVIReshapingCurve *csrc = &mapping->curves[c]; + struct pl_reshape_data *cdst = &out->comp[c]; + cdst->num_pivots = csrc->num_pivots; + for (int i = 0; i < csrc->num_pivots; i++) { + const float scale = 1.0f / ((1 << header->bl_bit_depth) - 1); + cdst->pivots[i] = scale * csrc->pivots[i]; + } + for (int i = 0; i < csrc->num_pivots - 1; i++) { + const float scale = 1.0f / (1 << header->coef_log2_denom); + cdst->method[i] = csrc->mapping_idc[i]; + switch (csrc->mapping_idc[i]) { + case AV_DOVI_MAPPING_POLYNOMIAL: + for (int k = 0; k < 3; k++) { + cdst->poly_coeffs[i][k] = (k <= csrc->poly_order[i]) + ? scale * csrc->poly_coef[i][k] + : 0.0f; + } + break; + case AV_DOVI_MAPPING_MMR: + cdst->mmr_order[i] = csrc->mmr_order[i]; + cdst->mmr_constant[i] = scale * csrc->mmr_constant[i]; + for (int j = 0; j < csrc->mmr_order[i]; j++) { + for (int k = 0; k < 7; k++) + cdst->mmr_coeffs[i][j][k] = scale * csrc->mmr_coef[i][j][k]; + } + break; + } + } + } +} + +PL_LIBAV_API void pl_frame_map_avdovi_metadata(struct pl_frame *out_frame, + struct pl_dovi_metadata *dovi, + const AVDOVIMetadata *metadata) +{ + const AVDOVIRpuDataHeader *header; + const AVDOVIColorMetadata *color; + if (!dovi || !metadata) + return; + + header = av_dovi_get_header(metadata); + color = av_dovi_get_color(metadata); + if (header->disable_residual_flag) { + pl_map_dovi_metadata(dovi, metadata); + + out_frame->repr.dovi = dovi; + out_frame->repr.sys = PL_COLOR_SYSTEM_DOLBYVISION; + out_frame->color.primaries = PL_COLOR_PRIM_BT_2020; + out_frame->color.transfer = PL_COLOR_TRC_PQ; + out_frame->color.hdr.min_luma = + pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NITS, color->source_min_pq / 4095.0f); + out_frame->color.hdr.max_luma = + pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NITS, color->source_max_pq / 4095.0f); + } +} +#endif // PL_HAVE_LAV_DOLBY_VISION + +PL_LIBAV_API bool pl_frame_recreate_from_avframe(pl_gpu gpu, + struct pl_frame *out, + pl_tex tex[4], + const AVFrame *frame) +{ + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); + struct pl_plane_data data[4] = {0}; + int planes; + + pl_frame_from_avframe(out, frame); + planes = pl_plane_data_from_pixfmt(data, &out->repr.bits, frame->format); + if (!planes) + return false; + + for (int p = 0; p < planes; p++) { + bool is_chroma = p == 1 || p == 2; // matches lavu logic + data[p].width = AV_CEIL_RSHIFT(frame->width, is_chroma ? desc->log2_chroma_w : 0); + data[p].height = AV_CEIL_RSHIFT(frame->height, is_chroma ? desc->log2_chroma_h : 0); + + if (!pl_recreate_plane(gpu, &out->planes[p], &tex[p], &data[p])) + return false; + } + + return true; +} + +static void pl_avframe_free_cb(void *priv) +{ + AVFrame *frame = priv; + av_frame_free(&frame); +} + +#define PL_MAGIC0 0xfb5b3b8b +#define PL_MAGIC1 0xee659f6d + +struct pl_avalloc { + uint32_t magic[2]; + pl_gpu gpu; + pl_buf buf; +}; + +// Attached to `pl_frame.user_data` for mapped AVFrames +struct pl_avframe_priv { + AVFrame *avframe; + struct pl_dovi_metadata dovi; // backing storage for per-frame dovi metadata + pl_tex planar; // for planar vulkan textures +}; + +static void pl_fix_hwframe_sample_depth(struct pl_frame *out, const AVFrame *frame) +{ + const AVHWFramesContext *hwfc = (AVHWFramesContext *) frame->hw_frames_ctx->data; + pl_fmt fmt = out->planes[0].texture->params.format; + struct pl_bit_encoding *bits = &out->repr.bits; + + bits->sample_depth = fmt->component_depth[0]; + + switch (hwfc->sw_format) { + case AV_PIX_FMT_P010: bits->bit_shift = 6; break; + default: break; + } +} + +static bool pl_map_avframe_drm(pl_gpu gpu, struct pl_frame *out, + const AVFrame *frame) +{ + const AVHWFramesContext *hwfc = (AVHWFramesContext *) frame->hw_frames_ctx->data; + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format); + const AVDRMFrameDescriptor *drm = (AVDRMFrameDescriptor *) frame->data[0]; + assert(frame->format == AV_PIX_FMT_DRM_PRIME); + if (!(gpu->import_caps.tex & PL_HANDLE_DMA_BUF)) + return false; + + assert(drm->nb_layers >= out->num_planes); + for (int n = 0; n < out->num_planes; n++) { + const AVDRMLayerDescriptor *layer = &drm->layers[n]; + const AVDRMPlaneDescriptor *plane = &layer->planes[0]; + const AVDRMObjectDescriptor *object = &drm->objects[plane->object_index]; + pl_fmt fmt = pl_find_fourcc(gpu, layer->format); + bool is_chroma = n == 1 || n == 2; + if (!fmt || !pl_fmt_has_modifier(fmt, object->format_modifier)) + return false; + + assert(layer->nb_planes == 1); // we only support planar formats + assert(plane->pitch >= 0); // definitely requires special handling + out->planes[n].texture = pl_tex_create(gpu, pl_tex_params( + .w = AV_CEIL_RSHIFT(frame->width, is_chroma ? desc->log2_chroma_w : 0), + .h = AV_CEIL_RSHIFT(frame->height, is_chroma ? desc->log2_chroma_h : 0), + .format = fmt, + .sampleable = true, + .blit_src = fmt->caps & PL_FMT_CAP_BLITTABLE, + .import_handle = PL_HANDLE_DMA_BUF, + .shared_mem = { + .handle.fd = object->fd, + .size = object->size, + .offset = plane->offset, + .drm_format_mod = object->format_modifier, + .stride_w = plane->pitch, + }, + )); + if (!out->planes[n].texture) + return false; + } + + pl_fix_hwframe_sample_depth(out, frame); + return true; +} + +// Derive a DMABUF from any other hwaccel format, and map that instead +static bool pl_map_avframe_derived(pl_gpu gpu, struct pl_frame *out, + const AVFrame *frame) +{ + const int flags = AV_HWFRAME_MAP_READ | AV_HWFRAME_MAP_DIRECT; + struct pl_avframe_priv *priv = out->user_data; + AVFrame *derived = av_frame_alloc(); + derived->width = frame->width; + derived->height = frame->height; + derived->format = AV_PIX_FMT_DRM_PRIME; + derived->hw_frames_ctx = av_buffer_ref(frame->hw_frames_ctx); + if (av_hwframe_map(derived, frame, flags) < 0) + goto error; + if (av_frame_copy_props(derived, frame) < 0) + goto error; + if (!pl_map_avframe_drm(gpu, out, derived)) + goto error; + + av_frame_free(&priv->avframe); + priv->avframe = derived; + return true; + +error: + av_frame_free(&derived); + return false; +} + +#ifdef PL_HAVE_LAV_VULKAN +static bool pl_acquire_avframe(pl_gpu gpu, struct pl_frame *frame) +{ + const struct pl_avframe_priv *priv = frame->user_data; + AVHWFramesContext *hwfc = (void *) priv->avframe->hw_frames_ctx->data; + AVVulkanFramesContext *vkfc = hwfc->hwctx; + AVVkFrame *vkf = (AVVkFrame *) priv->avframe->data[0]; + +#ifdef PL_HAVE_LAV_VULKAN_V2 + vkfc->lock_frame(hwfc, vkf); +#else + (void) vkfc; +#endif + + for (int n = 0; n < frame->num_planes; n++) { + pl_vulkan_release_ex(gpu, pl_vulkan_release_params( + .tex = priv->planar ? priv->planar : frame->planes[n].texture, + .layout = vkf->layout[n], + .qf = VK_QUEUE_FAMILY_IGNORED, + .semaphore = { + .sem = vkf->sem[n], + .value = vkf->sem_value[n], + }, + )); + if (priv->planar) + break; + } + + return true; +} + +static void pl_release_avframe(pl_gpu gpu, struct pl_frame *frame) +{ + const struct pl_avframe_priv *priv = frame->user_data; + AVHWFramesContext *hwfc = (void *) priv->avframe->hw_frames_ctx->data; + AVVulkanFramesContext *vkfc = hwfc->hwctx; + AVVkFrame *vkf = (AVVkFrame *) priv->avframe->data[0]; + + for (int n = 0; n < frame->num_planes; n++) { + int ok = pl_vulkan_hold_ex(gpu, pl_vulkan_hold_params( + .tex = priv->planar ? priv->planar : frame->planes[n].texture, + .out_layout = &vkf->layout[n], + .qf = VK_QUEUE_FAMILY_IGNORED, + .semaphore = { + .sem = vkf->sem[n], + .value = vkf->sem_value[n] + 1, + }, + )); + + vkf->access[n] = 0; + vkf->sem_value[n] += !!ok; + if (priv->planar) + break; + } + +#ifdef PL_HAVE_LAV_VULKAN_V2 + vkfc->unlock_frame(hwfc, vkf); +#else + (void) vkfc; +#endif +} + +static bool pl_map_avframe_vulkan(pl_gpu gpu, struct pl_frame *out, + const AVFrame *frame) +{ + const AVHWFramesContext *hwfc = (AVHWFramesContext *) frame->hw_frames_ctx->data; + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format); + const AVVulkanFramesContext *vkfc = hwfc->hwctx; + AVVkFrame *vkf = (AVVkFrame *) frame->data[0]; + struct pl_avframe_priv *priv = out->user_data; + pl_vulkan vk = pl_vulkan_get(gpu); + +#ifdef PL_HAVE_LAV_VULKAN_V2 + const VkFormat *vk_fmt = vkfc->format; +#else + const VkFormat *vk_fmt = av_vkfmt_from_pixfmt(hwfc->sw_format); +#endif + + assert(frame->format == AV_PIX_FMT_VULKAN); + priv->planar = NULL; + if (!vk) + return false; + + for (int n = 0; n < out->num_planes; n++) { + struct pl_plane *plane = &out->planes[n]; + bool chroma = n == 1 || n == 2; + int num_subplanes; + assert(vk_fmt[n]); + + plane->texture = pl_vulkan_wrap(gpu, pl_vulkan_wrap_params( + .image = vkf->img[n], + .width = AV_CEIL_RSHIFT(hwfc->width, chroma ? desc->log2_chroma_w : 0), + .height = AV_CEIL_RSHIFT(hwfc->height, chroma ? desc->log2_chroma_h : 0), + .format = vk_fmt[n], + .usage = vkfc->usage, + )); + if (!plane->texture) + return false; + + num_subplanes = plane->texture->params.format->num_planes; + if (num_subplanes) { + assert(num_subplanes == out->num_planes); + priv->planar = plane->texture; + for (int i = 0; i < num_subplanes; i++) + out->planes[i].texture = priv->planar->planes[i]; + break; + } + } + + out->acquire = pl_acquire_avframe; + out->release = pl_release_avframe; + pl_fix_hwframe_sample_depth(out, frame); + return true; +} + +static void pl_unmap_avframe_vulkan(pl_gpu gpu, struct pl_frame *frame) +{ + struct pl_avframe_priv *priv = frame->user_data; + if (priv->planar) { + pl_tex_destroy(gpu, &priv->planar); + for (int n = 0; n < frame->num_planes; n++) + frame->planes[n].texture = NULL; + } +} +#endif + +PL_LIBAV_API bool pl_map_avframe_ex(pl_gpu gpu, struct pl_frame *out, + const struct pl_avframe_params *params) +{ + const AVFrame *frame = params->frame; + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format); + struct pl_plane_data data[4] = {0}; + pl_tex *tex = params->tex; + int planes; + + struct pl_avframe_priv *priv = malloc(sizeof(*priv)); + if (!priv) + goto error; + + pl_frame_from_avframe(out, frame); + priv->avframe = av_frame_clone(frame); + out->user_data = priv; + +#ifdef PL_HAVE_LAV_DOLBY_VISION + if (params->map_dovi) { + AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DOVI_METADATA); + if (sd) { + const AVDOVIMetadata *metadata = (AVDOVIMetadata *) sd->data; + const AVDOVIRpuDataHeader *header = av_dovi_get_header(metadata); + // Only automatically map DoVi RPUs that don't require an EL + if (header->disable_residual_flag) + pl_frame_map_avdovi_metadata(out, &priv->dovi, metadata); + } + +#ifdef PL_HAVE_LIBDOVI + sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DOVI_RPU_BUFFER); + if (sd) + pl_hdr_metadata_from_dovi_rpu(&out->color.hdr, sd->buf->data, sd->buf->size); +#endif // PL_HAVE_LIBDOVI + } + +#endif // PL_HAVE_LAV_DOLBY_VISION + + switch (frame->format) { + case AV_PIX_FMT_DRM_PRIME: + if (!pl_map_avframe_drm(gpu, out, frame)) + goto error; + return true; + + case AV_PIX_FMT_VAAPI: + if (!pl_map_avframe_derived(gpu, out, frame)) + goto error; + return true; + +#ifdef PL_HAVE_LAV_VULKAN + case AV_PIX_FMT_VULKAN: + if (!pl_map_avframe_vulkan(gpu, out, frame)) + goto error; + return true; +#endif + + default: break; + } + + // Backing textures are required from this point onwards + if (!tex) + goto error; + + planes = pl_plane_data_from_pixfmt(data, &out->repr.bits, frame->format); + if (!planes) + goto error; + + for (int p = 0; p < planes; p++) { + AVBufferRef *buf = av_frame_get_plane_buffer((AVFrame *) frame, p); + struct pl_avalloc *alloc = buf ? av_buffer_get_opaque(buf) : NULL; + bool is_chroma = p == 1 || p == 2; // matches lavu logic + + data[p].width = AV_CEIL_RSHIFT(frame->width, is_chroma ? desc->log2_chroma_w : 0); + data[p].height = AV_CEIL_RSHIFT(frame->height, is_chroma ? desc->log2_chroma_h : 0); + if (frame->linesize[p] < 0) { + data[p].pixels = frame->data[p] + frame->linesize[p] * (data[p].height - 1); + data[p].row_stride = -frame->linesize[p]; + out->planes[p].flipped = true; + } else { + data[p].pixels = frame->data[p]; + data[p].row_stride = frame->linesize[p]; + } + + // Probe for frames allocated by pl_get_buffer2 + if (alloc && alloc->magic[0] == PL_MAGIC0 && alloc->magic[1] == PL_MAGIC1) { + data[p].buf = alloc->buf; + data[p].buf_offset = (uintptr_t) data[p].pixels - (uintptr_t) alloc->buf->data; + data[p].pixels = NULL; + } else if (gpu->limits.callbacks) { + // Use asynchronous upload if possible + data[p].callback = pl_avframe_free_cb; + data[p].priv = av_frame_clone(frame); + } + + if (!pl_upload_plane(gpu, &out->planes[p], &tex[p], &data[p])) { + av_frame_free((AVFrame **) &data[p].priv); + goto error; + } + + out->planes[p].texture = tex[p]; + } + + return true; + +error: + pl_unmap_avframe(gpu, out); + return false; +} + +// Backwards compatibility with previous versions of this API. +PL_LIBAV_API bool pl_map_avframe(pl_gpu gpu, struct pl_frame *out_frame, + pl_tex tex[4], const AVFrame *avframe) +{ + return pl_map_avframe_ex(gpu, out_frame, &(struct pl_avframe_params) { + .frame = avframe, + .tex = tex, + }); +} + +PL_LIBAV_API void pl_unmap_avframe(pl_gpu gpu, struct pl_frame *frame) +{ + struct pl_avframe_priv *priv = frame->user_data; + const AVPixFmtDescriptor *desc; + if (!priv) + goto done; + +#ifdef PL_HAVE_LAV_VULKAN + if (priv->avframe->format == AV_PIX_FMT_VULKAN) + pl_unmap_avframe_vulkan(gpu, frame); +#endif + + desc = av_pix_fmt_desc_get(priv->avframe->format); + if (desc->flags & AV_PIX_FMT_FLAG_HWACCEL) { + for (int i = 0; i < 4; i++) + pl_tex_destroy(gpu, &frame->planes[i].texture); + } + + av_frame_free(&priv->avframe); + free(priv); + +done: + memset(frame, 0, sizeof(*frame)); // sanity +} + +PL_LIBAV_API AVFrame *pl_get_mapped_avframe(const struct pl_frame *frame) +{ + struct pl_avframe_priv *priv = frame->user_data; + return priv->avframe; +} + +static void pl_done_cb(void *priv) +{ + bool *status = priv; + *status = true; +} + +PL_LIBAV_API bool pl_download_avframe(pl_gpu gpu, + const struct pl_frame *frame, + AVFrame *out_frame) +{ + bool done[4] = {0}; + if (frame->num_planes != av_pix_fmt_count_planes(out_frame->format)) + return false; + + for (int p = 0; p < frame->num_planes; p++) { + bool ok = pl_tex_download(gpu, pl_tex_transfer_params( + .tex = frame->planes[p].texture, + .row_pitch = out_frame->linesize[p], + .ptr = out_frame->data[p], + // Use synchronous transfer for the last plane + .callback = (p+1) < frame->num_planes ? pl_done_cb : NULL, + .priv = &done[p], + )); + + if (!ok) + return false; + } + + for (int p = 0; p < frame->num_planes - 1; p++) { + while (!done[p]) + pl_tex_poll(gpu, frame->planes[p].texture, UINT64_MAX); + } + + return true; +} + +#define PL_DIV_UP(x, y) (((x) + (y) - 1) / (y)) +#define PL_ALIGN(x, align) ((align) ? PL_DIV_UP(x, align) * (align) : (x)) +#define PL_MAX(x, y) ((x) > (y) ? (x) : (y)) +#define PL_LCM(x, y) ((x) * ((y) / av_gcd(x, y))) + +static inline void pl_avalloc_free(void *opaque, uint8_t *data) +{ + struct pl_avalloc *alloc = opaque; + assert(alloc->magic[0] == PL_MAGIC0); + assert(alloc->magic[1] == PL_MAGIC1); + assert(alloc->buf->data == data); + pl_buf_destroy(alloc->gpu, &alloc->buf); + free(alloc); +} + +PL_LIBAV_API int pl_get_buffer2(AVCodecContext *avctx, AVFrame *pic, int flags) +{ + int alignment[AV_NUM_DATA_POINTERS]; + int width = pic->width; + int height = pic->height; + size_t planesize[4]; + int ret = 0; + + pl_gpu *pgpu = avctx->opaque; + pl_gpu gpu = pgpu ? *pgpu : NULL; + struct pl_plane_data data[4]; + struct pl_avalloc *alloc; + const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pic->format); + int planes = pl_plane_data_from_pixfmt(data, NULL, pic->format); + + // Sanitize frame structs + memset(pic->data, 0, sizeof(pic->data)); + memset(pic->linesize, 0, sizeof(pic->linesize)); + memset(pic->buf, 0, sizeof(pic->buf)); + pic->extended_data = pic->data; + pic->extended_buf = NULL; + + if (!(avctx->codec->capabilities & AV_CODEC_CAP_DR1) || !planes) + goto fallback; + if (!gpu || !gpu->limits.thread_safe || !gpu->limits.max_mapped_size || + !gpu->limits.host_cached) + { + goto fallback; + } + + avcodec_align_dimensions2(avctx, &width, &height, alignment); + if ((ret = av_image_fill_linesizes(pic->linesize, pic->format, width))) + return ret; + + for (int p = 0; p < planes; p++) { + alignment[p] = PL_LCM(alignment[p], gpu->limits.align_tex_xfer_pitch); + alignment[p] = PL_LCM(alignment[p], gpu->limits.align_tex_xfer_offset); + alignment[p] = PL_LCM(alignment[p], data[p].pixel_stride); + pic->linesize[p] = PL_ALIGN(pic->linesize[p], alignment[p]); + } + +#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(56, 56, 100) + ret = av_image_fill_plane_sizes(planesize, pic->format, height, (ptrdiff_t[4]) { + pic->linesize[0], pic->linesize[1], pic->linesize[2], pic->linesize[3], + }); + if (ret < 0) + return ret; +#else + uint8_t *ptrs[4], * const base = (uint8_t *) 0x10000; + ret = av_image_fill_pointers(ptrs, pic->format, height, base, pic->linesize); + if (ret < 0) + return ret; + for (int p = 0; p < 4; p++) + planesize[p] = (uintptr_t) ptrs[p] - (uintptr_t) base; +#endif + + for (int p = 0; p < planes; p++) { + const size_t buf_size = planesize[p] + alignment[p]; + if (buf_size > gpu->limits.max_mapped_size) { + av_frame_unref(pic); + goto fallback; + } + + alloc = malloc(sizeof(*alloc)); + if (!alloc) { + av_frame_unref(pic); + return AVERROR(ENOMEM); + } + + *alloc = (struct pl_avalloc) { + .magic = { PL_MAGIC0, PL_MAGIC1 }, + .gpu = gpu, + .buf = pl_buf_create(gpu, pl_buf_params( + .size = buf_size, + .memory_type = PL_BUF_MEM_HOST, + .host_mapped = true, + .storable = desc->flags & AV_PIX_FMT_FLAG_BE, + )), + }; + + if (!alloc->buf) { + free(alloc); + av_frame_unref(pic); + return AVERROR(ENOMEM); + } + + pic->data[p] = (uint8_t *) PL_ALIGN((uintptr_t) alloc->buf->data, alignment[p]); + pic->buf[p] = av_buffer_create(alloc->buf->data, buf_size, pl_avalloc_free, alloc, 0); + if (!pic->buf[p]) { + pl_buf_destroy(gpu, &alloc->buf); + free(alloc); + av_frame_unref(pic); + return AVERROR(ENOMEM); + } + } + + return 0; + +fallback: + return avcodec_default_get_buffer2(avctx, pic, flags); +} + +#undef PL_MAGIC0 +#undef PL_MAGIC1 +#undef PL_ALIGN +#undef PL_MAX + +#endif // LIBPLACEBO_LIBAV_H_ diff --git a/src/include/libplacebo/utils/upload.h b/src/include/libplacebo/utils/upload.h new file mode 100644 index 0000000..9e8d436 --- /dev/null +++ b/src/include/libplacebo/utils/upload.h @@ -0,0 +1,153 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_UPLOAD_H_ +#define LIBPLACEBO_UPLOAD_H_ + +#include <stdint.h> + +#include <libplacebo/gpu.h> +#include <libplacebo/renderer.h> + +PL_API_BEGIN + +// This file contains a utility function to assist in uploading data from host +// memory to a texture. In particular, the texture will be suitable for use as +// a `pl_plane`. + +// Description of the host representation of an image plane +struct pl_plane_data { + enum pl_fmt_type type; // meaning of the data (must not be UINT or SINT) + int width, height; // dimensions of the plane + int component_size[4]; // size in bits of each coordinate + int component_pad[4]; // ignored bits preceding each component + int component_map[4]; // semantic meaning of each component (pixel order) + size_t pixel_stride; // offset in bytes between pixels (required) + size_t row_stride; // offset in bytes between rows (optional) + bool swapped; // pixel data is endian-swapped (non-native) + + // Similar to `pl_tex_transfer_params`, you can either upload from a raw + // pointer address, or a buffer + offset. Again, the use of these two + // mechanisms is mutually exclusive. + // + // 1. Uploading from host memory + const void *pixels; // the actual data underlying this plane + + // 2. Uploading from a buffer (requires `pl_gpu_limits.buf_transfer`) + pl_buf buf; // the buffer to use + size_t buf_offset; // offset of data within buffer, must be a + // multiple of `pixel_stride` as well as of 4 + + // Similar to `pl_tex_transfer_params.callback`, this allows turning the + // upload of a plane into an asynchronous upload. The same notes apply. + void (*callback)(void *priv); + void *priv; + + // Note: When using this together with `pl_frame`, there is some amount of + // overlap between `component_pad` and `pl_color_repr.bits`. Some key + // differences between the two: + // + // - the bits from `component_pad` are ignored; whereas the superfluous bits + // in a `pl_color_repr` must be 0. + // - the `component_pad` exists to align the component size and placement + // with the capabilities of GPUs; the `pl_color_repr` exists to control + // the semantics of the color samples on a finer granularity. + // - the `pl_color_repr` applies to the color sample as a whole, and + // therefore applies to all planes; the `component_pad` can be different + // for each plane. + // - `component_pad` interacts with float textures by moving the actual + // float in memory. `pl_color_repr` interacts with float data as if + // the float was converted from an integer under full range semantics. + // + // To help establish the motivating difference, a typical example of a use + // case would be yuv420p10. Since 10-bit GPU texture support is limited, + // and working with non-byte-aligned pixels is awkward in general, the + // convention is to represent yuv420p10 as 16-bit samples with either the + // high or low bits set to 0. In this scenario, the `component_size` of the + // `pl_plane_data` and `pl_bit_encoding.sample_depth` would be 16, while + // the `pl_bit_encoding.color_depth` would be 10 (and additionally, the + // `pl_bit_encoding.bit_shift` would be either 0 or 6, depending on + // whether the low or the high bits are used). + // + // On the contrary, something like a packed, 8-bit XBGR format (where the + // X bits are ignored and may contain garbage) would set `component_pad[0]` + // to 8, and the component_size[0:2] (respectively) to 8 as well. + // + // As a general rule of thumb, for maximum compatibility, you should try + // and align component_size/component_pad to multiples of 8 and explicitly + // clear any remaining superfluous bits (+ use `pl_color_repr.bits` to + // ensure they're decoded correctly). You should also try to align the + // `pixel_stride` to a power of two. +}; + +// Fills in the `component_size`, `component_pad` and `component_map` fields +// based on the supplied mask for each component (in semantic order, i.e. +// RGBA). Each element of `mask` must have a contiguous range of set bits. +PL_API void pl_plane_data_from_mask(struct pl_plane_data *data, uint64_t mask[4]); + +// Fills in the `component_size`, `component_pad` and `component_map` fields +// based on the supplied sizes (in bits) and shift of each component (in +// semantic order). +// +// Similar to `pl_plane_data_from_mask` but not limited to 64-bit pixels. +PL_API void pl_plane_data_from_comps(struct pl_plane_data *data, int size[4], + int shift[4]); + +// Helper function to take a `pl_plane_data` struct and try and improve its +// alignment to make it more likely to correspond to a real `pl_fmt`. It does +// this by attempting to round each component up to the nearest byte boundary. +// This relies on the assumption (true in practice) that superfluous bits of +// byte-misaligned formats are explicitly set to 0. +// +// The resulting shift must be consistent across all components, in which case +// it's returned in `out_bits`. If no alignment was possible, `out_bits` is set +// to {0}, and this function returns false. +PL_API bool pl_plane_data_align(struct pl_plane_data *data, struct pl_bit_encoding *out_bits); + +// Helper function to find a suitable `pl_fmt` based on a pl_plane_data's +// requirements. This is called internally by `pl_upload_plane`, but it's +// exposed to users both as a convenience and so they may pre-emptively check +// if a format would be supported without actually having to attempt the upload. +PL_API pl_fmt pl_plane_find_fmt(pl_gpu gpu, int out_map[4], const struct pl_plane_data *data); + +// Upload an image plane to a texture, and output the resulting `pl_plane` +// struct to `out_plane` (optional). `tex` must be a valid pointer to a texture +// (or NULL), which will be destroyed and reinitialized if it does not already +// exist or is incompatible. Returns whether successful. +// +// The resulting texture is guaranteed to be `sampleable`, and it will also try +// and maximize compatibility with the other `pl_renderer` requirements +// (blittable, linear filterable, etc.). +// +// Note: `out_plane->shift_x/y` and `out_plane->flipped` are left +// uninitialized, and should be set explicitly by the user. +PL_API bool pl_upload_plane(pl_gpu gpu, struct pl_plane *out_plane, + pl_tex *tex, const struct pl_plane_data *data); + +// Like `pl_upload_plane`, but only creates an uninitialized texture object +// rather than actually performing an upload. This can be useful to, for +// example, prepare textures to be used as the target of rendering. +// +// The resulting texture is guaranteed to be `renderable`, and it will also try +// to maximize compatibility with the other `pl_renderer` requirements +// (blittable, storable, etc.). +PL_API bool pl_recreate_plane(pl_gpu gpu, struct pl_plane *out_plane, + pl_tex *tex, const struct pl_plane_data *data); + +PL_API_END + +#endif // LIBPLACEBO_UPLOAD_H_ diff --git a/src/include/libplacebo/vulkan.h b/src/include/libplacebo/vulkan.h new file mode 100644 index 0000000..4e5db95 --- /dev/null +++ b/src/include/libplacebo/vulkan.h @@ -0,0 +1,638 @@ +/* + * This file is part of libplacebo. + * + * libplacebo is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * libplacebo is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>. + */ + +#ifndef LIBPLACEBO_VULKAN_H_ +#define LIBPLACEBO_VULKAN_H_ + +#include <vulkan/vulkan.h> +#include <libplacebo/gpu.h> +#include <libplacebo/swapchain.h> + +PL_API_BEGIN + +#define PL_VK_MIN_VERSION VK_API_VERSION_1_2 + +// Structure representing a VkInstance. Using this is not required. +typedef const struct pl_vk_inst_t { + VkInstance instance; + + // The Vulkan API version supported by this VkInstance. + uint32_t api_version; + + // The associated vkGetInstanceProcAddr pointer. + PFN_vkGetInstanceProcAddr get_proc_addr; + + // The instance extensions that were successfully enabled, including + // extensions enabled by libplacebo internally. May contain duplicates. + const char * const *extensions; + int num_extensions; + + // The instance layers that were successfully enabled, including + // layers enabled by libplacebo internally. May contain duplicates. + const char * const *layers; + int num_layers; +} *pl_vk_inst; + +struct pl_vk_inst_params { + // If set, enable the debugging and validation layers. These should + // generally be lightweight and relatively harmless to enable. + bool debug; + + // If set, also enable GPU-assisted verification and best practices + // layers. (Note: May cause substantial slowdown and/or result in lots of + // false positive spam) + bool debug_extra; + + // If nonzero, restricts the Vulkan API version to be at most this. This + // is only really useful for explicitly testing backwards compatibility. + uint32_t max_api_version; + + // Pointer to a user-provided `vkGetInstanceProcAddr`. If this is NULL, + // libplacebo will use the directly linked version (if available). + PFN_vkGetInstanceProcAddr get_proc_addr; + + // Enables extra instance extensions. Instance creation will fail if these + // extensions are not all supported. The user may use this to enable e.g. + // windowing system integration. + const char * const *extensions; + int num_extensions; + + // Enables extra optional instance extensions. These are opportunistically + // enabled if supported by the device, but otherwise skipped. + const char * const *opt_extensions; + int num_opt_extensions; + + // Enables extra layers. Instance creation will fail if these layers are + // not all supported. + // + // NOTE: Layers needed for required/optional extensions are automatically + // enabled. The user does not specifically need to enable layers related + // to extension support. + const char * const *layers; + int num_layers; + + // Enables extra optional layers. These are opportunistically enabled if + // supported by the platform, but otherwise skipped. + const char * const *opt_layers; + int num_opt_layers; +}; + +#define pl_vk_inst_params(...) (&(struct pl_vk_inst_params) { __VA_ARGS__ }) +PL_API extern const struct pl_vk_inst_params pl_vk_inst_default_params; + +// Helper function to simplify instance creation. The user could also bypass +// these helpers and do it manually, but this function is provided as a +// convenience. It also sets up a debug callback which forwards all vulkan +// messages to the `pl_log` callback. +PL_API pl_vk_inst pl_vk_inst_create(pl_log log, const struct pl_vk_inst_params *params); +PL_API void pl_vk_inst_destroy(pl_vk_inst *inst); + +struct pl_vulkan_queue { + uint32_t index; // Queue family index + uint32_t count; // Queue family count +}; + +// Structure representing the actual vulkan device and associated GPU instance +typedef const struct pl_vulkan_t *pl_vulkan; +struct pl_vulkan_t { + pl_gpu gpu; + + // The vulkan objects in use. The user may use this for their own purposes, + // but please note that the lifetime is tied to the lifetime of the + // pl_vulkan object, and must not be destroyed by the user. Note that the + // created vulkan device may have any number of queues and queue family + // assignments; so using it for queue submission commands is ill-advised. + VkInstance instance; + VkPhysicalDevice phys_device; + VkDevice device; + + // The associated vkGetInstanceProcAddr pointer. + PFN_vkGetInstanceProcAddr get_proc_addr; + + // The Vulkan API version supported by this VkPhysicalDevice. + uint32_t api_version; + + // The device extensions that were successfully enabled, including + // extensions enabled by libplacebo internally. May contain duplicates. + const char * const *extensions; + int num_extensions; + + // The device features that were enabled at device creation time. + // + // Note: Whenever a feature flag is ambiguious between several alternative + // locations, for completeness' sake, we include both. + const VkPhysicalDeviceFeatures2 *features; + + // The explicit queue families we are using to provide a given capability. + struct pl_vulkan_queue queue_graphics; // provides VK_QUEUE_GRAPHICS_BIT + struct pl_vulkan_queue queue_compute; // provides VK_QUEUE_COMPUTE_BIT + struct pl_vulkan_queue queue_transfer; // provides VK_QUEUE_TRANSFER_BIT + + // Functions for locking a queue. These must be used to lock VkQueues for + // submission or other related operations when sharing the VkDevice between + // multiple threads, Using this on queue families or indices not contained + // in `queues` is undefined behavior. + void (*lock_queue)(pl_vulkan vk, uint32_t qf, uint32_t qidx); + void (*unlock_queue)(pl_vulkan vk, uint32_t qf, uint32_t qidx); + + // --- Deprecated fields + + // These are the same active queue families and their queue counts in list + // form. This list does not contain duplicates, nor any extra queues + // enabled at device creation time. Deprecated in favor of querying + // `vkGetPhysicalDeviceQueueFamilyProperties` directly. + const struct pl_vulkan_queue *queues PL_DEPRECATED; + int num_queues PL_DEPRECATED; +}; + +struct pl_vulkan_params { + // The vulkan instance. Optional, if NULL then libplacebo will internally + // create a VkInstance with the settings from `instance_params`. + // + // Note: The VkInstance provided by the user *MUST* be created with a + // VkApplicationInfo.apiVersion of PL_VK_MIN_VERSION or higher. + VkInstance instance; + + // Pointer to `vkGetInstanceProcAddr`. If this is NULL, libplacebo will + // use the directly linked version (if available). + // + // Note: This overwrites the same value from `instance_params`. + PFN_vkGetInstanceProcAddr get_proc_addr; + + // Configures the settings used for creating an internal vulkan instance. + // May be NULL. Ignored if `instance` is set. + const struct pl_vk_inst_params *instance_params; + + // When choosing the device, rule out all devices that don't support + // presenting to this surface. When creating a device, enable all extensions + // needed to ensure we can present to this surface. Optional. Only legal + // when specifying an existing VkInstance to use. + VkSurfaceKHR surface; + + // --- Physical device selection options + + // The vulkan physical device. May be set by the caller to indicate the + // physical device to use. Otherwise, libplacebo will pick the "best" + // available GPU, based on the advertised device type. (i.e., it will + // prefer discrete GPUs over integrated GPUs). Only legal when specifying + // an existing VkInstance to use. + VkPhysicalDevice device; + + // When choosing the device, only choose a device with this exact name. + // This overrides `allow_software`. No effect if `device` is set. Note: A + // list of devices and their names are logged at level PL_LOG_INFO. + const char *device_name; + + // When choosing the device, only choose a device with this exact UUID. + // This overrides `allow_software` and `device_name`. No effect if `device` + // is set. + uint8_t device_uuid[16]; + + // When choosing the device, controls whether or not to also allow software + // GPUs. No effect if `device` or `device_name` are set. + bool allow_software; + + // --- Logical device creation options + + // Controls whether or not to allow asynchronous transfers, using transfer + // queue families, if supported by the device. This can be significantly + // faster and more power efficient, and also allows streaming uploads in + // parallel with rendering commands. Enabled by default. + bool async_transfer; + + // Controls whether or not to allow asynchronous compute, using dedicated + // compute queue families, if supported by the device. On some devices, + // these can allow the GPU to schedule compute shaders in parallel with + // fragment shaders. Enabled by default. + bool async_compute; + + // Limits the number of queues to use. If left as 0, libplacebo will use as + // many queues as the device supports. Multiple queues can result in + // improved efficiency when submitting multiple commands that can entirely + // or partially execute in parallel. Defaults to 1, since using more queues + // can actually decrease performance. + // + // Note: libplacebo will always *create* logical devices with all available + // queues for a given QF enabled, regardless of this setting. + int queue_count; + + // Bitmask of extra queue families to enable. If set, then *all* queue + // families matching *any* of these flags will be enabled at device + // creation time. Setting this to VK_QUEUE_FLAG_BITS_MAX_ENUM effectively + // enables all queue families supported by the device. + VkQueueFlags extra_queues; + + // Enables extra device extensions. Device creation will fail if these + // extensions are not all supported. The user may use this to enable e.g. + // interop extensions. + const char * const *extensions; + int num_extensions; + + // Enables extra optional device extensions. These are opportunistically + // enabled if supported by the device, but otherwise skipped. + const char * const *opt_extensions; + int num_opt_extensions; + + // Optional extra features to enable at device creation time. These are + // opportunistically enabled if supported by the physical device, but + // otherwise kept disabled. + const VkPhysicalDeviceFeatures2 *features; + + // --- Misc/debugging options + + // Restrict specific features to e.g. work around driver bugs, or simply + // for testing purposes + int max_glsl_version; // limit the maximum GLSL version + uint32_t max_api_version; // limit the maximum vulkan API version +}; + +// Default/recommended parameters. Should generally be safe and efficient. +#define PL_VULKAN_DEFAULTS \ + .async_transfer = true, \ + .async_compute = true, \ + /* enabling multiple queues often decreases perf */ \ + .queue_count = 1, + +#define pl_vulkan_params(...) (&(struct pl_vulkan_params) { PL_VULKAN_DEFAULTS __VA_ARGS__ }) +PL_API extern const struct pl_vulkan_params pl_vulkan_default_params; + +// Creates a new vulkan device based on the given parameters and initializes +// a new GPU. If `params` is left as NULL, it defaults to +// &pl_vulkan_default_params. +// +// Thread-safety: Safe +PL_API pl_vulkan pl_vulkan_create(pl_log log, const struct pl_vulkan_params *params); + +// Destroys the vulkan device and all associated objects, except for the +// VkInstance provided by the user. +// +// Note that all resources allocated from this vulkan object (e.g. via the +// `vk->ra` or using `pl_vulkan_create_swapchain`) *must* be explicitly +// destroyed by the user before calling this. +// +// Also note that this function will block until all in-flight GPU commands are +// finished processing. You can avoid this by manually calling `pl_gpu_finish` +// before `pl_vulkan_destroy`. +PL_API void pl_vulkan_destroy(pl_vulkan *vk); + +// For a `pl_gpu` backed by `pl_vulkan`, this function can be used to retrieve +// the underlying `pl_vulkan`. Returns NULL for any other type of `gpu`. +PL_API pl_vulkan pl_vulkan_get(pl_gpu gpu); + +struct pl_vulkan_device_params { + // The instance to use. Required! + // + // Note: The VkInstance provided by the user *must* be created with a + // VkApplicationInfo.apiVersion of PL_VK_MIN_VERSION or higher. + VkInstance instance; + + // Mirrored from `pl_vulkan_params`. All of these fields are optional. + PFN_vkGetInstanceProcAddr get_proc_addr; + VkSurfaceKHR surface; + const char *device_name; + uint8_t device_uuid[16]; + bool allow_software; +}; + +#define pl_vulkan_device_params(...) (&(struct pl_vulkan_device_params) { __VA_ARGS__ }) + +// Helper function to choose the best VkPhysicalDevice, given a VkInstance. +// This uses the same logic as `pl_vulkan_create` uses internally. If no +// matching device was found, this returns VK_NULL_HANDLE. +PL_API VkPhysicalDevice pl_vulkan_choose_device(pl_log log, + const struct pl_vulkan_device_params *params); + +struct pl_vulkan_swapchain_params { + // The surface to use for rendering. Required, the user is in charge of + // creating this. Must belong to the same VkInstance as `vk->instance`. + VkSurfaceKHR surface; + + // The preferred presentation mode. See the vulkan documentation for more + // information about these. If the device/surface combination does not + // support this mode, libplacebo will fall back to VK_PRESENT_MODE_FIFO_KHR. + // + // Warning: Leaving this zero-initialized is the same as having specified + // VK_PRESENT_MODE_IMMEDIATE_KHR, which is probably not what the user + // wants! + VkPresentModeKHR present_mode; + + // Allow up to N in-flight frames. This essentially controls how many + // rendering commands may be queued up at the same time. See the + // documentation for `pl_swapchain_get_latency` for more information. For + // vulkan specifically, we are only able to wait until the GPU has finished + // rendering a frame - we are unable to wait until the display has actually + // finished displaying it. So this only provides a rough guideline. + // Optional, defaults to 3. + int swapchain_depth; + + // This suppresses automatic recreation of the swapchain when any call + // returns VK_SUBOPTIMAL_KHR. Normally, libplacebo will recreate the + // swapchain internally on the next `pl_swapchain_start_frame`. If enabled, + // clients are assumed to take care of swapchain recreations themselves, by + // calling `pl_swapchain_resize` as appropriate. libplacebo will tolerate + // the "suboptimal" status indefinitely. + bool allow_suboptimal; + + // Disable high-bit (10 or more) SDR formats. May help work around buggy + // drivers which don't dither properly when outputting high bit depth + // SDR backbuffers to 8-bit screens. + bool disable_10bit_sdr; +}; + +#define pl_vulkan_swapchain_params(...) (&(struct pl_vulkan_swapchain_params) { __VA_ARGS__ }) + +// Creates a new vulkan swapchain based on an existing VkSurfaceKHR. Using this +// function requires that the vulkan device was created with the +// VK_KHR_swapchain extension. The easiest way of accomplishing this is to set +// the `pl_vulkan_params.surface` explicitly at creation time. +PL_API pl_swapchain pl_vulkan_create_swapchain(pl_vulkan vk, + const struct pl_vulkan_swapchain_params *params); + +// This will return true if the vulkan swapchain is internally detected +// as being suboptimal (VK_SUBOPTIMAL_KHR). This might be of use to clients +// who have `params->allow_suboptimal` enabled. +PL_API bool pl_vulkan_swapchain_suboptimal(pl_swapchain sw); + +// Vulkan interop API, for sharing a single VkDevice (and associated vulkan +// resources) directly with the API user. The use of this API is a bit sketchy +// and requires careful communication of Vulkan API state. + +struct pl_vulkan_import_params { + // The vulkan instance. Required. + // + // Note: The VkInstance provided by the user *must* be created with a + // VkApplicationInfo.apiVersion of PL_VK_MIN_VERSION or higher. + VkInstance instance; + + // Pointer to `vkGetInstanceProcAddr`. If this is NULL, libplacebo will + // use the directly linked version (if available). + PFN_vkGetInstanceProcAddr get_proc_addr; + + // The physical device selected by the user. Required. + VkPhysicalDevice phys_device; + + // The logical device created by the user. Required. + VkDevice device; + + // --- Logical device parameters + + // List of all device-level extensions that were enabled. (Instance-level + // extensions need not be re-specified here, since it's guaranteed that any + // instance-level extensions that device-level extensions depend on were + // enabled at the instance level) + const char * const *extensions; + int num_extensions; + + // Enabled queue families. At least `queue_graphics` is required. + // + // It's okay for multiple queue families to be specified with the same + // index, e.g. in the event that a dedicated compute queue also happens to + // be the dedicated transfer queue. + // + // It's also okay to leave the queue struct as {0} in the event that no + // dedicated queue exists for a given operation type. libplacebo will + // automatically fall back to using e.g. the graphics queue instead. + struct pl_vulkan_queue queue_graphics; // must support VK_QUEUE_GRAPHICS_BIT + struct pl_vulkan_queue queue_compute; // must support VK_QUEUE_COMPUTE_BIT + struct pl_vulkan_queue queue_transfer; // must support VK_QUEUE_TRANSFER_BIT + + // Enabled VkPhysicalDeviceFeatures. The device *must* be created with + // all of the features in `pl_vulkan_required_features` enabled. + const VkPhysicalDeviceFeatures2 *features; + + // Functions for locking a queue. If set, these will be used instead of + // libplacebo's internal functions for `pl_vulkan.(un)lock_queue`. + void (*lock_queue)(void *ctx, uint32_t qf, uint32_t qidx); + void (*unlock_queue)(void *ctx, uint32_t qf, uint32_t qidx); + void *queue_ctx; + + // --- Misc/debugging options + + // Restrict specific features to e.g. work around driver bugs, or simply + // for testing purposes. See `pl_vulkan_params` for a description of these. + int max_glsl_version; + uint32_t max_api_version; +}; + +#define pl_vulkan_import_params(...) (&(struct pl_vulkan_import_params) { __VA_ARGS__ }) + +// For purely informative reasons, this contains a list of extensions and +// device features that libplacebo *can* make use of. These are all strictly +// optional, but provide a hint to the API user as to what might be worth +// enabling at device creation time. +// +// Note: This also includes physical device features provided by extensions. +// They are all provided using extension-specific features structs, rather +// than the more general purpose VkPhysicalDeviceVulkan11Features etc. +PL_API extern const char * const pl_vulkan_recommended_extensions[]; +PL_API extern const int pl_vulkan_num_recommended_extensions; +PL_API extern const VkPhysicalDeviceFeatures2 pl_vulkan_recommended_features; + +// A list of device features that are required by libplacebo. These +// *must* be provided by imported Vulkan devices. +// +// Note: `pl_vulkan_recommended_features` does not include this list. +PL_API extern const VkPhysicalDeviceFeatures2 pl_vulkan_required_features; + +// Import an existing VkDevice instead of creating a new one, and wrap it into +// a `pl_vulkan` abstraction. It's safe to `pl_vulkan_destroy` this, which will +// destroy application state related to libplacebo but leave the underlying +// VkDevice intact. +PL_API pl_vulkan pl_vulkan_import(pl_log log, const struct pl_vulkan_import_params *params); + +struct pl_vulkan_wrap_params { + // The image itself. It *must* be usable concurrently by all of the queue + // family indices listed in `pl_vulkan->queues`. Note that this requires + // the use of VK_SHARING_MODE_CONCURRENT if `pl_vulkan->num_queues` is + // greater than 1. If this is difficult to achieve for the user, then + // `async_transfer` / `async_compute` should be turned off, which + // guarantees the use of only one queue family. + VkImage image; + + // Which aspect of `image` to wrap. Only useful for wrapping individual + // sub-planes of planar images. If left as 0, it defaults to the entire + // image (i.e. the union of VK_IMAGE_ASPECT_PLANE_N_BIT for planar formats, + // and VK_IMAGE_ASPECT_COLOR_BIT otherwise). + VkImageAspectFlags aspect; + + // The image's dimensions (unused dimensions must be 0) + int width; + int height; + int depth; + + // The image's format. libplacebo will try to map this to an equivalent + // pl_fmt. If no compatible pl_fmt is found, wrapping will fail. + VkFormat format; + + // The usage flags the image was created with. libplacebo will set the + // pl_tex capabilities to include whatever it can, as determined by the set + // of enabled usage flags. + VkImageUsageFlags usage; + + // See `pl_tex_params` + void *user_data; + pl_debug_tag debug_tag; +}; + +#define pl_vulkan_wrap_params(...) (&(struct pl_vulkan_wrap_params) { \ + .debug_tag = PL_DEBUG_TAG, \ + __VA_ARGS__ \ + }) + +// Wraps an external VkImage into a pl_tex abstraction. By default, the image +// is considered "held" by the user and must be released before calling any +// pl_tex_* API calls on it (see `pl_vulkan_release`). +// +// This wrapper can be destroyed by simply calling `pl_tex_destroy` on it, +// which will not destroy the underlying VkImage. If a pl_tex wrapper is +// destroyed while an image is not currently being held by the user, that +// image is left in an undefined state. +// +// Wrapping the same VkImage multiple times is undefined behavior, as is trying +// to wrap an image belonging to a different VkDevice than the one in use by +// `gpu`. +// +// This function may fail, in which case it returns NULL. +PL_API pl_tex pl_vulkan_wrap(pl_gpu gpu, const struct pl_vulkan_wrap_params *params); + +// Analogous to `pl_vulkan_wrap`, this function takes any `pl_tex` (including +// ones created by `pl_tex_create`) and unwraps it to expose the underlying +// VkImage to the user. Unlike `pl_vulkan_wrap`, this `pl_tex` is *not* +// considered held after calling this function - the user must explicitly +// `pl_vulkan_hold` before accessing the VkImage. +// +// `out_format` and `out_flags` will be updated to hold the VkImage's +// format and usage flags. (Optional) +PL_API VkImage pl_vulkan_unwrap(pl_gpu gpu, pl_tex tex, + VkFormat *out_format, VkImageUsageFlags *out_flags); + +// Represents a vulkan semaphore/value pair (for compatibility with timeline +// semaphores). When using normal, binary semaphores, `value` may be ignored. +typedef struct pl_vulkan_sem { + VkSemaphore sem; + uint64_t value; +} pl_vulkan_sem; + +struct pl_vulkan_hold_params { + // The Vulkan image to hold. It will be marked as held. Attempting to + // perform any pl_tex_* operation (except pl_tex_destroy) on a held image + // is undefined behavior. + pl_tex tex; + + // The layout to transition the image to when holding. Alternatively, a + // pointer to receive the current image layout. If `out_layout` is + // provided, `layout` is ignored. + VkImageLayout layout; + VkImageLayout *out_layout; + + // The queue family index to transition the image to. This can be used with + // VK_QUEUE_FAMILY_EXTERNAL to transition the image to an external API. As + // a special case, if set to VK_QUEUE_FAMILY_IGNORED, libplacebo will not + // transition the image, even if this image was not set up for concurrent + // usage. Ignored for concurrent images. + uint32_t qf; + + // The semaphore to fire when the image is available for use. (Required) + pl_vulkan_sem semaphore; +}; + +#define pl_vulkan_hold_params(...) (&(struct pl_vulkan_hold_params) { __VA_ARGS__ }) + +// "Hold" a shared image, transferring control over the image to the user. +// Returns whether successful. +PL_API bool pl_vulkan_hold_ex(pl_gpu gpu, const struct pl_vulkan_hold_params *params); + +struct pl_vulkan_release_params { + // The image to be released. It must be marked as "held". Performing any + // operation on the VkImage underlying this `pl_tex` while it is not being + // held by the user is undefined behavior. + pl_tex tex; + + // The current layout of the image at the point in time when `semaphore` + // fires, or if no semaphore is specified, at the time of call. + VkImageLayout layout; + + // The queue family index to transition the image to. This can be used with + // VK_QUEUE_FAMILY_EXTERNAL to transition the image rom an external API. As + // a special case, if set to VK_QUEUE_FAMILY_IGNORED, libplacebo will not + // transition the image, even if this image was not set up for concurrent + // usage. Ignored for concurrent images. + uint32_t qf; + + // The semaphore to wait on before libplacebo will actually use or modify + // the image. (Optional) + // + // Note: the lifetime of `semaphore` is indeterminate, and destroying it + // while the texture is still depending on that semaphore is undefined + // behavior. + // + // Technically, the only way to be sure that it's safe to free is to use + // `pl_gpu_finish()` or similar (e.g. `pl_vulkan_destroy` or + // `vkDeviceWaitIdle`) after another operation involving `tex` has been + // emitted (or the texture has been destroyed). + // + // + // Warning: If `tex` is a planar image (`pl_fmt.num_planes > 0`), and + // `semaphore` is specified, it *must* be a timeline semaphore! Failure to + // respect this will result in undefined behavior. This warning does not + // apply to individual planes (as exposed by `pl_tex.planes`). + pl_vulkan_sem semaphore; +}; + +#define pl_vulkan_release_params(...) (&(struct pl_vulkan_release_params) { __VA_ARGS__ }) + +// "Release" a shared image, transferring control to libplacebo. +PL_API void pl_vulkan_release_ex(pl_gpu gpu, const struct pl_vulkan_release_params *params); + +struct pl_vulkan_sem_params { + // The type of semaphore to create. + VkSemaphoreType type; + + // For VK_SEMAPHORE_TYPE_TIMELINE, sets the initial timeline value. + uint64_t initial_value; + + // If set, exports this VkSemaphore to the handle given in `out_handle`. + // The user takes over ownership, and should manually close it before + // destroying this VkSemaphore (via `pl_vulkan_sem_destroy`). + enum pl_handle_type export_handle; + union pl_handle *out_handle; + + // Optional debug tag to identify this semaphore. + pl_debug_tag debug_tag; +}; + +#define pl_vulkan_sem_params(...) (&(struct pl_vulkan_sem_params) { \ + .debug_tag = PL_DEBUG_TAG, \ + __VA_ARGS__ \ + }) + +// Helper functions to create and destroy vulkan semaphores. Returns +// VK_NULL_HANDLE on failure. +PL_API VkSemaphore pl_vulkan_sem_create(pl_gpu gpu, const struct pl_vulkan_sem_params *params); +PL_API void pl_vulkan_sem_destroy(pl_gpu gpu, VkSemaphore *semaphore); + +// Backwards-compatibility wrappers for older versions of the API. +PL_DEPRECATED PL_API bool pl_vulkan_hold(pl_gpu gpu, pl_tex tex, VkImageLayout layout, + pl_vulkan_sem sem_out); +PL_DEPRECATED PL_API bool pl_vulkan_hold_raw(pl_gpu gpu, pl_tex tex, VkImageLayout *out_layout, + pl_vulkan_sem sem_out); +PL_DEPRECATED PL_API void pl_vulkan_release(pl_gpu gpu, pl_tex tex, VkImageLayout layout, + pl_vulkan_sem sem_in); + +PL_API_END + +#endif // LIBPLACEBO_VULKAN_H_ |