35 files changed, 11304 insertions, 0 deletions
diff --git a/src/include/libplacebo/cache.h b/src/include/libplacebo/cache.h
new file mode 100644
index 0000000..5897ac8
--- /dev/null
+++ b/src/include/libplacebo/cache.h
@@ -0,0 +1,200 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_CACHE_H_
+#define LIBPLACEBO_CACHE_H_
+
+#include <stddef.h>
+#include <stdint.h>
+#include <stdio.h>
+
+#include <libplacebo/config.h>
+#include <libplacebo/common.h>
+#include <libplacebo/log.h>
+
+PL_API_BEGIN
+
+typedef struct pl_cache_obj {
+    // Cache object key. This will uniquely identify this cached object.
+    uint64_t key;
+
+    // Cache data pointer and length. 0-length cached objects are invalid
+    // and will be silently dropped. You can explicitly remove a cached
+    // object by overwriting it with a length 0 object.
+    void *data;
+    size_t size;
+
+    // Free callback, to free memory associated with `data`. (Optional)
+    // Will be called when the object is either explicitly deleted, culled
+    // due to hitting size limits, or on pl_cache_destroy().
+    void (*free)(void *data);
+} pl_cache_obj;
+
+struct pl_cache_params {
+    // Optional `pl_log` that is used for logging internal events related
+    // to the cache, such as insertions, saving and loading.
+    pl_log log;
+
+    // Size limits. If 0, no limit is imposed.
+    //
+    // Note: libplacebo will never detect or invalidate stale cache entries, so
+    // setting an upper size limit is strongly recommended
+    size_t max_object_size;
+    size_t max_total_size;
+
+    // Optional external callback to call after a cached object is modified
+    // (including deletion and (re-)insertion). Note that this is not called on
+    // objects which are merely pruned from the cache due to `max_total_size`,
+    // so users must rely on some external mechanism to prune stale entries or
+    // enforce size limits.
+    //
+    // Note: `pl_cache_load` does not trigger this callback.
+    // Note: Ownership of `obj` does *not* pass to the caller.
+    // Note: This function must be thread safe.
+    void (*set)(void *priv, pl_cache_obj obj);
+
+    // Optional external callback to call on a cache miss. Ownership of the
+    // returned object passes to the `pl_cache`. Objects returned by this
+    // callback *should* have a valid `free` callback, unless lifetime can be
+    // externally managed and guaranteed to outlive the `pl_cache`.
+    //
+    // Note: This function must be thread safe.
+    pl_cache_obj (*get)(void *priv, uint64_t key);
+
+    // External context for insert/lookup.
+    void *priv;
+};
+
+#define pl_cache_params(...) (&(struct pl_cache_params) { __VA_ARGS__ })
+PL_API extern const struct pl_cache_params pl_cache_default_params;
+
+// Thread-safety: Safe
+//
+// Note: In any context in which `pl_cache` is used, users may also pass NULL
+// to disable caching. In other words, NULL is a valid `pl_cache`.
+typedef const struct pl_cache_t {
+    struct pl_cache_params params;
+} *pl_cache;
+
+// Create a new cache. This function will never fail.
+PL_API pl_cache pl_cache_create(const struct pl_cache_params *params);
+
+// Destroy a `pl_cache` object, including all underlying objects.
+PL_API void pl_cache_destroy(pl_cache *cache);
+
+// Explicitly clear all objects in the cache without destroying it. This is
+// similar to `pl_cache_destroy`, but the cache remains valid afterwards.
+//
+// Note: Objects destroyed in this way *not* propagated to the `set` callback.
+PL_API void pl_cache_reset(pl_cache cache);
+
+// Return the current internal number of objects and total size (bytes)
+PL_API int pl_cache_objects(pl_cache cache);
+PL_API size_t pl_cache_size(pl_cache cache);
+
+// --- Cache saving and loading APIs
+
+// Serialize the internal state of a `pl_cache` into an abstract cache
+// object that can be e.g. saved to disk and loaded again later. Returns the
+// number of objects saved.
+//
+// Note: Using `save/load` is largely redundant with using `insert/lookup`
+// callbacks, and the user should decide whether to use the explicit API or the
+// callback-based API.
+PL_API int pl_cache_save_ex(pl_cache cache,
+                            void (*write)(void *priv, size_t size, const void *ptr),
+                            void *priv);
+
+// Load the result of a previous `pl_cache_save` call. Any duplicate entries in
+// the `pl_cache` will be overwritten. Returns the number of objects loaded, or
+// a negative number on serious error (e.g. corrupt header)
+//
+// Note: This does not trigger the `update` callback.
+PL_API int pl_cache_load_ex(pl_cache cache,
+                            bool (*read)(void *priv, size_t size, void *ptr),
+                            void *priv);
+
+// --- Convenience wrappers around pl_cache_save/load_ex
+
+// Writes data directly to a pointer. Returns the number of bytes that *would*
+// have been written, so this can be used on a size 0 buffer to get the required
+// total size.
+PL_API size_t pl_cache_save(pl_cache cache, uint8_t *data, size_t size);
+
+// Reads data directly from a pointer. This still reads from `data`, so it does
+// not avoid a copy.
+PL_API int pl_cache_load(pl_cache cache, const uint8_t *data, size_t size);
+
+// Writes/loads data to/from a FILE stream at the current position.
+#define pl_cache_save_file(c, file) pl_cache_save_ex(c, pl_write_file_cb, file)
+#define pl_cache_load_file(c, file) pl_cache_load_ex(c, pl_read_file_cb,  file)
+
+static inline void pl_write_file_cb(void *priv, size_t size, const void *ptr)
+{
+    (void) fwrite(ptr, 1, size, (FILE *) priv);
+}
+
+static inline bool pl_read_file_cb(void *priv, size_t size, void *ptr)
+{
+    return fread(ptr, 1, size, (FILE *) priv) == size;
+}
+
+// --- Object modification API. Mostly intended for internal use.
+
+// Insert a new cached object into a `pl_cache`. Returns whether successful.
+// Overwrites any existing cached object with that signature, so this can be
+// used to e.g. delete objects as well (set their size to 0). On success,
+// ownership of `obj` passes to the `pl_cache`.
+//
+// Note: If `object.free` is NULL, this will perform an internal memdup. To
+// bypass this (e.g. when directly adding externally managed memory), you can
+// set the `free` callback to an explicit noop function.
+//
+// Note: `obj->data/free` will be reset to NULL on successful insertion.
+PL_API bool pl_cache_try_set(pl_cache cache, pl_cache_obj *obj);
+
+// Variant of `pl_cache_try_set` that simply frees `obj` on failure.
+PL_API void pl_cache_set(pl_cache cache, pl_cache_obj *obj);
+
+// Looks up `obj->key` in the object cache. If successful, `obj->data` is
+// set to memory owned by the caller, which must be either explicitly
+// re-inserted, or explicitly freed (using obj->free).
+//
+// Note: On failure, `obj->data/size/free` are reset to NULL.
+PL_API bool pl_cache_get(pl_cache cache, pl_cache_obj *obj);
+
+// Run a callback on every object currently stored in `cache`.
+//
+// Note: Running any `pl_cache_*` function on `cache` from this callback is
+// undefined behavior.
+PL_API void pl_cache_iterate(pl_cache cache,
+                             void (*cb)(void *priv, pl_cache_obj obj),
+                             void *priv);
+
+// Utility wrapper to free a `pl_cache_obj` if necessary (and sanitize it)
+static inline void pl_cache_obj_free(pl_cache_obj *obj)
+{
+    if (obj->free)
+        obj->free(obj->data);
+    obj->data = NULL;
+    obj->free = NULL;
+    obj->size = 0;
+}
+
+PL_API_END
+
+#endif // LIBPLACEBO_CACHE_H_
diff --git a/src/include/libplacebo/colorspace.h b/src/include/libplacebo/colorspace.h
new file mode 100644
index 0000000..6663019
--- /dev/null
+++ b/src/include/libplacebo/colorspace.h
@@ -0,0 +1,719 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_COLORSPACE_H_
+#define LIBPLACEBO_COLORSPACE_H_
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+
+#include <libplacebo/common.h>
+
+PL_API_BEGIN
+
+// The underlying color representation (e.g. RGB, XYZ or YCbCr)
+enum pl_color_system {
+    PL_COLOR_SYSTEM_UNKNOWN = 0,
+    // YCbCr-like color systems:
+    PL_COLOR_SYSTEM_BT_601,      // ITU-R Rec. BT.601 (SD)
+    PL_COLOR_SYSTEM_BT_709,      // ITU-R Rec. BT.709 (HD)
+    PL_COLOR_SYSTEM_SMPTE_240M,  // SMPTE-240M
+    PL_COLOR_SYSTEM_BT_2020_NC,  // ITU-R Rec. BT.2020 (non-constant luminance)
+    PL_COLOR_SYSTEM_BT_2020_C,   // ITU-R Rec. BT.2020 (constant luminance)
+    PL_COLOR_SYSTEM_BT_2100_PQ,  // ITU-R Rec. BT.2100 ICtCp PQ variant
+    PL_COLOR_SYSTEM_BT_2100_HLG, // ITU-R Rec. BT.2100 ICtCp HLG variant
+    PL_COLOR_SYSTEM_DOLBYVISION, // Dolby Vision (see pl_dovi_metadata)
+    PL_COLOR_SYSTEM_YCGCO,       // YCgCo (derived from RGB)
+    // Other color systems:
+    PL_COLOR_SYSTEM_RGB,         // Red, Green and Blue
+    PL_COLOR_SYSTEM_XYZ,         // Digital Cinema Distribution Master (XYZ)
+    PL_COLOR_SYSTEM_COUNT
+};
+
+PL_API bool pl_color_system_is_ycbcr_like(enum pl_color_system sys);
+
+// Returns true for color systems that are linear transformations of the RGB
+// equivalent, i.e. are simple matrix multiplications. For color systems with
+// this property, `pl_color_repr_decode` is sufficient for conversion to RGB.
+PL_API bool pl_color_system_is_linear(enum pl_color_system sys);
+
+// Guesses the best YCbCr-like colorspace based on a image given resolution.
+// This only picks conservative values. (In particular, BT.2020 is never
+// auto-guessed, even for 4K resolution content)
+PL_API enum pl_color_system pl_color_system_guess_ycbcr(int width, int height);
+
+// Friendly names for the canonical channel names and order.
+enum pl_channel {
+    PL_CHANNEL_NONE = -1,
+    PL_CHANNEL_A = 3, // alpha
+    // RGB system
+    PL_CHANNEL_R = 0,
+    PL_CHANNEL_G = 1,
+    PL_CHANNEL_B = 2,
+    // YCbCr-like systems
+    PL_CHANNEL_Y = 0,
+    PL_CHANNEL_CB = 1,
+    PL_CHANNEL_CR = 2,
+    // Aliases for Cb/Cr
+    PL_CHANNEL_U = 1,
+    PL_CHANNEL_V = 2
+    // There are deliberately no names for the XYZ system to avoid
+    // confusion due to PL_CHANNEL_Y.
+};
+
+// The numerical range of the representation (where applicable).
+enum pl_color_levels {
+    PL_COLOR_LEVELS_UNKNOWN = 0,
+    PL_COLOR_LEVELS_LIMITED,    // Limited/TV range, e.g. 16-235
+    PL_COLOR_LEVELS_FULL,       // Full/PC range, e.g. 0-255
+    PL_COLOR_LEVELS_COUNT,
+
+    // Compatibility aliases
+    PL_COLOR_LEVELS_TV = PL_COLOR_LEVELS_LIMITED,
+    PL_COLOR_LEVELS_PC = PL_COLOR_LEVELS_FULL,
+};
+
+// The alpha representation mode.
+enum pl_alpha_mode {
+    PL_ALPHA_UNKNOWN = 0,   // or no alpha channel present
+    PL_ALPHA_INDEPENDENT,   // alpha channel is separate from the video
+    PL_ALPHA_PREMULTIPLIED, // alpha channel is multiplied into the colors
+    PL_ALPHA_MODE_COUNT,
+};
+
+// The underlying bit-wise representation of a color sample. For example,
+// a 10-bit TV-range YCbCr value uploaded to a 16 bit texture would have
+// sample_depth=16 color_depth=10 bit_shift=0.
+//
+// For another example, a 12-bit XYZ full range sample shifted to 16-bits with
+// the lower 4 bits all set to 0 would have sample_depth=16 color_depth=12
+// bit_shift=4. (libavcodec likes outputting this type of `xyz12`)
+//
+// To explain the meaning of `sample_depth` further; the consideration factor
+// here is the fact that GPU sampling will normalized the sampled color to the
+// range 0.0 - 1.0 in a manner dependent on the number of bits in the texture
+// format. So if you upload a 10-bit YCbCr value unpadded as 16-bit color
+// samples, all of the sampled values will be extremely close to 0.0. In such a
+// case, `pl_color_repr_normalize` would return a high scaling factor, which
+// would pull the color up to their 16-bit range.
+struct pl_bit_encoding {
+    int sample_depth; // the number of bits the color is stored/sampled as
+    int color_depth;  // the effective number of bits of the color information
+    int bit_shift;    // a representational bit shift applied to the color
+};
+
+// Returns whether two bit encodings are exactly identical.
+PL_API bool pl_bit_encoding_equal(const struct pl_bit_encoding *b1,
+                                  const struct pl_bit_encoding *b2);
+
+// Parsed metadata from the Dolby Vision RPU
+struct pl_dovi_metadata {
+    // Colorspace transformation metadata
+    float nonlinear_offset[3];  // input offset ("ycc_to_rgb_offset")
+    pl_matrix3x3 nonlinear;     // before PQ, also called "ycc_to_rgb"
+    pl_matrix3x3 linear;        // after PQ, also called "rgb_to_lms"
+
+    // Reshape data, grouped by component
+    struct pl_reshape_data {
+        uint8_t num_pivots;
+        float pivots[9]; // normalized to [0.0, 1.0] based on BL bit depth
+        uint8_t method[8]; // 0 = polynomial, 1 = MMR
+        // Note: these must be normalized (divide by coefficient_log2_denom)
+        float poly_coeffs[8][3]; // x^0, x^1, x^2, unused must be 0
+        uint8_t mmr_order[8]; // 1, 2 or 3
+        float mmr_constant[8];
+        float mmr_coeffs[8][3 /* order */][7];
+    } comp[3];
+};
+
+// Struct describing the underlying color system and representation. This
+// information is needed to convert an encoded color to a normalized RGB triple
+// in the range 0-1.
+struct pl_color_repr {
+    enum pl_color_system sys;
+    enum pl_color_levels levels;
+    enum pl_alpha_mode alpha;
+    struct pl_bit_encoding bits; // or {0} if unknown
+
+    // Metadata for PL_COLOR_SYSTEM_DOLBYVISION. Note that, for the sake of
+    // efficiency, this is treated purely as an opaque reference - functions
+    // like pl_color_repr_equal will merely do a pointer equality test.
+    //
+    // The only functions that actually dereference it in any way are
+    // pl_color_repr_decode,  pl_shader_decode_color and pl_render_image(_mix).
+    const struct pl_dovi_metadata *dovi;
+};
+
+// Some common color representations. It's worth pointing out that all of these
+// presets leave `alpha` and `bits` as unknown - that is, only the system and
+// levels are predefined
+PL_API extern const struct pl_color_repr pl_color_repr_unknown;
+PL_API extern const struct pl_color_repr pl_color_repr_rgb;
+PL_API extern const struct pl_color_repr pl_color_repr_sdtv;
+PL_API extern const struct pl_color_repr pl_color_repr_hdtv;  // also Blu-ray
+PL_API extern const struct pl_color_repr pl_color_repr_uhdtv; // SDR, NCL system
+PL_API extern const struct pl_color_repr pl_color_repr_jpeg;
+
+// Returns whether two colorspace representations are exactly identical.
+PL_API bool pl_color_repr_equal(const struct pl_color_repr *c1,
+                                const struct pl_color_repr *c2);
+
+// Replaces unknown values in the first struct by those of the second struct.
+PL_API void pl_color_repr_merge(struct pl_color_repr *orig,
+                                const struct pl_color_repr *update);
+
+// This function normalizes the color representation such that
+// color_depth=sample_depth and bit_shift=0; and returns the scaling factor
+// that must be multiplied into the color value to accomplish this, assuming
+// it has already been sampled by the GPU. If unknown, the color and sample
+// depth will both be inferred as 8 bits for the purposes of this conversion.
+PL_API float pl_color_repr_normalize(struct pl_color_repr *repr);
+
+// Guesses the best color levels based on the specified color levels and
+// falling back to using the color system instead. YCbCr-like systems are
+// assumed to be TV range, otherwise this defaults to PC range.
+PL_API enum pl_color_levels pl_color_levels_guess(const struct pl_color_repr *repr);
+
+// The colorspace's primaries (gamut)
+enum pl_color_primaries {
+    PL_COLOR_PRIM_UNKNOWN = 0,
+    // Standard gamut:
+    PL_COLOR_PRIM_BT_601_525,   // ITU-R Rec. BT.601 (525-line = NTSC, SMPTE-C)
+    PL_COLOR_PRIM_BT_601_625,   // ITU-R Rec. BT.601 (625-line = PAL, SECAM)
+    PL_COLOR_PRIM_BT_709,       // ITU-R Rec. BT.709 (HD), also sRGB
+    PL_COLOR_PRIM_BT_470M,      // ITU-R Rec. BT.470 M
+    PL_COLOR_PRIM_EBU_3213,     // EBU Tech. 3213-E / JEDEC P22 phosphors
+    // Wide gamut:
+    PL_COLOR_PRIM_BT_2020,      // ITU-R Rec. BT.2020 (UltraHD)
+    PL_COLOR_PRIM_APPLE,        // Apple RGB
+    PL_COLOR_PRIM_ADOBE,        // Adobe RGB (1998)
+    PL_COLOR_PRIM_PRO_PHOTO,    // ProPhoto RGB (ROMM)
+    PL_COLOR_PRIM_CIE_1931,     // CIE 1931 RGB primaries
+    PL_COLOR_PRIM_DCI_P3,       // DCI-P3 (Digital Cinema)
+    PL_COLOR_PRIM_DISPLAY_P3,   // DCI-P3 (Digital Cinema) with D65 white point
+    PL_COLOR_PRIM_V_GAMUT,      // Panasonic V-Gamut (VARICAM)
+    PL_COLOR_PRIM_S_GAMUT,      // Sony S-Gamut
+    PL_COLOR_PRIM_FILM_C,       // Traditional film primaries with Illuminant C
+    PL_COLOR_PRIM_ACES_AP0,     // ACES Primaries #0 (ultra wide)
+    PL_COLOR_PRIM_ACES_AP1,     // ACES Primaries #1
+    PL_COLOR_PRIM_COUNT
+};
+
+PL_API bool pl_color_primaries_is_wide_gamut(enum pl_color_primaries prim);
+
+// Guesses the best primaries based on a resolution. This always guesses
+// conservatively, i.e. it will never return a wide gamut color space even if
+// the resolution is 4K.
+PL_API enum pl_color_primaries pl_color_primaries_guess(int width, int height);
+
+// The colorspace's transfer function (gamma / EOTF)
+enum pl_color_transfer {
+    PL_COLOR_TRC_UNKNOWN = 0,
+    // Standard dynamic range:
+    PL_COLOR_TRC_BT_1886,       // ITU-R Rec. BT.1886 (CRT emulation + OOTF)
+    PL_COLOR_TRC_SRGB,          // IEC 61966-2-4 sRGB (CRT emulation)
+    PL_COLOR_TRC_LINEAR,        // Linear light content
+    PL_COLOR_TRC_GAMMA18,       // Pure power gamma 1.8
+    PL_COLOR_TRC_GAMMA20,       // Pure power gamma 2.0
+    PL_COLOR_TRC_GAMMA22,       // Pure power gamma 2.2
+    PL_COLOR_TRC_GAMMA24,       // Pure power gamma 2.4
+    PL_COLOR_TRC_GAMMA26,       // Pure power gamma 2.6
+    PL_COLOR_TRC_GAMMA28,       // Pure power gamma 2.8
+    PL_COLOR_TRC_PRO_PHOTO,     // ProPhoto RGB (ROMM)
+    PL_COLOR_TRC_ST428,         // Digital Cinema Distribution Master (XYZ)
+    // High dynamic range:
+    PL_COLOR_TRC_PQ,            // ITU-R BT.2100 PQ (perceptual quantizer), aka SMPTE ST2048
+    PL_COLOR_TRC_HLG,           // ITU-R BT.2100 HLG (hybrid log-gamma), aka ARIB STD-B67
+    PL_COLOR_TRC_V_LOG,         // Panasonic V-Log (VARICAM)
+    PL_COLOR_TRC_S_LOG1,        // Sony S-Log1
+    PL_COLOR_TRC_S_LOG2,        // Sony S-Log2
+    PL_COLOR_TRC_COUNT
+};
+
+// Returns the nominal peak of a given transfer function, relative to the
+// reference white. This refers to the highest encodable signal level.
+// Always equal to 1.0 for SDR curves.
+//
+// Note: For HLG in particular, which is scene-referred, this returns the
+// highest nominal peak in scene-referred space (3.77), which may be different
+// from the actual peak in display space after application of the HLG OOTF.
+PL_API float pl_color_transfer_nominal_peak(enum pl_color_transfer trc);
+
+static inline bool pl_color_transfer_is_hdr(enum pl_color_transfer trc)
+{
+    return pl_color_transfer_nominal_peak(trc) > 1.0;
+}
+
+// This defines the display-space standard reference white level (in cd/m^2)
+// that is assumed for SDR content, for use when mapping between HDR and SDR in
+// display space. See ITU-R Report BT.2408 for more information.
+#define PL_COLOR_SDR_WHITE 203.0f
+
+// This defines the assumed contrast level of an unknown SDR display. This
+// will be used to determine the black point in the absence of any tagged
+// minimum luminance, relative to the tagged maximum luminance (or
+// PL_COLOR_SDR_WHITE in the absence of all tagging)
+#define PL_COLOR_SDR_CONTRAST 1000.0f
+
+// This defines the default black point assumed for "infinite contrast" HDR
+// displays. This is not exactly 0.0 because a value of 0.0 is interpreted
+// as "unknown / missing metadata" inside struct pl_hdr_metadata, and also
+// to avoid numerical issues in a variety of tone mapping functions.
+// Essentially, a black level below this number is functionally meaningless
+// inside libplacebo, and will be clamped to this value regardless.
+//
+// The value used here (1e-6) is about one 13-bit PQ step above absolute zero,
+// which is a small fraction of the human JND at this brightness level, and also
+// about 3 bits above the floating point machine epsilon.
+#define PL_COLOR_HDR_BLACK 1e-6f
+
+// This defines the assumed peak brightness of a HLG display with no HDR10
+// metadata. This is set to the brightness of a "nominal" HLG reference display.
+#define PL_COLOR_HLG_PEAK 1000.0f
+
+// Represents a single CIE xy coordinate (e.g. CIE Yxy with Y = 1.0)
+struct pl_cie_xy {
+    float x, y;
+};
+
+// Creates a pl_cie_xyz from raw XYZ values
+static inline struct pl_cie_xy pl_cie_from_XYZ(float X, float Y, float Z)
+{
+    float k = 1.0f / (X + Y + Z);
+    struct pl_cie_xy xy = { k * X, k * Y };
+    return xy;
+}
+
+// Recovers (X / Y) from a CIE xy value.
+static inline float pl_cie_X(struct pl_cie_xy xy)
+{
+    return xy.x / xy.y;
+}
+
+// Recovers (Z / Y) from a CIE xy value.
+static inline float pl_cie_Z(struct pl_cie_xy xy)
+{
+    return (1 - xy.x - xy.y) / xy.y;
+}
+
+static inline bool pl_cie_xy_equal(const struct pl_cie_xy *a,
+                                   const struct pl_cie_xy *b)
+{
+    return a->x == b->x && a->y == b->y;
+}
+
+// Computes the CIE xy chromaticity coordinates of a CIE D-series illuminant
+// with the given correlated color temperature.
+//
+// `temperature` must be between 2500 K and 25000 K, inclusive.
+PL_API struct pl_cie_xy pl_white_from_temp(float temperature);
+
+// Represents the raw physical primaries corresponding to a color space.
+struct pl_raw_primaries {
+    struct pl_cie_xy red, green, blue, white;
+};
+
+// Returns whether two raw primaries are exactly identical.
+PL_API bool pl_raw_primaries_equal(const struct pl_raw_primaries *a,
+                                   const struct pl_raw_primaries *b);
+
+// Returns whether two raw primaries are approximately equal
+PL_API bool pl_raw_primaries_similar(const struct pl_raw_primaries *a,
+                                     const struct pl_raw_primaries *b);
+
+// Replaces unknown values in the first struct by those of the second struct.
+PL_API void pl_raw_primaries_merge(struct pl_raw_primaries *orig,
+                                   const struct pl_raw_primaries *update);
+
+// Returns the raw primaries for a given color space.
+PL_API const struct pl_raw_primaries *pl_raw_primaries_get(enum pl_color_primaries prim);
+
+enum pl_hdr_scaling {
+    PL_HDR_NORM = 0,        // 0.0 is absolute black, 1.0 is PL_COLOR_SDR_WHITE
+    PL_HDR_SQRT,            // sqrt() of PL_HDR_NORM values
+    PL_HDR_NITS,            // absolute brightness in raw cd/m²
+    PL_HDR_PQ,              // absolute brightness in PQ (0.0 to 1.0)
+    PL_HDR_SCALING_COUNT,
+};
+
+// Generic helper for performing HDR scale conversions.
+PL_API float pl_hdr_rescale(enum pl_hdr_scaling from, enum pl_hdr_scaling to, float x);
+
+enum pl_hdr_metadata_type {
+    PL_HDR_METADATA_ANY = 0,
+    PL_HDR_METADATA_NONE,
+    PL_HDR_METADATA_HDR10,          // HDR10 static mastering display metadata
+    PL_HDR_METADATA_HDR10PLUS,      // HDR10+ dynamic metadata
+    PL_HDR_METADATA_CIE_Y,          // CIE Y derived dynamic luminance metadata
+    PL_HDR_METADATA_TYPE_COUNT,
+};
+
+// Bezier curve for HDR metadata
+struct pl_hdr_bezier {
+    float target_luma;      // target luminance (cd/m²) for this OOTF
+    float knee_x, knee_y;   // cross-over knee point (0-1)
+    float anchors[15];      // intermediate bezier curve control points (0-1)
+    uint8_t num_anchors;
+};
+
+// Represents raw HDR metadata as defined by SMPTE 2086 / CTA 861.3, which is
+// often attached to HDR sources and can be forwarded to HDR-capable displays,
+// or used to guide the libplacebo built-in tone mapping. Values left as 0
+// are treated as unknown by libplacebo.
+//
+// Note: This means that a value of `min_luma == 0.0` gets treated as "minimum
+// luminance not known", which in practice may end up inferring a default
+// contrast of 1000:1 for SDR transfer functions. To avoid this, the user should
+// set these fields to a low positive value, e.g. PL_COLOR_HDR_BLACK, to signal
+// a "zero" black point (i.e. infinite contrast display).
+struct pl_hdr_metadata {
+    // --- PL_HDR_METADATA_HDR10
+    // Mastering display metadata.
+    struct pl_raw_primaries prim;   // mastering display primaries
+    float min_luma, max_luma;       // min/max luminance (in cd/m²)
+
+    // Content light level. (Note: this is ignored by libplacebo itself)
+    float max_cll;                  // max content light level (in cd/m²)
+    float max_fall;                 // max frame average light level (in cd/m²)
+
+    // --- PL_HDR_METADATA_HDR10PLUS
+    float scene_max[3];             // maxSCL in cd/m² per component (RGB)
+    float scene_avg;                // average of maxRGB in cd/m²
+    struct pl_hdr_bezier ootf;      // reference OOTF (optional)
+
+    // --- PL_HDR_METADATA_CIE_Y
+    float max_pq_y;                 // maximum PQ luminance (in PQ, 0-1)
+    float avg_pq_y;                 // averaged PQ luminance (in PQ, 0-1)
+};
+
+PL_API extern const struct pl_hdr_metadata pl_hdr_metadata_empty; // equal to {0}
+PL_API extern const struct pl_hdr_metadata pl_hdr_metadata_hdr10; // generic HDR10 display
+
+// Returns whether two sets of HDR metadata are exactly identical.
+PL_API bool pl_hdr_metadata_equal(const struct pl_hdr_metadata *a,
+                                  const struct pl_hdr_metadata *b);
+
+// Replaces unknown values in the first struct by those of the second struct.
+PL_API void pl_hdr_metadata_merge(struct pl_hdr_metadata *orig,
+                                  const struct pl_hdr_metadata *update);
+
+// Returns `true` if `data` contains a complete set of a given metadata type.
+// Note: for PL_HDR_METADATA_HDR10, only `min_luma` and `max_luma` are
+// considered - CLL/FALL and primaries are irrelevant for HDR tone-mapping.
+PL_API bool pl_hdr_metadata_contains(const struct pl_hdr_metadata *data,
+                                     enum pl_hdr_metadata_type type);
+
+// Rendering intent for colorspace transformations. These constants match the
+// ICC specification (Table 23)
+enum pl_rendering_intent {
+    PL_INTENT_AUTO = -1, // not a valid ICC intent, but used to auto-infer
+    PL_INTENT_PERCEPTUAL = 0,
+    PL_INTENT_RELATIVE_COLORIMETRIC = 1,
+    PL_INTENT_SATURATION = 2,
+    PL_INTENT_ABSOLUTE_COLORIMETRIC = 3
+};
+
+// Struct describing a physical color space. This information is needed to
+// turn a normalized RGB triple into its physical meaning, as well as to convert
+// between color spaces.
+struct pl_color_space {
+    enum pl_color_primaries primaries;
+    enum pl_color_transfer transfer;
+
+    // HDR metadata for this color space, if present. (Optional)
+    struct pl_hdr_metadata hdr;
+};
+
+#define pl_color_space(...) (&(struct pl_color_space) { __VA_ARGS__ })
+
+// Returns whether or not a color space is considered as effectively HDR.
+// This is true when the effective signal peak is greater than the SDR
+// reference white (1.0), taking into account `csp->hdr`.
+PL_API bool pl_color_space_is_hdr(const struct pl_color_space *csp);
+
+// Returns whether or not a color space is "black scaled", in which case 0.0 is
+// the true black point. This is true for SDR signals other than BT.1886, as
+// well as for HLG.
+PL_API bool pl_color_space_is_black_scaled(const struct pl_color_space *csp);
+
+struct pl_nominal_luma_params {
+    // The color space to infer luminance from
+    const struct pl_color_space *color;
+
+    // Which type of metadata to draw values from
+    enum pl_hdr_metadata_type metadata;
+
+    // This field controls the scaling of `out_*`
+    enum pl_hdr_scaling scaling;
+
+    // Fields to write the detected nominal luminance to. (Optional)
+    //
+    // For SDR displays, this will default to a contrast level of 1000:1 unless
+    // indicated otherwise in the `min/max_luma` static HDR10 metadata fields.
+    float *out_min;
+    float *out_max;
+
+    // Field to write the detected average luminance to, or 0.0 in the absence
+    // of dynamic metadata. (Optional)
+    float *out_avg;
+};
+
+#define pl_nominal_luma_params(...) \
+    (&(struct pl_nominal_luma_params) { __VA_ARGS__ })
+
+// Returns the effective luminance described by a pl_color_space.
+PL_API void pl_color_space_nominal_luma_ex(const struct pl_nominal_luma_params *params);
+
+// Backwards compatibility wrapper for `pl_color_space_nominal_luma_ex`
+PL_DEPRECATED PL_API void pl_color_space_nominal_luma(const struct pl_color_space *csp,
+                                                      float *out_min, float *out_max);
+
+// Replaces unknown values in the first struct by those of the second struct.
+PL_API void pl_color_space_merge(struct pl_color_space *orig,
+                                 const struct pl_color_space *update);
+
+// Returns whether two colorspaces are exactly identical.
+PL_API bool pl_color_space_equal(const struct pl_color_space *c1,
+                                 const struct pl_color_space *c2);
+
+// Go through a color-space and explicitly default all unknown fields to
+// reasonable values. After this function is called, none of the values will be
+// PL_COLOR_*_UNKNOWN or 0.0, except for the dynamic HDR metadata fields.
+PL_API void pl_color_space_infer(struct pl_color_space *space);
+
+// Like `pl_color_space_infer`, but takes default values from the reference
+// color space (excluding certain special cases like HDR or wide gamut).
+PL_API void pl_color_space_infer_ref(struct pl_color_space *space,
+                                     const struct pl_color_space *ref);
+
+// Infer both the source and destination gamut simultaneously, and also adjust
+// values for optimal display. This is mostly the same as
+// `pl_color_space_infer(src)` followed by `pl_color_space_infer_ref`, but also
+// takes into account the SDR contrast levels and PQ black points. This is
+// basically the logic used by `pl_shader_color_map` and `pl_renderer` to
+// decide the output color space in a conservative way and compute the final
+// end-to-end color transformation that needs to be done.
+PL_API void pl_color_space_infer_map(struct pl_color_space *src,
+                                     struct pl_color_space *dst);
+
+// Some common color spaces. Note: These don't necessarily have all fields
+// filled, in particular `hdr` is left unset.
+PL_API extern const struct pl_color_space pl_color_space_unknown;
+PL_API extern const struct pl_color_space pl_color_space_srgb;
+PL_API extern const struct pl_color_space pl_color_space_bt709;
+PL_API extern const struct pl_color_space pl_color_space_hdr10;
+PL_API extern const struct pl_color_space pl_color_space_bt2020_hlg;
+PL_API extern const struct pl_color_space pl_color_space_monitor; // typical display
+
+// This represents metadata about extra operations to perform during colorspace
+// conversion, which correspond to artistic adjustments of the color.
+struct pl_color_adjustment {
+    // Brightness boost. 0.0 = neutral, 1.0 = solid white, -1.0 = solid black
+    float brightness;
+    // Contrast boost. 1.0 = neutral, 0.0 = solid black
+    float contrast;
+    // Saturation gain. 1.0 = neutral, 0.0 = grayscale
+    float saturation;
+    // Hue shift, corresponding to a rotation around the [U, V] subvector, in
+    // radians. 0.0 = neutral
+    float hue;
+    // Gamma adjustment. 1.0 = neutral, 0.0 = solid black
+    float gamma;
+    // Color temperature shift. 0.0 = 6500 K, -1.0 = 3000 K, 1.0 = 10000 K
+    float temperature;
+};
+
+#define PL_COLOR_ADJUSTMENT_NEUTRAL \
+    .contrast       = 1.0,           \
+    .saturation     = 1.0,           \
+    .gamma          = 1.0,
+
+#define pl_color_adjustment(...) (&(struct pl_color_adjustment) { PL_COLOR_ADJUSTMENT_NEUTRAL __VA_ARGS__ })
+PL_API extern const struct pl_color_adjustment pl_color_adjustment_neutral;
+
+// Represents the chroma placement with respect to the luma samples. This is
+// only relevant for YCbCr-like colorspaces with chroma subsampling.
+enum pl_chroma_location {
+    PL_CHROMA_UNKNOWN = 0,
+    PL_CHROMA_LEFT,             // MPEG2/4, H.264
+    PL_CHROMA_CENTER,           // MPEG1, JPEG
+    PL_CHROMA_TOP_LEFT,
+    PL_CHROMA_TOP_CENTER,
+    PL_CHROMA_BOTTOM_LEFT,
+    PL_CHROMA_BOTTOM_CENTER,
+    PL_CHROMA_COUNT,
+};
+
+// Fills *x and *y with the offset in luma pixels corresponding to a given
+// chroma location.
+//
+// Note: PL_CHROMA_UNKNOWN defaults to PL_CHROMA_LEFT
+PL_API void pl_chroma_location_offset(enum pl_chroma_location loc, float *x, float *y);
+
+// Returns an RGB->XYZ conversion matrix for a given set of primaries.
+// Multiplying this into the RGB color transforms it to CIE XYZ, centered
+// around the color space's white point.
+PL_API pl_matrix3x3 pl_get_rgb2xyz_matrix(const struct pl_raw_primaries *prim);
+
+// Similar to pl_get_rgb2xyz_matrix, but gives the inverse transformation.
+PL_API pl_matrix3x3 pl_get_xyz2rgb_matrix(const struct pl_raw_primaries *prim);
+
+// Returns a primary adaptation matrix, which converts from one set of
+// primaries to another. This is an RGB->RGB transformation. For rendering
+// intents other than PL_INTENT_ABSOLUTE_COLORIMETRIC, the white point is
+// adapted using the Bradford matrix.
+PL_API pl_matrix3x3 pl_get_color_mapping_matrix(const struct pl_raw_primaries *src,
+                                                const struct pl_raw_primaries *dst,
+                                                enum pl_rendering_intent intent);
+
+// Return a chromatic adaptation matrix, which converts from one white point to
+// another, using the Bradford matrix. This is an RGB->RGB transformation.
+PL_API pl_matrix3x3 pl_get_adaptation_matrix(struct pl_cie_xy src, struct pl_cie_xy dst);
+
+// Returns true if 'b' is entirely contained in 'a'. Useful for figuring out if
+// colorimetric clipping will occur or not.
+PL_API bool pl_primaries_superset(const struct pl_raw_primaries *a,
+                                  const struct pl_raw_primaries *b);
+
+// Returns true if `prim` forms a nominally valid set of primaries. This does
+// not check whether or not these primaries are actually physically realisable,
+// merely that they satisfy the requirements for colorspace math (to avoid NaN).
+PL_API bool pl_primaries_valid(const struct pl_raw_primaries *prim);
+
+// Returns true if two primaries are 'compatible', which is the case if
+// they preserve the relationship between primaries (red=red, green=green,
+// blue=blue). In other words, this is false for synthetic primaries that have
+// channels misordered from the convention (e.g. for some test ICC profiles).
+PL_API bool pl_primaries_compatible(const struct pl_raw_primaries *a,
+                                    const struct pl_raw_primaries *b);
+
+// Clip points in the first gamut (src) to be fully contained inside the second
+// gamut (dst). Only works on compatible primaries (pl_primaries_compatible).
+PL_API struct pl_raw_primaries
+pl_primaries_clip(const struct pl_raw_primaries *src,
+                  const struct pl_raw_primaries *dst);
+
+// Primary-dependent RGB->LMS matrix for the IPTPQc4 color system. This is
+// derived from the HPE XYZ->LMS matrix with 4% crosstalk added.
+PL_API pl_matrix3x3 pl_ipt_rgb2lms(const struct pl_raw_primaries *prim);
+PL_API pl_matrix3x3 pl_ipt_lms2rgb(const struct pl_raw_primaries *prim);
+
+// Primary-independent L'M'S' -> IPT matrix for the IPTPQc4 color system, and
+// its inverse. This is identical to the Ebner & Fairchild (1998) IPT matrix.
+PL_API extern const pl_matrix3x3 pl_ipt_lms2ipt;
+PL_API extern const pl_matrix3x3 pl_ipt_ipt2lms;
+
+// Cone types involved in human vision
+enum pl_cone {
+    PL_CONE_L = 1 << 0,
+    PL_CONE_M = 1 << 1,
+    PL_CONE_S = 1 << 2,
+
+    // Convenience aliases
+    PL_CONE_NONE = 0,
+    PL_CONE_LM   = PL_CONE_L | PL_CONE_M,
+    PL_CONE_MS   = PL_CONE_M | PL_CONE_S,
+    PL_CONE_LS   = PL_CONE_L | PL_CONE_S,
+    PL_CONE_LMS  = PL_CONE_L | PL_CONE_M | PL_CONE_S,
+};
+
+// Structure describing parameters for simulating color blindness
+struct pl_cone_params {
+    enum pl_cone cones; // Which cones are *affected* by the vision model
+    float strength;     // Coefficient for how strong the defect is
+                        // (1.0 = Unaffected, 0.0 = Full blindness)
+};
+
+#define pl_cone_params(...) (&(struct pl_cone_params) { __VA_ARGS__ })
+
+// Built-in color blindness models
+PL_API extern const struct pl_cone_params pl_vision_normal;        // No distortion (92%)
+PL_API extern const struct pl_cone_params pl_vision_protanomaly;   // Red deficiency (0.66%)
+PL_API extern const struct pl_cone_params pl_vision_protanopia;    // Red absence (0.59%)
+PL_API extern const struct pl_cone_params pl_vision_deuteranomaly; // Green deficiency (2.7%)
+PL_API extern const struct pl_cone_params pl_vision_deuteranopia;  // Green absence (0.56%)
+PL_API extern const struct pl_cone_params pl_vision_tritanomaly;   // Blue deficiency (0.01%)
+PL_API extern const struct pl_cone_params pl_vision_tritanopia;    // Blue absence (0.016%)
+PL_API extern const struct pl_cone_params pl_vision_monochromacy;  // Blue cones only (<0.001%)
+PL_API extern const struct pl_cone_params pl_vision_achromatopsia; // Rods only (<0.0001%)
+
+// Returns a cone adaptation matrix. Applying this to an RGB color in the given
+// color space will apply the given cone adaptation coefficients for simulating
+// a type of color blindness.
+//
+// For the color blindness models which don't entail complete loss of a cone,
+// you can partially counteract the effect by using a similar model with the
+// `strength` set to its inverse. For example, to partially counteract
+// deuteranomaly, you could generate a cone matrix for PL_CONE_M with the
+// strength 2.0 (or some other number above 1.0).
+PL_API pl_matrix3x3 pl_get_cone_matrix(const struct pl_cone_params *params,
+                                       const struct pl_raw_primaries *prim);
+
+// Returns a color decoding matrix for a given combination of source color
+// representation and adjustment parameters. This mutates `repr` to reflect the
+// change. If `params` is NULL, it defaults to &pl_color_adjustment_neutral.
+//
+// This function always performs a conversion to RGB. To convert to other
+// colorspaces (e.g. between YUV systems), obtain a second YUV->RGB matrix
+// and invert it using `pl_transform3x3_invert`.
+//
+// Note: For BT.2020 constant-luminance, this outputs chroma information in the
+// range [-0.5, 0.5]. Since the CL system conversion is non-linear, further
+// processing must be done by the caller. The channel order is CrYCb.
+//
+// Note: For BT.2100 ICtCp, this outputs in the color space L'M'S'. Further
+// non-linear processing must be done by the caller.
+//
+// Note: XYZ system is expected to be in DCDM X'Y'Z' encoding (ST 428-1), in
+// practice this means normalizing by (48.0 / 52.37) factor and applying 2.6 gamma
+PL_API pl_transform3x3 pl_color_repr_decode(struct pl_color_repr *repr,
+                                            const struct pl_color_adjustment *params);
+
+// Common struct to describe an ICC profile
+struct pl_icc_profile {
+    // Points to the in-memory representation of the ICC profile. This is
+    // allowed to be NULL, in which case the `pl_icc_profile` represents "no
+    // profile”.
+    const void *data;
+    size_t len;
+
+    // If a profile is set, this signature must uniquely identify it (including
+    // across restarts, for caching), ideally using a checksum of the profile
+    // contents. The user is free to choose the method of determining this
+    // signature, but note the existence of the
+    // `pl_icc_profile_compute_signature` helper.
+    uint64_t signature;
+};
+
+#define pl_icc_profile(...) &(struct pl_icc_profile) { __VA_ARGS__ }
+
+// This doesn't do a comparison of the actual contents, only of the signature.
+PL_API bool pl_icc_profile_equal(const struct pl_icc_profile *p1,
+                                 const struct pl_icc_profile *p2);
+
+// Sets `signature` to a hash of `profile->data`, if non-NULL. Provided as a
+// convenience function for the sake of users ingesting arbitrary ICC profiles
+// from sources where they can't reliably detect profile changes.
+//
+// Note: This is based on a very fast hash, and will compute a signature for
+// even large (10 MB) ICC profiles in, typically, a fraction of a millisecond.
+PL_API void pl_icc_profile_compute_signature(struct pl_icc_profile *profile);
+
+PL_API_END
+
+#endif // LIBPLACEBO_COLORSPACE_H_
diff --git a/src/include/libplacebo/common.h b/src/include/libplacebo/common.h
new file mode 100644
index 0000000..806730c
--- /dev/null
+++ b/src/include/libplacebo/common.h
@@ -0,0 +1,244 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_COMMON_H_
+#define LIBPLACEBO_COMMON_H_
+
+#include <stdbool.h>
+
+#include <libplacebo/config.h>
+
+PL_API_BEGIN
+
+// Some common utility types. These are overloaded to support 2D, 3D and
+// integer/float variants.
+typedef struct pl_rect2d {
+    int x0, y0;
+    int x1, y1;
+} pl_rect2d;
+
+typedef struct pl_rect3d {
+    int x0, y0, z0;
+    int x1, y1, z1;
+} pl_rect3d;
+
+typedef struct pl_rect2df {
+    float x0, y0;
+    float x1, y1;
+} pl_rect2df;
+
+typedef struct pl_rect3df {
+    float x0, y0, z0;
+    float x1, y1, z1;
+} pl_rect3df;
+
+// These macros will work for any of the above pl_rect variants (with enough
+// dimensions). Careful: double-evaluation hazard
+#define pl_rect_w(r) ((r).x1 - (r).x0)
+#define pl_rect_h(r) ((r).y1 - (r).y0)
+#define pl_rect_d(r) ((r).z1 - (r).z0)
+
+#define pl_rect2d_eq(a, b) \
+    ((a).x0 == (b).x0 && (a).x1 == (b).x1 && \
+     (a).y0 == (b).y0 && (a).y1 == (b).y1)
+
+#define pl_rect3d_eq(a, b) \
+    ((a).x0 == (b).x0 && (a).x1 == (b).x1 && \
+     (a).y0 == (b).y0 && (a).y1 == (b).y1 && \
+     (a).z0 == (b).z0 && (a).z1 == (b).z1)
+
+// "Normalize" a rectangle: This ensures d1 >= d0 for all dimensions.
+PL_API void pl_rect2d_normalize(pl_rect2d *rc);
+PL_API void pl_rect3d_normalize(pl_rect3d *rc);
+
+PL_API void pl_rect2df_normalize(pl_rect2df *rc);
+PL_API void pl_rect3df_normalize(pl_rect3df *rc);
+
+// Return the rounded form of a rect.
+PL_API pl_rect2d pl_rect2df_round(const pl_rect2df *rc);
+PL_API pl_rect3d pl_rect3df_round(const pl_rect3df *rc);
+
+// Represents a row-major matrix, i.e. the following matrix
+//     [ a11 a12 a13 ]
+//     [ a21 a22 a23 ]
+//     [ a31 a32 a33 ]
+// is represented in C like this:
+//   { { a11, a12, a13 },
+//     { a21, a22, a23 },
+//     { a31, a32, a33 } };
+typedef struct pl_matrix3x3 {
+    float m[3][3];
+} pl_matrix3x3;
+
+PL_API extern const pl_matrix3x3 pl_matrix3x3_identity;
+
+// Applies a matrix to a float vector in-place.
+PL_API void pl_matrix3x3_apply(const pl_matrix3x3 *mat, float vec[3]);
+
+// Applies a matrix to a pl_rect3df
+PL_API void pl_matrix3x3_apply_rc(const pl_matrix3x3 *mat, pl_rect3df *rc);
+
+// Scales a color matrix by a linear factor.
+PL_API void pl_matrix3x3_scale(pl_matrix3x3 *mat, float scale);
+
+// Inverts a matrix. Only use where precision is not that important.
+PL_API void pl_matrix3x3_invert(pl_matrix3x3 *mat);
+
+// Composes/multiplies two matrices. Multiples B into A, i.e.
+// A := A * B
+PL_API void pl_matrix3x3_mul(pl_matrix3x3 *a, const pl_matrix3x3 *b);
+
+// Flipped version of `pl_matrix3x3_mul`.
+// B := A * B
+PL_API void pl_matrix3x3_rmul(const pl_matrix3x3 *a, pl_matrix3x3 *b);
+
+// Represents an affine transformation, which is basically a 3x3 matrix
+// together with a column vector to add onto the output.
+typedef struct pl_transform3x3 {
+    pl_matrix3x3 mat;
+    float c[3];
+} pl_transform3x3;
+
+PL_API extern const pl_transform3x3 pl_transform3x3_identity;
+
+// Applies a transform to a float vector in-place.
+PL_API void pl_transform3x3_apply(const pl_transform3x3 *t, float vec[3]);
+
+// Applies a transform to a pl_rect3df
+PL_API void pl_transform3x3_apply_rc(const pl_transform3x3 *t, pl_rect3df *rc);
+
+// Scales the output of a transform by a linear factor. Since an affine
+// transformation is non-linear, this does not commute. If you want to scale
+// the *input* of a transform, use pl_matrix3x3_scale on `t.mat`.
+PL_API void pl_transform3x3_scale(pl_transform3x3 *t, float scale);
+
+// Inverts a transform. Only use where precision is not that important.
+PL_API void pl_transform3x3_invert(pl_transform3x3 *t);
+
+// 2D analog of the above structs. Since these are featured less prominently,
+// we omit some of the other helper functions.
+typedef struct pl_matrix2x2 {
+    float m[2][2];
+} pl_matrix2x2;
+
+PL_API extern const pl_matrix2x2 pl_matrix2x2_identity;
+PL_API pl_matrix2x2 pl_matrix2x2_rotation(float angle);
+
+PL_API void pl_matrix2x2_apply(const pl_matrix2x2 *mat, float vec[2]);
+PL_API void pl_matrix2x2_apply_rc(const pl_matrix2x2 *mat, pl_rect2df *rc);
+
+PL_API void pl_matrix2x2_mul(pl_matrix2x2 *a, const pl_matrix2x2 *b);
+PL_API void pl_matrix2x2_rmul(const pl_matrix2x2 *a, pl_matrix2x2 *b);
+
+PL_API void pl_matrix2x2_scale(pl_matrix2x2 *mat, float scale);
+PL_API void pl_matrix2x2_invert(pl_matrix2x2 *mat);
+
+typedef struct pl_transform2x2 {
+    pl_matrix2x2 mat;
+    float c[2];
+} pl_transform2x2;
+
+PL_API extern const pl_transform2x2 pl_transform2x2_identity;
+
+PL_API void pl_transform2x2_apply(const pl_transform2x2 *t, float vec[2]);
+PL_API void pl_transform2x2_apply_rc(const pl_transform2x2 *t, pl_rect2df *rc);
+
+PL_API void pl_transform2x2_mul(pl_transform2x2 *a, const pl_transform2x2 *b);
+PL_API void pl_transform2x2_rmul(const pl_transform2x2 *a, pl_transform2x2 *b);
+
+PL_API void pl_transform2x2_scale(pl_transform2x2 *t, float scale);
+PL_API void pl_transform2x2_invert(pl_transform2x2 *t);
+
+// Compute new bounding box of a transformation (as applied to a given rect).
+PL_API pl_rect2df pl_transform2x2_bounds(const pl_transform2x2 *t,
+                                         const pl_rect2df *rc);
+
+// Helper functions for dealing with aspect ratios and stretched/scaled rects.
+
+// Return the (absolute) aspect ratio (width/height) of a given pl_rect2df.
+// This will always be a positive number, even if `rc` is flipped.
+PL_API float pl_rect2df_aspect(const pl_rect2df *rc);
+
+// Set the aspect of a `rc` to a given aspect ratio with an extra 'panscan'
+// factor choosing the balance between shrinking and growing the `rc` to meet
+// this aspect ratio.
+//
+// Notes:
+// - If `panscan` is 0.0, this function will only ever shrink the `rc`.
+// - If `panscan` is 1.0, this function will only ever grow the `rc`.
+// - If `panscan` is 0.5, this function is area-preserving.
+PL_API void pl_rect2df_aspect_set(pl_rect2df *rc, float aspect, float panscan);
+
+// Set one rect's aspect to that of another
+#define pl_rect2df_aspect_copy(rc, src, panscan) \
+    pl_rect2df_aspect_set((rc), pl_rect2df_aspect(src), (panscan))
+
+// 'Fit' one rect inside another. `rc` will be set to the same size and aspect
+// ratio as `src`, but with the size limited to fit inside the original `rc`.
+// Like `pl_rect2df_aspect_set`, `panscan` controls the pan&scan factor.
+PL_API void pl_rect2df_aspect_fit(pl_rect2df *rc, const pl_rect2df *src, float panscan);
+
+// Scale rect in each direction while keeping it centered.
+PL_API void pl_rect2df_stretch(pl_rect2df *rc, float stretch_x, float stretch_y);
+
+// Offset rect by an arbitrary offset factor. If the corresponding dimension
+// of a rect is flipped, so too is the applied offset.
+PL_API void pl_rect2df_offset(pl_rect2df *rc, float offset_x, float offset_y);
+
+// Scale a rect uniformly in both dimensions.
+#define pl_rect2df_zoom(rc, zoom) pl_rect2df_stretch((rc), (zoom), (zoom))
+
+// Rotation in degrees clockwise
+typedef int pl_rotation;
+enum {
+    PL_ROTATION_0   = 0,
+    PL_ROTATION_90  = 1,
+    PL_ROTATION_180 = 2,
+    PL_ROTATION_270 = 3,
+    PL_ROTATION_360 = 4, // equivalent to PL_ROTATION_0
+
+    // Note: Values outside the range [0,4) are legal, including negatives.
+};
+
+// Constrains to the interval [PL_ROTATION_0, PL_ROTATION_360).
+static inline pl_rotation pl_rotation_normalize(pl_rotation rot)
+{
+    return (rot % PL_ROTATION_360 + PL_ROTATION_360) % PL_ROTATION_360;
+}
+
+// Rotates the coordinate system of a `pl_rect2d(f)` in a certain direction.
+// For example, calling this with PL_ROTATION_90 will correspond to rotating
+// the coordinate system 90° to the right (so the x axis becomes the y axis).
+//
+// The resulting rect is re-normalized in the same coordinate system.
+PL_API void pl_rect2df_rotate(pl_rect2df *rc, pl_rotation rot);
+
+// Returns the aspect ratio in a rotated frame of reference.
+static inline float pl_aspect_rotate(float aspect, pl_rotation rot)
+{
+    return (rot % PL_ROTATION_180) ? 1.0 / aspect : aspect;
+}
+
+#define pl_rect2df_aspect_set_rot(rc, aspect, rot, panscan) \
+    pl_rect2df_aspect_set((rc), pl_aspect_rotate((aspect), (rot)), (panscan))
+
+#define pl_rect2df_aspect_copy_rot(rc, src, panscan, rot) \
+    pl_rect2df_aspect_set_rot((rc), pl_rect2df_aspect(src), (rot), (panscan))
+
+PL_API_END
+
+#endif // LIBPLACEBO_COMMON_H_
diff --git a/src/include/libplacebo/config.h.in b/src/include/libplacebo/config.h.in
new file mode 100644
index 0000000..2ed6290
--- /dev/null
+++ b/src/include/libplacebo/config.h.in
@@ -0,0 +1,102 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_CONFIG_H_
+#define LIBPLACEBO_CONFIG_H_
+
+// Increased any time the library changes in a fundamental/major way.
+#define PL_MAJOR_VER @majorver@
+
+// Increased any time the API changes. (Note: Does not reset when PL_MAJOR_VER
+// is increased)
+#define PL_API_VER @apiver@
+
+// Increased any time a fix is made to a given API version.
+#define PL_FIX_VER (pl_fix_ver())
+
+// Friendly name (`git describe`) for the overall version of the library
+#define PL_VERSION (pl_version())
+
+// Feature tests. These aren't described in further detail, but may be useful
+// for programmers wanting to programmatically check for feature support
+// in their compiled libplacebo versions.
+@extra_defs@
+
+// Extra compiler-specific stuff
+#ifndef PL_DEPRECATED
+# if defined(_MSC_VER)
+# define PL_DEPRECATED
+# else
+# define PL_DEPRECATED __attribute__((deprecated))
+# endif
+#endif
+
+#ifndef __has_feature
+#define __has_feature(x) 0
+#endif
+
+#ifndef PL_DEPRECATED_ENUMERATOR
+# if (defined(__GNUC__) && (__GNUC__ >= 6)) || __has_feature(enumerator_attributes)
+# define PL_DEPRECATED_ENUMERATOR PL_DEPRECATED
+# else
+# define PL_DEPRECATED_ENUMERATOR
+# endif
+#endif
+
+#if defined(_WIN32) || defined(__CYGWIN__)
+# ifdef PL_EXPORT
+#  define PL_API __declspec(dllexport)
+# else
+#  ifndef PL_STATIC
+#   define PL_API __declspec(dllimport)
+#  else
+#   define PL_API
+#  endif
+# endif
+#else
+# define PL_API __attribute__ ((visibility ("default")))
+#endif
+
+// C++ compatibility
+#ifdef __cplusplus
+# define PL_API_BEGIN extern "C" {
+# define PL_API_END }
+#else
+# define PL_API_BEGIN
+# define PL_API_END
+#endif
+
+#ifndef __cplusplus
+// Disable this warning because libplacebo's params macros override fields
+# pragma GCC diagnostic ignored "-Woverride-init"
+#endif
+
+// Extra helper macros
+#define PL_TOSTRING_INNER(x) #x
+#define PL_TOSTRING(x) PL_TOSTRING_INNER(x)
+
+// Deprecated macro for back-compatibility
+#define PL_STRUCT(name) struct name##_t
+
+PL_API_BEGIN
+
+PL_API int pl_fix_ver(void);
+PL_API const char *pl_version(void);
+
+PL_API_END
+
+#endif // LIBPLACEBO_CONFIG_H_
diff --git a/src/include/libplacebo/d3d11.h b/src/include/libplacebo/d3d11.h
new file mode 100644
index 0000000..8ecba30
--- /dev/null
+++ b/src/include/libplacebo/d3d11.h
@@ -0,0 +1,248 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_D3D11_H_
+#define LIBPLACEBO_D3D11_H_
+
+#include <windows.h>
+#include <d3d11.h>
+#include <dxgi1_2.h>
+#include <libplacebo/gpu.h>
+#include <libplacebo/swapchain.h>
+
+PL_API_BEGIN
+
+// Structure representing the actual D3D11 device and associated GPU instance
+typedef const struct pl_d3d11_t {
+    pl_gpu gpu;
+
+    // The D3D11 device in use. The user is free to use this for their own
+    // purposes, including taking a reference to the device (with AddRef) and
+    // using it beyond the lifetime of the pl_d3d11 that created it (though if
+    // this is done with debug enabled, it will confuse the leak checker.)
+    ID3D11Device *device;
+
+    // True if the device is using a software (WARP) adapter
+    bool software;
+} *pl_d3d11;
+
+struct pl_d3d11_params {
+    // The Direct3D 11 device to use. Optional, if NULL then libplacebo will
+    // create its own ID3D11Device using the options below. If set, all the
+    // options below will be ignored.
+    ID3D11Device *device;
+
+    // --- Adapter selection options
+
+    // The adapter to use. This overrides adapter_luid.
+    IDXGIAdapter *adapter;
+
+    // The LUID of the adapter to use. If adapter and adapter_luid are unset,
+    // the default adapter will be used instead.
+    LUID adapter_luid;
+
+    // Allow a software (WARP) adapter when selecting the adapter automatically.
+    // Note that sometimes the default adapter will be a software adapter. This
+    // is because, on Windows 8 and up, if there are no hardware adapters,
+    // Windows will pretend the WARP adapter is the default hardware adapter.
+    bool allow_software;
+
+    // Always use a software adapter. This is mainly for testing purposes.
+    bool force_software;
+
+    // --- Device creation options
+
+    // Enable the debug layer (D3D11_CREATE_DEVICE_DEBUG)
+    // Also logs IDXGIInfoQueue messages
+    bool debug;
+
+    // Extra flags to pass to D3D11CreateDevice (D3D11_CREATE_DEVICE_FLAG).
+    // libplacebo should be compatible with any flags passed here.
+    UINT flags;
+
+    // The minimum and maximum allowable feature levels for the created device.
+    // libplacebo will attempt to create a device with the highest feature level
+    // between min_feature_level and max_feature_level (inclusive.) If there are
+    // no supported feature levels in this range, `pl_d3d11_create` will either
+    // return NULL or fall back to the software adapter, depending on whether
+    // `allow_software` is set.
+    //
+    // Normally there is no reason to set `max_feature_level` other than to test
+    // if a program works at lower feature levels.
+    //
+    // Note that D3D_FEATURE_LEVEL_9_3 and below (known as 10level9) are highly
+    // restrictive. These feature levels are supported on a best-effort basis.
+    // They represent very old DirectX 9 compatible PC and laptop hardware
+    // (2001-2007, GeForce FX, 6, 7, ATI R300-R500, GMA 950-X3000) and some
+    // less-old mobile devices (Surface RT, Surface 2.) Basic video rendering
+    // should work, but the full pl_gpu API will not be available and advanced
+    // shaders will probably fail. The hardware is probably too slow for these
+    // anyway.
+    //
+    // Known restrictions of 10level9 devices include:
+    //   D3D_FEATURE_LEVEL_9_3 and below:
+    //   - `pl_pass_run_params->index_buf` will not work (but `index_data` will)
+    //   - Dimensions of 3D textures must be powers of two
+    //   - Shaders cannot use gl_FragCoord
+    //   - Shaders cannot use texelFetch
+    //   D3D_FEATURE_LEVEL_9_2 and below:
+    //   - Fragment shaders have no dynamic flow control and very strict limits
+    //     on the number of constants, temporary registers and instructions.
+    //     Whether a shader meets the requirements will depend on how it's
+    //     compiled and optimized, but it's likely that only simple shaders will
+    //     work.
+    //   D3D_FEATURE_LEVEL_9_1:
+    //   - No high-bit-depth formats with PL_FMT_CAP_RENDERABLE or
+    //     PL_FMT_CAP_LINEAR
+    //
+    // If these restrictions are undesirable and you don't need to support
+    // ancient hardware, set `min_feature_level` to D3D_FEATURE_LEVEL_10_0.
+    int min_feature_level; // Defaults to D3D_FEATURE_LEVEL_9_1 if unset
+    int max_feature_level; // Defaults to D3D_FEATURE_LEVEL_12_1 if unset
+
+    // Allow up to N in-flight frames. Similar to swapchain_depth for Vulkan and
+    // OpenGL, though with DXGI this is a device-wide setting that affects all
+    // swapchains (except for waitable swapchains.) See the documentation for
+    // `pl_swapchain_latency` for more information.
+    int max_frame_latency;
+};
+
+// Default/recommended parameters. Should generally be safe and efficient.
+#define PL_D3D11_DEFAULTS   \
+    .allow_software = true,
+
+#define pl_d3d11_params(...) (&(struct pl_d3d11_params) { PL_D3D11_DEFAULTS __VA_ARGS__ })
+PL_API extern const struct pl_d3d11_params pl_d3d11_default_params;
+
+// Creates a new Direct3D 11 device based on the given parameters, or wraps an
+// existing device, and initializes a new GPU instance. If params is left as
+// NULL, it defaults to &pl_d3d11_default_params. If an existing device is
+// provided in params->device, `pl_d3d11_create` will take a reference to it
+// that will be released in `pl_d3d11_destroy`.
+PL_API pl_d3d11 pl_d3d11_create(pl_log log, const struct pl_d3d11_params *params);
+
+// Release the D3D11 device.
+//
+// Note that all libplacebo objects allocated from this pl_d3d11 object (e.g.
+// via `d3d11->gpu` or using `pl_d3d11_create_swapchain`) *must* be explicitly
+// destroyed by the user before calling this.
+PL_API void pl_d3d11_destroy(pl_d3d11 *d3d11);
+
+// For a `pl_gpu` backed by `pl_d3d11`, this function can be used to retrieve
+// the underlying `pl_d3d11`. Returns NULL for any other type of `gpu`.
+PL_API pl_d3d11 pl_d3d11_get(pl_gpu gpu);
+
+struct pl_d3d11_swapchain_params {
+    // The Direct3D 11 swapchain to wrap. Optional. If NULL, libplacebo will
+    // create its own swapchain using the options below. If set, all the
+    // swapchain creation options will be ignored.
+    //
+    // The provided swapchain must have been created by the same device used
+    // by `gpu` and must not have multisampled backbuffers.
+    IDXGISwapChain *swapchain;
+
+    // --- Swapchain creation options
+
+    // Initial framebuffer width and height. If both width and height are set to
+    // 0 and window is non-NULL, the client area of the window is used instead.
+    // For convenience, if either component would be 0, it is set to 1 instead.
+    // This is because Windows can have 0-sized windows, but not 0-sized
+    // swapchains.
+    int width;
+    int height;
+
+    // The handle of the output window. In Windows 8 and up this is optional
+    // because you can output to a CoreWindow or create a composition swapchain
+    // instead.
+    HWND window;
+
+    // A pointer to the CoreWindow to output to. If both this and `window` are
+    // NULL, CreateSwapChainForComposition will be used to create the swapchain.
+    IUnknown *core_window;
+
+    // If set, libplacebo will create a swapchain that uses the legacy bitblt
+    // presentation model (with the DXGI_SWAP_EFFECT_DISCARD swap effect.) This
+    // tends to give worse performance and frame pacing in windowed mode and it
+    // prevents borderless fullscreen optimizations, but it might be necessary
+    // to work around buggy drivers, especially with DXGI 1.2 in the Platform
+    // Update for Windows 7. When unset, libplacebo will try to use the flip
+    // presentation model and only fall back to bitblt if flip is unavailable.
+    bool blit;
+
+    // additional swapchain flags
+    // No validation on these flags is being performed, and swapchain creation
+    // may fail if an unsupported combination is requested.
+    UINT flags;
+
+    // --- Swapchain usage behavior options
+
+    // Disable using a 10-bit swapchain format for SDR output
+    bool disable_10bit_sdr;
+};
+
+#define pl_d3d11_swapchain_params(...) (&(struct pl_d3d11_swapchain_params) { __VA_ARGS__ })
+
+// Creates a new Direct3D 11 swapchain, or wraps an existing one. If an existing
+// swapchain is provided in params->swapchain, `pl_d3d11_create_swapchain` will
+// take a reference to it that will be released in `pl_swapchain_destroy`.
+PL_API pl_swapchain pl_d3d11_create_swapchain(pl_d3d11 d3d11,
+    const struct pl_d3d11_swapchain_params *params);
+
+// Takes a `pl_swapchain` created by pl_d3d11_create_swapchain and returns a
+// reference to the underlying IDXGISwapChain. This increments the refcount, so
+// call IDXGISwapChain::Release when finished with it.
+PL_API IDXGISwapChain *pl_d3d11_swapchain_unwrap(pl_swapchain sw);
+
+struct pl_d3d11_wrap_params {
+    // The D3D11 texture to wrap, or a texture array containing the texture to
+    // wrap. Must be a ID3D11Texture1D, ID3D11Texture2D or ID3D11Texture3D
+    // created by the same device used by `gpu`, must have D3D11_USAGE_DEFAULT,
+    // and must not be mipmapped or multisampled.
+    ID3D11Resource *tex;
+
+    // If tex is a texture array, this is the array member to use as the pl_tex.
+    int array_slice;
+
+    // If tex is a video resource (eg. DXGI_FORMAT_AYUV, DXGI_FORMAT_NV12,
+    // DXGI_FORMAT_P010, etc.,) it can be wrapped as a pl_tex by specifying the
+    // type and size of the shader view. For planar video formats, the plane
+    // that is wrapped depends on the chosen format.
+    //
+    // If tex is not a video resource, these fields are unnecessary. The correct
+    // format will be determined automatically. If tex is not 2D, these fields
+    // are ignored.
+    //
+    // For a list of supported video formats and their corresponding view
+    // formats and sizes, see:
+    // https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#VideoViews
+    DXGI_FORMAT fmt;
+    int w;
+    int h;
+};
+
+#define pl_d3d11_wrap_params(...) (&(struct pl_d3d11_wrap_params) { __VA_ARGS__ })
+
+// Wraps an external texture into a pl_tex abstraction. `pl_d3d11_wrap` takes a
+// reference to the texture, which is released when `pl_tex_destroy` is called.
+//
+// This function may fail due to incompatible formats, incompatible flags or
+// other reasons, in which case it will return NULL.
+PL_API pl_tex pl_d3d11_wrap(pl_gpu gpu, const struct pl_d3d11_wrap_params *params);
+
+PL_API_END
+
+#endif // LIBPLACEBO_D3D11_H_
diff --git a/src/include/libplacebo/dispatch.h b/src/include/libplacebo/dispatch.h
new file mode 100644
index 0000000..7d43794
--- /dev/null
+++ b/src/include/libplacebo/dispatch.h
@@ -0,0 +1,239 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_DISPATCH_H_
+#define LIBPLACEBO_DISPATCH_H_
+
+#include <libplacebo/shaders.h>
+#include <libplacebo/gpu.h>
+
+PL_API_BEGIN
+
+// Thread-safety: Safe
+typedef struct pl_dispatch_t *pl_dispatch;
+
+// Creates a new shader dispatch object. This object provides a translation
+// layer between generated shaders (pl_shader) and the ra context such that it
+// can be used to execute shaders. This dispatch object will also provide
+// shader caching (for efficient re-use).
+PL_API pl_dispatch pl_dispatch_create(pl_log log, pl_gpu gpu);
+PL_API void pl_dispatch_destroy(pl_dispatch *dp);
+
+// Reset/increments the internal counters of the pl_dispatch. This must be
+// called whenever the user is going to begin with a new frame, in order to
+// perform garbage collection and advance the state of the internal PRNG.
+//
+// Note that shaders generated by `pl_dispatch` are therefore entirely
+// deterministic, as long as the sequence of calls (and inputs to the shader)
+// are the same.
+PL_API void pl_dispatch_reset_frame(pl_dispatch dp);
+
+// Returns a blank pl_shader object, suitable for recording rendering commands.
+// For more information, see the header documentation in `shaders/*.h`.
+PL_API pl_shader pl_dispatch_begin(pl_dispatch dp);
+
+// Struct passed to `info_callback`. Only valid until that function returns.
+struct pl_dispatch_info {
+    // Information about the shader for this shader execution, as well as a
+    // 64-bit signature uniquely identifying it.
+    pl_shader_info shader;
+    uint64_t signature;
+
+    // A list of execution times for this pass, in nanoseconds. May be empty.
+    uint64_t samples[256];
+    int num_samples;
+
+    // As a convenience, this contains the last, average and peak of the above
+    // list of samples. If `num_samples` is 0, these values are also 0.
+    uint64_t last;
+    uint64_t peak;
+    uint64_t average;
+};
+
+// Helper function to make a copy of `pl_dispatch_info`, while overriding
+// (and dereferencing) whatever was previously stored there.
+static inline void pl_dispatch_info_move(struct pl_dispatch_info *dst,
+                                         const struct pl_dispatch_info *src)
+{
+    pl_shader_info_deref(&dst->shader);
+    *dst = *src;
+    dst->shader = pl_shader_info_ref(src->shader);
+}
+
+// Set up a dispatch callback for this `pl_dispatch` object. The given callback
+// will be run for every successfully dispatched shader. Call this again with
+// `cb == NULL` to disable.
+PL_API void pl_dispatch_callback(pl_dispatch dp, void *priv,
+                                 void (*cb)(void *priv,
+                                 const struct pl_dispatch_info *));
+
+struct pl_dispatch_params {
+    // The shader to execute. The pl_dispatch will take over ownership
+    // of this shader, and return it back to the internal pool.
+    //
+    // This shader must have a compatible signature, i.e. inputs
+    // `PL_SHADER_SIG_NONE` and outputs `PL_SHADER_SIG_COLOR`.
+    pl_shader *shader;
+
+    // The texture to render to. This must have params compatible with the
+    // shader, i.e. `target->params.renderable` for fragment shaders and
+    // `target->params.storable` for compute shaders.
+    //
+    // Note: Even when not using compute shaders, users are advised to always
+    // set `target->params.storable` if permitted by the `pl_fmt`, since this
+    // allows the use of compute shaders instead of full-screen quads, which is
+    // faster on some platforms.
+    pl_tex target;
+
+    // The target rect to render to. Optional, if left as {0}, then the
+    // entire texture will be rendered to.
+    pl_rect2d rect;
+
+    // If set, enables and controls the blending for this pass. Optional. When
+    // using this with fragment shaders, `target->params.fmt->caps` must
+    // include `PL_FMT_CAP_BLENDABLE`.
+    const struct pl_blend_params *blend_params;
+
+    // If set, records the execution time of this dispatch into the given
+    // timer object. Optional.
+    //
+    // Note: If this is set, `pl_dispatch` cannot internally measure the
+    // execution time of the shader, which means `pl_dispatch_info.samples` may
+    // be empty as a result.
+    pl_timer timer;
+};
+
+#define pl_dispatch_params(...) (&(struct pl_dispatch_params) { __VA_ARGS__ })
+
+// Dispatch a generated shader (via the pl_shader mechanism). Returns whether
+// or not the dispatch was successful.
+PL_API bool pl_dispatch_finish(pl_dispatch dp, const struct pl_dispatch_params *params);
+
+struct pl_dispatch_compute_params {
+    // The shader to execute. This must be a compute shader with the input
+    // set to PL_SHADER_SIG_NONE. The output, if it has any, is ignored.
+    pl_shader *shader;
+
+    // The number of work groups to dispatch in each dimension. If this is left
+    // as [0} and `width/height` are both set, the number of work groups will
+    // be inferred from the shader's `compute_group_sizes`.
+    int dispatch_size[3];
+
+    // If set, simulate vertex attributes (similar to `pl_dispatch_finish`)
+    // according to the given dimensions. The first two components of the
+    // thread's global ID will be interpreted as the X and Y locations.
+    //
+    // Optional, ignored if either component is left as 0.
+    int width, height;
+
+    // If set, records the execution time of this dispatch into the given
+    // timer object. Optional.
+    //
+    // Note: If this is set, `pl_dispatch` cannot internally measure the
+    // execution time of the shader, which means `pl_dispatch_info.samples` may
+    // be empty as a result.
+    pl_timer timer;
+};
+
+#define pl_dispatch_compute_params(...) (&(struct pl_dispatch_compute_params) { __VA_ARGS__ })
+
+// A variant of `pl_dispatch_finish`, this one only dispatches a compute shader
+// while ignoring its output (if it has one). It's only useful for shaders
+// which have otherwise observable side effects (such as updating state
+// objects).
+PL_API bool pl_dispatch_compute(pl_dispatch dp, const struct pl_dispatch_compute_params *params);
+
+enum pl_vertex_coords {
+    PL_COORDS_ABSOLUTE,     // Absolute/integer `target` coordinates
+    PL_COORDS_RELATIVE,     // Relative `target` coordinates in range [0, 1]
+    PL_COORDS_NORMALIZED,   // GL-normalized coordinates in range  [-1, 1]
+};
+
+struct pl_dispatch_vertex_params {
+    // The shader to execute. This must be a raster shader with the input set
+    // to `PL_SHADER_SIG_NONE` and the output set to `PL_SHADER_SIG_COLOR`.
+    //
+    // Additionally, the shader must not have any attached vertex attributes.
+    pl_shader *shader;
+
+    // The texture to render to. Requires `target->params.renderable`.
+    pl_tex target;
+
+    // The target rect to clip the rendering to. (Optional)
+    pl_rect2d scissors;
+
+    // If set, enables and controls the blending for this pass. Optional. When
+    // enabled, `target->params.fmt->caps` must include `PL_FMT_CAP_BLENDABLE`.
+    const struct pl_blend_params *blend_params;
+
+    // The description of the vertex format, including offsets.
+    //
+    // Note: `location` is ignored and can safely be left unset.
+    const struct pl_vertex_attrib *vertex_attribs;
+    int num_vertex_attribs;
+    size_t vertex_stride;
+
+    // The index of the vertex position in `vertex_attribs`, as well as the
+    // interpretation of its contents.
+    int vertex_position_idx;
+    enum pl_vertex_coords vertex_coords;
+    bool vertex_flipped; // flip all vertex y coordinates
+
+    // Type and number of vertices to render.
+    enum pl_prim_type vertex_type;
+    int vertex_count;
+
+    // Vertex data. See `pl_pass_run_params.vertex_data`.
+    const void *vertex_data;
+    pl_buf vertex_buf;
+    size_t buf_offset;
+
+    // Index data. See `pl_pass_run_params.index_data`. Optional.
+    const void *index_data;
+    enum pl_index_format index_fmt;
+    pl_buf index_buf;
+    size_t index_offset;
+
+    // If set, records the execution time of this dispatch into the given
+    // timer object. Optional.
+    //
+    // Note: If this is set, `pl_dispatch` cannot internally measure the
+    // execution time of the shader, which means `pl_dispatch_info.samples` may
+    // be empty as a result.
+    pl_timer timer;
+};
+
+#define pl_dispatch_vertex_params(...) (&(struct pl_dispatch_vertex_params) { __VA_ARGS__ })
+
+// Dispatch a generated shader using custom vertices, rather than using a quad
+// generated by the dispatch. This allows the use of e.g. custom fragment
+// shaders for things like rendering custom UI elements, or possibly doing
+// advanced things like sampling from a cube map or spherical video.
+PL_API bool pl_dispatch_vertex(pl_dispatch dp, const struct pl_dispatch_vertex_params *params);
+
+// Cancel an active shader without submitting anything. Useful, for example,
+// if the shader was instead merged into a different shader.
+PL_API void pl_dispatch_abort(pl_dispatch dp, pl_shader *sh);
+
+// Deprecated in favor of `pl_cache_save/pl_cache_load` on the `pl_cache`
+// associated with the `pl_gpu` this dispatch is using.
+PL_DEPRECATED PL_API size_t pl_dispatch_save(pl_dispatch dp, uint8_t *out_cache);
+PL_DEPRECATED PL_API void pl_dispatch_load(pl_dispatch dp, const uint8_t *cache);
+
+PL_API_END
+
+#endif // LIBPLACEBO_DISPATCH_H
diff --git a/src/include/libplacebo/dither.h b/src/include/libplacebo/dither.h
new file mode 100644
index 0000000..84f17c7
--- /dev/null
+++ b/src/include/libplacebo/dither.h
@@ -0,0 +1,82 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_DITHER_H_
+#define LIBPLACEBO_DITHER_H_
+
+#include <libplacebo/common.h>
+
+PL_API_BEGIN
+
+// Generates a deterministic NxN bayer (ordered) dither matrix, storing the
+// result in `data`. `size` must be a power of two. The resulting matrix will
+// be roughly uniformly distributed within the range [0,1).
+PL_API void pl_generate_bayer_matrix(float *data, int size);
+
+// Generates a random NxN blue noise texture. storing the result in `data`.
+// `size` must be a positive power of two no larger than 256. The resulting
+// texture will be roughly uniformly distributed within the range [0,1).
+//
+// Note: This function is very, *very* slow for large sizes. Generating a
+// dither matrix with size 256 can take several seconds on a modern processor.
+PL_API void pl_generate_blue_noise(float *data, int size);
+
+// Defines the border of all error diffusion kernels
+#define PL_EDF_MIN_DX (-2)
+#define PL_EDF_MAX_DX  (2)
+#define PL_EDF_MAX_DY  (2)
+
+struct pl_error_diffusion_kernel {
+    const char *name; // Short and concise identifier
+    const char *description; // Longer / friendly name
+
+    // The minimum value such that a (y, x) -> (y, x + y * shift) mapping will
+    // make all error pushing operations affect next column (and after it)
+    // only.
+    //
+    // Higher shift values are significantly more computationally intensive.
+    int shift;
+
+    // The diffusion factor for (y, x) is pattern[y][x - PL_EDF_MIN_DX] / divisor.
+    int pattern[PL_EDF_MAX_DY + 1][PL_EDF_MAX_DX - PL_EDF_MIN_DX + 1];
+    int divisor;
+};
+
+// Algorithms with shift=1:
+PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_simple;
+PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_false_fs;
+// Algorithms with shift=2:
+PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_sierra_lite;
+PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_floyd_steinberg;
+PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_atkinson;
+// Algorithms with shift=3, probably too heavy for low end GPUs:
+PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_jarvis_judice_ninke;
+PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_stucki;
+PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_burkes;
+PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_sierra2;
+PL_API extern const struct pl_error_diffusion_kernel pl_error_diffusion_sierra3;
+
+// A list of built-in error diffusion kernels, terminated by NULL
+PL_API extern const struct pl_error_diffusion_kernel * const pl_error_diffusion_kernels[];
+PL_API extern const int pl_num_error_diffusion_kernels; // excluding trailing NULL
+
+// Find the error diffusion kernel with the given name, or NULL on failure.
+PL_API const struct pl_error_diffusion_kernel *pl_find_error_diffusion_kernel(const char *name);
+
+PL_API_END
+
+#endif // LIBPLACEBO_DITHER_H_
diff --git a/src/include/libplacebo/dummy.h b/src/include/libplacebo/dummy.h
new file mode 100644
index 0000000..c298438
--- /dev/null
+++ b/src/include/libplacebo/dummy.h
@@ -0,0 +1,131 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_DUMMY_H_
+#define LIBPLACEBO_DUMMY_H_
+
+#include <libplacebo/gpu.h>
+
+PL_API_BEGIN
+
+// The functions in this file allow creating and manipulating "dummy" contexts.
+// A dummy context isn't actually mapped by the GPU, all data exists purely on
+// the CPU. It also isn't capable of compiling or executing any shaders, any
+// attempts to do so will simply fail.
+//
+// The main use case for this dummy context is for users who want to generate
+// advanced shaders that depend on specific GLSL features or support for
+// certain types of GPU resources (e.g. LUTs). This dummy context allows such
+// shaders to be generated, with all of the referenced shader objects and
+// textures simply containing their data in a host-accessible way.
+
+struct pl_gpu_dummy_params {
+    // These GPU parameters correspond to their equivalents in `pl_gpu`, and
+    // must obey the same rules as documented there. The values from
+    // `pl_gpu_dummy_default_params` are set to support pretty much everything
+    // and are set for GLSL version 450.
+    //
+    // Individual fields such as `glsl.compute` or `glsl.version` description
+    // can and should be overridden by the user based on their requirements.
+    // Individual limits should ideally be set based on the corresponding
+    // `glGet` queries etc.
+    struct pl_glsl_version glsl;
+    struct pl_gpu_limits limits;
+};
+
+#define PL_GPU_DUMMY_DEFAULTS                                           \
+    .glsl = {                                                           \
+        .version            = 450,                                      \
+        .gles               = false,                                    \
+        .vulkan             = false,                                    \
+        .compute            = true,                                     \
+        .max_shmem_size     = SIZE_MAX,                                 \
+        .max_group_threads  = 1024,                                     \
+        .max_group_size     = { 1024, 1024, 1024 },                     \
+        .subgroup_size      = 32,                                       \
+        .min_gather_offset  = INT16_MIN,                                \
+        .max_gather_offset  = INT16_MAX,                                \
+    },                                                                  \
+    .limits = {                                                         \
+        /* pl_gpu */                                                    \
+        .callbacks          = false,                                    \
+        .thread_safe        = true,                                     \
+        /* pl_buf */                                                    \
+        .max_buf_size       = SIZE_MAX,                                 \
+        .max_ubo_size       = SIZE_MAX,                                 \
+        .max_ssbo_size      = SIZE_MAX,                                 \
+        .max_vbo_size       = SIZE_MAX,                                 \
+        .max_mapped_size    = SIZE_MAX,                                 \
+        .max_buffer_texels  = UINT64_MAX,                               \
+        /* pl_tex */                                                    \
+        .max_tex_1d_dim     = UINT32_MAX,                               \
+        .max_tex_2d_dim     = UINT32_MAX,                               \
+        .max_tex_3d_dim     = UINT32_MAX,                               \
+        .buf_transfer       = true,                                     \
+        .align_tex_xfer_pitch = 1,                                      \
+        .align_tex_xfer_offset = 1,                                     \
+        /* pl_pass */                                                   \
+        .max_variable_comps = SIZE_MAX,                                 \
+        .max_constants      = SIZE_MAX,                                 \
+        .max_pushc_size     = SIZE_MAX,                                 \
+        .max_dispatch       = { UINT32_MAX, UINT32_MAX, UINT32_MAX },   \
+        .fragment_queues    = 0,                                        \
+        .compute_queues     = 0,                                        \
+    },
+
+#define pl_gpu_dummy_params(...) (&(struct pl_gpu_dummy_params) { PL_GPU_DUMMY_DEFAULTS __VA_ARGS__ })
+PL_API extern const struct pl_gpu_dummy_params pl_gpu_dummy_default_params;
+
+// Create a dummy GPU context based on the given parameters. This GPU will have
+// a format for each host-representable type (i.e. intN_t, floats and doubles),
+// in the canonical channel order RGBA. These formats will have every possible
+// capability activated, respectively.
+//
+// If `params` is left as NULL, it defaults to `&pl_gpu_dummy_params`.
+PL_API pl_gpu pl_gpu_dummy_create(pl_log log, const struct pl_gpu_dummy_params *params);
+PL_API void pl_gpu_dummy_destroy(pl_gpu *gpu);
+
+// Back-doors into the `pl_tex` and `pl_buf` representations. These allow you
+// to access the raw data backing this object. Textures are always laid out in
+// a tightly packed manner.
+//
+// For "placeholder" dummy textures, this always returns NULL.
+PL_API uint8_t *pl_buf_dummy_data(pl_buf buf);
+PL_API uint8_t *pl_tex_dummy_data(pl_tex tex);
+
+// Skeleton of `pl_tex_params` containing only the fields relevant to
+// `pl_tex_dummy_create`, plus the extra `sampler_type` field.
+struct pl_tex_dummy_params {
+    int w, h, d;
+    pl_fmt format;
+    enum pl_sampler_type sampler_type;
+    void *user_data;
+};
+
+#define pl_tex_dummy_params(...) (&(struct pl_tex_dummy_params) { __VA_ARGS__ })
+
+// Allows creating a "placeholder" dummy texture. This is basically a texture
+// that isn't even backed by anything. All `pl_tex_*` operations (other than
+// `pl_tex_destroy`) performed on it will simply fail.
+//
+// All of the permissions will be set to `false`, except `sampleable`, which is
+// set to `true`. (So you can use it as an input to shader sampling functions)
+PL_API pl_tex pl_tex_dummy_create(pl_gpu gpu, const struct pl_tex_dummy_params *params);
+
+PL_API_END
+
+#endif // LIBPLACEBO_DUMMY_H_
diff --git a/src/include/libplacebo/filters.h b/src/include/libplacebo/filters.h
new file mode 100644
index 0000000..a95649d
--- /dev/null
+++ b/src/include/libplacebo/filters.h
@@ -0,0 +1,415 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_FILTER_KERNELS_H_
+#define LIBPLACEBO_FILTER_KERNELS_H_
+
+#include <stdbool.h>
+#include <libplacebo/log.h>
+
+PL_API_BEGIN
+
+#define PL_FILTER_MAX_PARAMS 2
+
+// Invocation parameters for a given kernel
+struct pl_filter_ctx {
+    float radius;
+    float params[PL_FILTER_MAX_PARAMS];
+};
+
+// Represents a single filter function, i.e. kernel or windowing function.
+struct pl_filter_function {
+    // The cosmetic name associated with this filter function.
+    const char *name;
+
+    // The radius of the filter function. For resizable filters, this gives
+    // the radius needed to represent a single filter lobe (tap).
+    float radius;
+
+    // If true, the filter function is resizable (see pl_filter_config.radius)
+    bool resizable;
+
+    // If true, the filter function is tunable (see pl_filter_config.params)
+    bool tunable[PL_FILTER_MAX_PARAMS];
+
+    // If the relevant parameter is tunable, this contains the default values.
+    float params[PL_FILTER_MAX_PARAMS];
+
+    // The underlying filter function itself: Computes the weight as a function
+    // of the offset. All filter functions must be normalized such that x=0 is
+    // the center point, and in particular weight(0) = 1.0. The functions may
+    // be undefined for values of x outside [0, radius].
+    double (*weight)(const struct pl_filter_ctx *f, double x);
+
+    // If true, this filter represents an opaque placeholder for a more
+    // sophisticated filter function which does not fit into the pl_filter
+    // framework. `weight()` will always return 0.0.
+    bool opaque;
+};
+
+// Deprecated function, merely checks a->weight == b->weight
+PL_DEPRECATED PL_API bool
+pl_filter_function_eq(const struct pl_filter_function *a,
+                      const struct pl_filter_function *b);
+
+// Box filter: Entirely 1.0 within the radius, entirely 0.0 outside of it.
+// This is also sometimes called a Dirichlet window
+PL_API extern const struct pl_filter_function pl_filter_function_box;
+
+// Triangle filter: Linear transitions from 1.0 at x=0 to 0.0 at x=radius.
+// This is also sometimes called a Bartlett window.
+PL_API extern const struct pl_filter_function pl_filter_function_triangle;
+
+// Cosine filter: Ordinary cosine function, single lobe.
+PL_API extern const struct pl_filter_function pl_filter_function_cosine;
+
+// Hann function: Cosine filter named after Julius von Hann. Also commonly
+// mislabeled as a "Hanning" function, due to its similarly to the Hamming
+// function.
+PL_API extern const struct pl_filter_function pl_filter_function_hann;
+
+// Hamming function: Cosine filter named after Richard Hamming.
+PL_API extern const struct pl_filter_function pl_filter_function_hamming;
+
+// Welch filter: Polynomial function consisting of a single parabolic section.
+PL_API extern const struct pl_filter_function pl_filter_function_welch;
+
+// Kaiser filter: Approximation of the DPSS window using Bessel functions.
+// Also sometimes called a Kaiser-Bessel window.
+// Parameter [0]: Shape (alpha). Determines the trade-off between the main lobe
+//                and the side lobes.
+PL_API extern const struct pl_filter_function pl_filter_function_kaiser;
+
+// Blackman filter: Cosine filter named after Ralph Beebe Blackman.
+// Parameter [0]: Scale (alpha). Influences the shape. The defaults result in
+//                zeros at the third and fourth sidelobes.
+PL_API extern const struct pl_filter_function pl_filter_function_blackman;
+
+// Bohman filter: 2nd order Cosine filter.
+PL_API extern const struct pl_filter_function pl_filter_function_bohman;
+
+// Gaussian function: Similar to the Gaussian distribution, this defines a
+// bell curve function.
+// Parameter [0]: Scale (t), increasing makes the result blurrier.
+PL_API extern const struct pl_filter_function pl_filter_function_gaussian;
+
+// Quadratic function: 2nd order approximation of the gaussian function. Also
+// sometimes called a "quadric" window.
+PL_API extern const struct pl_filter_function pl_filter_function_quadratic;
+
+// Sinc function: Widely used for both kernels and windows, sinc(x) = sin(x)/x.
+PL_API extern const struct pl_filter_function pl_filter_function_sinc;
+
+// Jinc function: Similar to sinc, but extended to the 2D domain. Widely
+// used as the kernel of polar (EWA) filters. Also sometimes called a Sombrero
+// function.
+PL_API extern const struct pl_filter_function pl_filter_function_jinc;
+
+// Sphinx function: Similar to sinc and jinx, but extended to the 3D domain.
+// The name is derived from "spherical" sinc. Can be used to filter 3D signals
+// in theory.
+PL_API extern const struct pl_filter_function pl_filter_function_sphinx;
+
+// B/C-tunable Spline function: This is a family of commonly used spline
+// functions with two tunable parameters. Does not need to be windowed.
+// Parameter [0]: "B"
+// Parameter [1]: "C"
+// Some popular variants of this function are:
+// B = 1.0,  C = 0.0:  "base" Cubic (blurry)
+// B = 0.0,  C = 0.0:  Hermite filter (blocky)
+// B = 0.0,  C = 0.5:  Catmull-Rom filter (sharp)
+// B = 1/3,  C = 1/3:  Mitchell-Netravali filter (soft, doesn't ring)
+// B ≈ 0.37, C ≈ 0.31: Robidoux filter (used by ImageMagick)
+// B ≈ 0.26, C ≈ 0.37: RobidouxSharp filter (sharper variant of Robidoux)
+PL_API extern const struct pl_filter_function pl_filter_function_cubic;
+PL_API extern const struct pl_filter_function pl_filter_function_hermite;
+#define pl_filter_function_bicubic pl_filter_function_cubic
+#define pl_filter_function_bcspline pl_filter_function_cubic
+
+// Cubic splines with 2/3/4 taps. Referred to as "spline16", "spline36", and
+// "spline64" mainly for historical reasons, based on the number of pixels in
+// their window when using them as 2D orthogonal filters. Do not need to be
+// windowed.
+PL_API extern const struct pl_filter_function pl_filter_function_spline16;
+PL_API extern const struct pl_filter_function pl_filter_function_spline36;
+PL_API extern const struct pl_filter_function pl_filter_function_spline64;
+
+// Special filter function for the built-in oversampling algorithm. This is an
+// opaque filter with no meaningful representation. though it has one tunable
+// parameter controlling the threshold at which to switch back to ordinary
+// nearest neighbour sampling. (See `pl_shader_sample_oversample`)
+PL_API extern const struct pl_filter_function pl_filter_function_oversample;
+
+// A list of built-in filter functions, terminated by NULL
+//
+// Note: May contain extra aliases for the above functions.
+PL_API extern const struct pl_filter_function * const pl_filter_functions[];
+PL_API extern const int pl_num_filter_functions; // excluding trailing NULL
+
+// Find the filter function with the given name, or NULL on failure.
+PL_API const struct pl_filter_function *pl_find_filter_function(const char *name);
+
+// Backwards compatibility with the older configuration API. Redundant with
+// `pl_filter_function.name`. May be formally deprecated in the future.
+
+struct pl_filter_function_preset {
+    const char *name;
+    const struct pl_filter_function *function;
+};
+
+// A list of built-in filter function presets, terminated by {0}
+PL_API extern const struct pl_filter_function_preset pl_filter_function_presets[];
+PL_API extern const int pl_num_filter_function_presets; // excluding trailing {0}
+
+// Find the filter function preset with the given name, or NULL on failure.
+PL_API const struct pl_filter_function_preset *pl_find_filter_function_preset(const char *name);
+
+// Different usage domains for a filter
+enum pl_filter_usage {
+    PL_FILTER_UPSCALING    = (1 << 0),
+    PL_FILTER_DOWNSCALING  = (1 << 1),
+    PL_FILTER_FRAME_MIXING = (1 << 2),
+
+    PL_FILTER_SCALING = PL_FILTER_UPSCALING | PL_FILTER_DOWNSCALING,
+    PL_FILTER_ALL     = PL_FILTER_SCALING | PL_FILTER_FRAME_MIXING,
+};
+
+// Represents a tuned combination of filter functions, plus parameters
+struct pl_filter_config {
+    // The cosmetic name associated with this filter config. Optional for
+    // user-provided configs, but always set by built-in configurations.
+    const char *name;
+
+    // Longer / friendly name. Always set for built-in configurations,
+    // except for names which are merely aliases of other filters.
+    const char *description;
+
+    // Allowed and recommended usage domains (respectively)
+    //
+    // When it is desired to maintain a simpler user interface, it may be
+    // recommended to include only scalers whose recommended usage domains
+    // includes the relevant context in which it will be used.
+    enum pl_filter_usage allowed;
+    enum pl_filter_usage recommended;
+
+    // The kernel function and (optionally) windowing function.
+    const struct pl_filter_function *kernel;
+    const struct pl_filter_function *window;
+
+    // The radius. Ignored if !kernel->resizable. Optional, defaults to
+    // kernel->radius if unset.
+    float radius;
+
+    // Parameters for the respective filter function. Ignored if not tunable.
+    float params[PL_FILTER_MAX_PARAMS];
+    float wparams[PL_FILTER_MAX_PARAMS];
+
+    // Represents a clamping coefficient for negative weights. A value of 0.0
+    // (the default) represents no clamping. A value of 1.0 represents full
+    // clamping, i.e. all negative weights will be clamped to 0. Values in
+    // between will be linearly scaled.
+    float clamp;
+
+    // Additional blur coefficient. This effectively stretches the kernel,
+    // without changing the effective radius of the filter radius. Setting this
+    // to a value of 0.0 is equivalent to disabling it. Values significantly
+    // below 1.0 may seriously degrade the visual output, and should be used
+    // with care.
+    float blur;
+
+    // Additional taper coefficient. This essentially flattens the function's
+    // center. The values within [-taper, taper] will return 1.0, with the
+    // actual function being squished into the remainder of [taper, radius].
+    // Defaults to 0.0.
+    float taper;
+
+    // If true, this filter is intended to be used as a polar/2D filter (EWA)
+    // instead of a separable/1D filter. Does not affect the actual sampling,
+    // but provides information about how the results are to be interpreted.
+    bool polar;
+
+    // Antiringing strength. A value of 0.0 disables antiringing, and a value
+    // of 1.0 enables full-strength antiringing. Defaults to 0.0 if
+    // unspecified.
+    //
+    // Note: This is only included in `pl_filter_config` for convenience. Does
+    // not affect the actual filter sampling, but provides information to the
+    // downstream consumer of the `pl_filter`.
+    float antiring;
+};
+
+PL_API bool pl_filter_config_eq(const struct pl_filter_config *a,
+                                const struct pl_filter_config *b);
+
+// Samples a given filter configuration at a given x coordinate, while
+// respecting all parameters of the configuration.
+PL_API double pl_filter_sample(const struct pl_filter_config *c, double x);
+
+// A list of built-in filter configurations. Since they are just combinations
+// of the above filter functions, they are not described in much further
+// detail.
+PL_API extern const struct pl_filter_config pl_filter_spline16;    // 2 taps
+PL_API extern const struct pl_filter_config pl_filter_spline36;    // 3 taps
+PL_API extern const struct pl_filter_config pl_filter_spline64;    // 4 taps
+PL_API extern const struct pl_filter_config pl_filter_nearest;
+PL_API extern const struct pl_filter_config pl_filter_box;
+PL_API extern const struct pl_filter_config pl_filter_bilinear;
+PL_API extern const struct pl_filter_config pl_filter_gaussian;
+// Sinc family (all configured to 3 taps):
+PL_API extern const struct pl_filter_config pl_filter_sinc;        // unwindowed
+PL_API extern const struct pl_filter_config pl_filter_lanczos;     // sinc-sinc
+PL_API extern const struct pl_filter_config pl_filter_ginseng;     // sinc-jinc
+PL_API extern const struct pl_filter_config pl_filter_ewa_jinc;    // unwindowed
+PL_API extern const struct pl_filter_config pl_filter_ewa_lanczos; // jinc-jinc
+PL_API extern const struct pl_filter_config pl_filter_ewa_lanczossharp;
+PL_API extern const struct pl_filter_config pl_filter_ewa_lanczos4sharpest;
+PL_API extern const struct pl_filter_config pl_filter_ewa_ginseng; // jinc-sinc
+PL_API extern const struct pl_filter_config pl_filter_ewa_hann;    // jinc-hann
+// Spline family
+PL_API extern const struct pl_filter_config pl_filter_bicubic;
+PL_API extern const struct pl_filter_config pl_filter_hermite;
+PL_API extern const struct pl_filter_config pl_filter_catmull_rom;
+PL_API extern const struct pl_filter_config pl_filter_mitchell;
+PL_API extern const struct pl_filter_config pl_filter_mitchell_clamp; // clamp = 1.0
+PL_API extern const struct pl_filter_config pl_filter_robidoux;
+PL_API extern const struct pl_filter_config pl_filter_robidouxsharp;
+PL_API extern const struct pl_filter_config pl_filter_ewa_robidoux;
+PL_API extern const struct pl_filter_config pl_filter_ewa_robidouxsharp;
+// Special/opaque filters
+PL_API extern const struct pl_filter_config pl_filter_oversample;
+
+// Backwards compatibility
+#define pl_filter_triangle          pl_filter_bilinear
+#define pl_oversample_frame_mixer   pl_filter_oversample
+
+// A list of built-in filter configs, terminated by NULL
+PL_API extern const struct pl_filter_config * const pl_filter_configs[];
+PL_API extern const int pl_num_filter_configs; // excluding trailing NULL
+
+// Find the filter config with the given name, or NULL on failure.
+// `usage` restricts the valid usage (based on `pl_filter_config.allowed`).
+PL_API const struct pl_filter_config *
+pl_find_filter_config(const char *name, enum pl_filter_usage usage);
+
+// Backward compatibility with the previous filter configuration API. Redundant
+// with pl_filter_config.name/description. May be deprecated in the future.
+struct pl_filter_preset {
+    const char *name;
+    const struct pl_filter_config *filter;
+
+    // Longer / friendly name, or NULL for aliases
+    const char *description;
+};
+
+// A list of built-in filter presets, terminated by {0}
+PL_API extern const struct pl_filter_preset pl_filter_presets[];
+PL_API extern const int pl_num_filter_presets; // excluding trailing {0}
+
+// Find the filter preset with the given name, or NULL on failure.
+PL_API const struct pl_filter_preset *pl_find_filter_preset(const char *name);
+
+// Parameters for filter generation.
+struct pl_filter_params {
+    // The particular filter configuration to be sampled. config.kernel must
+    // be set to a valid pl_filter_function.
+    struct pl_filter_config config;
+
+    // The precision of the resulting LUT. A value of 64 should be fine for
+    // most practical purposes, but higher or lower values may be justified
+    // depending on the use case. This value must be set to something > 0.
+    int lut_entries;
+
+    // --- Polar filers only (config.polar)
+
+    // As a micro-optimization, all samples below this cutoff value will be
+    // ignored when updating the cutoff radius. Setting it to a value of 0.0
+    // disables this optimization.
+    float cutoff;
+
+    // --- Separable filters only (!config.polar)
+
+    // Indicates the maximum row size that is supported by the calling code, or
+    // 0 for no limit.
+    int max_row_size;
+
+    // Indicates the row stride alignment. For some use cases (e.g. uploading
+    // the weights as a texture), there are certain alignment requirements for
+    // each row. The chosen row_size will always be a multiple of this value.
+    // Specifying 0 indicates no alignment requirements.
+    int row_stride_align;
+
+    // --- Deprecated options
+    float filter_scale PL_DEPRECATED; // no effect, use `config.blur` instead
+};
+
+#define pl_filter_params(...) (&(struct pl_filter_params) { __VA_ARGS__ })
+
+// Represents an initialized instance of a particular filter, with a
+// precomputed LUT. The interpretation of the LUT depends on the type of the
+// filter (polar or separable).
+typedef const struct pl_filter_t {
+    // Deep copy of the parameters, for convenience.
+    struct pl_filter_params params;
+
+    // Contains the true radius of the computed filter. This may be
+    // smaller than the configured radius depending on the exact filter
+    // parameters used. Mainly relevant for polar filters, since
+    // it affects the value range of *weights.
+    float radius;
+
+    // Radius of the first zero crossing (main lobe size).
+    float radius_zero;
+
+    // The computed look-up table (LUT). For polar filters, this is interpreted
+    // as a 1D array with dimensions [lut_entries] containing the raw filter
+    // samples on the scale [0, radius]. For separable (non-polar) filters,
+    // this is interpreted as a 2D array with dimensions
+    // [lut_entries][row_stride]. The inner rows contain the `row_size` samples
+    // to convolve with the corresponding input pixels. The outer coordinate is
+    // used to very the fractional offset (phase). So for example, if the
+    // sample position to reconstruct is directly aligned with the source
+    // texels, you would use the values from weights[0]. If the sample position
+    // to reconstruct is exactly half-way between two source texels (180° out
+    // of phase), you would use the values from weights[lut_entries/2].
+    const float *weights;
+
+    // --- separable filters only (!params.config.polar)
+
+    // The number of source texels to convolve over for each row. This value
+    // will never exceed the given `max_row_size`. If the filter ends up
+    // cut off because of this, the bool `insufficient` will be set to true.
+    int row_size;
+    bool insufficient;
+
+    // The separation (in *weights) between each row of the filter. Always
+    // a multiple of params.row_stride_align.
+    int row_stride;
+
+    // --- deprecated / removed fields
+    float radius_cutoff PL_DEPRECATED; // identical to `radius`
+} *pl_filter;
+
+// Generate (compute) a filter instance based on a given filter configuration.
+// The resulting pl_filter must be freed with `pl_filter_free` when no longer
+// needed. Returns NULL if filter generation fails due to invalid parameters
+// (i.e. missing a required parameter).
+PL_API pl_filter pl_filter_generate(pl_log log, const struct pl_filter_params *params);
+PL_API void pl_filter_free(pl_filter *filter);
+
+PL_API_END
+
+#endif // LIBPLACEBO_FILTER_KERNELS_H_
diff --git a/src/include/libplacebo/gamut_mapping.h b/src/include/libplacebo/gamut_mapping.h
new file mode 100644
index 0000000..a92a73b
--- /dev/null
+++ b/src/include/libplacebo/gamut_mapping.h
@@ -0,0 +1,182 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_GAMUT_MAPPING_H_
+#define LIBPLACEBO_GAMUT_MAPPING_H_
+
+#include <libplacebo/common.h>
+#include <libplacebo/colorspace.h>
+
+PL_API_BEGIN
+
+struct pl_gamut_map_params;
+struct pl_gamut_map_function {
+    const char *name;        // Identifier
+    const char *description; // Friendly / longer name
+
+    // The gamut-mapping function itself. Iterates over all values in `lut`,
+    // and adapts them as needed.
+    void (*map)(float *lut, const struct pl_gamut_map_params *params);
+
+    // Returns true if `map` supports both stretching and contracting the
+    // gamut. In this case, `map` is always executed, even if the output gamut
+    // is larger than the input gamut.
+    bool bidirectional;
+
+    // Private data. Unused by libplacebo, but may be accessed by `map`.
+    void *priv;
+};
+
+struct pl_gamut_map_constants {
+    // (Relative) chromaticity protection zone for perceptual mapping [0,1]
+    float perceptual_deadzone;
+
+    // Strength of the perceptual saturation mapping component [0,1]
+    float perceptual_strength;
+
+    // I vs C curve gamma to use for colorimetric clipping [0,10]
+    float colorimetric_gamma;
+
+    // Knee point to use for softclipping methods (perceptual, softclip) [0,1]
+    float softclip_knee;
+
+    // Desaturation strength (for softclip only) [0,1]
+    float softclip_desat;
+};
+
+#define PL_GAMUT_MAP_CONSTANTS    \
+    .colorimetric_gamma  = 1.80f, \
+    .softclip_knee       = 0.70f, \
+    .softclip_desat      = 0.35f, \
+    .perceptual_deadzone = 0.30f, \
+    .perceptual_strength = 0.80f,
+
+struct pl_gamut_map_params {
+    // If `function` is NULL, defaults to `pl_gamut_map_clip`.
+    const struct pl_gamut_map_function *function;
+
+    // The desired input/output primaries. This affects the subjective color
+    // volume in which the desired mapping shall take place.
+    struct pl_raw_primaries input_gamut;
+    struct pl_raw_primaries output_gamut;
+
+    // Minimum/maximum luminance (PQ) of the target display. Note that the same
+    // value applies to both the input and output, since it's assumed that tone
+    // mapping has already happened by this stage. This effectively defines the
+    // legal gamut boundary in RGB space.
+    //
+    // This also defines the I channel value range, for `pl_gamut_map_generate`
+    float min_luma;
+    float max_luma;
+
+    // Common constants, should be initialized to PL_GAMUT_MAP_CONSTANTS if
+    // not intending to override them further.
+    struct pl_gamut_map_constants constants;
+
+    // -- LUT generation options (for `pl_gamut_map_generate` only)
+
+    // The size of the resulting LUT, per channel.
+    //
+    // Note: For quality, it's generally best to increase h > I > C
+    int lut_size_I;
+    int lut_size_C;
+    int lut_size_h;
+
+    // The stride (in number of floats) between elements in the resulting LUT.
+    int lut_stride;
+
+    // -- Removed parameters
+    float chroma_margin PL_DEPRECATED; // non-functional
+};
+
+#define pl_gamut_map_params(...) (&(struct pl_gamut_map_params) {   \
+    .constants = { PL_GAMUT_MAP_CONSTANTS },                        \
+    __VA_ARGS__                                                     \
+})
+
+// Note: Only does pointer equality testing on `function`
+PL_API bool pl_gamut_map_params_equal(const struct pl_gamut_map_params *a,
+                                      const struct pl_gamut_map_params *b);
+
+// Returns true if the given gamut mapping configuration effectively represents
+// a no-op configuration. Gamut mapping can be skipped in this case.
+PL_API bool pl_gamut_map_params_noop(const struct pl_gamut_map_params *params);
+
+// Generate a gamut-mapping LUT for a given configuration. LUT samples are
+// stored as IPTPQc4 values, but the LUT itself is indexed by IChPQc4,spanning
+// the effective range [min_luma, max_luma] × [0, 0.5] × [-pi,pi].
+//
+// This ordering is designed to keep frequently co-occurring values close in
+// memory, while permitting simple wrapping of the 'h' component.
+PL_API void pl_gamut_map_generate(float *out, const struct pl_gamut_map_params *params);
+
+// Samples a gamut mapping function for a single IPTPQc4 value. The input
+// values are updated in-place.
+PL_API void pl_gamut_map_sample(float x[3], const struct pl_gamut_map_params *params);
+
+// Performs no gamut-mapping, just hard clips out-of-range colors per-channel.
+PL_API extern const struct pl_gamut_map_function pl_gamut_map_clip;
+
+// Performs a perceptually balanced (saturation) gamut mapping, using a soft
+// knee function to preserve in-gamut colors, followed by a final softclip
+// operation. This works bidirectionally, meaning it can both compress and
+// expand the gamut. Behaves similar to a blend of `saturation` and `softclip`.
+PL_API extern const struct pl_gamut_map_function pl_gamut_map_perceptual;
+
+// Performs a perceptually balanced gamut mapping using a soft knee function to
+// roll-off clipped regions, and a hue shifting function to preserve saturation.
+PL_API extern const struct pl_gamut_map_function pl_gamut_map_softclip;
+
+// Performs relative colorimetric clipping, while maintaining an exponential
+// relationship between brightness and chromaticity.
+PL_API extern const struct pl_gamut_map_function pl_gamut_map_relative;
+
+// Performs simple RGB->RGB saturation mapping. The input R/G/B channels are
+// mapped directly onto the output R/G/B channels. Will never clip, but will
+// distort all hues and/or result in a faded look.
+PL_API extern const struct pl_gamut_map_function pl_gamut_map_saturation;
+
+// Performs absolute colorimetric clipping. Like pl_gamut_map_relative, but
+// does not adapt the white point.
+PL_API extern const struct pl_gamut_map_function pl_gamut_map_absolute;
+
+// Performs constant-luminance colorimetric clipping, desaturing colors
+// towards white until they're in-range.
+PL_API extern const struct pl_gamut_map_function pl_gamut_map_desaturate;
+
+// Uniformly darkens the input slightly to prevent clipping on blown-out
+// highlights, then clamps colorimetrically to the input gamut boundary,
+// biased slightly to preserve chromaticity over luminance.
+PL_API extern const struct pl_gamut_map_function pl_gamut_map_darken;
+
+// Performs no gamut mapping, but simply highlights out-of-gamut pixels.
+PL_API extern const struct pl_gamut_map_function pl_gamut_map_highlight;
+
+// Linearly/uniformly desaturates the image in order to bring the entire
+// image into the target gamut.
+PL_API extern const struct pl_gamut_map_function pl_gamut_map_linear;
+
+// A list of built-in gamut mapping functions, terminated by NULL
+PL_API extern const struct pl_gamut_map_function * const pl_gamut_map_functions[];
+PL_API extern const int pl_num_gamut_map_functions; // excluding trailing NULL
+
+// Find the gamut mapping function with the given name, or NULL on failure.
+PL_API const struct pl_gamut_map_function *pl_find_gamut_map_function(const char *name);
+
+PL_API_END
+
+#endif // LIBPLACEBO_GAMUT_MAPPING_H_
diff --git a/src/include/libplacebo/gpu.h b/src/include/libplacebo/gpu.h
new file mode 100644
index 0000000..a63fdf7
--- /dev/null
+++ b/src/include/libplacebo/gpu.h
@@ -0,0 +1,1464 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_GPU_H_
+#define LIBPLACEBO_GPU_H_
+
+#include <stddef.h>
+#include <stdbool.h>
+#include <stdint.h>
+
+#include <libplacebo/common.h>
+#include <libplacebo/cache.h>
+#include <libplacebo/log.h>
+
+PL_API_BEGIN
+
+// These are not memory managed, and should represent compile-time constants
+typedef const char *pl_debug_tag;
+#define PL_DEBUG_TAG (__FILE__ ":" PL_TOSTRING(__LINE__))
+
+// Type of a shader input descriptor.
+enum pl_desc_type {
+    PL_DESC_INVALID = 0,
+    PL_DESC_SAMPLED_TEX,    // C: pl_tex*    GLSL: combined texture sampler
+                            // (`pl_tex->params.sampleable` must be set)
+    PL_DESC_STORAGE_IMG,    // C: pl_tex*    GLSL: storage image
+                            // (`pl_tex->params.storable` must be set)
+    PL_DESC_BUF_UNIFORM,    // C: pl_buf*    GLSL: uniform buffer
+                            // (`pl_buf->params.uniform` must be set)
+    PL_DESC_BUF_STORAGE,    // C: pl_buf*    GLSL: storage buffer
+                            // (`pl_buf->params.storable` must be set)
+    PL_DESC_BUF_TEXEL_UNIFORM,// C: pl_buf*  GLSL: uniform samplerBuffer
+                              // (`pl_buf->params.uniform` and `format` must be set)
+    PL_DESC_BUF_TEXEL_STORAGE,// C: pl_buf*  GLSL: uniform imageBuffer
+                              // (`pl_buf->params.uniform` and `format` must be set)
+    PL_DESC_TYPE_COUNT
+};
+
+// This file contains the definition of an API which is designed to abstract
+// away from platform-specific APIs like the various OpenGL variants, Direct3D
+// and Vulkan in a common way. It is a much more limited API than those APIs,
+// since it tries targeting a very small common subset of features that is
+// needed to implement libplacebo's rendering.
+//
+// NOTE: Most, but not all, parameter conditions (phrases such as "must" or
+// "valid usage" are explicitly tested and result in error messages followed by
+// graceful failure. Exceptions are noted where they exist.
+
+// Structure which wraps metadata describing GLSL capabilities.
+struct pl_glsl_version {
+    int version;        // GLSL version (e.g. 450), for #version
+    bool gles;          // GLSL ES semantics (ESSL)
+    bool vulkan;        // GL_KHR_vulkan_glsl semantics
+
+    // Compute shader support and limits. If `compute` is false, then all
+    // of the remaining fields in this section are {0}.
+    bool compute;
+    size_t max_shmem_size;      // maximum compute shader shared memory size
+    uint32_t max_group_threads; // maximum number of local threads per work group
+    uint32_t max_group_size[3]; // maximum work group size per dimension
+
+    // If nonzero, signals availability of shader subgroups. This guarantess
+    // availability of all of the following extensions:
+    // - GL_KHR_shader_subgroup_basic
+    // - GL_KHR_shader_subgroup_vote
+    // - GL_KHR_shader_subgroup_arithmetic
+    // - GL_KHR_shader_subgroup_ballot
+    // - GL_KHR_shader_subgroup_shuffle
+    uint32_t subgroup_size;
+
+    // Miscellaneous shader limits
+    int16_t min_gather_offset;  // minimum `textureGatherOffset` offset
+    int16_t max_gather_offset;  // maximum `textureGatherOffset` offset
+};
+
+// Backwards compatibility alias
+#define pl_glsl_desc pl_glsl_version
+
+// Structure defining the physical limits and capabilities of this GPU
+// instance. If a limit is given as 0, that means that feature is unsupported.
+struct pl_gpu_limits {
+    // --- pl_gpu
+    bool thread_safe;           // `pl_gpu` calls are thread-safe
+    bool callbacks;             // supports asynchronous GPU callbacks
+
+    // --- pl_buf
+    size_t max_buf_size;        // maximum size of any buffer
+    size_t max_ubo_size;        // maximum size of a `uniform` buffer
+    size_t max_ssbo_size;       // maximum size of a `storable` buffer
+    size_t max_vbo_size;        // maximum size of a `drawable` buffer
+    size_t max_mapped_size;     // maximum size of a `host_mapped` buffer
+    uint64_t max_buffer_texels; // maximum number of texels in a texel buffer
+    bool host_cached;           // if true, PL_BUF_MEM_HOST buffers are cached
+
+    // Required alignment for PL_HANDLE_HOST_PTR imports. This is provided
+    // merely as a hint to the user. If the host pointer being imported is
+    // misaligned, libplacebo will internally round (over-map) the region.
+    size_t align_host_ptr;
+
+    // --- pl_tex
+    uint32_t max_tex_1d_dim;    // maximum width for a 1D texture
+    uint32_t max_tex_2d_dim;    // maximum width/height for a 2D texture (required)
+    uint32_t max_tex_3d_dim;    // maximum width/height/depth for a 3D texture
+    bool blittable_1d_3d;       // supports blittable 1D/3D textures
+    bool buf_transfer;          // supports `pl_tex_transfer_params.buf`
+
+    // These don't represent hard limits but indicate performance hints for
+    // optimal alignment. For best performance, the corresponding field
+    // should be aligned to a multiple of these. They will always be a power
+    // of two.
+    size_t align_tex_xfer_pitch;    // optimal `pl_tex_transfer_params.row_pitch`
+    size_t align_tex_xfer_offset;   // optimal `pl_tex_transfer_params.buf_offset`
+
+    // --- pl_pass
+    size_t max_variable_comps;  // maximum components passed in variables
+    size_t max_constants;       // maximum `pl_pass_params.num_constants`
+    bool array_size_constants;  // push constants can be used to size arrays
+    size_t max_pushc_size;      // maximum `push_constants_size`
+    size_t align_vertex_stride; // alignment of `pl_pass_params.vertex_stride`
+    uint32_t max_dispatch[3];   // maximum dispatch size per dimension
+
+    // Note: At least one of `max_variable_comps` or `max_ubo_size` is
+    // guaranteed to be nonzero.
+
+    // As a performance hint, the GPU may signal the number of command queues
+    // it has for fragment and compute shaders, respectively. Users may use
+    // this information to decide the appropriate type of shader to dispatch.
+    uint32_t fragment_queues;
+    uint32_t compute_queues;
+};
+
+// Backwards compatibility aliases
+#define max_xfer_size max_buf_size
+#define align_tex_xfer_stride align_tex_xfer_pitch
+
+// Some `pl_gpu` operations allow sharing GPU resources with external APIs -
+// examples include interop with other graphics APIs such as CUDA, and also
+// various hardware decoding APIs. This defines the mechanism underpinning the
+// communication of such an interoperation.
+typedef uint64_t pl_handle_caps;
+enum pl_handle_type {
+    PL_HANDLE_FD        = (1 << 0), // `int fd` for POSIX-style APIs
+    PL_HANDLE_WIN32     = (1 << 1), // `HANDLE` for win32 API
+    PL_HANDLE_WIN32_KMT = (1 << 2), // `HANDLE` for pre-Windows-8 win32 API
+    PL_HANDLE_DMA_BUF   = (1 << 3), // 'int fd' for a dma_buf fd
+    PL_HANDLE_HOST_PTR  = (1 << 4), // `void *` for a host-allocated pointer
+    PL_HANDLE_MTL_TEX   = (1 << 5), // `MTLTexture*` for Apple platforms
+    PL_HANDLE_IOSURFACE = (1 << 6), // `IOSurfaceRef` for Apple platforms
+};
+
+struct pl_gpu_handle_caps {
+    pl_handle_caps tex;  // supported handles for `pl_tex` + `pl_shared_mem`
+    pl_handle_caps buf;  // supported handles for `pl_buf` + `pl_shared_mem`
+    pl_handle_caps sync; // supported handles for `pl_sync` / semaphores
+};
+
+// Wrapper for the handle used to communicate a shared resource externally.
+// This handle is owned by the `pl_gpu` - if a user wishes to use it in a way
+// that takes over ownership (e.g. importing into some APIs), they must clone
+// the handle before doing so (e.g. using `dup` for fds). It is important to
+// read the external API documentation _very_ carefully as different handle
+// types may be managed in different ways. (eg: CUDA takes ownership of an fd,
+// but does not take ownership of a win32 handle).
+union pl_handle {
+    int fd;         // PL_HANDLE_FD / PL_HANDLE_DMA_BUF
+    void *handle;   // PL_HANDLE_WIN32 / PL_HANDLE_WIN32_KMT / PL_HANDLE_MTL_TEX / PL_HANDLE_IOSURFACE
+    void *ptr;      // PL_HANDLE_HOST_PTR
+};
+
+// Structure encapsulating memory that is shared between libplacebo and the
+// user. This memory can be imported into external APIs using the handle.
+//
+// If the object a `pl_shared_mem` belongs to is destroyed (e.g. via
+// `pl_buf_destroy`), the handle becomes undefined, as do the contents of the
+// memory it points to, as well as any external API objects imported from it.
+struct pl_shared_mem {
+    union pl_handle handle;
+    size_t size;   // the total size of the memory referenced by this handle
+    size_t offset; // the offset of the object within the referenced memory
+
+    // Note: `size` is optional for some APIs and handle types, in particular
+    // when importing DMABUFs or D3D11 textures.
+
+    // For PL_HANDLE_DMA_BUF, this specifies the DRM format modifier that
+    // describes this resource. Note that when importing `pl_buf`, this must
+    // be DRM_FORMAT_MOD_LINEAR. For importing `pl_tex`, it can be any
+    // format modifier supported by the implementation.
+    uint64_t drm_format_mod;
+
+    // When importing a `pl_tex` of type PL_HANDLE_DMA_BUF, this can be used to
+    // set the image stride (AKA pitch) in memory. If left as 0, defaults to
+    // the image width/height.
+    size_t stride_w;
+    size_t stride_h;
+
+    // When importing a `pl_tex` of type PL_HANDLE_MTL_TEX, this determines
+    // which plane is imported (0 - 2).
+    unsigned plane;
+};
+
+// Structure grouping PCI bus address fields for GPU devices
+struct pl_gpu_pci_address {
+    uint32_t domain;
+    uint32_t bus;
+    uint32_t device;
+    uint32_t function;
+};
+
+typedef const struct pl_fmt_t *pl_fmt;
+
+// Abstract device context which wraps an underlying graphics context and can
+// be used to dispatch rendering commands.
+//
+// Thread-safety: Depends on `pl_gpu_limits.thread_safe`
+typedef const struct pl_gpu_t {
+    pl_log log;
+
+    struct pl_glsl_version glsl; // GLSL features supported by this GPU
+    struct pl_gpu_limits limits; // physical device limits and capabilities
+
+    // Fields relevant to external API interop. If the underlying device does
+    // not support interop with other APIs, these will all be {0}.
+    struct pl_gpu_handle_caps export_caps; // supported handles for exporting
+    struct pl_gpu_handle_caps import_caps; // supported handles for importing
+    uint8_t uuid[16];                      // underlying device UUID
+
+    // Supported texture formats, in preference order. (If there are multiple
+    // similar formats, the "better" ones come first)
+    pl_fmt *formats;
+    int num_formats;
+
+    // PCI Bus address of the underlying device, to help with interop.
+    // This will only be filled in if interop is supported.
+    struct pl_gpu_pci_address pci;
+} *pl_gpu;
+
+// Attach a pl_cache object to this GPU instance. This cache will be
+// used to cache all compiled shaders, as well as several other shader objects
+// (e.g. cached 3DLUTs). Calling this with `cache = NULL` disables the cache.
+//
+// Note: Calling this after shaders have already been compiled will not
+// retroactively add those shaders to the cache, so it's recommended to set
+// this early, before creating any passes.
+PL_API void pl_gpu_set_cache(pl_gpu gpu, pl_cache cache);
+
+enum pl_fmt_type {
+    PL_FMT_UNKNOWN = 0, // also used for inconsistent multi-component formats
+    PL_FMT_UNORM,       // unsigned, normalized integer format (sampled as float)
+    PL_FMT_SNORM,       // signed, normalized integer format (sampled as float)
+    PL_FMT_UINT,        // unsigned integer format (sampled as integer)
+    PL_FMT_SINT,        // signed integer format (sampled as integer)
+    PL_FMT_FLOAT,       // (signed) float formats, any bit size
+    PL_FMT_TYPE_COUNT,
+};
+
+enum pl_fmt_caps {
+    PL_FMT_CAP_SAMPLEABLE    = 1 << 0,  // may be sampled from (PL_DESC_SAMPLED_TEX)
+    PL_FMT_CAP_STORABLE      = 1 << 1,  // may be used as storage image (PL_DESC_STORAGE_IMG)
+    PL_FMT_CAP_LINEAR        = 1 << 2,  // may be linearly samplied from (PL_TEX_SAMPLE_LINEAR)
+    PL_FMT_CAP_RENDERABLE    = 1 << 3,  // may be rendered to (pl_pass_params.target_fmt)
+    PL_FMT_CAP_BLENDABLE     = 1 << 4,  // may be blended to (pl_pass_params.enable_blend)
+    PL_FMT_CAP_BLITTABLE     = 1 << 5,  // may be blitted from/to (pl_tex_blit)
+    PL_FMT_CAP_VERTEX        = 1 << 6,  // may be used as a vertex attribute
+    PL_FMT_CAP_TEXEL_UNIFORM = 1 << 7,  // may be used as a texel uniform buffer
+    PL_FMT_CAP_TEXEL_STORAGE = 1 << 8,  // may be used as a texel storage buffer
+    PL_FMT_CAP_HOST_READABLE = 1 << 9,  // may be used with `host_readable` textures
+    PL_FMT_CAP_READWRITE     = 1 << 10, // may be used with PL_DESC_ACCESS_READWRITE
+
+    // Notes:
+    // - PL_FMT_CAP_LINEAR also implies PL_FMT_CAP_SAMPLEABLE
+    // - PL_FMT_CAP_STORABLE also implies `pl_gpu.glsl.compute`
+    // - PL_FMT_CAP_BLENDABLE implies PL_FMT_CAP_RENDERABLE
+    // - PL_FMT_CAP_VERTEX implies that the format is non-opaque
+    // - PL_FMT_CAP_HOST_READABLE implies that the format is non-opaque
+};
+
+struct pl_fmt_plane {
+    // Underlying format of this particular sub-plane. This describes the
+    // components, texel size and host representation for the purpose of
+    // e.g. transfers, blits, and sampling.
+    pl_fmt format;
+
+    // X/Y subsampling shift factor for this plane.
+    uint8_t shift_x, shift_y;
+};
+
+// Structure describing a texel/vertex format.
+struct pl_fmt_t {
+    const char *name;       // symbolic name for this format (e.g. rgba32f)
+    uint64_t signature;     // unique but stable signature (for pass reusability)
+
+    enum pl_fmt_type type;  // the format's data type and interpretation
+    enum pl_fmt_caps caps;  // the features supported by this format
+    int num_components;     // number of components for this format
+    int component_depth[4]; // meaningful bits per component, texture precision
+    size_t internal_size;   // internal texel size (for blit compatibility)
+
+    // For planar formats, this provides a description of each sub-plane.
+    //
+    // Note on planar formats: Planar formats are always opaque and typically
+    // support only a limit subset of capabilities (or none at all). Access
+    // should be done via sub-planes. (See `pl_tex.planes`)
+    struct pl_fmt_plane planes[4];
+    int num_planes;         // or 0 for non-planar textures
+
+    // This controls the relationship between the data as seen by the host and
+    // the way it's interpreted by the texture. The host representation is
+    // always tightly packed (no padding bits in between each component).
+    //
+    // This representation assumes little endian ordering, i.e. components
+    // being ordered from LSB to MSB in memory. Note that for oddly packed
+    // formats like rgb10a2 or rgb565, this is inconsistent with the naming.
+    // (That is to say, rgb565 has sample order {2, 1, 0} under this convention
+    // - because rgb565 treats the R channel as the *most* significant bits)
+    //
+    // If `opaque` is true, then there's no meaningful correspondence between
+    // the two, and all of the remaining fields in this section are unset.
+    //
+    // If `emulated` is true, then this format doesn't actually exist on the
+    // GPU as an uploadable texture format - and any apparent support is being
+    // emulated (typically using compute shaders in the upload path).
+    bool opaque;
+    bool emulated;
+    size_t texel_size;      // total size in bytes per texel
+    size_t texel_align;     // texel alignment requirements (bytes)
+    int host_bits[4];       // number of meaningful bits in host memory
+    int sample_order[4];    // sampled index for each component, e.g.
+                            // {2, 1, 0, 3} for BGRA textures
+
+    // For sampleable formats, this bool indicates whether or not the format
+    // is compatible with `textureGather()`
+    bool gatherable;
+
+    // If usable as a vertex or texel buffer format, this gives the GLSL type
+    // corresponding to the data. (e.g. vec4)
+    const char *glsl_type;
+
+    // If usable as a storage image or texel storage buffer
+    // (PL_FMT_CAP_STORABLE / PL_FMT_CAP_TEXEL_STORAGE), this gives the GLSL
+    // texel format corresponding to the format (e.g. rgba16ui), if any. This
+    // field may be NULL, in which case the format modifier may be left
+    // unspecified.
+    const char *glsl_format;
+
+    // If available, this gives the fourcc associated with the host
+    // representation. In particular, this is intended for use with
+    // PL_HANDLE_DMA_BUF, where this field will match the DRM format from
+    // <drm_fourcc.h>. May be 0, for formats without matching DRM fourcc.
+    uint32_t fourcc;
+
+    // If `fourcc` is set, this contains the list of supported drm format
+    // modifiers for this format.
+    const uint64_t *modifiers;
+    int num_modifiers;
+};
+
+// Returns whether or not a pl_fmt's components are ordered sequentially
+// in memory in the order RGBA.
+PL_API bool pl_fmt_is_ordered(pl_fmt fmt);
+
+// Returns whether or not a pl_fmt is sampled as a float (e.g. UNORM)
+PL_API bool pl_fmt_is_float(pl_fmt fmt);
+
+// Returns whether or not a pl_fmt supports a given DRM modifier.
+PL_API bool pl_fmt_has_modifier(pl_fmt fmt, uint64_t modifier);
+
+// Helper function to find a format with a given number of components and
+// minimum effective precision per component. If `host_bits` is set, then the
+// format will always be non-opaque, unpadded, ordered and have exactly this
+// bit depth for each component. Finally, all `caps` must be supported.
+PL_API pl_fmt pl_find_fmt(pl_gpu gpu, enum pl_fmt_type type, int num_components,
+                          int min_depth, int host_bits, enum pl_fmt_caps caps);
+
+// Finds a vertex format for a given configuration. The resulting vertex will
+// have a component depth equivalent to the sizeof() the equivalent host type.
+// (e.g. PL_FMT_FLOAT will always have sizeof(float))
+PL_API pl_fmt pl_find_vertex_fmt(pl_gpu gpu, enum pl_fmt_type type, int num_components);
+
+// Find a format based on its name.
+PL_API pl_fmt pl_find_named_fmt(pl_gpu gpu, const char *name);
+
+// Find a format based on its fourcc.
+PL_API pl_fmt pl_find_fourcc(pl_gpu gpu, uint32_t fourcc);
+
+// A generic 'timer query' object. These can be used to measure an
+// approximation of the GPU execution time of a given operation. Due to the
+// highly asynchronous nature of GPUs, the actual results of any individual
+// timer query may be delayed by quite a bit. As such, users should avoid
+// trying to pair any particular GPU command with any particular timer query
+// result, and only reuse `pl_timer` objects with identical operations. The
+// results of timer queries are guaranteed to be in-order, but individual
+// queries may be dropped, and some operations might not record timer results
+// at all. (For example, if the underlying hardware does not support timer
+// queries for a given operation type)
+//
+// Thread-safety: Unsafe
+typedef struct pl_timer_t *pl_timer;
+
+// Creates a new timer object. This may return NULL, for example if the
+// implementation does not support timers, but since passing NULL to
+// `pl_timer_destroy` and `pl_timer_query` is safe, users generally need not
+// concern themselves with handling this.
+PL_API pl_timer pl_timer_create(pl_gpu gpu);
+PL_API void pl_timer_destroy(pl_gpu gpu, pl_timer *);
+
+// Queries any results that have been measured since the last execution of
+// `pl_timer_query`. There may be more than one result, in which case the user
+// should simply call the function again to get the subsequent values. This
+// function returns a value of 0 in the event that there are no more
+// unprocessed results.
+//
+// The results are reported in nanoseconds, but the actual precision of the
+// timestamp queries may be significantly lower.
+//
+// Note: Results do not queue up indefinitely. Generally, the implementation
+// will only keep track of a small, fixed number of results internally. Make
+// sure to include this function as part of your main rendering loop to process
+// all of its results, or older results will be overwritten by newer ones.
+PL_API uint64_t pl_timer_query(pl_gpu gpu, pl_timer);
+
+enum pl_buf_mem_type {
+    PL_BUF_MEM_AUTO = 0, // use whatever seems most appropriate
+    PL_BUF_MEM_HOST,     // try allocating from host memory (RAM)
+    PL_BUF_MEM_DEVICE,   // try allocating from device memory (VRAM)
+    PL_BUF_MEM_TYPE_COUNT,
+
+    // Note: This distinction only matters for discrete GPUs
+};
+
+// Structure describing a buffer.
+struct pl_buf_params {
+    size_t size;        // size in bytes (must be <= `pl_gpu_limits.max_buf_size`)
+    bool host_writable; // contents may be updated via pl_buf_write()
+    bool host_readable; // contents may be read back via pl_buf_read()
+    bool host_mapped;   // create a persistent, RW mapping (pl_buf.data)
+
+    // May be used as PL_DESC_BUF_UNIFORM or PL_DESC_BUF_TEXEL_UNIFORM.
+    // Requires `size <= pl_gpu_limits.max_ubo_size`
+    bool uniform;
+
+    // May be used as PL_DESC_BUF_STORAGE or PL_DESC_BUF_TEXEL_STORAGE.
+    // Requires `size <= pl_gpu_limits.max_ssbo_size`
+    bool storable;
+
+    // May be used as the source of vertex data for `pl_pass_run`.
+    bool drawable;
+
+    // Provide a hint for the memory type you want to use when allocating
+    // this buffer's memory.
+    //
+    // Note: Restrictions may apply depending on the usage flags. In
+    // particular, allocating buffers with `uniform` or `storable` enabled from
+    // non-device memory will almost surely fail.
+    enum pl_buf_mem_type memory_type;
+
+    // Setting this to a format with the `PL_FMT_CAP_TEXEL_*` capability allows
+    // this buffer to be used as a `PL_DESC_BUF_TEXEL_*`, when `uniform` and
+    // `storage` are respectively also enabled.
+    pl_fmt format;
+
+    // At most one of `export_handle` and `import_handle` can be set for a
+    // buffer.
+
+    // Setting this indicates that the memory backing this buffer should be
+    // shared with external APIs, If so, this must be exactly *one* of
+    // `pl_gpu.export_caps.buf`.
+    enum pl_handle_type export_handle;
+
+    // Setting this indicates that the memory backing this buffer will be
+    // imported from an external API. If so, this must be exactly *one* of
+    // `pl_gpu.import_caps.buf`.
+    enum pl_handle_type import_handle;
+
+    // If the shared memory is being imported, the import handle must be
+    // specified here. Otherwise, this is ignored.
+    struct pl_shared_mem shared_mem;
+
+    // If non-NULL, the buffer will be created with these contents. Otherwise,
+    // the initial data is undefined. Using this does *not* require setting
+    // host_writable.
+    const void *initial_data;
+
+    // Arbitrary user data. libplacebo does not use this at all.
+    void *user_data;
+
+    // Arbitrary identifying tag. Used only for debugging purposes.
+    pl_debug_tag debug_tag;
+};
+
+#define pl_buf_params(...) (&(struct pl_buf_params) {   \
+        .debug_tag = PL_DEBUG_TAG,                      \
+        __VA_ARGS__                                     \
+    })
+
+// A generic buffer, which can be used for multiple purposes (texture transfer,
+// storage buffer, uniform buffer, etc.)
+//
+// Note on efficiency: A pl_buf does not necessarily represent a true "buffer"
+// object on the underlying graphics API. It may also refer to a sub-slice of
+// a larger buffer, depending on the implementation details of the GPU. The
+// bottom line is that users do not need to worry about the efficiency of using
+// many small pl_buf objects. Having many small pl_bufs, even lots of few-byte
+// vertex buffers, is designed to be completely fine.
+//
+// Thread-safety: Unsafe
+typedef const struct pl_buf_t {
+    struct pl_buf_params params;
+    uint8_t *data; // for persistently mapped buffers, points to the first byte
+
+    // If `params.handle_type` is set, this structure references the shared
+    // memory backing this buffer, via the requested handle type.
+    //
+    // While this buffer is not in an "exported" state, the contents of the
+    // memory are undefined. (See: `pl_buf_export`)
+    struct pl_shared_mem shared_mem;
+} *pl_buf;
+
+// Create a buffer. The type of buffer depends on the parameters. The buffer
+// parameters must adhere to the restrictions imposed by the pl_gpu_limits.
+// Returns NULL on failure.
+//
+// For buffers with shared memory, the buffer is considered to be in an
+// "exported" state by default, and may be used directly by the external API
+// after being created (until the first libplacebo operation on the buffer).
+PL_API pl_buf pl_buf_create(pl_gpu gpu, const struct pl_buf_params *params);
+PL_API void pl_buf_destroy(pl_gpu gpu, pl_buf *buf);
+
+// This behaves like `pl_buf_create`, but if the buffer already exists and has
+// incompatible parameters, it will get destroyed first. A buffer is considered
+// "compatible" if it has the same buffer type and texel format, a size greater
+// than or equal to the requested size, and it has a superset of the features
+// the user requested. After this operation, the contents of the buffer are
+// undefined.
+//
+// Note: Due to its unpredictability, it's not allowed to use this with
+// `params->initial_data` being set. Similarly, it's not allowed on a buffer
+// with `params->export_handle`. since this may invalidate the corresponding
+// external API's handle. Conversely, it *is* allowed on a buffer with
+// `params->host_mapped`, and the corresponding `buf->data` pointer *may*
+// change as a result of doing so.
+//
+// Note: If the `user_data` alone changes, this does not trigger a buffer
+// recreation. In theory, this can be used to detect when the buffer ended
+// up being recreated.
+PL_API bool pl_buf_recreate(pl_gpu gpu, pl_buf *buf, const struct pl_buf_params *params);
+
+// Update the contents of a buffer, starting at a given offset (must be a
+// multiple of 4) and up to a given size, with the contents of *data.
+//
+// This function will block until the buffer is no longer in use. Use
+// `pl_buf_poll` to perform non-blocking queries of buffer availability.
+//
+// Note: This function can incur synchronization overhead, so it shouldn't be
+// used in tight loops. If you do need to loop (e.g. to perform a strided
+// write), consider using host-mapped buffers, or fixing the memory in RAM,
+// before calling this function.
+PL_API void pl_buf_write(pl_gpu gpu, pl_buf buf, size_t buf_offset,
+                         const void *data, size_t size);
+
+// Read back the contents of a buffer, starting at a given offset, storing the
+// data into *dest. Returns whether successful.
+//
+// This function will block until the buffer is no longer in use. Use
+// `pl_buf_poll` to perform non-blocking queries of buffer availability.
+PL_API bool pl_buf_read(pl_gpu gpu, pl_buf buf, size_t buf_offset,
+                        void *dest, size_t size);
+
+// Copy `size` bytes from one buffer to another, reading from and writing to
+// the respective offsets.
+PL_API void pl_buf_copy(pl_gpu gpu, pl_buf dst, size_t dst_offset,
+                        pl_buf src, size_t src_offset, size_t size);
+
+// Initiates a buffer export operation, allowing a buffer to be accessed by an
+// external API. This is only valid for buffers with `params.handle_type`.
+// Calling this twice in a row is a harmless no-op. Returns whether successful.
+//
+// There is no corresponding "buffer import" operation, the next libplacebo
+// operation that touches the buffer (e.g. pl_tex_upload, but also pl_buf_write
+// and pl_buf_read) will implicitly import the buffer back to libplacebo. Users
+// must ensure that all pending operations made by the external API are fully
+// completed before using it in libplacebo again. (Otherwise, the behaviour
+// is undefined)
+//
+// Please note that this function returning does not mean the memory is
+// immediately available as such. In general, it will mark a buffer as "in use"
+// in the same way any other buffer operation would, and it is the user's
+// responsibility to wait until `pl_buf_poll` returns false before accessing
+// the memory from the external API.
+//
+// In terms of the access performed by this operation, it is not considered a
+// "read" or "write" and therefore does not technically conflict with reads or
+// writes to the buffer performed by the host (via mapped memory - any use of
+// `pl_buf_read` or `pl_buf_write` would defeat the purpose of the export).
+// However, restrictions made by the external API may apply that prevent this.
+//
+// The recommended use pattern is something like this:
+//
+// while (loop) {
+//    pl_buf buf = get_free_buffer(); // or block on pl_buf_poll
+//    // write to the buffer using the external API
+//    pl_tex_upload(gpu, /* ... buf ... */); // implicitly imports
+//    pl_buf_export(gpu, buf);
+// }
+//
+// i.e. perform an external API operation, then use and immediately export the
+// buffer in libplacebo, and finally wait until `pl_buf_poll` is false before
+// re-using it in the external API. (Or get a new buffer in the meantime)
+PL_API bool pl_buf_export(pl_gpu gpu, pl_buf buf);
+
+// Returns whether or not a buffer is currently "in use". This can either be
+// because of a pending read operation, a pending write operation or a pending
+// buffer export operation. Any access to the buffer by external APIs or via
+// the host pointer (for host-mapped buffers) is forbidden while a buffer is
+// "in use". The only exception to this rule is multiple reads, for example
+// reading from a buffer with `pl_tex_upload` while simultaneously reading from
+// it using mapped memory.
+//
+// The `timeout`, specified in nanoseconds, indicates how long to block for
+// before returning. If set to 0, this function will never block, and only
+// returns the current status of the buffer. The actual precision of the
+// timeout may be significantly longer than one nanosecond, and has no upper
+// bound. This function does not provide hard latency guarantees. This function
+// may also return at any time, even if the buffer is still in use. If the user
+// wishes to block until the buffer is definitely no longer in use, the
+// recommended usage is:
+//
+// while (pl_buf_poll(gpu, buf, UINT64_MAX))
+//      ; // do nothing
+//
+// Note: libplacebo operations on buffers are always internally synchronized,
+// so this is only needed for host-mapped or externally exported buffers.
+// However, it may be used to do non-blocking queries before calling blocking
+// functions such as `pl_buf_read`.
+//
+// Note: If `pl_gpu_limits.thread_safe` is set, this function is implicitly
+// synchronized, meaning it can safely be called on a `pl_buf` that is in use
+// by another thread.
+PL_API bool pl_buf_poll(pl_gpu gpu, pl_buf buf, uint64_t timeout);
+
+enum pl_tex_sample_mode {
+    PL_TEX_SAMPLE_NEAREST,  // nearest neighbour sampling
+    PL_TEX_SAMPLE_LINEAR,   // linear filtering, requires PL_FMT_CAP_LINEAR
+    PL_TEX_SAMPLE_MODE_COUNT,
+};
+
+enum pl_tex_address_mode {
+    PL_TEX_ADDRESS_CLAMP,  // clamp the nearest edge texel
+    PL_TEX_ADDRESS_REPEAT, // repeat (tile) the texture
+    PL_TEX_ADDRESS_MIRROR, // repeat (mirror) the texture
+    PL_TEX_ADDRESS_MODE_COUNT,
+};
+
+// Structure describing a texture.
+struct pl_tex_params {
+    int w, h, d;            // physical dimension; unused dimensions must be 0
+    pl_fmt format;
+
+    // The following bools describe what operations can be performed. The
+    // corresponding pl_fmt capability must be set for every enabled
+    // operation type.
+    //
+    // Note: For planar formats, it is also possible to set capabilities only
+    // supported by sub-planes. In this case, the corresponding functionality
+    // will be available for the sub-plane, but not the planar texture itself.
+    bool sampleable;    // usable as a PL_DESC_SAMPLED_TEX
+    bool renderable;    // usable as a render target (pl_pass_run)
+                        // (must only be used with 2D textures)
+    bool storable;      // usable as a storage image (PL_DESC_IMG_*)
+    bool blit_src;      // usable as a blit source
+    bool blit_dst;      // usable as a blit destination
+    bool host_writable; // may be updated with pl_tex_upload()
+    bool host_readable; // may be fetched with pl_tex_download()
+
+    // Note: For `blit_src`, `blit_dst`, the texture must either be
+    // 2-dimensional or `pl_gpu_limits.blittable_1d_3d` must be set.
+
+    // At most one of `export_handle` and `import_handle` can be set for a
+    // texture.
+
+    // Setting this indicates that the memory backing this texture should be
+    // shared with external APIs, If so, this must be exactly *one* of
+    // `pl_gpu.export_caps.tex`.
+    enum pl_handle_type export_handle;
+
+    // Setting this indicates that the memory backing this texture will be
+    // imported from an external API. If so, this must be exactly *one* of
+    // `pl_gpu.import_caps.tex`. Mutually exclusive with `initial_data`.
+    enum pl_handle_type import_handle;
+
+    // If the shared memory is being imported, the import handle must be
+    // specified here. Otherwise, this is ignored.
+    struct pl_shared_mem shared_mem;
+
+    // If non-NULL, the texture will be created with these contents (tightly
+    // packed). Using this does *not* require setting host_writable. Otherwise,
+    // the initial data is undefined. Mutually exclusive with `import_handle`.
+    const void *initial_data;
+
+    // Arbitrary user data. libplacebo does not use this at all.
+    void *user_data;
+
+    // Arbitrary identifying tag. Used only for debugging purposes.
+    pl_debug_tag debug_tag;
+};
+
+#define pl_tex_params(...) (&(struct pl_tex_params) {   \
+        .debug_tag = PL_DEBUG_TAG,                      \
+        __VA_ARGS__                                     \
+    })
+
+static inline int pl_tex_params_dimension(const struct pl_tex_params params)
+{
+    return params.d ? 3 : params.h ? 2 : 1;
+}
+
+enum pl_sampler_type {
+    PL_SAMPLER_NORMAL,      // gsampler2D, gsampler3D etc.
+    PL_SAMPLER_RECT,        // gsampler2DRect
+    PL_SAMPLER_EXTERNAL,    // gsamplerExternalOES
+    PL_SAMPLER_TYPE_COUNT,
+};
+
+// Conflates the following typical GPU API concepts:
+// - texture itself
+// - sampler state
+// - staging buffers for texture upload
+// - framebuffer objects
+// - wrappers for swapchain framebuffers
+// - synchronization needed for upload/rendering/etc.
+//
+// Essentially a pl_tex can be anything ranging from a normal texture, a wrapped
+// external/real framebuffer, a framebuffer object + texture pair, a mapped
+// texture (via pl_hwdec), or other sorts of things that can be sampled from
+// and/or rendered to.
+//
+// Thread-safety: Unsafe
+typedef const struct pl_tex_t *pl_tex;
+struct pl_tex_t {
+    struct pl_tex_params params;
+
+    // If `params.format` is a planar format, this contains `pl_tex` handles
+    // encapsulating individual texture planes. Conversely, if this is a
+    // sub-plane of a planar texture, `parent` points to the planar texture.
+    //
+    // Note: Calling `pl_tex_destroy` on sub-planes is undefined behavior.
+    pl_tex planes[4];
+    pl_tex parent;
+
+    // If `params.export_handle` is set, this structure references the shared
+    // memory backing this buffer, via the requested handle type.
+    //
+    // While this texture is not in an "exported" state, the contents of the
+    // memory are undefined. (See: `pl_tex_export`)
+    //
+    // Note: Due to vulkan driver limitations, `shared_mem.drm_format_mod` will
+    // currently always be set to DRM_FORMAT_MOD_INVALID. No guarantee can be
+    // made about the cross-driver compatibility of textures exported this way.
+    struct pl_shared_mem shared_mem;
+
+    // If `params.sampleable` is true, this indicates the correct sampler type
+    // to use when sampling from this texture.
+    enum pl_sampler_type sampler_type;
+};
+
+// Create a texture (with undefined contents). Returns NULL on failure. This is
+// assumed to be an expensive/rare operation, and may need to perform memory
+// allocation or framebuffer creation.
+PL_API pl_tex pl_tex_create(pl_gpu gpu, const struct pl_tex_params *params);
+PL_API void pl_tex_destroy(pl_gpu gpu, pl_tex *tex);
+
+// This works like `pl_tex_create`, but if the texture already exists and has
+// incompatible texture parameters, it will get destroyed first. A texture is
+// considered "compatible" if it has the same texture format and sample/address
+// mode and it supports a superset of the features the user requested.
+//
+// Even if the texture is not recreated, calling this function will still
+// invalidate the contents of the texture. (Note: Because of this,
+// `initial_data` may not be used with `pl_tex_recreate`. Doing so is an error)
+//
+// Note: If the `user_data` alone changes, this does not trigger a texture
+// recreation. In theory, this can be used to detect when the texture ended
+// up being recreated.
+PL_API bool pl_tex_recreate(pl_gpu gpu, pl_tex *tex, const struct pl_tex_params *params);
+
+// Invalidates the contents of a texture. After this, the contents are fully
+// undefined.
+PL_API void pl_tex_invalidate(pl_gpu gpu, pl_tex tex);
+
+union pl_clear_color {
+    float f[4];
+    int32_t i[4];
+    uint32_t u[4];
+};
+
+// Clear the dst texture with the given color (rgba). This is functionally
+// identical to a blit operation, which means `dst->params.blit_dst` must be
+// set.
+PL_API void pl_tex_clear_ex(pl_gpu gpu, pl_tex dst, const union pl_clear_color color);
+
+// Wrapper for `pl_tex_clear_ex` which only works for floating point textures.
+PL_API void pl_tex_clear(pl_gpu gpu, pl_tex dst, const float color[4]);
+
+struct pl_tex_blit_params {
+    // The texture to blit from. Must have `params.blit_src` enabled.
+    pl_tex src;
+
+    // The texture to blit to. Must have `params.blit_dst` enabled, and a
+    // format that is loosely compatible with `src`. This essentially means
+    // that they must have the same `internal_size`. Additionally, UINT
+    // textures can only be blitted to other UINT textures, and SINT textures
+    // can only be blitted to other SINT textures.
+    pl_tex dst;
+
+    // The region of the source texture to blit. Must be within the texture
+    // bounds of `src`. May be flipped. (Optional)
+    pl_rect3d src_rc;
+
+    // The region of the destination texture to blit into. Must be within the
+    // texture bounds of `dst`. May be flipped. Areas outside of `dst_rc` in
+    // `dst` are preserved. (Optional)
+    pl_rect3d dst_rc;
+
+    // If `src_rc` and `dst_rc` have different sizes, the texture will be
+    // scaled using the given texture sampling mode.
+    enum pl_tex_sample_mode sample_mode;
+};
+
+#define pl_tex_blit_params(...) (&(struct pl_tex_blit_params) { __VA_ARGS__ })
+
+// Copy a sub-rectangle from one texture to another.
+PL_API void pl_tex_blit(pl_gpu gpu, const struct pl_tex_blit_params *params);
+
+// Structure describing a texture transfer operation.
+struct pl_tex_transfer_params {
+    // Texture to transfer to/from. Depending on the type of the operation,
+    // this must have params.host_writable (uploads) or params.host_readable
+    // (downloads) set, respectively.
+    pl_tex tex;
+
+    // Note: Superfluous parameters are ignored, i.e. for a 1D texture, the y
+    // and z fields of `rc`, as well as the corresponding pitches, are ignored.
+    // In all other cases, the pitch must be large enough to contain the
+    // corresponding dimension of `rc`, and the `rc` must be normalized and
+    // fully contained within the image dimensions. Missing fields in the `rc`
+    // are inferred from the image size. If unset, the pitch is inferred
+    // from `rc` (that is, it's assumed that the data is tightly packed in the
+    // buffer). Otherwise, `row_pitch` *must* be a multiple of
+    // `tex->params.format->texel_align`, and `depth_pitch` must be a multiple
+    // of `row_pitch`.
+    pl_rect3d rc;       // region of the texture to transfer
+    size_t row_pitch;   // the number of bytes separating image rows
+    size_t depth_pitch; // the number of bytes separating image planes
+
+    // An optional timer to report the approximate duration of the texture
+    // transfer to. Note that this is only an approximation, since the actual
+    // texture transfer may happen entirely in the background (in particular,
+    // for implementations with asynchronous transfer capabilities). It's also
+    // not guaranteed that all GPUs support this.
+    pl_timer timer;
+
+    // An optional callback to fire after the operation completes. If this is
+    // specified, then the operation is performed asynchronously. Note that
+    // transfers to/from buffers are always asynchronous, even without, this
+    // field, so it's more useful for `ptr` transfers. (Though it can still be
+    // helpful to avoid having to manually poll buffers all the time)
+    //
+    // When this is *not* specified, uploads from `ptr` are still asynchronous
+    // but require a host memcpy, while downloads from `ptr` are blocking. As
+    // such, it's recommended to always try using asynchronous texture
+    // transfers wherever possible.
+    //
+    // Note: Requires `pl_gpu_limits.callbacks`
+    //
+    // Note: Callbacks are implicitly synchronized, meaning that callbacks are
+    // guaranteed to never execute concurrently with other callbacks. However,
+    // they may execute from any thread that the `pl_gpu` is used on.
+    void (*callback)(void *priv);
+    void *priv; // arbitrary user data
+
+    // For the data source/target of a transfer operation, there are two valid
+    // options:
+    //
+    // 1. Transferring to/from a buffer: (requires `pl_gpu_limits.buf_transfer`)
+    pl_buf buf;         // buffer to use
+    size_t buf_offset;  // offset of data within buffer, should be a
+                        // multiple of `tex->params.format->texel_size`
+    // 2. Transferring to/from host memory directly:
+    void *ptr;          // address of data
+    bool no_import;     // always use memcpy, bypassing host ptr import
+
+    // Note: The contents of the memory region / buffer must exactly match the
+    // texture format; i.e. there is no explicit conversion between formats.
+};
+
+#define pl_tex_transfer_params(...) (&(struct pl_tex_transfer_params) { __VA_ARGS__ })
+
+// Upload data to a texture. Returns whether successful.
+PL_API bool pl_tex_upload(pl_gpu gpu, const struct pl_tex_transfer_params *params);
+
+// Download data from a texture. Returns whether successful.
+PL_API bool pl_tex_download(pl_gpu gpu, const struct pl_tex_transfer_params *params);
+
+// Returns whether or not a texture is currently "in use". This can either be
+// because of a pending read operation, a pending write operation or a pending
+// texture export operation. Note that this function's usefulness is extremely
+// limited under ordinary circumstances. In practically all cases, textures do
+// not need to be directly synchronized by the user, except when interfacing
+// with external libraries. This function should NOT, however, be used as a
+// crutch to avoid having to implement semaphore-based synchronization. Use
+// the API-specific functions such as `pl_vulkan_hold/release` for that.
+//
+// A good example of a use case in which this function is required is when
+// interoperating with external memory management that needs to know when an
+// imported texture is safe to free / reclaim internally, in which case
+// semaphores are insufficient because memory management is a host operation.
+//
+// The `timeout`, specified in nanoseconds, indicates how long to block for
+// before returning. If set to 0, this function will never block, and only
+// returns the current status of the texture. The actual precision of the
+// timeout may be significantly longer than one nanosecond, and has no upper
+// bound. This function does not provide hard latency guarantees. This function
+// may also return at any time, even if the texture is still in use. If the
+// user wishes to block until the texture is definitely no longer in use, the
+// recommended usage is:
+//
+// while (pl_tex_poll(gpu, buf, UINT64_MAX))
+//      ; // do nothing
+//
+// Note: If `pl_gpu_limits.thread_safe` is set, this function is implicitly
+// synchronized, meaning it can safely be called on a `pl_tex` that is in use
+// by another thread.
+PL_API bool pl_tex_poll(pl_gpu gpu, pl_tex tex, uint64_t timeout);
+
+// Data type of a shader input variable (e.g. uniform, or UBO member)
+enum pl_var_type {
+    PL_VAR_INVALID = 0,
+    PL_VAR_SINT,        // C: int           GLSL: int/ivec
+    PL_VAR_UINT,        // C: unsigned int  GLSL: uint/uvec
+    PL_VAR_FLOAT,       // C: float         GLSL: float/vec/mat
+    PL_VAR_TYPE_COUNT
+};
+
+// Returns the host size (in bytes) of a pl_var_type.
+PL_API size_t pl_var_type_size(enum pl_var_type type);
+
+// Represents a shader input variable (concrete data, e.g. vector, matrix)
+struct pl_var {
+    const char *name;       // name as used in the shader
+    enum pl_var_type type;
+    // The total number of values is given by dim_v * dim_m. For example, a
+    // vec2 would have dim_v = 2 and dim_m = 1. A mat3x4 would have dim_v = 4
+    // and dim_m = 3.
+    int dim_v;              // vector dimension
+    int dim_m;              // matrix dimension (number of columns, see below)
+    int dim_a;              // array dimension
+};
+
+// Helper functions for constructing the most common pl_vars, with names
+// corresponding to their corresponding GLSL built-in types.
+PL_API struct pl_var pl_var_float(const char *name);
+PL_API struct pl_var pl_var_vec2(const char *name);
+PL_API struct pl_var pl_var_vec3(const char *name);
+PL_API struct pl_var pl_var_vec4(const char *name);
+PL_API struct pl_var pl_var_mat2(const char *name);
+PL_API struct pl_var pl_var_mat2x3(const char *name);
+PL_API struct pl_var pl_var_mat2x4(const char *name);
+PL_API struct pl_var pl_var_mat3(const char *name);
+PL_API struct pl_var pl_var_mat3x4(const char *name);
+PL_API struct pl_var pl_var_mat4x2(const char *name);
+PL_API struct pl_var pl_var_mat4x3(const char *name);
+PL_API struct pl_var pl_var_mat4(const char *name);
+PL_API struct pl_var pl_var_int(const char *name);
+PL_API struct pl_var pl_var_ivec2(const char *name);
+PL_API struct pl_var pl_var_ivec3(const char *name);
+PL_API struct pl_var pl_var_ivec4(const char *name);
+PL_API struct pl_var pl_var_uint(const char *name);
+PL_API struct pl_var pl_var_uvec2(const char *name);
+PL_API struct pl_var pl_var_uvec3(const char *name);
+PL_API struct pl_var pl_var_uvec4(const char *name);
+
+struct pl_named_var {
+    const char *glsl_name;
+    struct pl_var var;
+};
+
+// The same list as above, tagged by name and terminated with a {0} entry.
+PL_API extern const struct pl_named_var pl_var_glsl_types[];
+
+// Efficient helper function for performing a lookup in the above array.
+// Returns NULL if the variable is not legal. Note that the array dimension is
+// ignored, since it's usually part of the variable name and not the type name.
+PL_API const char *pl_var_glsl_type_name(struct pl_var var);
+
+// Converts a pl_fmt to an "equivalent" pl_var. Equivalent in this sense means
+// that the pl_var's type will be the same as the vertex's sampled type (e.g.
+// PL_FMT_UNORM gets turned into PL_VAR_FLOAT).
+PL_API struct pl_var pl_var_from_fmt(pl_fmt fmt, const char *name);
+
+// Describes the memory layout of a variable, relative to some starting location
+// (typically the offset within a uniform/storage/pushconstant buffer)
+//
+// Note on matrices: All GPUs expect column major matrices, for both buffers and
+// input variables. Care needs to be taken to avoid trying to use e.g. a
+// pl_matrix3x3 (which is row major) directly as a pl_var_update.data!
+//
+// In terms of the host layout, a column-major matrix (e.g. matCxR) with C
+// columns and R rows is treated like an array vecR[C]. The `stride` here refers
+// to the separation between these array elements, i.e. the separation between
+// the individual columns.
+//
+// Visualization of a mat4x3:
+//
+//       0   1   2   3  <- columns
+// 0  [ (A) (D) (G) (J) ]
+// 1  [ (B) (E) (H) (K) ]
+// 2  [ (C) (F) (I) (L) ]
+// ^ rows
+//
+// Layout in GPU memory: (stride=16, size=60)
+//
+// [ A B C ] X <- column 0, offset +0
+// [ D E F ] X <- column 1, offset +16
+// [ G H I ] X <- column 2, offset +32
+// [ J K L ]   <- column 3, offset +48
+//
+// Note the lack of padding on the last column in this example.
+// In general: size <= stride * dim_m
+//
+// C representation: (stride=12, size=48)
+//
+// { { A, B, C },
+//   { D, E, F },
+//   { G, H, I },
+//   { J, K, L } }
+//
+// Note on arrays: `stride` represents both the stride between elements of a
+// matrix, and the stride between elements of an array. That is, there is no
+// distinction between the columns of a matrix and the rows of an array. For
+// example, a mat2[10] and a vec2[20] share the same pl_var_layout - the stride
+// would be sizeof(vec2) and the size would be sizeof(vec2) * 2 * 10.
+//
+// For non-array/matrix types, `stride` is equal to `size`.
+
+struct pl_var_layout {
+    size_t offset; // the starting offset of the first byte
+    size_t stride; // the delta between two elements of an array/matrix
+    size_t size;   // the total size of the input
+};
+
+// Returns the host layout of an input variable as required for a
+// tightly-packed, byte-aligned C data type, given a starting offset.
+PL_API struct pl_var_layout pl_var_host_layout(size_t offset, const struct pl_var *var);
+
+// Returns the GLSL std140 layout of an input variable given a current buffer
+// offset, as required for a buffer descriptor of type PL_DESC_BUF_UNIFORM
+//
+// The normal way to use this function is when calculating the size and offset
+// requirements of a uniform buffer in an incremental fashion, to calculate the
+// new offset of the next variable in this buffer.
+PL_API struct pl_var_layout pl_std140_layout(size_t offset, const struct pl_var *var);
+
+// Returns the GLSL std430 layout of an input variable given a current buffer
+// offset, as required for a buffer descriptor of type PL_DESC_BUF_STORAGE, and
+// for push constants.
+PL_API struct pl_var_layout pl_std430_layout(size_t offset, const struct pl_var *var);
+
+// Convenience definitions / friendly names for these
+#define pl_buf_uniform_layout pl_std140_layout
+#define pl_buf_storage_layout pl_std430_layout
+#define pl_push_constant_layout pl_std430_layout
+
+// Like memcpy, but copies bytes from `src` to `dst` in a manner governed by
+// the stride and size of `dst_layout` as well as `src_layout`. Also takes
+// into account the respective `offset`.
+PL_API void memcpy_layout(void *dst, struct pl_var_layout dst_layout,
+                          const void *src, struct pl_var_layout src_layout);
+
+// Represents a compile-time constant.
+struct pl_constant {
+    enum pl_var_type type;  // constant data type
+    uint32_t id;            // GLSL `constant_id`
+    size_t offset;          // byte offset in `constant_data`
+};
+
+// Represents a vertex attribute.
+struct pl_vertex_attrib {
+    const char *name;   // name as used in the shader
+    pl_fmt fmt;         // data format (must have PL_FMT_CAP_VERTEX)
+    size_t offset;      // byte offset into the vertex struct
+    int location;       // vertex location (as used in the shader)
+};
+
+// Returns an abstract namespace index for a given descriptor type. This will
+// always be a value >= 0 and < PL_DESC_TYPE_COUNT. Implementations can use
+// this to figure out which descriptors may share the same value of `binding`.
+// Bindings must only be unique for all descriptors within the same namespace.
+PL_API int pl_desc_namespace(pl_gpu gpu, enum pl_desc_type type);
+
+// Access mode of a shader input descriptor.
+enum pl_desc_access {
+    PL_DESC_ACCESS_READWRITE,
+    PL_DESC_ACCESS_READONLY,
+    PL_DESC_ACCESS_WRITEONLY,
+    PL_DESC_ACCESS_COUNT,
+};
+
+// Returns the GLSL syntax for a given access mode (e.g. "readonly").
+PL_API const char *pl_desc_access_glsl_name(enum pl_desc_access mode);
+
+// Represents a shader descriptor (e.g. texture or buffer binding)
+struct pl_desc {
+    const char *name;       // name as used in the shader
+    enum pl_desc_type type;
+
+    // The binding of this descriptor, as used in the shader. All bindings
+    // within a namespace must be unique. (see: pl_desc_namespace)
+    int binding;
+
+    // For storage images and storage buffers, this can be used to restrict
+    // the type of access that may be performed on the descriptor. Ignored for
+    // the other descriptor types (uniform buffers and sampled textures are
+    // always read-only).
+    enum pl_desc_access access;
+};
+
+// Framebuffer blending mode (for raster passes)
+enum pl_blend_mode {
+    PL_BLEND_ZERO,
+    PL_BLEND_ONE,
+    PL_BLEND_SRC_ALPHA,
+    PL_BLEND_ONE_MINUS_SRC_ALPHA,
+    PL_BLEND_MODE_COUNT,
+};
+
+struct pl_blend_params {
+    enum pl_blend_mode src_rgb;
+    enum pl_blend_mode dst_rgb;
+    enum pl_blend_mode src_alpha;
+    enum pl_blend_mode dst_alpha;
+};
+
+#define pl_blend_params(...) (&(struct pl_blend_params) { __VA_ARGS__ })
+
+// Typical alpha compositing
+PL_API extern const struct pl_blend_params pl_alpha_overlay;
+
+enum pl_prim_type {
+    PL_PRIM_TRIANGLE_LIST,
+    PL_PRIM_TRIANGLE_STRIP,
+    PL_PRIM_TYPE_COUNT,
+};
+
+enum pl_index_format {
+    PL_INDEX_UINT16 = 0,
+    PL_INDEX_UINT32,
+    PL_INDEX_FORMAT_COUNT,
+};
+
+enum pl_pass_type {
+    PL_PASS_INVALID = 0,
+    PL_PASS_RASTER,  // vertex+fragment shader
+    PL_PASS_COMPUTE, // compute shader (requires `pl_gpu.glsl.compute`)
+    PL_PASS_TYPE_COUNT,
+};
+
+// Description of a rendering pass. It conflates the following:
+//  - GLSL shader(s) and its list of inputs
+//  - target parameters (for raster passes)
+struct pl_pass_params {
+    enum pl_pass_type type;
+
+    // Input variables.
+    struct pl_var *variables;
+    int num_variables;
+
+    // Input descriptors.
+    struct pl_desc *descriptors;
+    int num_descriptors;
+
+    // Compile-time specialization constants.
+    struct pl_constant *constants;
+    int num_constants;
+
+    // Initial data for the specialization constants. Optional. If NULL,
+    // specialization constants receive the values from the shader text.
+    void *constant_data;
+
+    // Push constant region. Must be be a multiple of 4 <= limits.max_pushc_size
+    size_t push_constants_size;
+
+    // The shader text in GLSL. For PL_PASS_RASTER, this is interpreted
+    // as a fragment shader. For PL_PASS_COMPUTE, this is interpreted as
+    // a compute shader.
+    const char *glsl_shader;
+
+    // --- type==PL_PASS_RASTER only
+
+    // Describes the interpretation and layout of the vertex data.
+    enum pl_prim_type vertex_type;
+    struct pl_vertex_attrib *vertex_attribs;
+    int num_vertex_attribs;
+    size_t vertex_stride; // must be a multiple of limits.align_vertex_stride
+
+    // The vertex shader itself.
+    const char *vertex_shader;
+
+    // Target format. The format must support PL_FMT_CAP_RENDERABLE. The
+    // resulting pass may only be used on textures that have a format with a
+    // `pl_fmt.signature` compatible to this format.
+    pl_fmt target_format;
+
+    // Target blending mode. If this is NULL, blending is disabled. Otherwise,
+    // the `target_format` must also support PL_FMT_CAP_BLENDABLE.
+    const struct pl_blend_params *blend_params;
+
+    // If false, the target's existing contents will be discarded before the
+    // pass is run. (Semantically equivalent to calling pl_tex_invalidate
+    // before every pl_pass_run, but slightly more efficient)
+    //
+    // Specifying `blend_params` requires `load_target` to be true.
+    bool load_target;
+
+    // --- Deprecated / removed fields.
+    PL_DEPRECATED const uint8_t *cached_program; // Non-functional
+    PL_DEPRECATED size_t cached_program_len;
+};
+
+#define pl_pass_params(...) (&(struct pl_pass_params) { __VA_ARGS__ })
+
+// Conflates the following typical GPU API concepts:
+// - various kinds of shaders
+// - rendering pipelines
+// - descriptor sets, uniforms, other bindings
+// - all synchronization necessary
+// - the current values of all inputs
+//
+// Thread-safety: Unsafe
+typedef const struct pl_pass_t {
+    struct pl_pass_params params;
+} *pl_pass;
+
+// Compile a shader and create a render pass. This is a rare/expensive
+// operation and may take a significant amount of time, even if a cached
+// program is used. Returns NULL on failure.
+PL_API pl_pass pl_pass_create(pl_gpu gpu, const struct pl_pass_params *params);
+PL_API void pl_pass_destroy(pl_gpu gpu, pl_pass *pass);
+
+struct pl_desc_binding {
+    const void *object; // pl_* object with type corresponding to pl_desc_type
+
+    // For PL_DESC_SAMPLED_TEX, this can be used to configure the sampler.
+    enum pl_tex_address_mode address_mode;
+    enum pl_tex_sample_mode sample_mode;
+};
+
+struct pl_var_update {
+    int index;        // index into params.variables[]
+    const void *data; // pointer to raw byte data corresponding to pl_var_host_layout()
+};
+
+struct pl_pass_run_params {
+    pl_pass pass;
+
+    // If present, the shader will be re-specialized with the new constants
+    // provided. This is a significantly cheaper operation than recompiling a
+    // brand new shader, but should still be avoided if possible.
+    //
+    // Leaving it as NULL re-uses the existing specialization values. Ignored
+    // if the shader has no specialization constants. Guaranteed to be a no-op
+    // if the values have not changed since the last invocation.
+    void *constant_data;
+
+    // This list only contains descriptors/variables which have changed
+    // since the previous invocation. All non-mentioned variables implicitly
+    // preserve their state from the last invocation.
+    struct pl_var_update *var_updates;
+    int num_var_updates;
+
+    // This list contains all descriptors used by this pass. It must
+    // always be filled, even if the descriptors haven't changed. The order
+    // must match that of pass->params.descriptors
+    struct pl_desc_binding *desc_bindings;
+
+    // The push constants for this invocation. This must always be set and
+    // fully defined for every invocation if params.push_constants_size > 0.
+    void *push_constants;
+
+    // An optional timer to report the approximate runtime of this shader pass
+    // invocation to. Note that this is only an approximation, since shaders
+    // may overlap their execution times and contend for GPU time.
+    pl_timer timer;
+
+    // --- pass->params.type==PL_PASS_RASTER only
+
+    // Target must be a 2D texture, `target->params.renderable` must be true,
+    // and `target->params.format->signature` must match the signature provided
+    // in `pass->params.target_format`.
+    //
+    // If the viewport or scissors are left blank, they are inferred from
+    // target->params.
+    //
+    // WARNING: Rendering to a *target that is being read from by the same
+    // shader is undefined behavior. In general, trying to bind the same
+    // resource multiple times to the same shader is undefined behavior.
+    pl_tex target;
+    pl_rect2d viewport; // screen space viewport (must be normalized)
+    pl_rect2d scissors; // target render scissors (must be normalized)
+
+    // Number of vertices to render
+    int vertex_count;
+
+    // Vertex data may be provided in one of two forms:
+    //
+    // 1. Drawing from host memory directly
+    const void *vertex_data;
+    // 2. Drawing from a vertex buffer (requires `vertex_buf->params.drawable`)
+    pl_buf vertex_buf;
+    size_t buf_offset;
+
+    // (Optional) Index data may be provided in the form given by `index_fmt`.
+    // These will be used for instanced rendering. Similar to vertex data, this
+    // can be provided in two forms:
+    // 1. From host memory
+    const void *index_data;
+    enum pl_index_format index_fmt;
+    // 2. From an index buffer (requires `index_buf->params.drawable`)
+    pl_buf index_buf;
+    size_t index_offset;
+    // Note: Drawing from an index buffer requires vertex data to also be
+    // present in buffer form, i.e. it's forbidden to mix `index_buf` with
+    // `vertex_data` (though vice versa is allowed).
+
+    // --- pass->params.type==PL_PASS_COMPUTE only
+
+    // Number of work groups to dispatch per dimension (X/Y/Z). Must be <= the
+    // corresponding index of limits.max_dispatch
+    int compute_groups[3];
+};
+
+#define pl_pass_run_params(...) (&(struct pl_pass_run_params) { __VA_ARGS__ })
+
+// Execute a render pass.
+PL_API void pl_pass_run(pl_gpu gpu, const struct pl_pass_run_params *params);
+
+// This is semantically a no-op, but it provides a hint that you want to flush
+// any partially queued up commands and begin execution. There is normally no
+// need to call this, because queued commands will always be implicitly flushed
+// whenever necessary to make forward progress on commands like `pl_buf_poll`,
+// or when submitting a frame to a swapchain for display. In fact, calling this
+// function can negatively impact performance, because some GPUs rely on being
+// able to re-order and modify queued commands in order to enable optimizations
+// retroactively.
+//
+// The only time this might be beneficial to call explicitly is if you're doing
+// lots of offline processing, i.e. you aren't rendering to a swapchain but to
+// textures that you download from again. In that case you should call this
+// function after each "work item" to ensure good parallelism between them.
+//
+// It's worth noting that this function may block if you're over-feeding the
+// GPU without waiting for existing results to finish.
+PL_API void pl_gpu_flush(pl_gpu gpu);
+
+// This is like `pl_gpu_flush` but also blocks until the GPU is fully idle
+// before returning. Using this in your rendering loop is seriously disadvised,
+// and almost never the right solution. The intended use case is for deinit
+// logic, where users may want to force the all pending GPU operations to
+// finish so they can clean up their state more easily.
+//
+// After this operation is called, it's guaranteed that all pending buffer
+// operations are complete - i.e. `pl_buf_poll` is guaranteed to return false.
+// It's also guaranteed that any outstanding timer query results are available.
+//
+// Note: If you only care about buffer operations, you can accomplish this more
+// easily by using `pl_buf_poll` with the timeout set to `UINT64_MAX`. But if
+// you have many buffers it may be more convenient to call this function
+// instead. The difference is that this function will also affect e.g. renders
+// to a `pl_swapchain`.
+PL_API void pl_gpu_finish(pl_gpu gpu);
+
+// Returns true if the GPU is considered to be in a "failed" state, which
+// during normal operation is typically the result of things like the device
+// being lost (due to e.g. power management).
+//
+// If this returns true, users *should* destroy and recreate the `pl_gpu`,
+// including all associated resources, via the appropriate mechanism.
+PL_API bool pl_gpu_is_failed(pl_gpu gpu);
+
+
+// Deprecated objects and functions:
+
+// A generic synchronization object intended for use with an external API. This
+// is not required when solely using libplacebo API functions, as all required
+// synchronisation is done internally. This comes in the form of a pair of
+// semaphores - one to synchronize access in each direction.
+//
+// Thread-safety: Unsafe
+typedef const struct pl_sync_t {
+    enum pl_handle_type handle_type;
+
+    // This handle is signalled by the `pl_gpu`, and waited on by the user. It
+    // fires when it is safe for the user to access the shared resource.
+    union pl_handle wait_handle;
+
+    // This handle is signalled by the user, and waited on by the `pl_gpu`. It
+    // must fire when the user has finished accessing the shared resource.
+    union pl_handle signal_handle;
+} *pl_sync;
+
+// Create a synchronization object. Returns NULL on failure.
+//
+// `handle_type` must be exactly *one* of `pl_gpu.export_caps.sync`, and
+// indicates which type of handle to generate for sharing this sync object.
+//
+// Deprecated in favor of API-specific semaphore creation operations such as
+// `pl_vulkan_sem_create`.
+PL_DEPRECATED PL_API pl_sync pl_sync_create(pl_gpu gpu, enum pl_handle_type handle_type);
+
+// Destroy a `pl_sync`. Note that this invalidates the externally imported
+// semaphores. Users should therefore make sure that all operations that
+// wait on or signal any of the semaphore have been fully submitted and
+// processed by the external API before destroying the `pl_sync`.
+//
+// Despite this, it's safe to destroy a `pl_sync` if the only pending
+// operations that involve it are internal to libplacebo.
+PL_DEPRECATED PL_API void pl_sync_destroy(pl_gpu gpu, pl_sync *sync);
+
+// Initiates a texture export operation, allowing a texture to be accessed by
+// an external API. Returns whether successful. After this operation
+// successfully returns, it is guaranteed that `sync->wait_handle` will
+// eventually be signalled. For APIs where this is relevant, the image layout
+// should be specified as "general", e.g. `GL_LAYOUT_GENERAL_EXT` for OpenGL.
+//
+// There is no corresponding "import" operation - the next operation that uses
+// a texture will implicitly import the texture. Valid API usage requires that
+// the user *must* submit a semaphore signal operation on `sync->signal_handle`
+// before doing so. Not doing so is undefined behavior and may very well
+// deadlock the calling process and/or the graphics card!
+//
+// Note that despite this restriction, it is always valid to call
+// `pl_tex_destroy`, even if the texture is in an exported state, without
+// having to signal the corresponding sync object first.
+//
+// Deprecated in favor of API-specific synchronization mechanisms such as
+// `pl_vulkan_hold/release_ex`.
+PL_DEPRECATED PL_API bool pl_tex_export(pl_gpu gpu, pl_tex tex, pl_sync sync);
+
+
+PL_API_END
+
+#endif // LIBPLACEBO_GPU_H_
diff --git a/src/include/libplacebo/log.h b/src/include/libplacebo/log.h
new file mode 100644
index 0000000..b24c931
--- /dev/null
+++ b/src/include/libplacebo/log.h
@@ -0,0 +1,113 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_LOG_H_
+#define LIBPLACEBO_LOG_H_
+
+#include <libplacebo/config.h>
+#include <libplacebo/common.h>
+
+PL_API_BEGIN
+
+// The log level associated with a given log message.
+enum pl_log_level {
+    PL_LOG_NONE = 0,
+    PL_LOG_FATAL,   // results in total loss of function of a major component
+    PL_LOG_ERR,     // serious error; may result in degraded function
+    PL_LOG_WARN,    // warning; potentially bad, probably user-relevant
+    PL_LOG_INFO,    // informational message, also potentially harmless errors
+    PL_LOG_DEBUG,   // verbose debug message, informational
+    PL_LOG_TRACE,   // very noisy trace of activity,, usually benign
+    PL_LOG_ALL = PL_LOG_TRACE,
+};
+
+struct pl_log_params {
+    // Logging callback. All messages, informational or otherwise, will get
+    // redirected to this callback. The logged messages do not include trailing
+    // newlines. Optional.
+    void (*log_cb)(void *log_priv, enum pl_log_level level, const char *msg);
+    void *log_priv;
+
+    // The current log level. Controls the level of message that will be
+    // redirected to the log callback. Setting this to PL_LOG_ALL means all
+    // messages will be forwarded, but doing so indiscriminately can result
+    // in increased CPU usage as it may enable extra debug paths based on the
+    // configured log level.
+    enum pl_log_level log_level;
+};
+
+#define pl_log_params(...) (&(struct pl_log_params) { __VA_ARGS__ })
+PL_API extern const struct pl_log_params pl_log_default_params;
+
+// Thread-safety: Safe
+//
+// Note: In any context in which `pl_log` is used, users may also pass NULL
+// to disable logging. In other words, NULL is a valid `pl_log`.
+typedef const struct pl_log_t {
+    struct pl_log_params params;
+} *pl_log;
+
+#define pl_log_glue1(x, y) x##y
+#define pl_log_glue2(x, y) pl_log_glue1(x, y)
+// Force a link error in the case of linking against an incompatible API
+// version.
+#define pl_log_create pl_log_glue2(pl_log_create_, PL_API_VER)
+// Creates a pl_log. `api_ver` is for historical reasons and ignored currently.
+// `params` defaults to `&pl_log_default_params` if left as NULL.
+//
+// Note: As a general rule, any `params` struct used as an argument to a
+// function need only live until the corresponding function returns.
+PL_API pl_log pl_log_create(int api_ver, const struct pl_log_params *params);
+
+// Destroy a `pl_log` object.
+//
+// Note: As a general rule, all `_destroy` functions take the pointer to the
+// object to free as their parameter. This pointer is overwritten by NULL
+// afterwards. Calling a _destroy function on &{NULL} is valid, but calling it
+// on NULL itself is invalid.
+PL_API void pl_log_destroy(pl_log *log);
+
+// Update the parameters of a `pl_log` without destroying it. This can be
+// used to change the log function, log context or log level retroactively.
+// `params` defaults to `&pl_log_default_params` if left as NULL.
+//
+// Returns the previous params, atomically.
+PL_API struct pl_log_params pl_log_update(pl_log log, const struct pl_log_params *params);
+
+// Like `pl_log_update` but only updates the log level, leaving the log
+// callback intact.
+//
+// Returns the previous log level, atomically.
+PL_API enum pl_log_level pl_log_level_update(pl_log log, enum pl_log_level level);
+
+// Two simple, stream-based loggers. You can use these as the log_cb. If you
+// also set log_priv to a FILE* (e.g. stdout or stderr) it will be printed
+// there; otherwise, it will be printed to stdout or stderr depending on the
+// log level.
+//
+// The version with colors will use ANSI escape sequences to indicate the log
+// level. The version without will use explicit prefixes.
+PL_API void pl_log_simple(void *stream, enum pl_log_level level, const char *msg);
+PL_API void pl_log_color(void *stream, enum pl_log_level level, const char *msg);
+
+// Backwards compatibility with older versions of libplacebo
+#define pl_context pl_log
+#define pl_context_params pl_log_params
+
+PL_API_END
+
+#endif // LIBPLACEBO_LOG_H_
diff --git a/src/include/libplacebo/meson.build b/src/include/libplacebo/meson.build
new file mode 100644
index 0000000..2f4631e
--- /dev/null
+++ b/src/include/libplacebo/meson.build
@@ -0,0 +1,6 @@
+sources += configure_file(
+  input: 'config.h.in',
+  output: 'config.h',
+  install_dir: get_option('includedir') / meson.project_name(),
+  configuration: conf_public,
+)
diff --git a/src/include/libplacebo/opengl.h b/src/include/libplacebo/opengl.h
new file mode 100644
index 0000000..46597b2
--- /dev/null
+++ b/src/include/libplacebo/opengl.h
@@ -0,0 +1,230 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_OPENGL_H_
+#define LIBPLACEBO_OPENGL_H_
+
+#include <string.h>
+
+#include <libplacebo/gpu.h>
+#include <libplacebo/swapchain.h>
+
+PL_API_BEGIN
+
+// Note on thread safety: The thread safety of `pl_opengl` and any associated
+// GPU objects follows the same thread safety rules as the underlying OpenGL
+// context. In other words, they must only be called from the thread the OpenGL
+// context is current on.
+
+typedef const struct pl_opengl_t {
+    pl_gpu gpu;
+
+    // Detected GL version
+    int major, minor;
+
+    // List of GL/EGL extensions, provided for convenience
+    const char * const *extensions;
+    int num_extensions;
+} *pl_opengl;
+
+static inline bool pl_opengl_has_ext(pl_opengl gl, const char *ext)
+{
+    for (int i = 0; i < gl->num_extensions; i++)
+        if (!strcmp(ext, gl->extensions[i]))
+            return true;
+    return false;
+}
+
+typedef void (*pl_voidfunc_t)(void);
+
+struct pl_opengl_params {
+    // Main gl*GetProcAddr function. This will be used to load all GL/EGL
+    // functions. Optional - if unspecified, libplacebo will default to an
+    // internal loading logic which should work on most platforms.
+    pl_voidfunc_t (*get_proc_addr_ex)(void *proc_ctx, const char *procname);
+    void *proc_ctx;
+
+    // Simpler API for backwards compatibility / convenience. (This one
+    // directly matches the signature of most gl*GetProcAddr library functions)
+    pl_voidfunc_t (*get_proc_addr)(const char *procname);
+
+    // Enable OpenGL debug report callbacks. May have little effect depending
+    // on whether or not the GL context was initialized with appropriate
+    // debugging enabled.
+    bool debug;
+
+    // Allow the use of (suspected) software rasterizers and renderers. These
+    // can be useful for debugging purposes, but normally, their use is
+    // undesirable when GPU-accelerated processing is expected.
+    bool allow_software;
+
+    // Restrict the maximum allowed GLSL version. (Mainly for testing)
+    int max_glsl_version;
+
+    // Optional. Required when importing/exporting dmabufs as textures.
+    void *egl_display;
+    void *egl_context;
+
+    // Optional callbacks to bind/release the OpenGL context on the current
+    // thread. If these are specified, then the resulting `pl_gpu` will have
+    // `pl_gpu_limits.thread_safe` enabled, and may therefore be used from any
+    // thread without first needing to bind the OpenGL context.
+    //
+    // If the user is re-using the same OpenGL context in non-libplacebo code,
+    // then these callbacks should include whatever synchronization is
+    // necessary to prevent simultaneous use between libplacebo and the user.
+    bool (*make_current)(void *priv);
+    void (*release_current)(void *priv);
+    void *priv;
+};
+
+// Default/recommended parameters
+#define pl_opengl_params(...) (&(struct pl_opengl_params) { __VA_ARGS__ })
+PL_API extern const struct pl_opengl_params pl_opengl_default_params;
+
+// Creates a new OpenGL renderer based on the given parameters. This will
+// internally use whatever platform-defined mechanism (WGL, X11, EGL) is
+// appropriate for loading the OpenGL function calls, so the user doesn't need
+// to pass in a `getProcAddress` callback. If `params` is left as NULL, it
+// defaults to `&pl_opengl_default_params`. The context must be active when
+// calling this function, and must remain active whenever calling any
+// libplacebo function on the resulting `pl_opengl` or `pl_gpu`.
+//
+// Note that creating multiple `pl_opengl` instances from the same OpenGL
+// context is undefined behavior.
+PL_API pl_opengl pl_opengl_create(pl_log log, const struct pl_opengl_params *params);
+
+// All resources allocated from the `pl_gpu` contained by this `pl_opengl` must
+// be explicitly destroyed by the user before calling `pl_opengl_destroy`.
+PL_API void pl_opengl_destroy(pl_opengl *gl);
+
+// For a `pl_gpu` backed by `pl_opengl`, this function can be used to retrieve
+// the underlying `pl_opengl`. Returns NULL for any other type of `gpu`.
+PL_API pl_opengl pl_opengl_get(pl_gpu gpu);
+
+struct pl_opengl_framebuffer {
+    // ID of the framebuffer, or 0 to use the context's default framebuffer.
+    int id;
+
+    // If true, then the framebuffer is assumed to be "flipped" relative to
+    // normal GL semantics, i.e. set this to `true` if the first pixel is the
+    // top left corner.
+    bool flipped;
+};
+
+struct pl_opengl_swapchain_params {
+    // Set this to the platform-specific function to swap buffers, e.g.
+    // glXSwapBuffers, eglSwapBuffers etc. This will be called internally by
+    // `pl_swapchain_swap_buffers`. Required, unless you never call that
+    // function.
+    void (*swap_buffers)(void *priv);
+
+    // Initial framebuffer description. This can be changed later on using
+    // `pl_opengl_swapchain_update_fb`.
+    struct pl_opengl_framebuffer framebuffer;
+
+    // Attempt forcing a specific latency. If this is nonzero, then
+    // `pl_swapchain_swap_buffers` will wait until fewer than N frames are "in
+    // flight" before returning. Setting this to a high number generally
+    // accomplished nothing, because the OpenGL driver typically limits the
+    // number of buffers on its own. But setting it to a low number like 2 or
+    // even 1 can reduce latency (at the cost of throughput).
+    int max_swapchain_depth;
+
+    // Arbitrary user pointer that gets passed to `swap_buffers` etc.
+    void *priv;
+};
+
+#define pl_opengl_swapchain_params(...) (&(struct pl_opengl_swapchain_params) { __VA_ARGS__ })
+
+// Creates an instance of `pl_swapchain` tied to the active context.
+// Note: Due to OpenGL semantics, users *must* call `pl_swapchain_resize`
+// before attempting to use this swapchain, otherwise calls to
+// `pl_swapchain_start_frame` will fail.
+PL_API pl_swapchain pl_opengl_create_swapchain(pl_opengl gl,
+                                               const struct pl_opengl_swapchain_params *params);
+
+// Update the framebuffer description. After calling this function, users
+// *must* call `pl_swapchain_resize` before attempting to use the swapchain
+// again, otherwise calls to `pl_swapchain_start_frame` will fail.
+PL_API void pl_opengl_swapchain_update_fb(pl_swapchain sw,
+                                          const struct pl_opengl_framebuffer *fb);
+
+struct pl_opengl_wrap_params {
+    // The GLuint texture object itself. Optional. If no texture is provided,
+    // then only the opaque framebuffer `fbo` will be wrapped, leaving the
+    // resulting `pl_tex` object with some operations (such as sampling) being
+    // unsupported.
+    unsigned int texture;
+
+    // The GLuint associated framebuffer. Optional. If this is not specified,
+    // then libplacebo will attempt creating a framebuffer from the provided
+    // texture object (if possible).
+    //
+    // Note: As a special case, if neither a texture nor an FBO are provided,
+    // this is equivalent to wrapping the OpenGL default framebuffer (id 0).
+    unsigned int framebuffer;
+
+    // The image's dimensions (unused dimensions must be 0)
+    int width;
+    int height;
+    int depth;
+
+    // Texture-specific fields:
+    //
+    // Note: These are only relevant if `texture` is provided.
+
+    // The GLenum for the texture target to use, e.g. GL_TEXTURE_2D. Optional.
+    // If this is left as 0, the target is inferred from the number of
+    // dimensions. Users may want to set this to something specific like
+    // GL_TEXTURE_EXTERNAL_OES depending on the nature of the texture.
+    unsigned int target;
+
+    // The texture's GLint sized internal format (e.g. GL_RGBA16F). Required.
+    int iformat;
+};
+
+#define pl_opengl_wrap_params(...) (&(struct pl_opengl_wrap_params) { __VA_ARGS__ })
+
+// Wraps an external OpenGL object into a `pl_tex` abstraction. Due to the
+// internally synchronized nature of OpenGL, no explicit synchronization
+// is needed between libplacebo `pl_tex_` operations, and host accesses to
+// the texture. Wrapping the same OpenGL texture multiple times is permitted.
+// Note that this function transfers no ownership.
+//
+// This wrapper can be destroyed by simply calling `pl_tex_destroy` on it,
+// which will *not* destroy the user-provided OpenGL texture or framebuffer.
+//
+// This function may fail, in which case it returns NULL.
+PL_API pl_tex pl_opengl_wrap(pl_gpu gpu, const struct pl_opengl_wrap_params *params);
+
+// Analogous to `pl_opengl_wrap`, this function takes any `pl_tex` (including
+// ones created by `pl_tex_create`) and unwraps it to expose the underlying
+// OpenGL texture to the user. Note that this function transfers no ownership,
+// i.e. the texture object and framebuffer shall not be destroyed by the user.
+//
+// Returns the OpenGL texture. `out_target` and `out_iformat` will be updated
+// to hold the target type and internal format, respectively. (Optional)
+//
+// For renderable/blittable textures, `out_fbo` will be updated to the ID of
+// the framebuffer attached to this texture, or 0 if there is none. (Optional)
+PL_API unsigned int pl_opengl_unwrap(pl_gpu gpu, pl_tex tex, unsigned int *out_target,
+                                     int *out_iformat, unsigned int *out_fbo);
+
+PL_API_END
+
+#endif // LIBPLACEBO_OPENGL_H_
diff --git a/src/include/libplacebo/options.h b/src/include/libplacebo/options.h
new file mode 100644
index 0000000..e40f5e7
--- /dev/null
+++ b/src/include/libplacebo/options.h
@@ -0,0 +1,201 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_OPTIONS_H_
+#define LIBPLACEBO_OPTIONS_H_
+
+#include <libplacebo/renderer.h>
+
+PL_API_BEGIN
+
+// High-level heap-managed struct containing storage for all options implied by
+// pl_render_params, including a high-level interface for serializing,
+// deserializing and interfacing with them in a programmatic way.
+
+typedef const struct pl_opt_t *pl_opt;
+typedef struct pl_options_t {
+    // Non-NULL `params.*_params` pointers must always point into this struct
+    struct pl_render_params params;
+
+    // Backing storage for all of the various rendering parameters. Whether
+    // or not these params are active is determined by whether or not
+    // `params.*_params` is set to this address or NULL.
+    struct pl_deband_params deband_params;
+    struct pl_sigmoid_params sigmoid_params;
+    struct pl_color_adjustment color_adjustment;
+    struct pl_peak_detect_params peak_detect_params;
+    struct pl_color_map_params color_map_params;
+    struct pl_dither_params dither_params;
+    struct pl_icc_params icc_params PL_DEPRECATED;
+    struct pl_cone_params cone_params;
+    struct pl_blend_params blend_params;
+    struct pl_deinterlace_params deinterlace_params;
+    struct pl_distort_params distort_params;
+
+    // Backing storage for "custom" scalers. `params.upscaler` etc. will
+    // always be a pointer either to a built-in pl_filter_config, or one of
+    // these structs. `name`, `description` and `allowed` will always be
+    // valid for the respective type of filter config.
+    struct pl_filter_config upscaler;
+    struct pl_filter_config downscaler;
+    struct pl_filter_config plane_upscaler;
+    struct pl_filter_config plane_downscaler;
+    struct pl_filter_config frame_mixer;
+} *pl_options;
+
+// Allocate a new set of render params, with internally backed storage for
+// all parameters. Initialized to an "empty" config (PL_RENDER_DEFAULTS),
+// equivalent to `&pl_render_fast_params`. To initialize the struct instead to
+// the recommended default parameters, use `pl_options_reset` with
+// `pl_render_default_params`.
+//
+// If `log` is provided, errors related to parsing etc. will be logged there.
+PL_API pl_options pl_options_alloc(pl_log log);
+PL_API void pl_options_free(pl_options *opts);
+
+// Resets all options to their default values from a given struct. If `preset`
+// is NULL, `opts` is instead reset back to the initial "empty" configuration,
+// with all options disabled, as if it was freshly allocated.
+//
+// Note: This function will also reset structs which were not included in
+// `preset`, such as any custom upscalers.
+PL_API void pl_options_reset(pl_options opts, const struct pl_render_params *preset);
+
+typedef const struct pl_opt_data_t {
+    // Original options struct.
+    pl_options opts;
+
+    // Triggering option for this callback invocation.
+    pl_opt opt;
+
+    // The raw data associated with this option. Always some pointer into
+    // `opts`. Note that only PL_OPT_BOOL, PL_OPT_INT and PL_OPT_FLOAT have
+    // a fixed representation, for other fields its usefulness is dubious.
+    const void *value;
+
+    // The underlying data, as a formatted, locale-invariant string. Lifetime
+    // is limited until the return of this callback.
+    const char *text;
+} *pl_opt_data;
+
+// Query a single option from `opts` by key, or NULL if none was found.
+// The resulting pointer is only valid until the next pl_options_* call.
+PL_API pl_opt_data pl_options_get(pl_options opts, const char *key);
+
+// Update an option from a formatted value string (see `pl_opt_data.text`).
+// This can be used for all type of options, even non-string ones. In this case,
+// `value` will be parsed according to the option type.
+//
+// Returns whether successful.
+PL_API bool pl_options_set_str(pl_options opts, const char *key, const char *value);
+
+// Programmatically iterate over options set in a `pl_options`, running the
+// provided callback on each entry.
+PL_API void pl_options_iterate(pl_options opts,
+                               void (*cb)(void *priv, pl_opt_data data),
+                               void *priv);
+
+// Serialize a `pl_options` structs to a comma-separated key/value string. The
+// returned string has a lifetime valid until either the next call to
+// `pl_options_save`, or until the `pl_options` is freed.
+PL_API const char *pl_options_save(pl_options opts);
+
+// Parse a `pl_options` struct from a key/value string, in standard syntax
+// "key1=value1,key2=value2,...", and updates `opts` with the new values.
+// Valid separators include whitespace, commas (,) and (semi)colons (:;).
+//
+// Returns true if no errors occurred.
+PL_API bool pl_options_load(pl_options opts, const char *str);
+
+// Helpers for interfacing with `opts->params.hooks`. Note that using any of
+// these helpers will overwrite the array by an internally managed pointer,
+// so care must be taken when combining them with external management of
+// this memory. Negative indices are possible and are counted relative to the
+// end of the list.
+//
+// Note: These hooks are *not* included in pl_options_save() and related.
+PL_API void pl_options_add_hook(pl_options opts, const struct pl_hook *hook);
+PL_API void pl_options_insert_hook(pl_options opts, const struct pl_hook *hook, int idx);
+PL_API void pl_options_remove_hook_at(pl_options opts, int idx);
+
+// Underlying options system and list
+//
+// Note: By necessity, this option list does not cover every single field
+// present in `pl_render_params`. In particular, fields like `info_callback`,
+// `lut` and `hooks` cannot be configured through the options system, as doing
+// so would require interop with C code or I/O. (However, see
+// `pl_options_add_hook` and related)
+
+enum pl_option_type {
+    // Accepts `yes/no`, `on/off`, `true/false` and variants
+    PL_OPT_BOOL,
+
+    // Parsed as human-readable locale-invariant (C) numbers, scientific
+    // notation accepted for floats
+    PL_OPT_INT,
+    PL_OPT_FLOAT,
+
+    // Parsed as a short string containing only alphanumerics and _-,
+    // corresponding to some name/identifier. Catch-all bucket for several
+    // other types of options, such as presets, struct pointers, and functions
+    //
+    // Note: These options do not correspond to actual strings in C, the
+    // underlying type of option will determine the values of `size` and
+    // corresponding interpretation of pointers.
+    PL_OPT_STRING,
+
+    PL_OPT_TYPE_COUNT,
+};
+
+struct pl_opt_t {
+    // Programmatic key uniquely identifying this option.
+    const char *key;
+
+    // Longer, human readable friendly name
+    const char *name;
+
+    // Data type of option, affects how it is parsed. This field is purely
+    // informative for the user, the actual implementation may vary.
+    enum pl_option_type type;
+
+    // Minimum/maximum value ranges for numeric options (int / float)
+    // If both are 0.0, these limits are disabled/ignored.
+    float min, max;
+
+    // If true, this option is considered deprecated and may be removed
+    // in the future.
+    bool deprecated;
+
+    // If true, this option is considered a 'preset' (read-only), which can
+    // be loaded but not saved. (The equivalent underlying options this preset
+    // corresponds to will be saved instead)
+    bool preset;
+
+    // Internal implementation details (for parsing/saving), opaque to user
+    const void *priv;
+};
+
+// A list of options, terminated by {0} for convenience
+PL_API extern const struct pl_opt_t pl_option_list[];
+PL_API extern const int pl_option_count; // excluding terminating {0}
+
+// Returns the `pl_option` associated with a given key, or NULL
+PL_API pl_opt pl_find_option(const char *key);
+
+PL_API_END
+
+#endif // LIBPLACEBO_OPTIONS_H_
diff --git a/src/include/libplacebo/renderer.h b/src/include/libplacebo/renderer.h
new file mode 100644
index 0000000..d2e01e4
--- /dev/null
+++ b/src/include/libplacebo/renderer.h
@@ -0,0 +1,847 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_RENDERER_H_
+#define LIBPLACEBO_RENDERER_H_
+
+#include <libplacebo/config.h>
+#include <libplacebo/colorspace.h>
+#include <libplacebo/filters.h>
+#include <libplacebo/gpu.h>
+#include <libplacebo/shaders/colorspace.h>
+#include <libplacebo/shaders/deinterlacing.h>
+#include <libplacebo/shaders/dithering.h>
+#include <libplacebo/shaders/film_grain.h>
+#include <libplacebo/shaders/icc.h>
+#include <libplacebo/shaders/lut.h>
+#include <libplacebo/shaders/sampling.h>
+#include <libplacebo/shaders/custom.h>
+#include <libplacebo/swapchain.h>
+
+PL_API_BEGIN
+
+// Thread-safety: Unsafe
+typedef struct pl_renderer_t *pl_renderer;
+
+// Enum values used in pl_renderer_errors_t as a bit positions for error flags
+enum pl_render_error {
+    PL_RENDER_ERR_NONE              = 0,
+    PL_RENDER_ERR_FBO               = 1 << 0,
+    PL_RENDER_ERR_SAMPLING          = 1 << 1,
+    PL_RENDER_ERR_DEBANDING         = 1 << 2,
+    PL_RENDER_ERR_BLENDING          = 1 << 3,
+    PL_RENDER_ERR_OVERLAY           = 1 << 4,
+    PL_RENDER_ERR_PEAK_DETECT       = 1 << 5,
+    PL_RENDER_ERR_FILM_GRAIN        = 1 << 6,
+    PL_RENDER_ERR_FRAME_MIXING      = 1 << 7,
+    PL_RENDER_ERR_DEINTERLACING     = 1 << 8,
+    PL_RENDER_ERR_ERROR_DIFFUSION   = 1 << 9,
+    PL_RENDER_ERR_HOOKS             = 1 << 10,
+    PL_RENDER_ERR_CONTRAST_RECOVERY = 1 << 11,
+};
+
+// Struct describing current renderer state, including internal processing errors,
+// as well as list of signatures of disabled hooks.
+struct pl_render_errors {
+    enum pl_render_error errors;
+    // List containing signatures of disabled hooks
+    const uint64_t *disabled_hooks;
+    int num_disabled_hooks;
+};
+
+// Creates a new renderer object, which is backed by a GPU context. This is a
+// high-level object that takes care of the rendering chain as a whole, from
+// the source textures to the finished frame.
+PL_API pl_renderer pl_renderer_create(pl_log log, pl_gpu gpu);
+PL_API void pl_renderer_destroy(pl_renderer *rr);
+
+// Returns current renderer state, see pl_render_errors.
+PL_API struct pl_render_errors pl_renderer_get_errors(pl_renderer rr);
+
+// Clears errors state of renderer. If `errors` is NULL, all render errors will
+// be cleared. Otherwise only selected errors/hooks will be cleared.
+// If `PL_RENDER_ERR_HOOKS` is set and `num_disabled_hooks` is 0, clear all hooks.
+// Otherwise only selected hooks will be cleard based on `disabled_hooks` array.
+PL_API void pl_renderer_reset_errors(pl_renderer rr,
+                                     const struct pl_render_errors *errors);
+
+enum pl_lut_type {
+    PL_LUT_UNKNOWN = 0,
+    PL_LUT_NATIVE,      // applied to raw image contents (after fixing bit depth)
+    PL_LUT_NORMALIZED,  // applied to normalized (HDR) RGB values
+    PL_LUT_CONVERSION,  // LUT fully replaces color conversion
+
+    // Note: When using a PL_LUT_CONVERSION to replace the YUV->RGB conversion,
+    // `pl_render_params.color_adjustment` is no longer applied. Similarly,
+    // when using a PL_LUT_CONVERSION to replace the image->target color space
+    // conversion, `pl_render_params.color_map_params` are ignored.
+    //
+    // Note: For LUTs attached to the output frame, PL_LUT_CONVERSION should
+    // instead perform the inverse (RGB->native) conversion.
+    //
+    // Note: PL_LUT_UNKNOWN tries inferring the meaning of the LUT from the
+    // LUT's tagged metadata, and otherwise falls back to PL_LUT_NATIVE.
+};
+
+enum pl_render_stage {
+    PL_RENDER_STAGE_FRAME,  // full frame redraws, for fresh/uncached frames
+    PL_RENDER_STAGE_BLEND,  // the output blend pass (only for pl_render_image_mix)
+    PL_RENDER_STAGE_COUNT,
+};
+
+struct pl_render_info {
+    const struct pl_dispatch_info *pass;    // information about the shader
+    enum pl_render_stage stage;             // the associated render stage
+
+    // This specifies the chronological index of this pass within the frame and
+    // stage (starting at `index == 0`).
+    int index;
+
+    // For PL_RENDER_STAGE_BLEND, this specifies the number of frames
+    // being blended (since that results in a different shader).
+    int count;
+};
+
+// Represents the options used for rendering. These affect the quality of
+// the result.
+struct pl_render_params {
+    // Configures the algorithms used for upscaling and downscaling,
+    // respectively. If left as NULL, then libplacebo will only use inexpensive
+    // sampling (bilinear or nearest neighbour depending on the capabilities
+    // of the hardware / texture).
+    //
+    // Note: Setting `downscaler` to NULL also implies `skip_anti_aliasing`,
+    // since the built-in GPU sampling algorithms can't anti-alias.
+    //
+    // Note: If set to the same address as the built-in `pl_filter_bicubic`,
+    // `pl_filter_nearest` etc.; libplacebo will also use the more efficient
+    // direct sampling algorithm where possible without quality loss.
+    const struct pl_filter_config *upscaler;
+    const struct pl_filter_config *downscaler;
+
+    // If set, this overrides the value of `upscaler`/`downscaling` for
+    // subsampled (chroma) planes. These scalers are used whenever the size of
+    // multiple different `pl_plane`s in a single `pl_frame` differ, requiring
+    // adaptation when converting to/from RGB. Note that a value of NULL simply
+    // means "no override". To force built-in scaling explicitly, set this to
+    // `&pl_filter_bilinear`.
+    const struct pl_filter_config *plane_upscaler;
+    const struct pl_filter_config *plane_downscaler;
+
+    // The anti-ringing strength to apply to filters. See the equivalent option
+    // in `pl_sample_filter_params` for more information.
+    float antiringing_strength;
+
+    // Configures the algorithm used for frame mixing (when using
+    // `pl_render_image_mix`). Ignored otherwise. As a special requirement,
+    // this must be a filter config with `polar` set to false, since it's only
+    // used for 1D mixing and thus only 1D filters are compatible.
+    //
+    // If set to NULL, frame mixing is disabled, in which case
+    // `pl_render_image_mix` will use nearest-neighbour semantics. (Note that
+    // this still goes through the redraw cache, unless you also enable
+    // `skip_caching_single_frame`)
+    const struct pl_filter_config *frame_mixer;
+
+    // Configures the settings used to deband source textures. Leaving this as
+    // NULL disables debanding.
+    //
+    // Note: The `deband_params.grain` setting is automatically adjusted to
+    // prevent blowing up on HDR sources. The user need not account for this.
+    const struct pl_deband_params *deband_params;
+
+    // Configures the settings used to sigmoidize the image before upscaling.
+    // This is not always used. If NULL, disables sigmoidization.
+    const struct pl_sigmoid_params *sigmoid_params;
+
+    // Configures the color adjustment parameters used to decode the color.
+    // This can be used to apply additional artistic settings such as
+    // desaturation, etc. If NULL, defaults to &pl_color_adjustment_neutral.
+    const struct pl_color_adjustment *color_adjustment;
+
+    // Configures the settings used to detect the peak of the source content,
+    // for HDR sources. Has no effect on SDR content. If NULL, peak detection
+    // is disabled.
+    const struct pl_peak_detect_params *peak_detect_params;
+
+    // Configures the settings used to tone map from HDR to SDR, or from higher
+    // gamut to standard gamut content. If NULL, defaults to
+    // `&pl_color_map_default_params`.
+    const struct pl_color_map_params *color_map_params;
+
+    // Configures the settings used to dither to the output depth. Leaving this
+    // as NULL disables dithering.
+    const struct pl_dither_params *dither_params;
+
+    // Configures the error diffusion kernel to use for error diffusion
+    // dithering. If set, this will be used instead of `dither_params` whenever
+    // possible. Leaving this as NULL disables error diffusion.
+    const struct pl_error_diffusion_kernel *error_diffusion;
+
+    // Configures the settings used to simulate color blindness, if desired.
+    // If NULL, this feature is disabled.
+    const struct pl_cone_params *cone_params;
+
+    // Configures output blending. When rendering to the final target, the
+    // framebuffer contents will be blended using this blend mode. Requires
+    // that the target format has PL_FMT_CAP_BLENDABLE. NULL disables blending.
+    const struct pl_blend_params *blend_params;
+
+    // Configures the settings used to deinterlace frames (see
+    // `pl_frame.field`), if required.. If NULL, deinterlacing is "disabled",
+    // meaning interlaced frames are rendered as weaved frames instead.
+    //
+    // Note: As a consequence of how `pl_frame` represents individual fields,
+    // and especially when using the `pl_queue`, this will still result in
+    // frames being redundantly rendered twice. As such, it's highly
+    // recommended to, instead, fully disable deinterlacing by not marking
+    // source frames as interlaced in the first place.
+    const struct pl_deinterlace_params *deinterlace_params;
+
+    // If set, applies an extra distortion matrix to the image, after
+    // scaling and before presenting it to the screen. Can be used for e.g.
+    // fractional rotation.
+    //
+    // Note: The distortion canvas will be set to the size of `target->crop`,
+    // so this cannot effectively draw outside the specified target area,
+    // nor change the aspect ratio of the image.
+    const struct pl_distort_params *distort_params;
+
+    // List of custom user shaders / hooks.
+    // See <libplacebo/shaders/custom.h> for more information.
+    const struct pl_hook * const *hooks;
+    int num_hooks;
+
+    // Color mapping LUT. If present, this will be applied as part of the
+    // image being rendered, in normalized RGB space.
+    //
+    // Note: In this context, PL_LUT_NATIVE means "gamma light" and
+    // PL_LUT_NORMALIZED means "linear light". For HDR signals, normalized LUTs
+    // are scaled so 1.0 corresponds to the `pl_color_transfer_nominal_peak`.
+    //
+    // Note: A PL_LUT_CONVERSION fully replaces the color adaptation from
+    // `image` to `target`, including any tone-mapping (if necessary) and ICC
+    // profiles. It has the same representation as PL_LUT_NATIVE, so in this
+    // case the input and output are (respectively) non-linear light RGB.
+    const struct pl_custom_lut *lut;
+    enum pl_lut_type lut_type;
+
+    // If the image being rendered does not span the entire size of the target,
+    // it will be cleared explicitly using this background color (RGB). To
+    // disable this logic, set `skip_target_clearing`.
+    float background_color[3];
+    float background_transparency; // 0.0 for opaque, 1.0 for fully transparent
+    bool skip_target_clearing;
+
+    // If set to a value above 0.0, the output will be rendered with rounded
+    // corners, as if an alpha transparency mask had been applied. The value
+    // indicates the relative fraction of the side length to round - a value
+    // of 1.0 rounds the corners as much as possible.
+    float corner_rounding;
+
+    // If true, then transparent images will made opaque by painting them
+    // against a checkerboard pattern consisting of alternating colors. If both
+    // colors are left as {0}, they default respectively to 93% and 87% gray.
+    bool blend_against_tiles;
+    float tile_colors[2][3];
+    int tile_size;
+
+    // --- Performance / quality trade-off options:
+    // These should generally be left off where quality is desired, as they can
+    // degrade the result quite noticeably; but may be useful for older or
+    // slower hardware. Note that libplacebo will automatically disable
+    // advanced features on hardware where they are unsupported, regardless of
+    // these settings. So only enable them if you need a performance bump.
+
+    // Disables anti-aliasing on downscaling. This will result in moiré
+    // artifacts and nasty, jagged pixels when downscaling, except for some
+    // very limited special cases (e.g. bilinear downsampling to exactly 0.5x).
+    //
+    // Significantly speeds up downscaling with high downscaling ratios.
+    bool skip_anti_aliasing;
+
+    // Normally, when the size of the `target` used with `pl_render_image_mix`
+    // changes, or the render parameters are updated, the internal cache of
+    // mixed frames must be discarded in order to re-render all required
+    // frames. Setting this option to `true` will skip the cache invalidation
+    // and instead re-use the existing frames (with bilinear scaling to the new
+    // size if necessary), which comes at a quality loss shortly after a
+    // resize, but should make it much more smooth.
+    bool preserve_mixing_cache;
+
+    // --- Performance tuning / debugging options
+    // These may affect performance or may make debugging problems easier,
+    // but shouldn't have any effect on the quality.
+
+    // Normally, `pl_render_image_mix` will also push single frames through the
+    // mixer cache, in order to speed up re-draws. Enabling this option
+    // disables that logic, causing single frames to bypass the cache. (Though
+    // it will still read from, if they happen to already be cached)
+    bool skip_caching_single_frame;
+
+    // Disables linearization / sigmoidization before scaling. This might be
+    // useful when tracking down unexpected image artifacts or excessing
+    // ringing, but it shouldn't normally be necessary.
+    bool disable_linear_scaling;
+
+    // Forces the use of the "general" scaling algorithms even when using the
+    // special-cased built-in presets like `pl_filter_bicubic`. Basically, this
+    // disables the more efficient implementations in favor of the slower,
+    // general-purpose ones.
+    bool disable_builtin_scalers;
+
+    // Forces correction of subpixel offsets (using the configured `upscaler`).
+    bool correct_subpixel_offsets;
+
+    // Forces the use of dithering, even when rendering to 16-bit FBOs. This is
+    // generally pretty pointless because most 16-bit FBOs have high enough
+    // depth that rounding errors are below the human perception threshold,
+    // but this can be used to test the dither code.
+    bool force_dither;
+
+    // Disables the gamma-correct dithering logic which normally applies when
+    // dithering to low bit depths. No real use, outside of testing.
+    bool disable_dither_gamma_correction;
+
+    // Completely overrides the use of FBOs, as if there were no renderable
+    // texture format available. This disables most features.
+    bool disable_fbos;
+
+    // Use only low-bit-depth FBOs (8 bits). Note that this also implies
+    // disabling linear scaling and sigmoidization.
+    bool force_low_bit_depth_fbos;
+
+    // If this is true, all shaders will be generated as "dynamic" shaders,
+    // with any compile-time constants being replaced by runtime-adjustable
+    // values. This is generally a performance loss, but has the advantage of
+    // being able to freely change parameters without triggering shader
+    // recompilations.
+    //
+    // It's a good idea to enable while presenting configurable settings to the
+    // user, but it should be set to false once those values are "dialed in".
+    bool dynamic_constants;
+
+    // This callback is invoked for every pass successfully executed in the
+    // process of rendering a frame. Optional.
+    //
+    // Note: `info` is only valid until this function returns.
+    void (*info_callback)(void *priv, const struct pl_render_info *info);
+    void *info_priv;
+
+    // --- Deprecated/removed fields
+    bool allow_delayed_peak_detect PL_DEPRECATED; // moved to pl_peak_detect_params
+    const struct pl_icc_params *icc_params PL_DEPRECATED; // use pl_frame.icc
+    bool ignore_icc_profiles PL_DEPRECATED; // non-functional, just set pl_frame.icc to NULL
+    int lut_entries PL_DEPRECATED; // hard-coded as 256
+    float polar_cutoff PL_DEPRECATED; // hard-coded as 1e-3
+};
+
+// Bare minimum parameters, with no features enabled. This is the fastest
+// possible configuration, and should therefore be fine on any system.
+#define PL_RENDER_DEFAULTS                              \
+    .color_map_params   = &pl_color_map_default_params, \
+    .color_adjustment   = &pl_color_adjustment_neutral, \
+    .tile_colors        = {{0.93, 0.93, 0.93},          \
+                           {0.87, 0.87, 0.87}},         \
+    .tile_size          = 32,
+
+#define pl_render_params(...) (&(struct pl_render_params) { PL_RENDER_DEFAULTS __VA_ARGS__ })
+PL_API extern const struct pl_render_params pl_render_fast_params;
+
+// This contains the default/recommended options for reasonable image quality,
+// while also not being too terribly slow. All of the *_params structs are
+// defaulted to the corresponding *_default_params, except for deband_params,
+// which is disabled by default.
+//
+// This should be fine on most integrated GPUs, but if it's too slow,
+// consider using `pl_render_fast_params` instead.
+PL_API extern const struct pl_render_params pl_render_default_params;
+
+// This contains a higher quality preset for better image quality at the cost
+// of quite a bit of performance. In addition to the settings implied by
+// `pl_render_default_params`, it enables debanding, sets the upscaler to
+// `pl_filter_ewa_lanczossharp`, and uses pl_*_high_quality_params structs where
+// available. This should only really be used with a discrete GPU and where
+// maximum image quality is desired.
+PL_API extern const struct pl_render_params pl_render_high_quality_params;
+
+#define PL_MAX_PLANES 4
+
+// High level description of a single slice of an image. This basically
+// represents a single 2D plane, with any number of components
+struct pl_plane {
+    // The texture underlying this plane. The texture must be 2D, and must
+    // have specific parameters set depending on what the plane is being used
+    // for (see `pl_render_image`).
+    pl_tex texture;
+
+    // The preferred behaviour when sampling outside of this texture. Optional,
+    // since the default (PL_TEX_ADDRESS_CLAMP) is very reasonable.
+    enum pl_tex_address_mode address_mode;
+
+    // Controls whether or not the `texture` will be considered flipped
+    // vertically with respect to the overall image dimensions. It's generally
+    // preferable to flip planes using this setting instead of the crop in
+    // cases where the flipping is the result of e.g. negative plane strides or
+    // flipped framebuffers (OpenGL).
+    //
+    // Note that any planar padding (due to e.g. size mismatch or misalignment
+    // of subsampled planes) is always at the physical end of the texture
+    // (highest y coordinate) - even if this bool is true. However, any
+    // subsampling shift (`shift_y`) is applied with respect to the flipped
+    // direction. This ensures the correct interpretation when e.g. vertically
+    // flipping 4:2:0 sources by flipping all planes.
+    bool flipped;
+
+    // Describes the number and interpretation of the components in this plane.
+    // This defines the mapping from component index to the canonical component
+    // order (RGBA, YCbCrA or XYZA). It's worth pointing out that this is
+    // completely separate from `texture->format.sample_order`. The latter is
+    // essentially irrelevant/transparent for the API user, since it just
+    // determines which order the texture data shows up as inside the GLSL
+    // shader; whereas this field controls the actual meaning of the component.
+    //
+    // Example; if the user has a plane with just {Y} and a plane with just
+    // {Cb Cr}, and a GPU that only supports bgra formats, you would still
+    // specify the component mapping as {0} and {1 2} respectively, even though
+    // the GPU is sampling the data in the order BGRA. Use -1 for "ignored"
+    // components.
+    int components;           // number of relevant components
+    int component_mapping[4]; // semantic index of each component
+
+    // Controls the sample offset, relative to the "reference" dimensions. For
+    // an example of what to set here, see `pl_chroma_location_offset`. Note
+    // that this is given in unit of reference pixels. For a graphical example,
+    // imagine you have a 2x2 image with a 1x1 (subsampled) plane. Without any
+    // shift (0.0), the situation looks like this:
+    //
+    // X-------X  X = reference pixel
+    // |       |  P = plane pixel
+    // |   P   |
+    // |       |
+    // X-------X
+    //
+    // For 4:2:0 subsampling, this corresponds to PL_CHROMA_CENTER. If the
+    // shift_x was instead set to -0.5, the `P` pixel would be offset to the
+    // left by half the separation between the reference (`X` pixels), resulting
+    // in the following:
+    //
+    // X-------X  X = reference pixel
+    // |       |  P = plane pixel
+    // P       |
+    // |       |
+    // X-------X
+    //
+    // For 4:2:0 subsampling, this corresponds to PL_CHROMA_LEFT.
+    //
+    // Note: It's recommended to fill this using `pl_chroma_location_offset` on
+    // the chroma planes.
+    float shift_x, shift_y;
+};
+
+enum pl_overlay_mode {
+    PL_OVERLAY_NORMAL = 0, // treat the texture as a normal, full-color texture
+    PL_OVERLAY_MONOCHROME, // treat the texture as a single-component alpha map
+    PL_OVERLAY_MODE_COUNT,
+};
+
+enum pl_overlay_coords {
+    PL_OVERLAY_COORDS_AUTO = 0,  // equal to SRC/DST_FRAME, respectively
+    PL_OVERLAY_COORDS_SRC_FRAME, // relative to the raw src frame
+    PL_OVERLAY_COORDS_SRC_CROP,  // relative to the src frame crop
+    PL_OVERLAY_COORDS_DST_FRAME, // relative to the raw dst frame
+    PL_OVERLAY_COORDS_DST_CROP,  // relative to the dst frame crop
+    PL_OVERLAY_COORDS_COUNT,
+
+    // Note on rotations: If there is an end-to-end rotation between `src` and
+    // `dst`, then any overlays relative to SRC_FRAME or SRC_CROP will be
+    // rotated alongside the image, while overlays relative to DST_FRAME or
+    // DST_CROP will not.
+};
+
+struct pl_overlay_part {
+    pl_rect2df src; // source coordinate with respect to `pl_overlay.tex`
+    pl_rect2df dst; // target coordinates with respect to `pl_overlay.coords`
+
+    // If `mode` is PL_OVERLAY_MONOCHROME, then this specifies the color of
+    // this overlay part. The color is multiplied into the sampled texture's
+    // first channel.
+    float color[4];
+};
+
+// A struct representing an image overlay (e.g. for subtitles or on-screen
+// status messages, controls, ...)
+struct pl_overlay {
+    // The texture containing the backing data for overlay parts. Must have
+    // `params.sampleable` set.
+    pl_tex tex;
+
+    // This controls the coloring mode of this overlay.
+    enum pl_overlay_mode mode;
+
+    // Controls which coordinates this overlay is addressed relative to.
+    enum pl_overlay_coords coords;
+
+    // This controls the colorspace information for this overlay. The contents
+    // of the texture / the value of `color` are interpreted according to this.
+    struct pl_color_repr repr;
+    struct pl_color_space color;
+
+    // The number of parts for this overlay.
+    const struct pl_overlay_part *parts;
+    int num_parts;
+};
+
+// High-level description of a complete frame, including metadata and planes
+struct pl_frame {
+    // Each frame is split up into some number of planes, each of which may
+    // carry several components and be of any size / offset.
+    int num_planes;
+    struct pl_plane planes[PL_MAX_PLANES];
+
+    // For interlaced frames. If set, this `pl_frame` corresponds to a single
+    // field of the underlying source textures. `first_field` indicates which
+    // of these fields is ordered first in time. `prev` and `next` should point
+    // to the previous/next frames in the file, or NULL if there are none.
+    //
+    // Note: Setting these fields on the render target has no meaning and will
+    // be ignored.
+    enum pl_field field;
+    enum pl_field first_field;
+    const struct pl_frame *prev, *next;
+
+    // If set, will be called immediately before GPU access to this frame. This
+    // function *may* be used to, for example, perform synchronization with
+    // external APIs (e.g. `pl_vulkan_hold/release`). If your mapping requires
+    // a memcpy of some sort (e.g. pl_tex_transfer), users *should* instead do
+    // the memcpy up-front and avoid the use of these callbacks - because they
+    // might be called multiple times on the same frame.
+    //
+    // This function *may* arbitrarily mutate the `pl_frame`, but it *should*
+    // ideally only update `planes` - in particular, color metadata and so
+    // forth should be provided up-front as best as possible. Note that changes
+    // here will not be reflected back to the structs provided in the original
+    // `pl_render_*` call (e.g. via `pl_frame_mix`).
+    //
+    // Note: Unless dealing with interlaced frames, only one frame will ever be
+    // acquired at a time per `pl_render_*` call. So users *can* safely use
+    // this with, for example, hwdec mappers that can only map a single frame
+    // at a time. When using this with, for example, `pl_render_image_mix`,
+    // each frame to be blended is acquired and release in succession, before
+    // moving on to the next frame. For interlaced frames, the previous and
+    // next frames must also be acquired simultaneously.
+    bool (*acquire)(pl_gpu gpu, struct pl_frame *frame);
+
+    // If set, will be called after a plane is done being used by the GPU,
+    // *including* after any errors (e.g. `acquire` returning false).
+    void (*release)(pl_gpu gpu, struct pl_frame *frame);
+
+    // Color representation / encoding / semantics of this frame.
+    struct pl_color_repr repr;
+    struct pl_color_space color;
+
+    // Optional ICC profile associated with this frame.
+    pl_icc_object icc;
+
+    // Alternative to `icc`, this can be used in cases where allocating and
+    // tracking an pl_icc_object externally may be inconvenient. The resulting
+    // profile will be managed internally by the pl_renderer.
+    struct pl_icc_profile profile;
+
+    // Optional LUT associated with this frame.
+    const struct pl_custom_lut *lut;
+    enum pl_lut_type lut_type;
+
+    // The logical crop / rectangle containing the valid information, relative
+    // to the reference plane's dimensions (e.g. luma). Pixels outside of this
+    // rectangle will ostensibly be ignored, but note that this is not a hard
+    // guarantee. In particular, scaler filters may end up sampling outside of
+    // this crop. This rect may be flipped, and may be partially or wholly
+    // outside the bounds of the underlying textures. (Optional)
+    //
+    // Note that `pl_render_image` will map the input crop directly to the
+    // output crop, stretching and scaling as needed. If you wish to preserve
+    // the aspect ratio, use a dedicated function like pl_rect2df_aspect_copy.
+    pl_rect2df crop;
+
+    // Logical rotation of the image, with respect to the underlying planes.
+    // For example, if this is PL_ROTATION_90, then the image will be rotated
+    // to the right by 90° when mapping to `crop`. The actual position on-screen
+    // is unaffected, so users should ensure that the (rotated) aspect ratio
+    // matches the source. (Or use a helper like `pl_rect2df_aspect_set_rot`)
+    //
+    // Note: For `target` frames, this corresponds to a rotation of the
+    // display, for `image` frames, this corresponds to a rotation of the
+    // camera.
+    //
+    // So, as an example, target->rotation = PL_ROTATE_90 means the end user
+    // has rotated the display to the right by 90° (meaning rendering will be
+    // rotated 90° to the *left* to compensate), and image->rotation =
+    // PL_ROTATE_90 means the video provider has rotated the camera to the
+    // right by 90° (so rendering will be rotated 90° to the *right* to
+    // compensate).
+    pl_rotation rotation;
+
+    // A list of additional overlays associated with this frame. Note that will
+    // be rendered directly onto intermediate/cache frames, so changing any of
+    // these overlays may require flushing the renderer cache.
+    const struct pl_overlay *overlays;
+    int num_overlays;
+
+    // Note on subsampling and plane correspondence: All planes belonging to
+    // the same frame will only be stretched by an integer multiple (or inverse
+    // thereof) in order to match the reference dimensions of this image. For
+    // example, suppose you have an 8x4 image. A valid plane scaling would be
+    // 4x2 -> 8x4 or 4x4 -> 4x4, but not 6x4 -> 8x4. So if a 6x4 plane is
+    // given, then it would be treated like a cropped 8x4 plane (since 1.0 is
+    // the closest scaling ratio to the actual ratio of 1.3).
+    //
+    // For an explanation of why this makes sense, consider the relatively
+    // common example of a subsampled, oddly sized (e.g. jpeg) image. In such
+    // cases, for example a 35x23 image, the 4:2:0 subsampled chroma plane
+    // would have to end up as 17.5x11.5, which gets rounded up to 18x12 by
+    // implementations. So in this example, the 18x12 chroma plane would get
+    // treated by libplacebo as an oversized chroma plane - i.e. the plane
+    // would get sampled as if it was 17.5 pixels wide and 11.5 pixels large.
+
+    // Associated film grain data (see <libplacebo/shaders/film_grain.h>).
+    //
+    // Note: This is ignored for the `target` of `pl_render_image`, since
+    // un-applying grain makes little sense.
+    struct pl_film_grain_data film_grain;
+
+    // Ignored by libplacebo. May be useful for users.
+    void *user_data;
+};
+
+// Helper function to infer the chroma location offset for each plane in a
+// frame. This is equivalent to calling `pl_chroma_location_offset` on all
+// subsampled planes' shift_x/shift_y variables.
+PL_API void pl_frame_set_chroma_location(struct pl_frame *frame,
+                                         enum pl_chroma_location chroma_loc);
+
+// Fills in a `pl_frame` based on a swapchain frame's FBO and metadata.
+PL_API void pl_frame_from_swapchain(struct pl_frame *out_frame,
+                                    const struct pl_swapchain_frame *frame);
+
+// Helper function to determine if a frame is logically cropped or not. In
+// particular, this is useful in determining whether or not an output frame
+// needs to be cleared before rendering or not.
+PL_API bool pl_frame_is_cropped(const struct pl_frame *frame);
+
+// Helper function to reset a frame to a given RGB color. If the frame's
+// color representation is something other than RGB, the clear color will
+// be adjusted accordingly. `clear_color` should be non-premultiplied.
+PL_API void pl_frame_clear_rgba(pl_gpu gpu, const struct pl_frame *frame,
+                                const float clear_color[4]);
+
+// Like `pl_frame_clear_rgba` but without an alpha channel.
+static inline void pl_frame_clear(pl_gpu gpu, const struct pl_frame *frame,
+                                  const float clear_color[3])
+{
+    const float clear_color_rgba[4] = { clear_color[0], clear_color[1], clear_color[2], 1.0 };
+    pl_frame_clear_rgba(gpu, frame, clear_color_rgba);
+}
+
+// Helper functions to return the fixed/inferred pl_frame parameters used
+// for rendering internally. Mutates `image` and `target` in-place to hold
+// the modified values, which are what will actually be used for rendering.
+//
+// This currently includes:
+// - Defaulting all missing pl_color_space/repr parameters
+// - Coalescing all rotation to the target
+// - Rounding and clamping the target crop to pixel boundaries and adjusting the
+//   image crop correspondingly
+//
+// Note: This is idempotent and does not generally alter the effects of a
+// subsequent `pl_render_image` on the same pl_frame pair. (But see the
+// following warning)
+//
+// Warning: This does *not* call pl_frame.acquire/release, and so the returned
+// metadata *may* be incorrect if the acquire callback mutates the pl_frame in
+// nontrivial ways, in particular the crop and color space fields.
+PL_API void pl_frames_infer(pl_renderer rr, struct pl_frame *image,
+                            struct pl_frame *target);
+
+
+// Render a single image to a target using the given parameters. This is
+// fully dynamic, i.e. the params can change at any time. libplacebo will
+// internally detect and flush whatever caches are invalidated as a result of
+// changing colorspace, size etc.
+//
+// Required plane capabilities:
+// - Planes in `image` must be `sampleable`
+// - Planes in `target` must be `renderable`
+//
+// Recommended plane capabilities: (Optional, but good for performance)
+// - Planes in `image` should have `sample_mode` PL_TEX_SAMPLE_LINEAR
+// - Planes in `target` should be `storable`
+// - Planes in `target` should have `blit_dst`
+//
+// Note on lifetime: Once this call returns, the passed structures may be
+// freely overwritten or discarded by the caller, even the referenced
+// `pl_tex` objects may be freely reused.
+//
+// Note: `image` may be NULL, in which case `target.overlays` will still be
+// rendered, but nothing else.
+PL_API bool pl_render_image(pl_renderer rr, const struct pl_frame *image,
+                            const struct pl_frame *target,
+                            const struct pl_render_params *params);
+
+// Flushes the internal state of this renderer. This is normally not needed,
+// even if the image parameters, colorspace or target configuration change,
+// since libplacebo will internally detect such circumstances and recreate
+// outdated resources automatically. Doing this explicitly *may* be useful to
+// purge some state related to things like HDR peak detection or frame mixing,
+// so calling it is a good idea if the content source is expected to change
+// dramatically (e.g. when switching to a different file).
+PL_API void pl_renderer_flush_cache(pl_renderer rr);
+
+// Mirrors `pl_get_detected_hdr_metadata`, giving you the current internal peak
+// detection HDR metadata (when peak detection is active). Returns false if no
+// information is available (e.g. not HDR source, peak detection disabled).
+PL_API bool pl_renderer_get_hdr_metadata(pl_renderer rr,
+                                         struct pl_hdr_metadata *metadata);
+
+// Represents a mixture of input frames, distributed temporally.
+//
+// NOTE: Frames must be sorted by timestamp, i.e. `timestamps` must be
+// monotonically increasing.
+struct pl_frame_mix {
+    // The number of frames in this mixture. The number of frames should be
+    // sufficient to meet the needs of the configured frame mixer. See the
+    // section below for more information.
+    //
+    // If the number of frames is 0, this call will be equivalent to
+    // `pl_render_image` with `image == NULL`.
+    int num_frames;
+
+    // A list of the frames themselves. The frames can have different
+    // colorspaces, configurations of planes, or even sizes.
+    //
+    // Note: This is a list of pointers, to avoid users having to copy
+    // around `pl_frame` structs when re-organizing this array.
+    const struct pl_frame **frames;
+
+    // A list of unique signatures, one for each frame. These are used to
+    // identify frames across calls to this function, so it's crucial that they
+    // be both unique per-frame but also stable across invocations of
+    // `pl_render_frame_mix`.
+    const uint64_t *signatures;
+
+    // A list of relative timestamps for each frame. These are relative to the
+    // time of the vsync being drawn, i.e. this function will render the frame
+    // that will be made visible at timestamp 0.0. The values are expected to
+    // be normalized such that a separation of 1.0 corresponds to roughly one
+    // nominal source frame duration. So a constant framerate video file will
+    // always have timestamps like e.g. {-2.3, -1.3, -0.3, 0.7, 1.7, 2.7},
+    // using an example radius of 3.
+    //
+    // In cases where the framerate is variable (e.g. VFR video), the choice of
+    // what to scale to use can be difficult to answer. A typical choice would
+    // be either to use the canonical (container-tagged) framerate, or the
+    // highest momentary framerate, as a reference. If all else fails, you
+    // could also use the display's framerate.
+    //
+    // Note: This function assumes zero-order-hold semantics, i.e. the frame at
+    // timestamp 0.7 is intended to remain visible until timestamp 1.7, when
+    // the next frame replaces it.
+    const float *timestamps;
+
+    // The duration for which the vsync being drawn will be held, using the
+    // same scale as `timestamps`. If the display has an unknown or variable
+    // frame-rate (e.g. Adaptive Sync), then you're probably better off not
+    // using this function and instead just painting the frames directly using
+    // `pl_render_frame` at the correct PTS.
+    //
+    // As an example, if `vsync_duration` is 0.4, then it's assumed that the
+    // vsync being painted is visible for the period [0.0, 0.4].
+    float vsync_duration;
+
+    // Explanation of the frame mixing radius: The algorithm chosen in
+    // `pl_render_params.frame_mixer` has a canonical radius equal to
+    // `pl_filter_config.kernel->radius`. This means that the frame mixing
+    // algorithm will (only) need to consult all of the frames that have a
+    // distance within the interval [-radius, radius]. As such, the user should
+    // include all such frames in `frames`, but may prune or omit frames that
+    // lie outside it.
+    //
+    // The built-in frame mixing (`pl_render_params.frame_mixer == NULL`) has
+    // no concept of radius, it just always needs access to the "current" and
+    // "next" frames.
+};
+
+// Helper function to calculate the base frame mixing radius.
+//
+// Note: When the source FPS exceeds the display FPS, this radius must be
+// increased by the corresponding ratio.
+static inline float pl_frame_mix_radius(const struct pl_render_params *params)
+{
+    // For backwards compatibility, allow !frame_mixer->kernel
+    if (!params->frame_mixer || !params->frame_mixer->kernel)
+        return 0.0;
+
+    return params->frame_mixer->kernel->radius;
+}
+
+// Find closest frame to current PTS by zero-order hold semantics, or NULL.
+PL_API const struct pl_frame *pl_frame_mix_current(const struct pl_frame_mix *mix);
+
+// Find closest frame to current PTS by nearest neighbour semantics, or NULL.
+PL_API const struct pl_frame *pl_frame_mix_nearest(const struct pl_frame_mix *mix);
+
+// Render a mixture of images to the target using the given parameters. This
+// functions much like a generalization of `pl_render_image`, for when the API
+// user has more control over the frame queue / vsync loop, and can provide a
+// few frames from the past and future + timestamp information.
+//
+// This allows libplacebo to perform rudimentary frame mixing / interpolation,
+// in order to eliminate judder artifacts typically associated with
+// source/display frame rate mismatch.
+PL_API bool pl_render_image_mix(pl_renderer rr, const struct pl_frame_mix *images,
+                                const struct pl_frame *target,
+                                const struct pl_render_params *params);
+
+// Analog of `pl_frame_infer` corresponding to `pl_render_image_mix`. This
+// function will *not* mutate the frames contained in `mix`, and instead
+// return an adjusted copy of the "reference" frame for that image mix in
+// `out_refimage`, or {0} if the mix is empty.
+PL_API void pl_frames_infer_mix(pl_renderer rr, const struct pl_frame_mix *mix,
+                                struct pl_frame *target, struct pl_frame *out_ref);
+
+// Backwards compatibility with old filters API, may be deprecated.
+// Redundant with pl_filter_configs and masking `allowed` for
+// PL_FILTER_SCALING and PL_FILTER_FRAME_MIXING respectively.
+
+// A list of recommended frame mixer presets, terminated by {0}
+PL_API extern const struct pl_filter_preset pl_frame_mixers[];
+PL_API extern const int pl_num_frame_mixers; // excluding trailing {0}
+
+// A list of recommended scaler presets, terminated by {0}. This is almost
+// equivalent to `pl_filter_presets` with the exception of including extra
+// built-in filters that don't map to the `pl_filter` architecture.
+PL_API extern const struct pl_filter_preset pl_scale_filters[];
+PL_API extern const int pl_num_scale_filters; // excluding trailing {0}
+
+// Deprecated in favor of `pl_cache_save/pl_cache_load` on the `pl_cache`
+// associated with the `pl_gpu` this renderer is using.
+PL_DEPRECATED PL_API size_t pl_renderer_save(pl_renderer rr, uint8_t *out_cache);
+PL_DEPRECATED PL_API void pl_renderer_load(pl_renderer rr, const uint8_t *cache);
+
+PL_API_END
+
+#endif // LIBPLACEBO_RENDERER_H_
diff --git a/src/include/libplacebo/shaders.h b/src/include/libplacebo/shaders.h
new file mode 100644
index 0000000..b8046be
--- /dev/null
+++ b/src/include/libplacebo/shaders.h
@@ -0,0 +1,273 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_SHADERS_H_
+#define LIBPLACEBO_SHADERS_H_
+
+// This function defines the "direct" interface to libplacebo's GLSL shaders,
+// suitable for use in contexts where the user controls GLSL shader compilation
+// but wishes to include functions generated by libplacebo as part of their
+// own rendering process. This API is normally not used for operation with
+// libplacebo's higher-level constructs such as `pl_dispatch` or `pl_renderer`.
+
+#include <libplacebo/gpu.h>
+
+PL_API_BEGIN
+
+// Thread-safety: Unsafe
+typedef struct pl_shader_t *pl_shader;
+
+struct pl_shader_params {
+    // The `id` represents an abstract identifier for the shader, to avoid
+    // collisions with other shaders being used as part of the same larger,
+    // overarching shader. This is relevant for users which want to combine
+    // multiple `pl_shader` objects together, in which case all `pl_shader`
+    // objects should have a unique `id`.
+    uint8_t id;
+
+    // If `gpu` is non-NULL, then this `gpu` will be used to create objects
+    // such as textures and buffers, or check for required capabilities, for
+    // operations which depend on either of those. This is fully optional, i.e.
+    // these GLSL primitives are designed to be used without a dependency on
+    // `gpu` wherever possible - however, some features may not work, and will
+    // be disabled even if requested.
+    pl_gpu gpu;
+
+    // The `index` represents an abstract frame index, which shaders may use
+    // internally to do things like temporal dithering or seeding PRNGs. If the
+    // user does not care about temporal dithering/debanding, or wants
+    // deterministic rendering, this may safely be left as 0. Otherwise, it
+    // should be incremented by 1 on successive frames.
+    uint8_t index;
+
+    // If `glsl.version` is nonzero, then this structure will be used to
+    // determine the effective GLSL mode and capabilities. If `gpu` is also
+    // set, then this overrides `gpu->glsl`.
+    struct pl_glsl_version glsl;
+
+    // If this is true, all constants in the shader will be replaced by
+    // dynamic variables. This is mainly useful to avoid recompilation for
+    // shaders which expect to have their values change constantly.
+    bool dynamic_constants;
+};
+
+#define pl_shader_params(...) (&(struct pl_shader_params) { __VA_ARGS__ })
+
+// Creates a new, blank, mutable pl_shader object.
+//
+// Note: Rather than allocating and destroying many shaders, users are
+// encouraged to reuse them (using `pl_shader_reset`) for efficiency.
+PL_API pl_shader pl_shader_alloc(pl_log log, const struct pl_shader_params *params);
+
+// Frees a pl_shader and all resources associated with it.
+PL_API void pl_shader_free(pl_shader *sh);
+
+// Resets a pl_shader to a blank slate, without releasing internal memory.
+// If you're going to be re-generating shaders often, this function will let
+// you skip the re-allocation overhead.
+PL_API void pl_shader_reset(pl_shader sh, const struct pl_shader_params *params);
+
+// Returns whether or not a shader is in a "failed" state. Trying to modify a
+// shader in illegal ways (e.g. signature mismatch) will result in the shader
+// being marked as "failed". Since most pl_shader_ operations have a void
+// return type, the user can use this function to figure out whether a specific
+// shader operation has failed or not. This function is somewhat redundant
+// since `pl_shader_finalize` will also return NULL in this case.
+PL_API bool pl_shader_is_failed(const pl_shader sh);
+
+// Returns whether or not a pl_shader needs to be run as a compute shader. This
+// will never be the case unless the `pl_glsl_version` this `pl_shader` was
+// created using has `compute` support enabled.
+PL_API bool pl_shader_is_compute(const pl_shader sh);
+
+// Returns whether or not the shader has any particular output size
+// requirements. Some shaders, in particular those that sample from other
+// textures, have specific output size requirements which need to be respected
+// by the caller. If this is false, then the shader is compatible with every
+// output size. If true, the size requirements are stored into *w and *h.
+PL_API bool pl_shader_output_size(const pl_shader sh, int *w, int *h);
+
+// Indicates the type of signature that is associated with a shader result.
+// Every shader result defines a function that may be called by the user, and
+// this enum indicates the type of value that this function takes and/or
+// returns.
+//
+// Which signature a shader ends up with depends on the type of operation being
+// performed by a shader fragment, as determined by the user's calls. See below
+// for more information.
+enum pl_shader_sig {
+    PL_SHADER_SIG_NONE = 0, // no input / void output
+    PL_SHADER_SIG_COLOR,    // vec4 color (normalized so that 1.0 is the ref white)
+
+    // The following are only valid as input signatures:
+    PL_SHADER_SIG_SAMPLER, // (gsampler* src_tex, vecN tex_coord) pair,
+                           // specifics depend on how the shader was generated
+};
+
+// Structure encapsulating information about a shader. This is internally
+// refcounted, to allow moving it around without having to create deep copies.
+typedef const struct pl_shader_info_t {
+    // A copy of the parameters used to create the shader.
+    struct pl_shader_params params;
+
+    // A list of friendly names for the semantic operations being performed by
+    // this shader, e.g. "color decoding" or "debanding".
+    const char **steps;
+    int num_steps;
+
+    // As a convenience, this contains a pretty-printed version of the
+    // above list, with entries tallied and separated by commas
+    const char *description;
+} *pl_shader_info;
+
+PL_API pl_shader_info pl_shader_info_ref(pl_shader_info info);
+PL_API void pl_shader_info_deref(pl_shader_info *info);
+
+// Represents a finalized shader fragment. This is not a complete shader, but a
+// collection of raw shader text together with description of the input
+// attributes, variables and vertices it expects to be available.
+struct pl_shader_res {
+    // Descriptive information about the shader. Note that this reference is
+    // attached to the shader itself - the user does not need to manually ref
+    // or deref `info` unless they wish to move it elsewhere.
+    pl_shader_info info;
+
+    // The shader text, as literal GLSL. This will always be a function
+    // definition, such that the the function with the indicated name and
+    // signature may be called by the user.
+    const char *glsl;
+    const char *name;
+    enum pl_shader_sig input;  // what the function expects
+    enum pl_shader_sig output; // what the function returns
+
+    // For compute shaders (pl_shader_is_compute), this indicates the requested
+    // work group size. Otherwise, both fields are 0. The interpretation of
+    // these work groups is that they're tiled across the output image.
+    int compute_group_size[2];
+
+    // If this pass is a compute shader, this field indicates the shared memory
+    // size requirements for this shader pass.
+    size_t compute_shmem;
+
+    // A set of input vertex attributes needed by this shader fragment.
+    const struct pl_shader_va *vertex_attribs;
+    int num_vertex_attribs;
+
+    // A set of input variables needed by this shader fragment.
+    const struct pl_shader_var *variables;
+    int num_variables;
+
+    // A list of input descriptors needed by this shader fragment,
+    const struct pl_shader_desc *descriptors;
+    int num_descriptors;
+
+    // A list of compile-time constants used by this shader fragment.
+    const struct pl_shader_const *constants;
+    int num_constants;
+
+    // --- Deprecated fields (see `info`)
+    struct pl_shader_params params PL_DEPRECATED;
+    const char **steps PL_DEPRECATED;
+    int num_steps PL_DEPRECATED;
+    const char *description PL_DEPRECATED;
+};
+
+// Represents a vertex attribute. The four values will be bound to the four
+// corner vertices respectively, in row-wise order starting from the top left:
+//   data[0] data[1]
+//   data[2] data[3]
+struct pl_shader_va {
+    struct pl_vertex_attrib attr; // VA type, excluding `offset` and `location`
+    const void *data[4];
+};
+
+// Represents a bound shared variable / descriptor
+struct pl_shader_var {
+    struct pl_var var;  // the underlying variable description
+    const void *data;   // the raw data (as per `pl_var_host_layout`)
+    bool dynamic;       // if true, the value is expected to change frequently
+};
+
+struct pl_buffer_var {
+    struct pl_var var;
+    struct pl_var_layout layout;
+};
+
+typedef uint16_t pl_memory_qualifiers;
+enum {
+    PL_MEMORY_COHERENT  = 1 << 0, // supports synchronization across shader invocations
+    PL_MEMORY_VOLATILE  = 1 << 1, // all writes are synchronized automatically
+
+    // Note: All descriptors are also implicitly assumed to have the 'restrict'
+    // memory qualifier. There is currently no way to override this behavior.
+};
+
+struct pl_shader_desc {
+    struct pl_desc desc; // descriptor type, excluding `int binding`
+    struct pl_desc_binding binding; // contents of the descriptor binding
+
+    // For PL_DESC_BUF_UNIFORM/STORAGE, this specifies the layout of the
+    // variables contained by a buffer. Ignored for the other descriptor types
+    struct pl_buffer_var *buffer_vars;
+    int num_buffer_vars;
+
+    // For storage images and buffers, this specifies additional memory
+    // qualifiers on the descriptor. It's highly recommended to always use
+    // at least PL_MEMORY_RESTRICT. Ignored for other descriptor types.
+    pl_memory_qualifiers memory;
+};
+
+// Represents a compile-time constant. This can be lowered to a specialization
+// constant to support cheaper recompilations.
+struct pl_shader_const {
+    enum pl_var_type type;
+    const char *name;
+    const void *data;
+
+    // If true, this constant *must* be a compile-time constant, which
+    // basically just overrides `pl_shader_params.dynamic_constants`. Useful
+    // for constants which will serve as inputs to e.g. array sizes.
+    bool compile_time;
+};
+
+// Finalize a pl_shader. It is no longer mutable at this point, and any further
+// attempts to modify it result in an error. (Functions which take a `const
+// pl_shader` argument do not modify the shader and may be freely
+// called on an already-finalized shader)
+//
+// The returned pl_shader_res is bound to the lifetime of the pl_shader - and
+// will only remain valid until the pl_shader is freed or reset. This function
+// may be called multiple times, and will produce the same result each time.
+//
+// This function will return NULL if the shader is considered to be in a
+// "failed" state (see pl_shader_is_failed).
+PL_API const struct pl_shader_res *pl_shader_finalize(pl_shader sh);
+
+// Shader objects represent abstract resources that shaders need to manage in
+// order to ensure their operation. This could include shader storage buffers,
+// generated lookup textures, or other sorts of configured state. The body
+// of a shader object is fully opaque; but the user is in charge of cleaning up
+// after them and passing them to the right shader passes.
+//
+// Note: pl_shader_obj objects must be initialized to NULL by the caller.
+typedef struct pl_shader_obj_t *pl_shader_obj;
+
+PL_API void pl_shader_obj_destroy(pl_shader_obj *obj);
+
+PL_API_END
+
+#endif // LIBPLACEBO_SHADERS_H_
diff --git a/src/include/libplacebo/shaders/colorspace.h b/src/include/libplacebo/shaders/colorspace.h
new file mode 100644
index 0000000..ead0958
--- /dev/null
+++ b/src/include/libplacebo/shaders/colorspace.h
@@ -0,0 +1,381 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_SHADERS_COLORSPACE_H_
+#define LIBPLACEBO_SHADERS_COLORSPACE_H_
+
+// Color space transformation shaders. These all input and output a color
+// value (PL_SHADER_SIG_COLOR).
+
+#include <libplacebo/colorspace.h>
+#include <libplacebo/gamut_mapping.h>
+#include <libplacebo/tone_mapping.h>
+#include <libplacebo/shaders.h>
+
+// For backwards compatibility
+#include <libplacebo/shaders/dithering.h>
+
+PL_API_BEGIN
+
+// Transform the input color, in its given representation, to ensure
+// compatibility with the indicated alpha mode. Mutates `repr` to reflect the
+// change. Note that this is a no-op if the input is PL_ALPHA_UNKNOWN.
+PL_API void pl_shader_set_alpha(pl_shader sh, struct pl_color_repr *repr,
+                                enum pl_alpha_mode mode);
+
+// Colorspace reshaping for PL_COLOR_SYSTEM_DOLBYVISION. Note that this is done
+// automatically by `pl_shader_decode_color` for PL_COLOR_SYSTEM_DOLBYVISION.
+PL_API void pl_shader_dovi_reshape(pl_shader sh, const struct pl_dovi_metadata *data);
+
+// Decode the color into normalized RGB, given a specified color_repr. This
+// also takes care of additional pre- and post-conversions requires for the
+// "special" color systems (XYZ, BT.2020-C, etc.). If `params` is left as NULL,
+// it defaults to &pl_color_adjustment_neutral.
+//
+// Note: This function always returns PC-range RGB with independent alpha.
+// It mutates the pl_color_repr to reflect the change.
+//
+// Note: For DCDM XYZ decoding output is linear
+PL_API void pl_shader_decode_color(pl_shader sh, struct pl_color_repr *repr,
+                                   const struct pl_color_adjustment *params);
+
+// Encodes a color from normalized, PC-range, independent alpha RGB into a
+// given representation. That is, this performs the inverse operation of
+// `pl_shader_decode_color` (sans color adjustments).
+//
+// Note: For DCDM XYZ encoding input is expected to be linear
+PL_API void pl_shader_encode_color(pl_shader sh, const struct pl_color_repr *repr);
+
+// Linearize (expand) `vec4 color`, given a specified color space. In essence,
+// this corresponds to the ITU-R EOTF.
+//
+// Note: Unlike the ITU-R EOTF, it never includes the OOTF - even for systems
+// where the EOTF includes the OOTF (such as HLG).
+PL_API void pl_shader_linearize(pl_shader sh, const struct pl_color_space *csp);
+
+// Delinearize (compress), given a color space as output. This loosely
+// corresponds to the inverse EOTF (not the OETF) in ITU-R terminology, again
+// assuming a reference monitor.
+PL_API void pl_shader_delinearize(pl_shader sh, const struct pl_color_space *csp);
+
+struct pl_sigmoid_params {
+    // The center (bias) of the sigmoid curve. Must be between 0.0 and 1.0.
+    // If left as NULL, defaults to 0.75
+    float center;
+
+    // The slope (steepness) of the sigmoid curve. Must be between 1.0 and 20.0.
+    // If left as NULL, defaults to 6.5.
+    float slope;
+};
+
+#define PL_SIGMOID_DEFAULTS \
+    .center = 0.75,         \
+    .slope  = 6.50,
+
+#define pl_sigmoid_params(...) (&(struct pl_sigmoid_params) { PL_SIGMOID_DEFAULTS __VA_ARGS__ })
+PL_API extern const struct pl_sigmoid_params pl_sigmoid_default_params;
+
+// Applies a sigmoidal color transform to all channels. This helps avoid
+// ringing artifacts during upscaling by bringing the color information closer
+// to neutral and away from the extremes. If `params` is NULL, it defaults to
+// &pl_sigmoid_default_params.
+//
+// Warning: This function clamps the input to the interval [0,1]; and as such
+// it should *NOT* be used on already-decoded high-dynamic range content.
+PL_API void pl_shader_sigmoidize(pl_shader sh, const struct pl_sigmoid_params *params);
+
+// This performs the inverse operation to `pl_shader_sigmoidize`.
+PL_API void pl_shader_unsigmoidize(pl_shader sh, const struct pl_sigmoid_params *params);
+
+struct pl_peak_detect_params {
+    // Smoothing coefficient for the detected values. This controls the time
+    // parameter (tau) of an IIR low pass filter. In other words, it represent
+    // the cutoff period (= 1 / cutoff frequency) in frames. Frequencies below
+    // this length will be suppressed. This helps block out annoying
+    // "sparkling" or "flickering" due to small variations in frame-to-frame
+    // brightness. If left as 0.0, this smoothing is completely disabled.
+    float smoothing_period;
+
+    // In order to avoid reacting sluggishly on scene changes as a result of
+    // the low-pass filter, we disable it when the difference between the
+    // current frame brightness and the average frame brightness exceeds a
+    // given threshold difference. But rather than a single hard cutoff, which
+    // would lead to weird discontinuities on fades, we gradually disable it
+    // over a small window of brightness ranges. These parameters control the
+    // lower and upper bounds of this window, in units of 1% PQ.
+    //
+    // Setting either one of these to 0.0 disables this logic.
+    float scene_threshold_low;
+    float scene_threshold_high;
+
+    // Which percentile of the input image brightness histogram to consider as
+    // the true peak of the scene. If this is set to 100 (or 0), the brightest
+    // pixel is measured. Otherwise, the top of the frequency distribution is
+    // progressively cut off. Setting this too low will cause clipping of very
+    // bright details, but can improve the dynamic brightness range of scenes
+    // with very bright isolated highlights.
+    //
+    // A recommended value is 99.995%, which is very conservative and should
+    // cause no major issues in typical content.
+    float percentile;
+
+    // Allows the peak detection result to be delayed by up to a single frame,
+    // which can sometimes improve thoughput, at the cost of introducing the
+    // possibility of 1-frame flickers on transitions. Disabled by default.
+    bool allow_delayed;
+
+    // --- Deprecated / removed fields
+    float overshoot_margin PL_DEPRECATED;
+    float minimum_peak PL_DEPRECATED;
+};
+
+#define PL_PEAK_DETECT_DEFAULTS         \
+    .smoothing_period       = 20.0f,    \
+    .scene_threshold_low    = 1.0f,     \
+    .scene_threshold_high   = 3.0f,     \
+    .percentile             = 100.0f,
+
+#define PL_PEAK_DETECT_HQ_DEFAULTS      \
+    PL_PEAK_DETECT_DEFAULTS             \
+    .percentile             = 99.995f,
+
+#define pl_peak_detect_params(...) (&(struct pl_peak_detect_params) { PL_PEAK_DETECT_DEFAULTS __VA_ARGS__ })
+PL_API extern const struct pl_peak_detect_params pl_peak_detect_default_params;
+PL_API extern const struct pl_peak_detect_params pl_peak_detect_high_quality_params;
+
+// This function can be used to measure the CLL and FALL of a video
+// source automatically, using a compute shader. The measured values are
+// smoothed automatically (depending on the parameters), so to keep track of
+// the measured results over time, a tone mapping shader state object is used
+// to hold the state. Returns false on failure initializing the tone mapping
+// object, or if compute shaders are not supported.
+//
+// It's important that the same shader object is used for successive frames
+// belonging to the same source. If the source changes (e.g. due to a file
+// change or seek), the user should reset it with `pl_reset_detected_peak` (or
+// destroy it and use a new state object).
+//
+// The parameter `csp` holds the representation of the color values that are
+// the input to this function. (They must already be in decoded RGB form, i.e.
+// alternate color representations are not supported)
+PL_API bool pl_shader_detect_peak(pl_shader sh, struct pl_color_space csp,
+                                  pl_shader_obj *state,
+                                  const struct pl_peak_detect_params *params);
+
+// After dispatching the above shader, this function can be used to retrieve
+// the detected dynamic HDR10+ metadata parameters. The other fields of
+// `metadata` are not written to. Returns whether or not any values were
+// written. If not, the values are left untouched, so this can be used to
+// safely update `pl_hdr_metadata` values in-place. This function may or may
+// not block, depending on the previous setting of `allow_delayed`.
+PL_API bool pl_get_detected_hdr_metadata(const pl_shader_obj state,
+                                         struct pl_hdr_metadata *metadata);
+
+// After dispatching the above shader, this function *may* be used to read out
+// the detected CLL and FALL directly (in PL_HDR_NORM units). If the shader
+// has never been dispatched yet, i.e. no information is available, this will
+// return false.
+//
+// Deprecated in favor of `pl_get_detected_hdr_metadata`
+PL_DEPRECATED PL_API bool pl_get_detected_peak(const pl_shader_obj state,
+                                               float *out_cll, float *out_fall);
+
+// Resets the peak detection state in a given tone mapping state object. This
+// is not equal to `pl_shader_obj_destroy`, because it does not destroy any
+// state used by `pl_shader_tone_map`.
+PL_API void pl_reset_detected_peak(pl_shader_obj state);
+
+// Feature map extraction (for pl_color_map_args.feature_map). The result
+// of this shader should be downscaled / low-passed to the indicated kernel
+// size before use. (This does not happen automatically)
+PL_API void pl_shader_extract_features(pl_shader sh, struct pl_color_space csp);
+
+// Deprecated and unused. Libplacebo now always performs a variant of the old
+// hybrid tone-mapping, mixing together the intensity (I) and per-channel (LMS)
+// results.
+enum pl_tone_map_mode {
+    PL_TONE_MAP_AUTO    PL_DEPRECATED_ENUMERATOR,
+    PL_TONE_MAP_RGB     PL_DEPRECATED_ENUMERATOR,
+    PL_TONE_MAP_MAX     PL_DEPRECATED_ENUMERATOR,
+    PL_TONE_MAP_HYBRID  PL_DEPRECATED_ENUMERATOR,
+    PL_TONE_MAP_LUMA    PL_DEPRECATED_ENUMERATOR,
+    PL_TONE_MAP_MODE_COUNT,
+};
+
+// Deprecated by <libplacebo/gamut_mapping.h>
+enum pl_gamut_mode {
+    PL_GAMUT_CLIP       PL_DEPRECATED_ENUMERATOR, // pl_gamut_map_clip
+    PL_GAMUT_WARN       PL_DEPRECATED_ENUMERATOR, // pl_gamut_map_highlight
+    PL_GAMUT_DARKEN     PL_DEPRECATED_ENUMERATOR, // pl_gamut_map_darken
+    PL_GAMUT_DESATURATE PL_DEPRECATED_ENUMERATOR, // pl_gamut_map_desaturate
+    PL_GAMUT_MODE_COUNT,
+};
+
+struct pl_color_map_params {
+    // --- Gamut mapping options
+
+    // Gamut mapping function to use to handle out-of-gamut colors, including
+    // colors which are out-of-gamut as a consequence of tone mapping.
+    const struct pl_gamut_map_function *gamut_mapping;
+
+    // Gamut mapping constants, for expert tuning. Leave as default otherwise.
+    struct pl_gamut_map_constants gamut_constants;
+
+    // Gamut mapping 3DLUT size, for channels ICh. Defaults to {48, 32, 256}
+    int lut3d_size[3];
+
+    // Use higher quality, but slower, tricubic interpolation for gamut mapping
+    // 3DLUTs. May substantially improve the 3DLUT gamut mapping accuracy, in
+    // particular at smaller 3DLUT sizes. Shouldn't have much effect at the
+    // default size.
+    bool lut3d_tricubic;
+
+    // If true, allows the gamut mapping function to expand the gamut, in
+    // cases where the target gamut exceeds that of the source. If false,
+    // the source gamut will never be enlarged, even when using a gamut
+    // mapping function capable of bidirectional mapping.
+    bool gamut_expansion;
+
+    // --- Tone mapping options
+
+    // Tone mapping function to use to handle out-of-range colors.
+    const struct pl_tone_map_function *tone_mapping_function;
+
+    // Tone mapping constants, for expert tuning. Leave as default otherwise.
+    struct pl_tone_map_constants tone_constants;
+
+    // If true, and supported by the given tone mapping function, libplacebo
+    // will perform inverse tone mapping to expand the dynamic range of a
+    // signal. libplacebo is not liable for any HDR-induced eye damage.
+    bool inverse_tone_mapping;
+
+    // Data source to use when tone-mapping. Setting this to a specific
+    // value allows overriding the default metadata preference logic.
+    enum pl_hdr_metadata_type metadata;
+
+    // Tone mapping LUT size. Defaults to 256.
+    int lut_size;
+
+    // HDR contrast recovery strength. If set to a value above 0.0, the source
+    // image will be divided into high-frequency and low-frequency components,
+    // and a portion of the high-frequency image is added back onto the
+    // tone-mapped output. May cause excessive ringing artifacts for some HDR
+    // sources, but can improve the subjective sharpness and detail left over
+    // in the image after tone-mapping.
+    float contrast_recovery;
+
+    // Contrast recovery lowpass kernel size. Defaults to 3.5. Increasing
+    // or decreasing this will affect the visual appearance substantially.
+    float contrast_smoothness;
+
+    // --- Debugging options
+
+    // Force the use of a full tone-mapping LUT even for functions that have
+    // faster pure GLSL replacements (e.g. clip, linear, saturation).
+    bool force_tone_mapping_lut;
+
+    // Visualize the tone-mapping LUT and gamut mapping 3DLUT, in IPT space.
+    bool visualize_lut;
+
+    // Controls where to draw the visualization, relative to the rendered
+    // video (dimensions 0-1). Optional, defaults to the full picture.
+    pl_rect2df visualize_rect;
+
+    // Controls the rotation of the 3DLUT visualization.
+    float visualize_hue;    // useful range [-pi, pi]
+    float visualize_theta;  // useful range [0, pi/2]
+
+    // Graphically highlight hard-clipped pixels during tone-mapping (i.e.
+    // pixels that exceed the claimed source luminance range).
+    bool show_clipping;
+
+    // --- Deprecated fields
+    enum pl_tone_map_mode tone_mapping_mode PL_DEPRECATED; // removed
+    float tone_mapping_param PL_DEPRECATED;         // see `tone_constants`
+    float tone_mapping_crosstalk PL_DEPRECATED;     // now hard-coded as 0.04
+    enum pl_rendering_intent intent PL_DEPRECATED;  // see `gamut_mapping`
+    enum pl_gamut_mode gamut_mode PL_DEPRECATED;    // see `gamut_mapping`
+    float hybrid_mix PL_DEPRECATED;                 // removed
+};
+
+#define PL_COLOR_MAP_DEFAULTS                                   \
+    .gamut_mapping          = &pl_gamut_map_perceptual,         \
+    .tone_mapping_function  = &pl_tone_map_spline,              \
+    .gamut_constants        = { PL_GAMUT_MAP_CONSTANTS },       \
+    .tone_constants         = { PL_TONE_MAP_CONSTANTS },        \
+    .metadata               = PL_HDR_METADATA_ANY,              \
+    .lut3d_size             = {48, 32, 256},                    \
+    .lut_size               = 256,                              \
+    .visualize_rect         = {0, 0, 1, 1},                     \
+    .contrast_smoothness    = 3.5f,
+
+#define PL_COLOR_MAP_HQ_DEFAULTS                                \
+    PL_COLOR_MAP_DEFAULTS                                       \
+    .contrast_recovery      = 0.30f,
+
+#define pl_color_map_params(...) (&(struct pl_color_map_params) { PL_COLOR_MAP_DEFAULTS __VA_ARGS__ })
+PL_API extern const struct pl_color_map_params pl_color_map_default_params;
+PL_API extern const struct pl_color_map_params pl_color_map_high_quality_params;
+
+// Execution arguments for the `pl_shader_color_map_ex` call. Distinct from
+// `pl_color_map_params` because it is filled by internally-provided execution
+// metadata, instead of user-tunable aesthetic parameters.
+struct pl_color_map_args {
+    // Input/output color space for the mapping.
+    struct pl_color_space src;
+    struct pl_color_space dst;
+
+    // If true, the logic will assume the input has already been linearized by
+    // the caller (e.g. as part of a previous linear light scaling operation).
+    bool prelinearized;
+
+    // Object to be used to store generated LUTs. Note that this is the same
+    // state object used by `pl_shader_detect_peak`, and if that function has
+    // been called on `state` prior to `pl_shader_color_map`, the detected
+    // values will be used to guide the tone mapping algorithm. If this is not
+    // provided, tone/gamut mapping are disabled.
+    pl_shader_obj *state;
+
+    // Low-resolution intensity feature map, as generated by
+    // `pl_shader_extract_features`. Optional. No effect if
+    // `params->contrast_recovery` is disabled.
+    pl_tex feature_map;
+};
+
+#define pl_color_map_args(...) (&(struct pl_color_map_args) { __VA_ARGS__ })
+
+// Maps `vec4 color` from one color space to another color space according
+// to the parameters (described in greater depth above). If `params` is left
+// as NULL, it defaults to `&pl_color_map_default_params`
+PL_API void pl_shader_color_map_ex(pl_shader sh,
+                                   const struct pl_color_map_params *params,
+                                   const struct pl_color_map_args *args);
+
+// Backwards compatibility wrapper around `pl_shader_color_map_ex`
+PL_API void pl_shader_color_map(pl_shader sh, const struct pl_color_map_params *params,
+                                struct pl_color_space src, struct pl_color_space dst,
+                                pl_shader_obj *state, bool prelinearized);
+
+// Applies a set of cone distortion parameters to `vec4 color` in a given color
+// space. This can be used to simulate color blindness. See `pl_cone_params`
+// for more information.
+PL_API void pl_shader_cone_distort(pl_shader sh, struct pl_color_space csp,
+                                   const struct pl_cone_params *params);
+
+PL_API_END
+
+#endif // LIBPLACEBO_SHADERS_COLORSPACE_H_
diff --git a/src/include/libplacebo/shaders/custom.h b/src/include/libplacebo/shaders/custom.h
new file mode 100644
index 0000000..a4eec69
--- /dev/null
+++ b/src/include/libplacebo/shaders/custom.h
@@ -0,0 +1,341 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_SHADERS_CUSTOM_H_
+#define LIBPLACEBO_SHADERS_CUSTOM_H_
+
+#include <stdlib.h>
+
+// Functions for writing custom shaders and hooking them into the `pl_renderer`
+// pipeline, as well as compatibility functions for parsing shaders in mpv
+// format.
+
+#include <libplacebo/shaders.h>
+#include <libplacebo/dispatch.h>
+#include <libplacebo/colorspace.h>
+
+PL_API_BEGIN
+
+// Parameters describing custom shader text to be embedded into a `pl_shader`
+// object. All of the strings are optional and can be left as NULL, but without
+// a `body` in particular, the shader will do nothing useful on its own.
+struct pl_custom_shader {
+    // The prelude contains text such as extra #defines, #extension pragmas,
+    // or other parts of the shader that must be placed at the very
+    // beginning (before input layout declarations etc.)
+    //
+    // Note: #extension pragmas do not need to be emitted to enable support for
+    // resource types already attached to the shader (e.g. SSBOs), compute
+    // shaders, or GPU capabilities known to libplacebo (e.g. subgroups).
+    const char *prelude;
+
+    // The header contains text such as helper function definitions, extra
+    // uniforms, shared memory variables or buffer descriptions.
+    const char *header;
+
+    // A friendly name for the shader. (Optional)
+    const char *description;
+
+    // The "primary" GLSL code. This will be effectively appended to the "main"
+    // function. It lives in an environment given by the `input` signature, and
+    // is expected to return results in a way given by the `output` signature.
+    //
+    // Note: In the case of PL_SHADER_SIG_COLOR, the output `vec4 color` is
+    // allocated by `pl_shader_custom`, the user merely needs to assign to it.
+    //
+    // Note: For ease of development it can be useful to have the main logic
+    // live inside a helper function defined as part of `header`, and specify
+    // the `body` as a single line that simply calls the helper function.
+    const char *body;
+    enum pl_shader_sig input;
+    enum pl_shader_sig output;
+
+    // Extra descriptors, variables and vertex attributes to attach to the
+    // resulting `pl_shader_res`.
+    //
+    // Note: The names inside these will possibly be replaced by fresh
+    // identifiers internally, so users should avoid looking for exact string
+    // matches for the given names inside the `pl_shader_res`.
+    const struct pl_shader_desc *descriptors;
+    int num_descriptors;
+    const struct pl_shader_var *variables;
+    int num_variables;
+    const struct pl_shader_va *vertex_attribs;
+    int num_vertex_attribs;
+    const struct pl_shader_const *constants;
+    int num_constants;
+
+    // If true, this shader must be a compute shader. The desired workgroup
+    // size and shared memory usage can be optionally specified, or 0 if no
+    // specific work group size or shared memory size restrictions apply.
+    //
+    // See also: `pl_shader_res.compute_group_size`
+    bool compute;
+    size_t compute_shmem;
+    int compute_group_size[2];
+
+    // Fixes the output size requirements of the shader to exact dimensions.
+    // Optional, if left as 0, means the shader can be dispatched at any size.
+    int output_w;
+    int output_h;
+};
+
+// Append custom shader code, including extra descriptors and variables, to an
+// existing `pl_shader` object. Returns whether successful. This function may
+// fail in the event that e.g. the custom shader requires compute shaders on
+// an unsupported GPU, or exceeds the GPU's shared memory capabilities.
+PL_API bool pl_shader_custom(pl_shader sh, const struct pl_custom_shader *params);
+
+// Which "rendering stages" are available for user shader hooking purposes.
+// Except where otherwise noted, all stages are "non-resizable", i.e. the
+// shaders already have specific output size requirements.
+enum pl_hook_stage {
+    // Hook stages for the untouched planes, as made available by the source.
+    // These are all resizable, i.e. there are no specific output stage
+    // requirements.
+    PL_HOOK_RGB_INPUT       = 1 << 0,
+    PL_HOOK_LUMA_INPUT      = 1 << 1,
+    PL_HOOK_CHROMA_INPUT    = 1 << 2,
+    PL_HOOK_ALPHA_INPUT     = 1 << 3,
+    PL_HOOK_XYZ_INPUT       = 1 << 4,
+
+    // Hook stages for the scaled/aligned planes
+    PL_HOOK_CHROMA_SCALED   = 1 << 5,
+    PL_HOOK_ALPHA_SCALED    = 1 << 6,
+
+    PL_HOOK_NATIVE          = 1 << 7,  // Combined image in its native color space
+    PL_HOOK_RGB             = 1 << 8,  // After conversion to RGB (resizable)
+    PL_HOOK_LINEAR          = 1 << 9,  // After linearization but before scaling
+    PL_HOOK_SIGMOID         = 1 << 10, // After sigmoidization
+    PL_HOOK_PRE_KERNEL      = 1 << 11, // Immediately before the main scaler kernel
+    PL_HOOK_POST_KERNEL     = 1 << 12, // Immediately after the main scaler kernel
+    PL_HOOK_SCALED          = 1 << 13, // After scaling, before color management
+    PL_HOOK_PRE_OUTPUT      = 1 << 14, // After color management, before blending/rotation
+    PL_HOOK_OUTPUT          = 1 << 15, // After blending/rotation, before dithering
+};
+
+// Returns true if a given hook stage is resizable
+static inline bool pl_hook_stage_resizable(enum pl_hook_stage stage) {
+    switch (stage) {
+    case PL_HOOK_RGB_INPUT:
+    case PL_HOOK_LUMA_INPUT:
+    case PL_HOOK_CHROMA_INPUT:
+    case PL_HOOK_ALPHA_INPUT:
+    case PL_HOOK_XYZ_INPUT:
+    case PL_HOOK_NATIVE:
+    case PL_HOOK_RGB:
+        return true;
+
+    case PL_HOOK_CHROMA_SCALED:
+    case PL_HOOK_ALPHA_SCALED:
+    case PL_HOOK_LINEAR:
+    case PL_HOOK_SIGMOID:
+    case PL_HOOK_PRE_KERNEL:
+    case PL_HOOK_POST_KERNEL:
+    case PL_HOOK_SCALED:
+    case PL_HOOK_PRE_OUTPUT:
+    case PL_HOOK_OUTPUT:
+        return false;
+    }
+
+    abort();
+}
+
+// The different forms of communicating image data between the renderer and
+// the hooks
+enum pl_hook_sig {
+    PL_HOOK_SIG_NONE,   // No data is passed, no data is received/returned
+    PL_HOOK_SIG_COLOR,  // `vec4 color` already pre-sampled in a `pl_shader`
+    PL_HOOK_SIG_TEX,    // `pl_tex` containing the image data
+    PL_HOOK_SIG_COUNT,
+};
+
+struct pl_hook_params {
+    // GPU objects associated with the `pl_renderer`, which the user may
+    // use for their own purposes.
+    pl_gpu gpu;
+    pl_dispatch dispatch;
+
+    // Helper function to fetch a new temporary texture, using renderer-backed
+    // storage. This is guaranteed to have sane image usage requirements and a
+    // 16-bit or floating point format. The user does not need to free/destroy
+    // this texture in any way. May return NULL.
+    pl_tex (*get_tex)(void *priv, int width, int height);
+    void *priv;
+
+    // Which stage triggered the hook to run.
+    enum pl_hook_stage stage;
+
+    // For `PL_HOOK_SIG_COLOR`, this contains the existing shader object with
+    // the color already pre-sampled into `vec4 color`. The user may modify
+    // this as much as they want, as long as they don't dispatch/finalize/reset
+    // it.
+    //
+    // Note that this shader might have specific output size requirements,
+    // depending on the exact shader stage hooked by the user, and may already
+    // be a compute shader.
+    pl_shader sh;
+
+    // For `PL_HOOK_SIG_TEX`, this contains the texture that the user should
+    // sample from.
+    //
+    // Note: This texture object is owned by the renderer, and users must not
+    // modify its contents. It will not be touched for the duration of a frame,
+    // but the contents are lost in between frames.
+    pl_tex tex;
+
+    // The effective current rectangle of the image we're rendering in this
+    // shader, i.e. the effective rect of the content we're interested in,
+    // as a crop of either `sh` or `tex` (depending on the signature).
+    //
+    // Note: This is still set even for `PL_HOOK_SIG_NONE`!
+    pl_rect2df rect;
+
+    // The current effective colorspace and representation, of either the
+    // pre-sampled color (in `sh`), or the contents of `tex`, respectively.
+    //
+    // Note: This is still set even for `PL_HOOK_SIG_NONE`!
+    struct pl_color_repr repr;
+    struct pl_color_space color;
+    int components;
+
+    // The representation and colorspace of the original image, for reference.
+    const struct pl_color_repr *orig_repr;
+    const struct pl_color_space *orig_color;
+
+    // The (cropped) source and destination rectangles of the overall
+    // rendering. These are functionallty equivalent to `image.crop` and
+    // `target.crop`, respectively, but `src_rect` in particular may change as
+    // a result of previous hooks being executed. (e.g. prescalers)
+    pl_rect2df src_rect;
+    pl_rect2d dst_rect;
+};
+
+struct pl_hook_res {
+    // If true, the hook is assumed to have "failed" or errored in some way,
+    // and all other fields are ignored.
+    bool failed;
+
+    // What type of output this hook is returning.
+    // Note: If this is `PL_HOOK_SIG_NONE`, all other fields are ignored.
+    enum pl_hook_sig output;
+
+    // For `PL_HOOK_SIG_COLOR`, this *must* be set to a valid `pl_shader`
+    // object containing the sampled color value (i.e. with an output signature
+    // of `PL_SHADER_SIG_COLOR`), and *should* be allocated from the given
+    // `pl_dispatch` object. Ignored otherwise.
+    pl_shader sh;
+
+    // For `PL_HOOK_SIG_TEX`, this *must* contain the texture object containing
+    // the result of rendering the hook. This *should* be a texture allocated
+    // using the given `get_tex` callback, to ensure the format and texture
+    // usage flags are compatible with what the renderer expects.
+    pl_tex tex;
+
+    // For shaders that return some sort of output, this contains the
+    // new/altered versions of the existing "current texture" metadata.
+    struct pl_color_repr repr;
+    struct pl_color_space color;
+    int components;
+
+    // This contains the new effective rect of the contents. This may be
+    // different from the original `rect` for resizable passes. Ignored for
+    // non-resizable passes.
+    pl_rect2df rect;
+};
+
+enum pl_hook_par_mode {
+    PL_HOOK_PAR_VARIABLE,   // normal shader variable
+    PL_HOOK_PAR_DYNAMIC,    // dynamic shader variable, e.g. per-frame changing
+    PL_HOOK_PAR_CONSTANT,   // fixed at compile time (e.g. for array sizes),
+                            // must be scalar (non-vector/matrix)
+    PL_HOOK_PAR_DEFINE,     // defined in the preprocessor, must be `int`
+    PL_HOOK_PAR_MODE_COUNT,
+};
+
+typedef union pl_var_data {
+    int i;
+    unsigned u;
+    float f;
+} pl_var_data;
+
+struct pl_hook_par {
+    // Name as used in the shader.
+    const char *name;
+
+    // Type of this shader parameter, and how it's manifested in the shader.
+    enum pl_var_type type;
+    enum pl_hook_par_mode mode;
+
+    // Human-readable explanation of this parameter. (Optional)
+    const char *description;
+
+    // Mutable data pointer to current value of variable.
+    pl_var_data *data;
+
+    // Default/initial value, and lower/upper bounds.
+    pl_var_data initial;
+    pl_var_data minimum;
+    pl_var_data maximum;
+
+    // Human-readable names for the variants of an integer option. This array
+    // can be indexed directly by integer values, ranging from `minimum.i` to
+    // `maximum.i`. May be NULL, in which case options are unnamed.
+    const char * const *names;
+};
+
+// Struct describing a hook.
+//
+// Note: Users may freely create their own instances of this struct, there is
+// nothing particularly special about `pl_mpv_user_shader_parse`.
+struct pl_hook {
+    enum pl_hook_stage stages;  // Which stages to hook on
+    enum pl_hook_sig input;     // Which input signature this hook expects
+    void *priv;                 // Arbitrary user context
+
+    // Custom tunable shader parameters exported by this hook. These may be
+    // updated at any time by the user, to influence the behavior of the hook.
+    // Contents are arbitrary and subject to the method of hook construction.
+    const struct pl_hook_par *parameters;
+    int num_parameters;
+
+    // Called at the beginning of passes, to reset/initialize the hook. (Optional)
+    void (*reset)(void *priv);
+
+    // The hook function itself. Called by the renderer at any of the indicated
+    // hook stages. See `pl_hook_res` for more info on the return values.
+    struct pl_hook_res (*hook)(void *priv, const struct pl_hook_params *params);
+
+    // Unique signature identifying this hook, used to disable misbehaving hooks.
+    // All hooks with the same signature will be disabled, should they fail to
+    // execute during run-time.
+    uint64_t signature;
+};
+
+// Compatibility layer with `mpv` user shaders. See the mpv man page for more
+// information on the format. Will return `NULL` if the shader fails parsing.
+//
+// The resulting `pl_hook` objects should be destroyed with the corresponding
+// destructor when no longer needed.
+PL_API const struct pl_hook *
+pl_mpv_user_shader_parse(pl_gpu gpu, const char *shader_text, size_t shader_len);
+
+PL_API void pl_mpv_user_shader_destroy(const struct pl_hook **hook);
+
+PL_API_END
+
+#endif // LIBPLACEBO_SHADERS_CUSTOM_H_
diff --git a/src/include/libplacebo/shaders/deinterlacing.h b/src/include/libplacebo/shaders/deinterlacing.h
new file mode 100644
index 0000000..40e74e8
--- /dev/null
+++ b/src/include/libplacebo/shaders/deinterlacing.h
@@ -0,0 +1,137 @@
+
+/*
+ * This file is part of libplacebo, which is normally licensed under the terms
+ * of the LGPL v2.1+. However, this file (film_grain.h) is also available under
+ * the terms of the more permissive MIT license:
+ *
+ * Copyright (c) 2018-2019 Niklas Haas
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef LIBPLACEBO_SHADERS_DEINTERLACING_H_
+#define LIBPLACEBO_SHADERS_DEINTERLACING_H_
+
+#include <libplacebo/shaders.h>
+
+PL_API_BEGIN
+
+enum pl_field {
+    PL_FIELD_NONE = 0, // no deinterlacing
+    PL_FIELD_EVEN,     // "top" fields, with even y coordinates
+    PL_FIELD_ODD,      // "bottom" fields, with odd y coordinates
+
+    // Convenience aliases
+    PL_FIELD_TOP = PL_FIELD_EVEN,
+    PL_FIELD_BOTTOM = PL_FIELD_ODD,
+};
+
+static inline enum pl_field pl_field_other(enum pl_field field)
+{
+    switch (field) {
+    case PL_FIELD_EVEN: return PL_FIELD_ODD;
+    case PL_FIELD_ODD:  return PL_FIELD_EVEN;
+    default: return field;
+    }
+}
+
+struct pl_field_pair {
+    // Top texture. If only this is specified, it's assumed to contain both
+    // fields in an interleaved fashion (MBAFF).
+    //
+    // Note: Support for separate fields (PAFF), is currently pending, so this
+    // is the only way to provide interlaced frames at the moment.
+    pl_tex top;
+};
+
+#define pl_field_pair(...) ((struct pl_field_pair) { __VA_ARGS__ })
+
+struct pl_deinterlace_source {
+    // Previous, current and next source (interlaced) frames. `prev` and `next`
+    // may be NULL, but `cur` is required. If present, they must all have the
+    // exact same texture dimensions.
+    //
+    // Note: `prev` and `next` are only required for PL_DEINTERLACE_YADIF.
+    struct pl_field_pair prev, cur, next;
+
+    // The parity of the current field to output. This field will be unmodified
+    // from `cur`, with the corresponding other field interpolated.
+    //
+    // If this is `PL_FIELD_NONE`, no deinterlacing is performed, and the
+    // texture is merely sampled as-is.
+    enum pl_field field;
+
+    // The parity of the first frame in a stream. Set this the field that is
+    // (conceptually) ordered first in time.
+    //
+    // If this is `PL_FIELD_NONE`, it will instead default to `PL_FIELD_TOP`.
+    enum pl_field first_field;
+
+    // Components to deinterlace. Components not specified will be ignored.
+    // Optional, if left as 0, all components will be deinterlaced.
+    uint8_t component_mask;
+};
+
+#define pl_deinterlace_source(...) (&(struct pl_deinterlace_source) { __VA_ARGS__ })
+
+enum pl_deinterlace_algorithm {
+    // No-op deinterlacing, just sample the weaved frame un-touched.
+    PL_DEINTERLACE_WEAVE = 0,
+
+    // Naive bob deinterlacing. Doubles the field lines vertically.
+    PL_DEINTERLACE_BOB,
+
+    // "Yet another deinterlacing filter". Deinterlacer with temporal and
+    // spatial information. Based on FFmpeg's Yadif filter algorithm, but
+    // adapted slightly for the GPU.
+    PL_DEINTERLACE_YADIF,
+
+    PL_DEINTERLACE_ALGORITHM_COUNT,
+};
+
+// Returns whether or not an algorithm requires `prev`/`next` refs to be set.
+static inline bool pl_deinterlace_needs_refs(enum pl_deinterlace_algorithm algo)
+{
+    return algo == PL_DEINTERLACE_YADIF;
+}
+
+struct pl_deinterlace_params {
+    // Algorithm to use. The recommended default is PL_DEINTERLACE_YADIF, which
+    // provides a good trade-off of quality and speed.
+    enum pl_deinterlace_algorithm algo;
+
+    // Skip the spatial interlacing check. (PL_DEINTERLACE_YADIF only)
+    bool skip_spatial_check;
+};
+
+#define PL_DEINTERLACE_DEFAULTS     \
+    .algo   = PL_DEINTERLACE_YADIF,
+
+#define pl_deinterlace_params(...) (&(struct pl_deinterlace_params) { PL_DEINTERLACE_DEFAULTS __VA_ARGS__ })
+PL_API extern const struct pl_deinterlace_params pl_deinterlace_default_params;
+
+// Deinterlaces a set of interleaved source frames and outputs the result into
+// `vec4 color`. If `params` is left as NULL, it defaults to
+// `&pl_deinterlace_default_params`.
+PL_API void pl_shader_deinterlace(pl_shader sh, const struct pl_deinterlace_source *src,
+                                  const struct pl_deinterlace_params *params);
+
+PL_API_END
+
+#endif // LIBPLACEBO_SHADERS_DEINTERLACING_H_
diff --git a/src/include/libplacebo/shaders/dithering.h b/src/include/libplacebo/shaders/dithering.h
new file mode 100644
index 0000000..9146c81
--- /dev/null
+++ b/src/include/libplacebo/shaders/dithering.h
@@ -0,0 +1,140 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_SHADERS_DITHERING_H_
+#define LIBPLACEBO_SHADERS_DITHERING_H_
+
+// Dithering shaders
+
+#include <libplacebo/colorspace.h>
+#include <libplacebo/dither.h>
+#include <libplacebo/shaders.h>
+
+PL_API_BEGIN
+
+enum pl_dither_method {
+    // Dither with blue noise. Very high quality, but requires the use of a
+    // LUT. Warning: Computing a blue noise texture with a large size can be
+    // very slow, however this only needs to be performed once. Even so, using
+    // this with a `lut_size` greater than 6 is generally ill-advised. This is
+    // the preferred/default dither method.
+    PL_DITHER_BLUE_NOISE,
+
+    // Dither with an ordered (bayer) dither matrix, using a LUT. Low quality,
+    // and since this also uses a LUT, there's generally no advantage to picking
+    // this instead of `PL_DITHER_BLUE_NOISE`. It's mainly there for testing.
+    PL_DITHER_ORDERED_LUT,
+
+    // The same as `PL_DITHER_ORDERED_LUT`, but uses fixed function math instead
+    // of a LUT. This is faster, but only supports a fixed dither matrix size
+    // of 16x16 (equal to a `lut_size` of 4).
+    PL_DITHER_ORDERED_FIXED,
+
+    // Dither with white noise. This does not require a LUT and is fairly cheap
+    // to compute. Unlike the other modes it doesn't show any repeating
+    // patterns either spatially or temporally, but the downside is that this
+    // is visually fairly jarring due to the presence of low frequencies in the
+    // noise spectrum.
+    PL_DITHER_WHITE_NOISE,
+
+    PL_DITHER_METHOD_COUNT,
+};
+
+struct pl_dither_params {
+    // The source of the dither noise to use.
+    enum pl_dither_method method;
+
+    // For the dither methods which require the use of a LUT, this controls
+    // the size of the LUT (base 2). If left as NULL, this defaults to 6, which
+    // is equivalent to a 64x64 dither matrix. Must not be larger than 8.
+    int lut_size;
+
+    // Enables temporal dithering. This reduces the persistence of dithering
+    // artifacts by perturbing the dithering matrix per frame.
+    // Warning: This can cause nasty aliasing artifacts on some LCD screens.
+    bool temporal;
+
+    // Gamma function to use for dither gamma correction. This will only have
+    // an effect when dithering to low bit depths (<= 4).
+    enum pl_color_transfer transfer;
+};
+
+#define PL_DITHER_DEFAULTS                              \
+    .method     = PL_DITHER_BLUE_NOISE,                 \
+    .lut_size   = 6,                                    \
+    /* temporal dithering commonly flickers on LCDs */  \
+    .temporal   = false,
+
+#define pl_dither_params(...) (&(struct pl_dither_params) { PL_DITHER_DEFAULTS __VA_ARGS__ })
+PL_API extern const struct pl_dither_params pl_dither_default_params;
+
+// Dither the colors to a lower depth, given in bits. This can be used on input
+// colors of any precision. Basically, this rounds the colors to only linear
+// multiples of the stated bit depth. The average intensity of the result
+// will not change (i.e., the dither noise is balanced in both directions).
+// If `params` is NULL, it defaults to &pl_dither_default_params.
+//
+// For the dither methods which require the use of a LUT, `dither_state` must
+// be set to a valid pointer. To avoid thrashing the resource, users should
+// avoid trying to re-use the same LUT for different dither configurations. If
+// passed as NULL, libplacebo will automatically fall back to dither algorithms
+// that don't require the use of a LUT.
+//
+// Warning: This dithering algorithm is not gamma-invariant; so using it for
+// very low bit depths (below 4 or so) will noticeably increase the brightness
+// of the resulting image. When doing low bit depth dithering for aesthetic
+// purposes, it's recommended that the user explicitly (de)linearize the colors
+// before and after this algorithm.
+PL_API void pl_shader_dither(pl_shader sh, int new_depth,
+                             pl_shader_obj *dither_state,
+                             const struct pl_dither_params *params);
+
+struct pl_error_diffusion_params {
+    // Both the input and output texture must be provided up-front, with the
+    // same size. The output texture must be storable, and the input texture
+    // must be sampleable.
+    pl_tex input_tex;
+    pl_tex output_tex;
+
+    // Depth to dither to. Required.
+    int new_depth;
+
+    // Error diffusion kernel to use. Optional. If unspecified, defaults to
+    // `&pl_error_diffusion_sierra_lite`.
+    const struct pl_error_diffusion_kernel *kernel;
+};
+
+#define pl_error_diffusion_params(...) (&(struct pl_error_diffusion_params) { __VA_ARGS__ })
+
+// Computes the shared memory requirements for a given error diffusion kernel.
+// This can be used to test up-front whether or not error diffusion would be
+// supported or not, before having to initialize textures.
+PL_API size_t pl_error_diffusion_shmem_req(const struct pl_error_diffusion_kernel *kernel,
+                                           int height);
+
+// Apply an error diffusion dithering kernel. This is a much more expensive and
+// heavy dithering method, and is not generally recommended for realtime usage
+// where performance is critical.
+//
+// Requires compute shader support. Returns false if dithering fail e.g. as a
+// result of shader memory limits being exceeded. The resulting shader must be
+// dispatched with a work group count of exactly 1.
+PL_API bool pl_shader_error_diffusion(pl_shader sh, const struct pl_error_diffusion_params *params);
+
+PL_API_END
+
+#endif // LIBPLACEBO_SHADERS_DITHERING_H_
diff --git a/src/include/libplacebo/shaders/film_grain.h b/src/include/libplacebo/shaders/film_grain.h
new file mode 100644
index 0000000..8a9c78b
--- /dev/null
+++ b/src/include/libplacebo/shaders/film_grain.h
@@ -0,0 +1,137 @@
+/*
+ * This file is part of libplacebo, which is normally licensed under the terms
+ * of the LGPL v2.1+. However, this file (film_grain.h) is also available under
+ * the terms of the more permissive MIT license:
+ *
+ * Copyright (c) 2018-2019 Niklas Haas
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to deal
+ * in the Software without restriction, including without limitation the rights
+ * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
+ * copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef LIBPLACEBO_SHADERS_FILM_GRAIN_H_
+#define LIBPLACEBO_SHADERS_FILM_GRAIN_H_
+
+// Film grain synthesis shaders for AV1 / H.274.
+
+#include <stdint.h>
+#include <stdbool.h>
+
+#include <libplacebo/colorspace.h>
+#include <libplacebo/shaders.h>
+
+PL_API_BEGIN
+
+enum pl_film_grain_type {
+    PL_FILM_GRAIN_NONE = 0,
+    PL_FILM_GRAIN_AV1,
+    PL_FILM_GRAIN_H274,
+    PL_FILM_GRAIN_COUNT,
+};
+
+// AV1 film grain parameters. For the exact meaning of these, see the AV1
+// specification (section 6.8.20).
+struct pl_av1_grain_data {
+    int num_points_y;
+    uint8_t points_y[14][2];     // [n][0] = value, [n][1] = scaling
+    bool chroma_scaling_from_luma;
+    int num_points_uv[2];        // should be {0} for grayscale images
+    uint8_t points_uv[2][10][2]; // like points_y for points_uv[0, 1] = u, v
+    int scaling_shift;
+    int ar_coeff_lag;
+    int8_t ar_coeffs_y[24];
+    int8_t ar_coeffs_uv[2][25];
+    int ar_coeff_shift;
+    int grain_scale_shift;
+    int8_t uv_mult[2];
+    int8_t uv_mult_luma[2];
+    int16_t uv_offset[2];        // 9-bit value, range [-256, 255]
+    bool overlap;
+};
+
+// H.274 film grain parameters. For the exact meaning of these, see the H.274
+// specification (section 8.5).
+struct pl_h274_grain_data {
+    int model_id;
+    int blending_mode_id;
+    int log2_scale_factor;
+    bool component_model_present[3];
+    uint16_t num_intensity_intervals[3];
+    uint8_t num_model_values[3];
+    const uint8_t *intensity_interval_lower_bound[3];
+    const uint8_t *intensity_interval_upper_bound[3];
+    const int16_t (*comp_model_value[3])[6];
+};
+
+// Tagged union for film grain data
+struct pl_film_grain_data {
+    enum pl_film_grain_type type;   // film grain type
+    uint64_t seed;                  // shared seed value
+
+    union {
+        // Warning: These values are not sanity-checked at all, Invalid grain
+        // data results in undefined behavior!
+        struct pl_av1_grain_data av1;
+        struct pl_h274_grain_data h274;
+    } params;
+};
+
+// Options for the `pl_shader_film_grain` call.
+struct pl_film_grain_params {
+    // Required for all film grain types:
+    struct pl_film_grain_data data; // film grain data
+    pl_tex tex;                     // texture to sample from
+    struct pl_color_repr *repr;     // underlying color representation (see notes)
+    int components;
+    int component_mapping[4];       // same as `struct pl_plane`
+
+    // Notes for `repr`:
+    //  - repr->bits affects the rounding for grain generation
+    //  - repr->levels affects whether or not we clip to full range or not
+    //  - repr->sys affects the interpretation of channels
+    //  - *repr gets normalized by this shader, which is why it's a pointer
+
+    // Required for PL_FILM_GRAIN_AV1 only:
+    pl_tex luma_tex;                // "luma" texture (see notes)
+    int luma_comp;                  // index of luma in `luma_tex`
+
+    // Notes for `luma_tex`:
+    //  - `luma_tex` must be specified if the `tex` does not itself contain the
+    //     "luma-like" component. For XYZ systems, the Y channel is the luma
+    //     component. For RGB systems, the G channel is.
+};
+
+#define pl_film_grain_params(...) (&(struct pl_film_grain_params) { __VA_ARGS__ })
+
+// Test if film grain needs to be applied. This is a helper function that users
+// can use to decide whether or not `pl_shader_film_grain` needs to be called,
+// based on the given grain metadata.
+PL_API bool pl_needs_film_grain(const struct pl_film_grain_params *params);
+
+// Sample from a texture while applying film grain at the same time.
+// `grain_state` must be unique for every plane configuration, as it may
+// contain plane-dependent state.
+//
+// Returns false on any error, or if film grain generation is not supported
+// due to GLSL limitations.
+PL_API bool pl_shader_film_grain(pl_shader sh, pl_shader_obj *grain_state,
+                                 const struct pl_film_grain_params *params);
+
+PL_API_END
+
+#endif // LIBPLACEBO_SHADERS_FILM_GRAIN_H_
diff --git a/src/include/libplacebo/shaders/icc.h b/src/include/libplacebo/shaders/icc.h
new file mode 100644
index 0000000..a4003f4
--- /dev/null
+++ b/src/include/libplacebo/shaders/icc.h
@@ -0,0 +1,135 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_SHADERS_ICC_H_
+#define LIBPLACEBO_SHADERS_ICC_H_
+
+// Functions for generating and applying ICC-derived (3D)LUTs
+
+#include <libplacebo/colorspace.h>
+#include <libplacebo/shaders.h>
+
+PL_API_BEGIN
+
+struct pl_icc_params {
+    // The rendering intent to use, for profiles with multiple intents. A
+    // recommended value is PL_INTENT_RELATIVE_COLORIMETRIC for color-accurate
+    // video reproduction, or PL_INTENT_PERCEPTUAL for profiles containing
+    // meaningful perceptual mapping tables for some more suitable color space
+    // like BT.709.
+    //
+    // If this is set to the special value PL_INTENT_AUTO, will use the
+    // preferred intent provided by the profile header.
+    enum pl_rendering_intent intent;
+
+    // The size of the 3DLUT to generate. If left as NULL, these individually
+    // default to values appropriate for the profile. (Based on internal
+    // precision heuristics)
+    //
+    // Note: Setting this manually is strongly discouraged, as it can result
+    // in excessively high 3DLUT sizes where a much smaller LUT would have
+    // sufficed.
+    int size_r, size_g, size_b;
+
+    // This field can be used to override the detected brightness level of the
+    // ICC profile. If you set this to the special value 0 (or a negative
+    // number), libplacebo will attempt reading the brightness value from the
+    // ICC profile's tagging (if available), falling back to PL_COLOR_SDR_WHITE
+    // if unavailable.
+    float max_luma;
+
+    // Force black point compensation. May help avoid crushed or raised black
+    // points on "improper" profiles containing e.g. colorimetric tables that
+    // do not round-trip. Should not be required on well-behaved profiles,
+    // or when using PL_INTENT_PERCEPTUAL, but YMMV.
+    bool force_bpc;
+
+    // If provided, this pl_cache instance will be used, instead of the
+    // GPU-internal cache, to cache the generated 3DLUTs. Note that these can
+    // get large, especially for large values of size_{r,g,b}, so the user may
+    // wish to split this cache off from the main shader cache. (Optional)
+    pl_cache cache;
+
+    // Deprecated legacy caching API. Replaced by `cache`.
+    PL_DEPRECATED void *cache_priv;
+    PL_DEPRECATED void (*cache_save)(void *priv, uint64_t sig, const uint8_t *cache, size_t size);
+    PL_DEPRECATED bool (*cache_load)(void *priv, uint64_t sig, uint8_t *cache, size_t size);
+};
+
+#define PL_ICC_DEFAULTS                         \
+    .intent = PL_INTENT_RELATIVE_COLORIMETRIC,  \
+    .max_luma = PL_COLOR_SDR_WHITE,
+
+#define pl_icc_params(...) (&(struct pl_icc_params) { PL_ICC_DEFAULTS __VA_ARGS__ })
+PL_API extern const struct pl_icc_params pl_icc_default_params;
+
+// This object represents a "parsed" ICC profile.
+typedef const struct pl_icc_object_t {
+    // Provided params, with the `intent` and `size` fields set (as described)
+    struct pl_icc_params params;
+
+    // Signature of the corresponding ICC profile.
+    uint64_t signature;
+
+    // Detected color space (or UNKNOWN for profiles which don't contain an
+    // exact match), with HDR metedata set to the detected gamut and
+    // white/black value ranges.
+    struct pl_color_space csp;
+
+    // Best estimate of profile gamma. This only serves as a rough guideline.
+    float gamma;
+
+    // Smallest containing primary set, always set.
+    enum pl_color_primaries containing_primaries;
+} *pl_icc_object;
+
+// Attempts opening/parsing the contents of an ICC profile. The resulting
+// object is memory managed and may outlive the original profile - access
+// to the underlying profile is no longer needed once this returns.
+PL_API pl_icc_object pl_icc_open(pl_log log, const struct pl_icc_profile *profile,
+                                 const struct pl_icc_params *params);
+PL_API void pl_icc_close(pl_icc_object *icc);
+
+// Update an existing pl_icc_object, which may be NULL, replacing it by the
+// new profile and parameters (if incompatible).
+//
+// Returns success. `obj` is set to the created profile, or NULL on error.
+//
+// Note: If `profile->signature` matches `(*obj)->signature`, or if `profile` is
+// NULL, then the existing profile is directly reused, with only the effective
+// parameters changing. In this case, `profile->data` is also *not* read from,
+// and may safely be NULL.
+PL_API bool pl_icc_update(pl_log log, pl_icc_object *obj,
+                          const struct pl_icc_profile *profile,
+                          const struct pl_icc_params *params);
+
+// Decode the input from the colorspace determined by the attached ICC profile
+// to linear light RGB (in the profile's containing primary set). `lut` must be
+// set to a shader object that will store the GPU resources associated with the
+// generated LUT. The resulting color space will be written to `out_csp`.
+PL_API void pl_icc_decode(pl_shader sh, pl_icc_object profile, pl_shader_obj *lut,
+                          struct pl_color_space *out_csp);
+
+// Encode the input from linear light RGB (in the profile's containing primary
+// set) into the colorspace determined by the attached ICC profile. `lut` must
+// be set to a shader object that will store the GPU resources associated with
+// the generated LUT.
+PL_API void pl_icc_encode(pl_shader sh, pl_icc_object profile, pl_shader_obj *lut);
+
+PL_API_END
+
+#endif // LIBPLACEBO_SHADERS_ICC_H_
diff --git a/src/include/libplacebo/shaders/lut.h b/src/include/libplacebo/shaders/lut.h
new file mode 100644
index 0000000..6e30ddc
--- /dev/null
+++ b/src/include/libplacebo/shaders/lut.h
@@ -0,0 +1,78 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_SHADERS_LUT_H_
+#define LIBPLACEBO_SHADERS_LUT_H_
+
+// Shaders for loading and applying arbitrary custom 1D/3DLUTs
+
+#include <libplacebo/colorspace.h>
+#include <libplacebo/shaders.h>
+
+PL_API_BEGIN
+
+// Struct defining custom LUTs
+//
+// Note: Users may freely create their own instances of this struct, there is
+// nothing particularly special about `pl_lut_parse_cube`.
+struct pl_custom_lut {
+    // Some unique signature identifying this LUT, needed to detect state
+    // changes (for cache invalidation). This should ideally be a hash of the
+    // file contents. (Which is what `pl_lut_parse_*` will set it to.)
+    uint64_t signature;
+
+    // Size of each dimension, in the order R, G, B. For 1D LUTs, only the R
+    // dimension should be specified (the others left as 0).
+    int size[3];
+
+    // Raw LUT data itself, in properly scaled floating point format. For 3D
+    // LUTs, the innermost dimension is the first dimension (R), and the
+    // outermost dimension is the last dimension (B). Individual color samples
+    // are in the order R, G, B.
+    const float *data;
+
+    // Extra input/output shaper matrices. Ignored if equal to {0}. This is
+    // mostly useful for 1D LUTs, since 3D LUTs can bake the shaper matrix into
+    // the LUT itself - but it can still help optimize LUT precision.
+    pl_matrix3x3 shaper_in, shaper_out;
+
+    // Nominal metadata for the input/output of a LUT. Left as {0} if unknown.
+    // Note: This is purely informative, `pl_shader_custom_lut` ignores it.
+    struct pl_color_repr repr_in, repr_out;
+    struct pl_color_space color_in, color_out;
+};
+
+// Parse a 3DLUT in .cube format. Returns NULL if the file fails parsing.
+PL_API struct pl_custom_lut *pl_lut_parse_cube(pl_log log, const char *str, size_t str_len);
+
+// Frees a LUT created by `pl_lut_parse_*`.
+PL_API void pl_lut_free(struct pl_custom_lut **lut);
+
+// Apply a `pl_custom_lut`. The user is responsible for ensuring colors going
+// into the LUT are in the expected format as informed by the LUT metadata.
+//
+// `lut_state` must be a pointer to a NULL-initialized shader state object that
+// will be used to encapsulate any required GPU state.
+//
+// Note: `lut` does not have to be allocated by `pl_lut_parse_*`. It can be a
+// struct filled out by the user.
+PL_API void pl_shader_custom_lut(pl_shader sh, const struct pl_custom_lut *lut,
+                                 pl_shader_obj *lut_state);
+
+PL_API_END
+
+#endif // LIBPLACEBO_SHADERS_LUT_H_
diff --git a/src/include/libplacebo/shaders/sampling.h b/src/include/libplacebo/shaders/sampling.h
new file mode 100644
index 0000000..5221e44
--- /dev/null
+++ b/src/include/libplacebo/shaders/sampling.h
@@ -0,0 +1,257 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_SHADERS_SAMPLING_H_
+#define LIBPLACEBO_SHADERS_SAMPLING_H_
+
+// Sampling operations. These shaders perform some form of sampling operation
+// from a given pl_tex. In order to use these, the pl_shader *must* have been
+// created using the same `gpu` as the originating `pl_tex`. Otherwise, this
+// is undefined behavior. They require nothing (PL_SHADER_SIG_NONE) and return
+// a color (PL_SHADER_SIG_COLOR).
+
+#include <libplacebo/colorspace.h>
+#include <libplacebo/filters.h>
+#include <libplacebo/shaders.h>
+
+PL_API_BEGIN
+
+// Common parameters for sampling operations
+struct pl_sample_src {
+    // There are two mutually exclusive ways of providing the source to sample
+    // from:
+    //
+    // 1. Provide the texture and sampled region directly. This generates
+    // a shader with input signature `PL_SHADER_SIG_NONE`, which binds the
+    // texture as a descriptor (and the coordinates as a vertex attribute)
+    pl_tex tex;             // texture to sample
+    pl_rect2df rect;        // sub-rect to sample from (optional)
+    enum pl_tex_address_mode address_mode; // preferred texture address mode
+
+    // 2. Have the shader take it as an argument. Doing this requires
+    // specifying the missing metadata of the texture backing the sampler, so
+    // that the shader generation can generate the correct code.
+    int tex_w, tex_h;             // dimensions of the actual texture
+    enum pl_fmt_type format;      // format of the sampler being accepted
+    enum pl_sampler_type sampler; // type of the sampler being accepted
+    enum pl_tex_sample_mode mode; // sample mode of the sampler being accepted
+    float sampled_w, sampled_h;   // dimensions of the sampled region (optional)
+
+    // Common metadata for both sampler input types:
+    int components;   // number of components to sample (optional)
+    uint8_t component_mask; // bitmask of components to sample (optional)
+    int new_w, new_h; // dimensions of the resulting output (optional)
+    float scale;      // factor to multiply into sampled signal (optional)
+
+    // Note: `component_mask` and `components` are mutually exclusive, the
+    // former is preferred if both are specified.
+};
+
+#define pl_sample_src(...) (&(struct pl_sample_src) { __VA_ARGS__ })
+
+struct pl_deband_params {
+    // The number of debanding steps to perform per sample. Each step reduces a
+    // bit more banding, but takes time to compute. Note that the strength of
+    // each step falls off very quickly, so high numbers (>4) are practically
+    // useless. Defaults to 1.
+    int iterations;
+
+    // The debanding filter's cut-off threshold. Higher numbers increase the
+    // debanding strength dramatically, but progressively diminish image
+    // details. Defaults to 3.0.
+    float threshold;
+
+    // The debanding filter's initial radius. The radius increases linearly
+    // for each iteration. A higher radius will find more gradients, but a
+    // lower radius will smooth more aggressively. Defaults to 16.0.
+    float radius;
+
+    // Add some extra noise to the image. This significantly helps cover up
+    // remaining quantization artifacts. Higher numbers add more noise.
+    // Note: When debanding HDR sources, even a small amount of grain can
+    // result in a very big change to the brightness level. It's recommended to
+    // either scale this value down or disable it entirely for HDR.
+    //
+    // Defaults to 4.0, which is very mild.
+    float grain;
+
+    // 'Neutral' grain value for each channel being debanded (sorted in order
+    // from low to high index). Grain application will be modulated to avoid
+    // disturbing colors close to this value. Set this to a value corresponding
+    // to black in the relevant colorspace.
+    float grain_neutral[3];
+};
+
+#define PL_DEBAND_DEFAULTS  \
+    .iterations = 1,        \
+    .threshold  = 3.0,      \
+    .radius     = 16.0,     \
+    .grain      = 4.0,
+
+#define pl_deband_params(...) (&(struct pl_deband_params) {PL_DEBAND_DEFAULTS __VA_ARGS__ })
+PL_API extern const struct pl_deband_params pl_deband_default_params;
+
+// Debands a given texture and returns the sampled color in `vec4 color`. If
+// `params` is left as NULL, it defaults to &pl_deband_default_params. Note
+// that `tex->params.format` must have PL_FMT_CAP_LINEAR. When the given
+// `pl_sample_src` implies scaling, this effectively performs bilinear
+// sampling on the input (but not the output).
+//
+// Note: This can also be used as a pure grain function, by setting the number
+// of iterations to 0.
+PL_API void pl_shader_deband(pl_shader sh, const struct pl_sample_src *src,
+                             const struct pl_deband_params *params);
+
+// Performs direct / native texture sampling, using whatever texture filter is
+// available (linear for linearly sampleable sources, nearest otherwise).
+//
+// Note: This is generally very low quality and should be avoided if possible,
+// for both upscaling and downscaling.
+PL_API bool pl_shader_sample_direct(pl_shader sh, const struct pl_sample_src *src);
+
+// Performs hardware-accelerated nearest neighbour sampling. This is similar to
+// `pl_shader_sample_direct`, but forces nearest neighbour interpolation.
+PL_API bool pl_shader_sample_nearest(pl_shader sh, const struct pl_sample_src *src);
+
+// Performs hardware-accelerated bilinear sampling. This is similar to
+// `pl_shader_sample_direct`, but forces bilinear interpolation.
+PL_API bool pl_shader_sample_bilinear(pl_shader sh, const struct pl_sample_src *src);
+
+// Optimized versions of specific, strictly positive scaler kernels that take
+// adantage of linear texture sampling to reduce the number of fetches needed
+// by a factor of four. This family of functions performs radius-2 scaling
+// with only four texture fetches, which is far more efficient than using
+// the generalized 1D scaling method. Only works well for upscaling.
+PL_API bool pl_shader_sample_bicubic(pl_shader sh, const struct pl_sample_src *src);
+PL_API bool pl_shader_sample_hermite(pl_shader sh, const struct pl_sample_src *src);
+PL_API bool pl_shader_sample_gaussian(pl_shader sh, const struct pl_sample_src *src);
+
+// A sampler that is similar to nearest neighbour sampling, but tries to
+// preserve pixel aspect ratios. This is mathematically equivalent to taking an
+// idealized image with square pixels, sampling it at an infinite resolution,
+// and then downscaling that to the desired resolution. (Hence it being called
+// "oversample"). Good for pixel art.
+//
+// The threshold provides a cutoff threshold below which the contribution of
+// pixels should be ignored, trading some amount of aspect ratio distortion for
+// a slightly crisper image. A value of `threshold == 0.5` makes this filter
+// equivalent to regular nearest neighbour sampling.
+PL_API bool pl_shader_sample_oversample(pl_shader sh, const struct pl_sample_src *src,
+                                        float threshold);
+
+struct pl_sample_filter_params {
+    // The filter to use for sampling.
+    struct pl_filter_config filter;
+
+    // Antiringing strength. A value of 0.0 disables antiringing, and a value
+    // of 1.0 enables full-strength antiringing. Defaults to 0.0 if
+    // unspecified.
+    //
+    // Note: Ignored if `filter.antiring` is already set to something nonzero.
+    float antiring;
+
+    // Disable the use of compute shaders (e.g. if rendering to non-storable tex)
+    bool no_compute;
+    // Disable the use of filter widening / anti-aliasing (for downscaling)
+    bool no_widening;
+
+    // This shader object is used to store the LUT, and will be recreated
+    // if necessary. To avoid thrashing the resource, users should avoid trying
+    // to re-use the same LUT for different filter configurations or scaling
+    // ratios. Must be set to a valid pointer, and the target NULL-initialized.
+    pl_shader_obj *lut;
+
+    // Deprecated / removed fields
+    int lut_entries PL_DEPRECATED; // hard-coded as 256
+    float cutoff PL_DEPRECATED; // hard-coded as 1e-3
+};
+
+#define pl_sample_filter_params(...) (&(struct pl_sample_filter_params) { __VA_ARGS__ })
+
+// Performs polar sampling. This internally chooses between an optimized compute
+// shader, and various fragment shaders, depending on the supported GLSL version
+// and GPU features. Returns whether or not it was successful.
+//
+// Note: `params->filter.polar` must be true to use this function.
+PL_API bool pl_shader_sample_polar(pl_shader sh, const struct pl_sample_src *src,
+                                   const struct pl_sample_filter_params *params);
+
+// Performs orthogonal (1D) sampling. Using this twice in a row (once vertical
+// and once horizontal) effectively performs a 2D upscale. This is lower
+// quality than polar sampling, but significantly faster, and therefore the
+// recommended default. Returns whether or not it was successful.
+//
+// `src` must represent a scaling operation that only scales in one direction,
+// i.e. either only X or only Y. The other direction must be left unscaled.
+//
+// Note: Due to internal limitations, this may currently only be used on 2D
+// textures - even though the basic principle would work for 1D and 3D textures
+// as well.
+PL_API bool pl_shader_sample_ortho2(pl_shader sh, const struct pl_sample_src *src,
+                                    const struct pl_sample_filter_params *params);
+
+struct pl_distort_params {
+    // An arbitrary 2x2 affine transformation to apply to the input image.
+    // For simplicity, the input image is explicitly centered and scaled such
+    // that the longer dimension is in [-1,1], before applying this.
+    pl_transform2x2 transform;
+
+    // If true, the texture is placed inside the center of the canvas without
+    // scaling. If false, it is effectively stretched to the canvas size.
+    bool unscaled;
+
+    // If true, the transformation is automatically scaled down and shifted to
+    // ensure that the resulting image fits inside the output canvas.
+    bool constrain;
+
+    // If true, use bicubic interpolation rather than faster bilinear
+    // interpolation. Higher quality but slower.
+    bool bicubic;
+
+    // Specifies the texture address mode to use when sampling out of bounds.
+    enum pl_tex_address_mode address_mode;
+
+    // If set, all out-of-bounds accesses will instead be treated as
+    // transparent, according to the given alpha mode. (Which should match the
+    // alpha mode of the texture)
+    //
+    // Note: `address_mode` has no effect when this is specified.
+    enum pl_alpha_mode alpha_mode;
+};
+
+#define PL_DISTORT_DEFAULTS \
+    .transform.mat.m = {{ 1, 0 }, {0, 1}},
+
+#define pl_distort_params(...) (&(struct pl_distort_params) {PL_DISTORT_DEFAULTS __VA_ARGS__ })
+PL_API extern const struct pl_distort_params pl_distort_default_params;
+
+// Distorts the input image using a given set of transformation parameters.
+// `out_w` and `out_h` determine the size of the effective canvas inside which
+// the distorted result may be rendered. Areas outside of this canvas will
+// be implicitly cut off.
+PL_API void pl_shader_distort(pl_shader sh, pl_tex tex, int out_w, int out_h,
+                              const struct pl_distort_params *params);
+
+enum PL_DEPRECATED { // for `int pass`
+    PL_SEP_VERT = 0,
+    PL_SEP_HORIZ,
+    PL_SEP_PASSES
+};
+
+PL_API_END
+
+#endif // LIBPLACEBO_SHADERS_SAMPLING_H_
diff --git a/src/include/libplacebo/swapchain.h b/src/include/libplacebo/swapchain.h
new file mode 100644
index 0000000..b53aa5c
--- /dev/null
+++ b/src/include/libplacebo/swapchain.h
@@ -0,0 +1,171 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_SWAPCHAIN_H_
+#define LIBPLACEBO_SWAPCHAIN_H_
+
+#include <libplacebo/common.h>
+#include <libplacebo/colorspace.h>
+#include <libplacebo/gpu.h>
+
+PL_API_BEGIN
+
+// This abstraction represents a low-level interface to visible surfaces
+// exposed by a graphics API (and accompanying GPU instance), allowing users to
+// directly present frames to the screen (or window, typically). This is a
+// sister API to gpu.h and follows the same convention w.r.t undefined behavior.
+//
+// Thread-safety: Safe
+typedef const struct pl_swapchain_t {
+    pl_log log;
+    pl_gpu gpu;
+} *pl_swapchain;
+
+// Destroys this swapchain. May be used at any time, and may block until the
+// completion of all outstanding rendering commands. The swapchain and any
+// resources retrieved from it must not be used afterwards.
+PL_API void pl_swapchain_destroy(pl_swapchain *sw);
+
+// Returns the approximate current swapchain latency in vsyncs, or 0 if
+// unknown. A latency of 1 means that `submit_frame` followed by `swap_buffers`
+// will block until the just-submitted frame has finished rendering. Typical
+// values are 2 or 3, which enable better pipelining by allowing the GPU to be
+// processing one or two frames at the same time as the user is preparing the
+// next for submission.
+PL_API int pl_swapchain_latency(pl_swapchain sw);
+
+// Update/query the swapchain size. This function performs both roles: it tries
+// setting the swapchain size to the values requested by the user, and returns
+// in the same variables what width/height the swapchain was actually set to -
+// which may be (substantially) different from the values requested by the
+// user. A value of 0 means "unknown/none" (in which case, libplacebo won't try
+// updating the size - it will simply return the current state of the
+// swapchain). It's also possible for libplacebo to return values of 0, such as
+// in the case that the swapchain doesn't exist yet.
+//
+// Returns false on significant errors (e.g. dead surface). This function can
+// effectively be used to probe if creating a swapchain works.
+PL_API bool pl_swapchain_resize(pl_swapchain sw, int *width, int *height);
+
+// Backwards compatibility
+#define pl_swapchain_colors pl_color_space
+
+// Inform the swapchain about the input color space. This API deliberately
+// provides no feedback, because the swapchain can internally decide what to do
+// with this information, including ignoring it entirely, or applying it
+// asynchronously. Users must still base their rendering on the value of
+// `pl_swapchain_frame.color_space`.
+//
+// Note: Calling this function a second time completely overrides any
+// previously specified hint. So calling this on {0} or NULL resets the
+// swapchain back to its initial/preferred colorspace.
+//
+// Note: If `csp->transfer` is a HDR transfer curve but HDR metadata is left
+// unspecified, the HDR metadata defaults to `pl_hdr_metadata_hdr10`.
+// Conversely, if the HDR metadata is non-empty but `csp->transfer` is left as
+// PL_COLOR_TRC_UNKNOWN, then it instead defaults to PL_COLOR_TRC_PQ.
+PL_API void pl_swapchain_colorspace_hint(pl_swapchain sw, const struct pl_color_space *csp);
+
+// The struct used to hold the results of `pl_swapchain_start_frame`
+struct pl_swapchain_frame {
+    // A texture representing the framebuffer users should use for rendering.
+    // It's guaranteed that `fbo->params.renderable` and `fbo->params.blit_dst`
+    // will be true, but no other guarantees are made - not even that
+    // `fbo->params.format` is a real format.
+    pl_tex fbo;
+
+    // If true, the user should assume that this framebuffer will be flipped
+    // as a result of presenting it on-screen. If false, nothing special needs
+    // to be done - but if true, users should flip the coordinate system of
+    // the `pl_pass` that is rendering to this framebuffer.
+    //
+    // Note: Normally, libplacebo follows the convention that (0,0) represents
+    // the top left of the image/screen. So when flipped is true, this means
+    // (0,0) on this framebuffer gets displayed as the bottom left of the image.
+    bool flipped;
+
+    // Indicates the color representation this framebuffer will be interpreted
+    // as by the host system / compositor / display, including the bit depth
+    // and alpha handling (where available).
+    struct pl_color_repr color_repr;
+    struct pl_color_space color_space;
+};
+
+// Retrieve a new frame from the swapchain. Returns whether successful. It's
+// worth noting that this function can fail sporadically for benign reasons,
+// for example the window being invisible or inaccessible. This function may
+// block until an image is available, which may be the case if the GPU is
+// rendering frames significantly faster than the display can output them. It
+// may also be non-blocking, so users shouldn't rely on this call alone in
+// order to meter rendering speed. (Specifics depend on the underlying graphics
+// API)
+PL_API bool pl_swapchain_start_frame(pl_swapchain sw, struct pl_swapchain_frame *out_frame);
+
+// Submits the previously started frame. Non-blocking. This must be issued in
+// lockstep with pl_swapchain_start_frame - there is no way to start multiple
+// frames and submit them out-of-order. The frames submitted this way will
+// generally be made visible in a first-in first-out fashion, although
+// specifics depend on the mechanism used to create the pl_swapchain. (See the
+// platform-specific APIs for more info).
+//
+// Returns whether successful. This should normally never fail, unless the
+// GPU/surface has been lost or some other critical error has occurred. The
+// "started" frame is consumed even in the event of failure.
+//
+// Note that `start_frame` and `submit_frame` form a lock pair, i.e. trying to
+// call e.g. `pl_swapchain_resize` from another thread will block until
+// `pl_swapchain_submit_frame` is finished.
+PL_API bool pl_swapchain_submit_frame(pl_swapchain sw);
+
+// Performs a "buffer swap", or some generalization of the concept. In layman's
+// terms, this blocks until the execution of the Nth previously submitted frame
+// has been "made complete" in some sense. (The N derives from the swapchain's
+// built-in latency. See `pl_swapchain_latency` for more information).
+//
+// Users should include this call in their rendering loops in order to make
+// sure they aren't submitting rendering commands faster than the GPU can
+// process them, which would potentially lead to a queue overrun or exhaust
+// memory.
+//
+// An example loop might look like this:
+//
+//     while (rendering) {
+//         struct pl_swapchain_frame frame;
+//         bool ok = pl_swapchain_start_frame(swapchain, &frame);
+//         if (!ok) {
+//             /* wait some time, or decide to stop rendering */
+//             continue;
+//         }
+//
+//         /* do some rendering with frame.fbo */
+//
+//         ok = pl_swapchain_submit_frame(swapchain);
+//         if (!ok)
+//             break;
+//
+//         pl_swapchain_swap_buffers(swapchain);
+//     }
+//
+// The duration this function blocks for, if at all, may be very inconsistent
+// and should not be used as an authoritative source of vsync timing
+// information without sufficient smoothing/filtering (and if so, the time that
+// `start_frame` blocked for should also be included).
+PL_API void pl_swapchain_swap_buffers(pl_swapchain sw);
+
+PL_API_END
+
+#endif // LIBPLACEBO_SWAPCHAIN_H_
diff --git a/src/include/libplacebo/tone_mapping.h b/src/include/libplacebo/tone_mapping.h
new file mode 100644
index 0000000..48f1eb7
--- /dev/null
+++ b/src/include/libplacebo/tone_mapping.h
@@ -0,0 +1,268 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_TONE_MAPPING_H_
+#define LIBPLACEBO_TONE_MAPPING_H_
+
+#include <stddef.h>
+#include <stdbool.h>
+
+#include <libplacebo/common.h>
+#include <libplacebo/colorspace.h>
+
+PL_API_BEGIN
+
+struct pl_tone_map_params;
+struct pl_tone_map_function {
+    const char *name;        // Identifier
+    const char *description; // Friendly / longer name
+
+    // This controls the type of values input/output to/from `map`
+    enum pl_hdr_scaling scaling;
+
+    // The tone-mapping function itself. Iterates over all values in `lut`, and
+    // adapts them as needed.
+    //
+    // Note that the `params` struct fed into this function is guaranteed to
+    // satisfy `params->input_scaling == params->output_scaling == scaling`,
+    // and also obeys `params->input_max >= params->output_max`.
+    void (*map)(float *lut, const struct pl_tone_map_params *params);
+
+    // Inverse tone mapping function. Optional. If absent, this tone mapping
+    // curve only works in the forwards direction.
+    //
+    // For this function, `params->input_max <= params->output_max`.
+    void (*map_inverse)(float *lut, const struct pl_tone_map_params *params);
+
+    // Private data. Unused by libplacebo, but may be accessed by `map`.
+    void *priv;
+
+    // --- Deprecated fields
+    const char *param_desc PL_DEPRECATED;
+    float param_min PL_DEPRECATED;
+    float param_def PL_DEPRECATED;
+    float param_max PL_DEPRECATED;
+};
+
+struct pl_tone_map_constants {
+    // Configures the knee point, as a ratio between the source average and
+    // target average (in PQ space). An adaptation of 1.0 always adapts the
+    // source scene average brightness to the (scaled) target average,
+    // while a value of 0.0 never modifies scene brightness. [0,1]
+    //
+    // Affects all methods that use the ST2094 knee point determination
+    // (currently ST2094-40, ST2094-10 and spline)
+    float knee_adaptation;
+
+    // Configures the knee point minimum and maximum, respectively, as
+    // a percentage of the PQ luminance range. Provides a hard limit on the
+    // knee point chosen by `knee_adaptation`.
+    float knee_minimum; // (0, 0.5)
+    float knee_maximum; // (0.5, 1.0)
+
+    // Default knee point to use in the absence of source scene average
+    // metadata. Normally, this is ignored in favor of picking the knee
+    // point as the (relative) source scene average brightness level.
+    float knee_default; // [knee_minimum, knee_maximum]
+
+    // Knee point offset (for BT.2390 only). Note that a value of 0.5 is
+    // the spec-defined default behavior, which differs from the libplacebo
+    // default of 1.0. [0.5, 2]
+    float knee_offset;
+
+    // For the single-pivot polynomial (spline) function, this controls the
+    // coefficients used to tune the slope of the curve. This tuning is designed
+    // to make the slope closer to 1.0 when the difference in peaks is low,
+    // and closer to linear when the difference between peaks is high.
+    float slope_tuning;   // [0,10]
+    float slope_offset;   // [0,1]
+
+    // Contrast setting for the spline function. Higher values make the curve
+    // steeper (closer to `clip`), preserving midtones at the cost of losing
+    // shadow/highlight details, while lower values make the curve shallowed
+    // (closer to `linear`), preserving highlights at the cost of losing midtone
+    // contrast. Values above 1.0 are possible, resulting in an output with more
+    // contrast than the input.
+    float spline_contrast; // [0,1.5]
+
+    // For the reinhard function, this specifies the local contrast coefficient
+    // at the display peak. Essentially, a value of 0.5 implies that the
+    // reference white will be about half as bright as when clipping. (0,1)
+    float reinhard_contrast;
+
+    // For legacy functions (mobius, gamma) which operate on linear light, this
+    // directly sets the corresponding knee point. (0,1)
+    float linear_knee;
+
+    // For linear methods (linear, linearlight), this controls the linear
+    // exposure/gain applied to the image. (0,10]
+    float exposure;
+};
+
+#define PL_TONE_MAP_CONSTANTS  \
+    .knee_adaptation   = 0.4f, \
+    .knee_minimum      = 0.1f, \
+    .knee_maximum      = 0.8f, \
+    .knee_default      = 0.4f, \
+    .knee_offset       = 1.0f, \
+    .slope_tuning      = 1.5f, \
+    .slope_offset      = 0.2f, \
+    .spline_contrast   = 0.5f, \
+    .reinhard_contrast = 0.5f, \
+    .linear_knee       = 0.3f, \
+    .exposure          = 1.0f,
+
+struct pl_tone_map_params {
+    // If `function` is NULL, defaults to `pl_tone_map_clip`.
+    const struct pl_tone_map_function *function;
+
+    // Common constants, should be initialized to PL_TONE_MAP_CONSTANTS if
+    // not intending to override them further.
+    struct pl_tone_map_constants constants;
+
+    // The desired input/output scaling of the tone map. If this differs from
+    // `function->scaling`, any required conversion will be performed.
+    //
+    // Note that to maximize LUT efficiency, it's *highly* recommended to use
+    // either PL_HDR_PQ or PL_HDR_SQRT as the input scaling, except when
+    // using `pl_tone_map_sample`.
+    enum pl_hdr_scaling input_scaling;
+    enum pl_hdr_scaling output_scaling;
+
+    // The size of the resulting LUT. (For `pl_tone_map_generate` only)
+    size_t lut_size;
+
+    // The characteristics of the input, in `input_scaling` units.
+    float input_min;
+    float input_max;
+    float input_avg; // or 0 if unknown
+
+    // The desired characteristics of the output, in `output_scaling` units.
+    float output_min;
+    float output_max;
+
+    // The input HDR metadata. Only used by a select few tone-mapping
+    // functions, currently only SMPTE ST2094. (Optional)
+    struct pl_hdr_metadata hdr;
+
+    // --- Deprecated fields
+    float param PL_DEPRECATED; // see `constants`
+};
+
+#define pl_tone_map_params(...) (&(struct pl_tone_map_params) { __VA_ARGS__ });
+
+// Note: Only does pointer equality testing on `function`
+PL_API bool pl_tone_map_params_equal(const struct pl_tone_map_params *a,
+                                     const struct pl_tone_map_params *b);
+
+// Clamps/defaults the parameters, including input/output maximum.
+PL_API void pl_tone_map_params_infer(struct pl_tone_map_params *params);
+
+// Returns true if the given tone mapping configuration effectively represents
+// a no-op configuration. Tone mapping can be skipped in this case (although
+// strictly speaking, the LUT would still clip illegal input values)
+PL_API bool pl_tone_map_params_noop(const struct pl_tone_map_params *params);
+
+// Generate a tone-mapping LUT for a given configuration. This will always
+// span the entire input range, as given by `input_min` and `input_max`.
+PL_API void pl_tone_map_generate(float *out, const struct pl_tone_map_params *params);
+
+// Samples a tone mapping function at a single position. Note that this is less
+// efficient than `pl_tone_map_generate` for generating multiple values.
+//
+// Ignores `params->lut_size`.
+PL_API float pl_tone_map_sample(float x, const struct pl_tone_map_params *params);
+
+// Performs no tone-mapping, just clips out-of-range colors. Retains perfect
+// color accuracy for in-range colors but completely destroys out-of-range
+// information. Does not perform any black point adaptation.
+PL_API extern const struct pl_tone_map_function pl_tone_map_clip;
+
+// EETF from SMPTE ST 2094-40 Annex B, which uses the provided OOTF based on
+// Bezier curves to perform tone-mapping. The OOTF used is adjusted based on
+// the ratio between the targeted and actual display peak luminances. In the
+// absence of HDR10+ metadata, falls back to a simple constant bezier curve.
+PL_API extern const struct pl_tone_map_function pl_tone_map_st2094_40;
+
+// EETF from SMPTE ST 2094-10 Annex B.2, which takes into account the input
+// signal average luminance in addition to the maximum/minimum.
+//
+// Note: This does *not* currently include the subjective gain/offset/gamma
+// controls defined in Annex B.3. (Open an issue with a valid sample file if
+// you want such parameters to be respected.)
+PL_API extern const struct pl_tone_map_function pl_tone_map_st2094_10;
+
+// EETF from the ITU-R Report BT.2390, a hermite spline roll-off with linear
+// segment.
+PL_API extern const struct pl_tone_map_function pl_tone_map_bt2390;
+
+// EETF from ITU-R Report BT.2446, method A. Can be used for both forward
+// and inverse tone mapping.
+PL_API extern const struct pl_tone_map_function pl_tone_map_bt2446a;
+
+// Simple spline consisting of two polynomials, joined by a single pivot point,
+// which is tuned based on the source scene average brightness (taking into
+// account dynamic metadata if available). This function can be used
+// for both forward and inverse tone mapping.
+PL_API extern const struct pl_tone_map_function pl_tone_map_spline;
+
+// Very simple non-linear curve. Named after Erik Reinhard.
+PL_API extern const struct pl_tone_map_function pl_tone_map_reinhard;
+
+// Generalization of the reinhard tone mapping algorithm to support an
+// additional linear slope near black. The name is derived from its function
+// shape (ax+b)/(cx+d), which is known as a Möbius transformation.
+PL_API extern const struct pl_tone_map_function pl_tone_map_mobius;
+
+// Piece-wise, filmic tone-mapping algorithm developed by John Hable for use in
+// Uncharted 2, inspired by a similar tone-mapping algorithm used by Kodak.
+// Popularized by its use in video games with HDR rendering. Preserves both
+// dark and bright details very well, but comes with the drawback of changing
+// the average brightness quite significantly. This is sort of similar to
+// pl_tone_map_reinhard with `reinhard_contrast=0.24`.
+PL_API extern const struct pl_tone_map_function pl_tone_map_hable;
+
+// Fits a gamma (power) function to transfer between the source and target
+// color spaces, effectively resulting in a perceptual hard-knee joining two
+// roughly linear sections. This preserves details at all scales, but can result
+// in an image with a muted or dull appearance.
+PL_API extern const struct pl_tone_map_function pl_tone_map_gamma;
+
+// Linearly stretches the input range to the output range, in PQ space. This
+// will preserve all details accurately, but results in a significantly
+// different average brightness. Can be used for inverse tone-mapping in
+// addition to regular tone-mapping.
+PL_API extern const struct pl_tone_map_function pl_tone_map_linear;
+
+// Like `pl_tone_map_linear`, but in linear light (instead of PQ). Works well
+// for small range adjustments but may cause severe darkening when
+// downconverting from e.g. 10k nits to SDR.
+PL_API extern const struct pl_tone_map_function pl_tone_map_linear_light;
+
+// A list of built-in tone mapping functions, terminated by NULL
+PL_API extern const struct pl_tone_map_function * const pl_tone_map_functions[];
+PL_API extern const int pl_num_tone_map_functions; // excluding trailing NULL
+
+// Find the tone mapping function with the given name, or NULL on failure.
+PL_API const struct pl_tone_map_function *pl_find_tone_map_function(const char *name);
+
+// Deprecated alias, do not use
+#define pl_tone_map_auto pl_tone_map_spline
+
+PL_API_END
+
+#endif // LIBPLACEBO_TONE_MAPPING_H_
diff --git a/src/include/libplacebo/utils/dav1d.h b/src/include/libplacebo/utils/dav1d.h
new file mode 100644
index 0000000..ece97c5
--- /dev/null
+++ b/src/include/libplacebo/utils/dav1d.h
@@ -0,0 +1,129 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_DAV1D_H_
+#define LIBPLACEBO_DAV1D_H_
+
+#include <libplacebo/gpu.h>
+#include <libplacebo/utils/upload.h>
+#include <dav1d/dav1d.h>
+
+#if defined(__cplusplus) && !defined(PL_DAV1D_IMPLEMENTATION)
+# define PL_DAV1D_API
+# define PL_DAV1D_IMPLEMENTATION 0
+# warning Remember to include this file with a PL_DAV1D_IMPLEMENTATION set to 1 in \
+          C translation unit to provide implementation. Suppress this warning by \
+          defining PL_DAV1D_IMPLEMENTATION to 0 in C++ files.
+#elif !defined(PL_DAV1D_IMPLEMENTATION)
+# define PL_DAV1D_API static inline
+# define PL_DAV1D_IMPLEMENTATION 1
+#else
+# define PL_DAV1D_API
+#endif
+
+PL_API_BEGIN
+
+// Fill in the details of a `pl_frame` from a Dav1dPicture. This function will
+// explicitly clear `out_frame`, setting all extra fields to 0. After this
+// function returns, the only missing data is information related to the plane
+// texture itself (`planes[N].texture`).
+//
+// Note: This will include all possible metadata, including HDR metadata and
+// AV1 film grain data. Users should explicitly clear this out if undesired.
+PL_DAV1D_API void pl_frame_from_dav1dpicture(struct pl_frame *out_frame,
+                                             const Dav1dPicture *picture);
+
+// Helper function to generate a `pl_color_space` struct from a Dav1dPicture.
+// Useful to update the swapchain colorspace mode dynamically (e.g. for HDR).
+PL_DAV1D_API void pl_swapchain_colors_from_dav1dpicture(struct pl_color_space *out_colors,
+                                                        const Dav1dPicture *picture);
+
+struct pl_dav1d_upload_params {
+    // The picture to upload. Not modified unless `asynchronous` is true.
+    Dav1dPicture *picture;
+
+    // If true, film grain present in `picture` will be exported to the
+    // `pl_frame` as well. This should be set to false unless the user has
+    // disabled `Dav1dSettings.apply_grain`.
+    bool film_grain;
+
+    // If true, libplacebo will probe for the allocation metadata set by
+    // `pl_allocate_dav1dpicture`, and directly import the attached buffers
+    // (saving a memcpy in some cases). Has no effect if the Dav1dPicture was
+    // not allocated using `pl_allocate_dav1dpicture`.
+    //
+    // Note: When this is the case, `asynchronous` has no further effect -
+    // uploads from attached buffers are already asynchronous.
+    bool gpu_allocated;
+
+    // If true, `picture` will be asynchronously uploaded and unref'd
+    // internally by libplacebo, and the struct passed by the user cleared to
+    // {0}. This is needed to avoid `memcpy` in some cases, so setting it to
+    // true is highly recommended wherever possible.
+    //
+    // Note: If `pl_upload_dav1dpicture` returns false, `picture` does not get
+    // unref'd.
+    bool asynchronous;
+};
+
+#define pl_dav1d_upload_params(...) (&(struct pl_dav1d_upload_params) { __VA_ARGS__ })
+
+// Very high level helper function to take a `Dav1dPicture` and upload it to
+// the GPU. Similar in spirit to `pl_upload_plane`, and the same notes apply.
+// `tex` must be an array of 3 pointers of type `pl_tex`, each
+// either pointing to a valid texture, or NULL. Returns whether successful.
+PL_DAV1D_API bool pl_upload_dav1dpicture(pl_gpu gpu,
+                                         struct pl_frame *out_frame, pl_tex tex[3],
+                                         const struct pl_dav1d_upload_params *params);
+
+// Allocate a Dav1dPicture from persistently mapped buffers. This can be more
+// efficient than regular Dav1dPictures, especially when using the synchronous
+// `pl_upload_dav1dpicture`, or on platforms that don't support importing
+// PL_HANDLE_HOST_PTR as buffers. Returns 0 or a negative DAV1D_ERR value.
+//
+// Note: These may only be used directly as a Dav1dPicAllocator if the `gpu`
+// passed as the value of `cookie` is `pl_gpu.limits.thread_safe`. Otherwise,
+// the user must manually synchronize this to ensure it runs on the correct
+// thread.
+PL_DAV1D_API int pl_allocate_dav1dpicture(Dav1dPicture *picture, void *gpu);
+PL_DAV1D_API void pl_release_dav1dpicture(Dav1dPicture *picture, void *gpu);
+
+// Mapping functions for the various Dav1dColor* enums. Note that these are not
+// quite 1:1, and even for values that exist in both, the semantics sometimes
+// differ. Some special cases (e.g. ICtCp, or XYZ) are handled differently in
+// libplacebo and libdav1d, respectively.
+PL_DAV1D_API enum pl_color_system pl_system_from_dav1d(enum Dav1dMatrixCoefficients mc);
+PL_DAV1D_API enum Dav1dMatrixCoefficients pl_system_to_dav1d(enum pl_color_system sys);
+PL_DAV1D_API enum pl_color_levels pl_levels_from_dav1d(int color_range);
+PL_DAV1D_API int pl_levels_to_dav1d(enum pl_color_levels levels);
+PL_DAV1D_API enum pl_color_primaries pl_primaries_from_dav1d(enum Dav1dColorPrimaries prim);
+PL_DAV1D_API enum Dav1dColorPrimaries pl_primaries_to_dav1d(enum pl_color_primaries prim);
+PL_DAV1D_API enum pl_color_transfer pl_transfer_from_dav1d(enum Dav1dTransferCharacteristics trc);
+PL_DAV1D_API enum Dav1dTransferCharacteristics pl_transfer_to_dav1d(enum pl_color_transfer trc);
+PL_DAV1D_API enum pl_chroma_location pl_chroma_from_dav1d(enum Dav1dChromaSamplePosition loc);
+PL_DAV1D_API enum Dav1dChromaSamplePosition pl_chroma_to_dav1d(enum pl_chroma_location loc);
+
+
+// Actual implementation, included as part of this header to avoid having
+// a compile-time dependency on libdav1d.
+#if PL_DAV1D_IMPLEMENTATION
+# include <libplacebo/utils/dav1d_internal.h>
+#endif
+
+PL_API_END
+
+#endif // LIBPLACEBO_DAV1D_H_
diff --git a/src/include/libplacebo/utils/dav1d_internal.h b/src/include/libplacebo/utils/dav1d_internal.h
new file mode 100644
index 0000000..2e0512a
--- /dev/null
+++ b/src/include/libplacebo/utils/dav1d_internal.h
@@ -0,0 +1,613 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_DAV1D_H_
+#error This header should be included as part of <libplacebo/utils/dav1d.h>
+#elif defined(__cplusplus)
+#error This header cannot be included from C++ define PL_DAV1D_IMPLEMENTATION appropriately
+#else
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+PL_DAV1D_API enum pl_color_system pl_system_from_dav1d(enum Dav1dMatrixCoefficients mc)
+{
+    switch (mc) {
+        case DAV1D_MC_IDENTITY:     return PL_COLOR_SYSTEM_RGB; // or XYZ (unlikely)
+        case DAV1D_MC_BT709:        return PL_COLOR_SYSTEM_BT_709;
+        case DAV1D_MC_UNKNOWN:      return PL_COLOR_SYSTEM_UNKNOWN;
+        case DAV1D_MC_FCC:          return PL_COLOR_SYSTEM_UNKNOWN; // missing
+        case DAV1D_MC_BT470BG:      return PL_COLOR_SYSTEM_BT_601;
+        case DAV1D_MC_BT601:        return PL_COLOR_SYSTEM_BT_601;
+        case DAV1D_MC_SMPTE240:     return PL_COLOR_SYSTEM_SMPTE_240M;
+        case DAV1D_MC_SMPTE_YCGCO:  return PL_COLOR_SYSTEM_YCGCO;
+        case DAV1D_MC_BT2020_NCL:   return PL_COLOR_SYSTEM_BT_2020_NC;
+        case DAV1D_MC_BT2020_CL:    return PL_COLOR_SYSTEM_BT_2020_C;
+        case DAV1D_MC_SMPTE2085:    return PL_COLOR_SYSTEM_UNKNOWN; // missing
+        case DAV1D_MC_CHROMAT_NCL:  return PL_COLOR_SYSTEM_UNKNOWN; // missing
+        case DAV1D_MC_CHROMAT_CL:   return PL_COLOR_SYSTEM_UNKNOWN; // missing
+        // Note: this colorspace is confused between PQ and HLG, which dav1d
+        // requires inferring from other sources, but libplacebo makes
+        // explicit. Default to PQ as it's the more common scenario.
+        case DAV1D_MC_ICTCP:        return PL_COLOR_SYSTEM_BT_2100_PQ;
+        case DAV1D_MC_RESERVED: abort();
+    }
+
+    return PL_COLOR_SYSTEM_UNKNOWN;
+}
+
+PL_DAV1D_API enum Dav1dMatrixCoefficients pl_system_to_dav1d(enum pl_color_system sys)
+{
+    switch (sys) {
+    case PL_COLOR_SYSTEM_UNKNOWN:       return DAV1D_MC_UNKNOWN;
+    case PL_COLOR_SYSTEM_BT_601:        return DAV1D_MC_BT601;
+    case PL_COLOR_SYSTEM_BT_709:        return DAV1D_MC_BT709;
+    case PL_COLOR_SYSTEM_SMPTE_240M:    return DAV1D_MC_SMPTE240;
+    case PL_COLOR_SYSTEM_BT_2020_NC:    return DAV1D_MC_BT2020_NCL;
+    case PL_COLOR_SYSTEM_BT_2020_C:     return DAV1D_MC_BT2020_CL;
+    case PL_COLOR_SYSTEM_BT_2100_PQ:    return DAV1D_MC_ICTCP;
+    case PL_COLOR_SYSTEM_BT_2100_HLG:   return DAV1D_MC_ICTCP;
+    case PL_COLOR_SYSTEM_DOLBYVISION:   return DAV1D_MC_UNKNOWN; // missing
+    case PL_COLOR_SYSTEM_YCGCO:         return DAV1D_MC_SMPTE_YCGCO;
+    case PL_COLOR_SYSTEM_RGB:           return DAV1D_MC_IDENTITY;
+    case PL_COLOR_SYSTEM_XYZ:           return DAV1D_MC_IDENTITY;
+    case PL_COLOR_SYSTEM_COUNT: abort();
+    }
+
+    return DAV1D_MC_UNKNOWN;
+}
+
+PL_DAV1D_API enum pl_color_levels pl_levels_from_dav1d(int color_range)
+{
+    return color_range ? PL_COLOR_LEVELS_FULL : PL_COLOR_LEVELS_LIMITED;
+}
+
+PL_DAV1D_API int pl_levels_to_dav1d(enum pl_color_levels levels)
+{
+    return levels == PL_COLOR_LEVELS_FULL;
+}
+
+PL_DAV1D_API enum pl_color_primaries pl_primaries_from_dav1d(enum Dav1dColorPrimaries prim)
+{
+    switch (prim) {
+    case DAV1D_COLOR_PRI_BT709:         return PL_COLOR_PRIM_BT_709;
+    case DAV1D_COLOR_PRI_UNKNOWN:       return PL_COLOR_PRIM_UNKNOWN;
+    case DAV1D_COLOR_PRI_RESERVED:      return PL_COLOR_PRIM_UNKNOWN;
+    case DAV1D_COLOR_PRI_BT470M:        return PL_COLOR_PRIM_BT_470M;
+    case DAV1D_COLOR_PRI_BT470BG:       return PL_COLOR_PRIM_BT_601_625;
+    case DAV1D_COLOR_PRI_BT601:         return PL_COLOR_PRIM_BT_601_525;
+    case DAV1D_COLOR_PRI_SMPTE240:      return PL_COLOR_PRIM_BT_601_525;
+    case DAV1D_COLOR_PRI_FILM:          return PL_COLOR_PRIM_FILM_C;
+    case DAV1D_COLOR_PRI_BT2020:        return PL_COLOR_PRIM_BT_2020;
+    case DAV1D_COLOR_PRI_XYZ:           return PL_COLOR_PRIM_UNKNOWN;
+    case DAV1D_COLOR_PRI_SMPTE431:      return PL_COLOR_PRIM_DCI_P3;
+    case DAV1D_COLOR_PRI_SMPTE432:      return PL_COLOR_PRIM_DISPLAY_P3;
+    case DAV1D_COLOR_PRI_EBU3213:       return PL_COLOR_PRIM_EBU_3213;
+    }
+
+    return PL_COLOR_PRIM_UNKNOWN;
+}
+
+PL_DAV1D_API enum Dav1dColorPrimaries pl_primaries_to_dav1d(enum pl_color_primaries prim)
+{
+    switch (prim) {
+    case PL_COLOR_PRIM_UNKNOWN:     return DAV1D_COLOR_PRI_UNKNOWN;
+    case PL_COLOR_PRIM_BT_601_525:  return DAV1D_COLOR_PRI_BT601;
+    case PL_COLOR_PRIM_BT_601_625:  return DAV1D_COLOR_PRI_BT470BG;
+    case PL_COLOR_PRIM_BT_709:      return DAV1D_COLOR_PRI_BT709;
+    case PL_COLOR_PRIM_BT_470M:     return DAV1D_COLOR_PRI_BT470M;
+    case PL_COLOR_PRIM_EBU_3213:    return DAV1D_COLOR_PRI_EBU3213;
+    case PL_COLOR_PRIM_BT_2020:     return DAV1D_COLOR_PRI_BT2020;
+    case PL_COLOR_PRIM_APPLE:       return DAV1D_COLOR_PRI_UNKNOWN; // missing
+    case PL_COLOR_PRIM_ADOBE:       return DAV1D_COLOR_PRI_UNKNOWN; // missing
+    case PL_COLOR_PRIM_PRO_PHOTO:   return DAV1D_COLOR_PRI_UNKNOWN; // missing
+    case PL_COLOR_PRIM_CIE_1931:    return DAV1D_COLOR_PRI_UNKNOWN; // missing
+    case PL_COLOR_PRIM_DCI_P3:      return DAV1D_COLOR_PRI_SMPTE431;
+    case PL_COLOR_PRIM_DISPLAY_P3:  return DAV1D_COLOR_PRI_SMPTE432;
+    case PL_COLOR_PRIM_V_GAMUT:     return DAV1D_COLOR_PRI_UNKNOWN; // missing
+    case PL_COLOR_PRIM_S_GAMUT:     return DAV1D_COLOR_PRI_UNKNOWN; // missing
+    case PL_COLOR_PRIM_FILM_C:      return DAV1D_COLOR_PRI_FILM;
+    case PL_COLOR_PRIM_ACES_AP0:    return DAV1D_COLOR_PRI_UNKNOWN; // missing
+    case PL_COLOR_PRIM_ACES_AP1:    return DAV1D_COLOR_PRI_UNKNOWN; // missing
+    case PL_COLOR_PRIM_COUNT: abort();
+    }
+
+    return DAV1D_COLOR_PRI_UNKNOWN;
+}
+
+PL_DAV1D_API enum pl_color_transfer pl_transfer_from_dav1d(enum Dav1dTransferCharacteristics trc)
+{
+    switch (trc) {
+    case DAV1D_TRC_BT709:           return PL_COLOR_TRC_BT_1886; // EOTF != OETF
+    case DAV1D_TRC_UNKNOWN:         return PL_COLOR_TRC_UNKNOWN;
+    case DAV1D_TRC_BT470M:          return PL_COLOR_TRC_GAMMA22;
+    case DAV1D_TRC_BT470BG:         return PL_COLOR_TRC_GAMMA28;
+    case DAV1D_TRC_BT601:           return PL_COLOR_TRC_BT_1886; // EOTF != OETF
+    case DAV1D_TRC_SMPTE240:        return PL_COLOR_TRC_BT_1886; // EOTF != OETF
+    case DAV1D_TRC_LINEAR:          return PL_COLOR_TRC_LINEAR;
+    case DAV1D_TRC_LOG100:          return PL_COLOR_TRC_UNKNOWN; // missing
+    case DAV1D_TRC_LOG100_SQRT10:   return PL_COLOR_TRC_UNKNOWN; // missing
+    case DAV1D_TRC_IEC61966:        return PL_COLOR_TRC_BT_1886; // EOTF != OETF
+    case DAV1D_TRC_BT1361:          return PL_COLOR_TRC_BT_1886; // ETOF != OETF
+    case DAV1D_TRC_SRGB:            return PL_COLOR_TRC_SRGB;
+    case DAV1D_TRC_BT2020_10BIT:    return PL_COLOR_TRC_BT_1886; // EOTF != OETF
+    case DAV1D_TRC_BT2020_12BIT:    return PL_COLOR_TRC_BT_1886; // EOTF != OETF
+    case DAV1D_TRC_SMPTE2084:       return PL_COLOR_TRC_PQ;
+    case DAV1D_TRC_SMPTE428:        return PL_COLOR_TRC_ST428;
+    case DAV1D_TRC_HLG:             return PL_COLOR_TRC_HLG;
+    case DAV1D_TRC_RESERVED: abort();
+    }
+
+    return PL_COLOR_TRC_UNKNOWN;
+}
+
+PL_DAV1D_API enum Dav1dTransferCharacteristics pl_transfer_to_dav1d(enum pl_color_transfer trc)
+{
+    switch (trc) {
+    case PL_COLOR_TRC_UNKNOWN:      return DAV1D_TRC_UNKNOWN;
+    case PL_COLOR_TRC_BT_1886:      return DAV1D_TRC_BT709;       // EOTF != OETF
+    case PL_COLOR_TRC_SRGB:         return DAV1D_TRC_SRGB;
+    case PL_COLOR_TRC_LINEAR:       return DAV1D_TRC_LINEAR;
+    case PL_COLOR_TRC_GAMMA18:      return DAV1D_TRC_UNKNOWN; // missing
+    case PL_COLOR_TRC_GAMMA20:      return DAV1D_TRC_UNKNOWN; // missing
+    case PL_COLOR_TRC_GAMMA22:      return DAV1D_TRC_BT470M;
+    case PL_COLOR_TRC_GAMMA24:      return DAV1D_TRC_UNKNOWN; // missing
+    case PL_COLOR_TRC_GAMMA26:      return DAV1D_TRC_UNKNOWN; // missing
+    case PL_COLOR_TRC_GAMMA28:      return DAV1D_TRC_BT470BG;
+    case PL_COLOR_TRC_ST428:        return DAV1D_TRC_SMPTE428;
+    case PL_COLOR_TRC_PRO_PHOTO:    return DAV1D_TRC_UNKNOWN; // missing
+    case PL_COLOR_TRC_PQ:           return DAV1D_TRC_SMPTE2084;
+    case PL_COLOR_TRC_HLG:          return DAV1D_TRC_HLG;
+    case PL_COLOR_TRC_V_LOG:        return DAV1D_TRC_UNKNOWN; // missing
+    case PL_COLOR_TRC_S_LOG1:       return DAV1D_TRC_UNKNOWN; // missing
+    case PL_COLOR_TRC_S_LOG2:       return DAV1D_TRC_UNKNOWN; // missing
+    case PL_COLOR_TRC_COUNT: abort();
+    }
+
+    return DAV1D_TRC_UNKNOWN;
+}
+
+PL_DAV1D_API enum pl_chroma_location pl_chroma_from_dav1d(enum Dav1dChromaSamplePosition loc)
+{
+    switch (loc) {
+    case DAV1D_CHR_UNKNOWN:     return PL_CHROMA_UNKNOWN;
+    case DAV1D_CHR_VERTICAL:    return PL_CHROMA_LEFT;
+    case DAV1D_CHR_COLOCATED:   return PL_CHROMA_TOP_LEFT;
+    }
+
+    return PL_CHROMA_UNKNOWN;
+}
+
+PL_DAV1D_API enum Dav1dChromaSamplePosition pl_chroma_to_dav1d(enum pl_chroma_location loc)
+{
+    switch (loc) {
+    case PL_CHROMA_UNKNOWN:         return DAV1D_CHR_UNKNOWN;
+    case PL_CHROMA_LEFT:            return DAV1D_CHR_VERTICAL;
+    case PL_CHROMA_CENTER:          return DAV1D_CHR_UNKNOWN; // missing
+    case PL_CHROMA_TOP_LEFT:        return DAV1D_CHR_COLOCATED;
+    case PL_CHROMA_TOP_CENTER:      return DAV1D_CHR_UNKNOWN; // missing
+    case PL_CHROMA_BOTTOM_LEFT:     return DAV1D_CHR_UNKNOWN; // missing
+    case PL_CHROMA_BOTTOM_CENTER:   return DAV1D_CHR_UNKNOWN; // missing
+    case PL_CHROMA_COUNT: abort();
+    }
+
+    return DAV1D_CHR_UNKNOWN;
+}
+
+static inline float pl_fixed24_8(uint32_t n)
+{
+    return (float) n / (1 << 8);
+}
+
+static inline float pl_fixed18_14(uint32_t n)
+{
+    return (float) n / (1 << 14);
+}
+
+static inline float pl_fixed0_16(uint16_t n)
+{
+    return (float) n / (1 << 16);
+}
+
+// Align to a power of 2
+#define PL_ALIGN2(x, align) (((x) + (align) - 1) & ~((align) - 1))
+
+PL_DAV1D_API void pl_frame_from_dav1dpicture(struct pl_frame *out,
+                                             const Dav1dPicture *picture)
+{
+    const Dav1dSequenceHeader *seq_hdr = picture->seq_hdr;
+    int num_planes;
+    switch (picture->p.layout) {
+    case DAV1D_PIXEL_LAYOUT_I400:
+        num_planes = 1;
+        break;
+    case DAV1D_PIXEL_LAYOUT_I420:
+    case DAV1D_PIXEL_LAYOUT_I422:
+    case DAV1D_PIXEL_LAYOUT_I444:
+        num_planes = 3;
+        break;
+    default: abort();
+    }
+
+    *out = (struct pl_frame) {
+        .num_planes = num_planes,
+        .planes = {
+            // Components are always in order, which makes things easy
+            {
+                .components = 1,
+                .component_mapping = {0},
+            }, {
+                .components = 1,
+                .component_mapping = {1},
+            }, {
+                .components = 1,
+                .component_mapping = {2},
+            },
+        },
+        .crop = {
+            0, 0, picture->p.w, picture->p.h,
+        },
+        .color = {
+            .primaries = pl_primaries_from_dav1d(seq_hdr->pri),
+            .transfer = pl_transfer_from_dav1d(seq_hdr->trc),
+        },
+        .repr = {
+            .sys = pl_system_from_dav1d(seq_hdr->mtrx),
+            .levels = pl_levels_from_dav1d(seq_hdr->color_range),
+            .bits = {
+                .sample_depth = PL_ALIGN2(picture->p.bpc, 8),
+                .color_depth = picture->p.bpc,
+            },
+        },
+    };
+
+    if (seq_hdr->mtrx == DAV1D_MC_ICTCP && seq_hdr->trc == DAV1D_TRC_HLG) {
+
+        // dav1d makes no distinction between PQ and HLG ICtCp, so we need
+        // to manually fix it in the case that we have HLG ICtCp data.
+        out->repr.sys = PL_COLOR_SYSTEM_BT_2100_HLG;
+
+    } else if (seq_hdr->mtrx == DAV1D_MC_IDENTITY &&
+               seq_hdr->pri == DAV1D_COLOR_PRI_XYZ)
+    {
+
+        // dav1d handles this as a special case, but doesn't provide an
+        // explicit flag for it either, so we have to resort to this ugly hack,
+        // even though CIE 1931 RGB *is* a valid thing in principle!
+        out->repr.sys= PL_COLOR_SYSTEM_XYZ;
+
+    } else if (!out->repr.sys) {
+
+        // PL_COLOR_SYSTEM_UNKNOWN maps to RGB, so hard-code this one
+        out->repr.sys = pl_color_system_guess_ycbcr(picture->p.w, picture->p.h);
+    }
+
+    const Dav1dContentLightLevel *cll = picture->content_light;
+    if (cll) {
+        out->color.hdr.max_cll = cll->max_content_light_level;
+        out->color.hdr.max_fall = cll->max_frame_average_light_level;
+    }
+
+    // This overrides the CLL values above, if both are present
+    const Dav1dMasteringDisplay *md = picture->mastering_display;
+    if (md) {
+        out->color.hdr.max_luma = pl_fixed24_8(md->max_luminance);
+        out->color.hdr.min_luma = pl_fixed18_14(md->min_luminance);
+        out->color.hdr.prim = (struct pl_raw_primaries) {
+            .red.x   = pl_fixed0_16(md->primaries[0][0]),
+            .red.y   = pl_fixed0_16(md->primaries[0][1]),
+            .green.x = pl_fixed0_16(md->primaries[1][0]),
+            .green.y = pl_fixed0_16(md->primaries[1][1]),
+            .blue.x  = pl_fixed0_16(md->primaries[2][0]),
+            .blue.y  = pl_fixed0_16(md->primaries[2][1]),
+            .white.x = pl_fixed0_16(md->white_point[0]),
+            .white.y = pl_fixed0_16(md->white_point[1]),
+        };
+    }
+
+    if (picture->frame_hdr->film_grain.present) {
+        const Dav1dFilmGrainData *fg = &picture->frame_hdr->film_grain.data;
+        out->film_grain = (struct pl_film_grain_data) {
+            .type = PL_FILM_GRAIN_AV1,
+            .seed = fg->seed,
+            .params.av1 = {
+                .num_points_y = fg->num_y_points,
+                .chroma_scaling_from_luma = fg->chroma_scaling_from_luma,
+                .num_points_uv = { fg->num_uv_points[0], fg->num_uv_points[1] },
+                .scaling_shift = fg->scaling_shift,
+                .ar_coeff_lag = fg->ar_coeff_lag,
+                .ar_coeff_shift = (int) fg->ar_coeff_shift,
+                .grain_scale_shift = fg->grain_scale_shift,
+                .uv_mult = { fg->uv_mult[0], fg->uv_mult[1] },
+                .uv_mult_luma = { fg->uv_luma_mult[0], fg->uv_luma_mult[1] },
+                .uv_offset = { fg->uv_offset[0], fg->uv_offset[1] },
+                .overlap = fg->overlap_flag,
+            },
+        };
+
+        struct pl_av1_grain_data *av1 = &out->film_grain.params.av1;
+        memcpy(av1->points_y, fg->y_points, sizeof(av1->points_y));
+        memcpy(av1->points_uv, fg->uv_points, sizeof(av1->points_uv));
+        memcpy(av1->ar_coeffs_y, fg->ar_coeffs_y, sizeof(av1->ar_coeffs_y));
+        memcpy(av1->ar_coeffs_uv[0], fg->ar_coeffs_uv[0], sizeof(av1->ar_coeffs_uv[0]));
+        memcpy(av1->ar_coeffs_uv[1], fg->ar_coeffs_uv[1], sizeof(av1->ar_coeffs_uv[1]));
+    }
+
+    switch (picture->p.layout) {
+    case DAV1D_PIXEL_LAYOUT_I400:
+    case DAV1D_PIXEL_LAYOUT_I444:
+        break;
+    case DAV1D_PIXEL_LAYOUT_I420:
+    case DAV1D_PIXEL_LAYOUT_I422:
+        // Only set the chroma location for definitely subsampled images
+        pl_frame_set_chroma_location(out, pl_chroma_from_dav1d(seq_hdr->chr));
+        break;
+    }
+}
+
+PL_DAV1D_API void pl_swapchain_colors_from_dav1dpicture(struct pl_swapchain_colors *out_colors,
+                                                            const Dav1dPicture *picture)
+{
+    struct pl_frame frame;
+    pl_frame_from_dav1dpicture(&frame, picture);
+
+    *out_colors = (struct pl_swapchain_colors) {
+        .primaries = frame.color.primaries,
+        .transfer = frame.color.transfer,
+    };
+
+    const Dav1dContentLightLevel *cll = picture->content_light;
+    if (cll) {
+        out_colors->hdr.max_cll = cll->max_content_light_level;
+        out_colors->hdr.max_fall = cll->max_frame_average_light_level;
+    }
+
+    const Dav1dMasteringDisplay *md = picture->mastering_display;
+    if (md) {
+        out_colors->hdr.min_luma = pl_fixed18_14(md->min_luminance);
+        out_colors->hdr.max_luma = pl_fixed24_8(md->max_luminance);
+        out_colors->hdr.prim.red.x   = pl_fixed0_16(md->primaries[0][0]);
+        out_colors->hdr.prim.red.y   = pl_fixed0_16(md->primaries[0][1]);
+        out_colors->hdr.prim.green.x = pl_fixed0_16(md->primaries[1][0]);
+        out_colors->hdr.prim.green.y = pl_fixed0_16(md->primaries[1][1]);
+        out_colors->hdr.prim.blue.x  = pl_fixed0_16(md->primaries[2][0]);
+        out_colors->hdr.prim.blue.y  = pl_fixed0_16(md->primaries[2][1]);
+        out_colors->hdr.prim.white.x = pl_fixed0_16(md->white_point[0]);
+        out_colors->hdr.prim.white.y = pl_fixed0_16(md->white_point[1]);
+    }
+}
+
+#define PL_MAGIC0 0x2c2a1269
+#define PL_MAGIC1 0xc6d02577
+
+struct pl_dav1dalloc {
+    uint32_t magic[2];
+    pl_gpu gpu;
+    pl_buf buf;
+};
+
+struct pl_dav1dref {
+    Dav1dPicture pic;
+    uint8_t count;
+};
+
+static void pl_dav1dpicture_unref(void *priv)
+{
+    struct pl_dav1dref *ref = priv;
+    if (--ref->count == 0) {
+        dav1d_picture_unref(&ref->pic);
+        free(ref);
+    }
+}
+
+PL_DAV1D_API bool pl_upload_dav1dpicture(pl_gpu gpu,
+                                             struct pl_frame *out,
+                                             pl_tex tex[3],
+                                             const struct pl_dav1d_upload_params *params)
+{
+    Dav1dPicture *pic = params->picture;
+    pl_frame_from_dav1dpicture(out, pic);
+    if (!params->film_grain)
+        out->film_grain.type = PL_FILM_GRAIN_NONE;
+
+    const int bytes = (pic->p.bpc + 7) / 8; // rounded up
+    int sub_x = 0, sub_y = 0;
+    switch (pic->p.layout) {
+    case DAV1D_PIXEL_LAYOUT_I400:
+    case DAV1D_PIXEL_LAYOUT_I444:
+        break;
+    case DAV1D_PIXEL_LAYOUT_I420:
+        sub_x = sub_y = 1;
+        break;
+    case DAV1D_PIXEL_LAYOUT_I422:
+        sub_x = 1;
+        break;
+    }
+
+    struct pl_plane_data data[3] = {
+        {
+            // Y plane
+            .type           = PL_FMT_UNORM,
+            .width          = pic->p.w,
+            .height         = pic->p.h,
+            .pixel_stride   = bytes,
+            .component_size = {bytes * 8},
+            .component_map  = {0},
+        }, {
+            // U plane
+            .type           = PL_FMT_UNORM,
+            .width          = pic->p.w >> sub_x,
+            .height         = pic->p.h >> sub_y,
+            .pixel_stride   = bytes,
+            .component_size = {bytes * 8},
+            .component_map  = {1},
+        }, {
+            // V plane
+            .type           = PL_FMT_UNORM,
+            .width          = pic->p.w >> sub_x,
+            .height         = pic->p.h >> sub_y,
+            .pixel_stride   = bytes,
+            .component_size = {bytes * 8},
+            .component_map  = {2},
+        },
+    };
+
+    pl_buf buf = NULL;
+    struct pl_dav1dalloc *alloc = params->gpu_allocated ? pic->allocator_data : NULL;
+    struct pl_dav1dref *ref = NULL;
+
+    if (alloc && alloc->magic[0] == PL_MAGIC0 && alloc->magic[1] == PL_MAGIC1) {
+        // Re-use pre-allocated buffers directly
+        assert(alloc->gpu == gpu);
+        buf = alloc->buf;
+    } else if (params->asynchronous && gpu->limits.callbacks) {
+        ref = malloc(sizeof(*ref));
+        if (!ref)
+            return false;
+        memcpy(&ref->pic, pic, sizeof(Dav1dPicture));
+        ref->count = out->num_planes;
+    }
+
+    for (int p = 0; p < out->num_planes; p++) {
+        ptrdiff_t stride = p > 0 ? pic->stride[1] : pic->stride[0];
+        if (stride < 0) {
+            data[p].pixels = (uint8_t *) pic->data[p] + stride * (data[p].height - 1);
+            data[p].row_stride = -stride;
+            out->planes[p].flipped = true;
+        } else {
+            data[p].pixels = pic->data[p];
+            data[p].row_stride = stride;
+        }
+
+        if (buf) {
+            data[p].buf = buf;
+            data[p].buf_offset = (uintptr_t) data[p].pixels - (uintptr_t) buf->data;
+            data[p].pixels = NULL;
+        } else if (ref) {
+            data[p].priv = ref;
+            data[p].callback = pl_dav1dpicture_unref;
+        }
+
+        if (!pl_upload_plane(gpu, &out->planes[p], &tex[p], &data[p])) {
+            free(ref);
+            return false;
+        }
+    }
+
+    if (params->asynchronous) {
+        if (ref) {
+            *pic = (Dav1dPicture) {0};
+        } else {
+            dav1d_picture_unref(pic);
+        }
+    }
+
+    return true;
+}
+
+PL_DAV1D_API int pl_allocate_dav1dpicture(Dav1dPicture *p, void *cookie)
+{
+    pl_gpu gpu = cookie;
+    if (!gpu->limits.max_mapped_size || !gpu->limits.host_cached ||
+        !gpu->limits.buf_transfer)
+    {
+        return DAV1D_ERR(ENOTSUP);
+    }
+
+    // Copied from dav1d_default_picture_alloc
+    const int hbd = p->p.bpc > 8;
+    const int aligned_w = PL_ALIGN2(p->p.w, 128);
+    const int aligned_h = PL_ALIGN2(p->p.h, 128);
+    const int has_chroma = p->p.layout != DAV1D_PIXEL_LAYOUT_I400;
+    const int ss_ver = p->p.layout == DAV1D_PIXEL_LAYOUT_I420;
+    const int ss_hor = p->p.layout != DAV1D_PIXEL_LAYOUT_I444;
+    p->stride[0] = aligned_w << hbd;
+    p->stride[1] = has_chroma ? (aligned_w >> ss_hor) << hbd : 0;
+
+    // Align strides up to multiples of the GPU performance hints
+    p->stride[0] = PL_ALIGN2(p->stride[0], gpu->limits.align_tex_xfer_pitch);
+    p->stride[1] = PL_ALIGN2(p->stride[1], gpu->limits.align_tex_xfer_pitch);
+
+    // Aligning offsets to 4 also implicitly aligns to the texel alignment (1 or 2)
+    size_t off_align = PL_ALIGN2(gpu->limits.align_tex_xfer_offset, 4);
+    const size_t y_sz = PL_ALIGN2(p->stride[0] * aligned_h, off_align);
+    const size_t uv_sz = PL_ALIGN2(p->stride[1] * (aligned_h >> ss_ver), off_align);
+
+    // The extra DAV1D_PICTURE_ALIGNMENTs are to brute force plane alignment,
+    // even in the case that the driver gives us insane alignments
+    const size_t pic_size = y_sz + 2 * uv_sz;
+    const size_t total_size = pic_size + DAV1D_PICTURE_ALIGNMENT * 4;
+
+    // Validate size limitations
+    if (total_size > gpu->limits.max_mapped_size)
+        return DAV1D_ERR(ENOMEM);
+
+    pl_buf buf = pl_buf_create(gpu, pl_buf_params(
+        .size = total_size,
+        .host_mapped = true,
+        .memory_type = PL_BUF_MEM_HOST,
+    ));
+
+    if (!buf)
+        return DAV1D_ERR(ENOMEM);
+
+    struct pl_dav1dalloc *alloc = malloc(sizeof(struct pl_dav1dalloc));
+    if (!alloc) {
+        pl_buf_destroy(gpu, &buf);
+        return DAV1D_ERR(ENOMEM);
+    }
+
+    *alloc = (struct pl_dav1dalloc) {
+        .magic = { PL_MAGIC0, PL_MAGIC1 },
+        .gpu = gpu,
+        .buf = buf,
+    };
+
+    assert(buf->data);
+    uintptr_t base = (uintptr_t) buf->data, data[3];
+    data[0] = PL_ALIGN2(base, DAV1D_PICTURE_ALIGNMENT);
+    data[1] = PL_ALIGN2(data[0] + y_sz, DAV1D_PICTURE_ALIGNMENT);
+    data[2] = PL_ALIGN2(data[1] + uv_sz, DAV1D_PICTURE_ALIGNMENT);
+
+    p->allocator_data = alloc;
+    p->data[0] = (void *) data[0];
+    p->data[1] = (void *) data[1];
+    p->data[2] = (void *) data[2];
+    return 0;
+}
+
+PL_DAV1D_API void pl_release_dav1dpicture(Dav1dPicture *p, void *cookie)
+{
+    struct pl_dav1dalloc *alloc = p->allocator_data;
+    if (!alloc)
+        return;
+
+    assert(alloc->magic[0] == PL_MAGIC0);
+    assert(alloc->magic[1] == PL_MAGIC1);
+    assert(alloc->gpu == cookie);
+    pl_buf_destroy(alloc->gpu, &alloc->buf);
+    free(alloc);
+
+    p->data[0] = p->data[1] = p->data[2] = p->allocator_data = NULL;
+}
+
+#undef PL_ALIGN2
+#undef PL_MAGIC0
+#undef PL_MAGIC1
+
+#endif // LIBPLACEBO_DAV1D_H_
diff --git a/src/include/libplacebo/utils/dolbyvision.h b/src/include/libplacebo/utils/dolbyvision.h
new file mode 100644
index 0000000..6d4d72e
--- /dev/null
+++ b/src/include/libplacebo/utils/dolbyvision.h
@@ -0,0 +1,34 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_DOLBYVISION_H_
+#define LIBPLACEBO_DOLBYVISION_H_
+
+#include <libplacebo/colorspace.h>
+
+PL_API_BEGIN
+
+// Parses the Dolby Vision RPU, and sets the `pl_hdr_metadata` dynamic
+// brightness metadata fields accordingly.
+//
+// Note: requires `PL_HAVE_LIBDOVI` to be defined, no-op otherwise.
+PL_API void pl_hdr_metadata_from_dovi_rpu(struct pl_hdr_metadata *out,
+                                          const uint8_t *buf, size_t size);
+
+PL_API_END
+
+#endif // LIBPLACEBO_DOLBYVISION_H_
diff --git a/src/include/libplacebo/utils/frame_queue.h b/src/include/libplacebo/utils/frame_queue.h
new file mode 100644
index 0000000..2a9c90c
--- /dev/null
+++ b/src/include/libplacebo/utils/frame_queue.h
@@ -0,0 +1,230 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_FRAME_QUEUE_H
+#define LIBPLACEBO_FRAME_QUEUE_H
+
+#include <libplacebo/renderer.h>
+#include <libplacebo/shaders/deinterlacing.h>
+
+PL_API_BEGIN
+
+// An abstraction layer for automatically turning a conceptual stream of
+// (frame, pts) pairs, as emitted by a decoder or filter graph, into a
+// `pl_frame_mix` suitable for `pl_render_image_mix`.
+//
+// This API ensures that minimal work is performed (e.g. only mapping frames
+// that are actually required), while also satisfying the requirements
+// of any configured frame mixer.
+//
+// Thread-safety: Safe
+typedef struct pl_queue_t *pl_queue;
+
+enum pl_queue_status {
+    PL_QUEUE_OK,       // success
+    PL_QUEUE_EOF,      // no more frames are available
+    PL_QUEUE_MORE,     // more frames needed, but not (yet) available
+    PL_QUEUE_ERR = -1, // some unknown error occurred while retrieving frames
+};
+
+struct pl_source_frame {
+    // The frame's presentation timestamp, in seconds relative to the first
+    // frame. These must be monotonically increasing for subsequent frames.
+    // To implement a discontinuous jump, users must explicitly reset the
+    // frame queue with `pl_queue_reset` and restart from PTS 0.0.
+    double pts;
+
+    // The frame's duration. This is not needed in normal scenarios, as the
+    // FPS can be inferred from the `pts` values themselves. Providing it
+    // only helps initialize the value for initial frames, which can smooth
+    // out the interpolation weights. Its use is also highly recommended
+    // when displaying interlaced frames. (Optional)
+    float duration;
+
+    // If set to something other than PL_FIELD_NONE, this source frame is
+    // marked as interlaced. It will be split up into two separate frames
+    // internally, and exported to the resulting `pl_frame_mix` as a pair of
+    // fields, referencing the corresponding previous and next frames. The
+    // first field will have the same PTS as `pts`, and the second field will
+    // be inserted at the timestamp `pts + duration/2`.
+    //
+    // Note: As a result of FPS estimates being unreliable around streams with
+    // mixed FPS (or when mixing interlaced and progressive frames), it's
+    // highly recommended to always specify a valid `duration` for interlaced
+    // frames.
+    enum pl_field first_field;
+
+    // Abstract frame data itself. To allow mapping frames only when they're
+    // actually needed, frames use a lazy representation. The provided
+    // callbacks will be invoked to interface with it.
+    void *frame_data;
+
+    // This will be called to map the frame to the GPU, only if needed.
+    //
+    // `tex` is a pointer to an array of 4 texture objects (or NULL), which
+    // *may* serve as backing storage for the texture being mapped. These are
+    // intended to be recreated by `map`, e.g. using `pl_tex_recreate` or
+    // `pl_upload_plane` as appropriate. They will be managed internally by
+    // `pl_queue` and destroyed at some unspecified future point in time.
+    //
+    // Note: If `map` fails, it will not be retried, nor will `discard` be run.
+    // The user should clean up state in this case.
+    bool (*map)(pl_gpu gpu, pl_tex *tex, const struct pl_source_frame *src,
+                struct pl_frame *out_frame);
+
+    // If present, this will be called on frames that are done being used by
+    // `pl_queue`. This may be useful to e.g. unmap textures backed by external
+    // APIs such as hardware decoders. (Optional)
+    void (*unmap)(pl_gpu gpu, struct pl_frame *frame, const struct pl_source_frame *src);
+
+    // This function will be called for frames that are deemed unnecessary
+    // (e.g. never became visible) and should instead be cleanly freed.
+    // (Optional)
+    void (*discard)(const struct pl_source_frame *src);
+};
+
+// Create a new, empty frame queue.
+//
+// It's highly recommended to fully render a single frame with `pts == 0.0`,
+// and flush the GPU pipeline with `pl_gpu_finish`, prior to starting the timed
+// playback loop.
+PL_API pl_queue pl_queue_create(pl_gpu gpu);
+PL_API void pl_queue_destroy(pl_queue *queue);
+
+// Explicitly clear the queue. This is essentially equivalent to destroying
+// and recreating the queue, but preserves any internal memory allocations.
+//
+// Note: Calling `pl_queue_reset` may block, if another thread is currently
+// blocked on a different `pl_queue_*` call.
+PL_API void pl_queue_reset(pl_queue queue);
+
+// Explicitly push a frame. This is an alternative way to feed the frame queue
+// with incoming frames, the other method being the asynchronous callback
+// specified as `pl_queue_params.get_frame`. Both methods may be used
+// simultaneously, although providing `get_frame` is recommended since it
+// avoids the risk of the queue underrunning.
+//
+// When no more frames are available, call this function with `frame == NULL`
+// to indicate EOF and begin draining the frame queue.
+PL_API void pl_queue_push(pl_queue queue, const struct pl_source_frame *frame);
+
+// Variant of `pl_queue_push` that blocks while the queue is judged
+// (internally) to be "too full". This is useful for asynchronous decoder loops
+// in order to prevent the queue from exhausting available RAM if frames are
+// decoded significantly faster than they're displayed.
+//
+// The given `timeout` parameter specifies how long to wait before giving up,
+// in nanoseconds. Returns false if this timeout was reached.
+PL_API bool pl_queue_push_block(pl_queue queue, uint64_t timeout,
+                                const struct pl_source_frame *frame);
+
+struct pl_queue_params {
+    // The PTS of the frame that will be rendered. This should be set to the
+    // timestamp (in seconds) of the next vsync, relative to the initial frame.
+    //
+    // These must be monotonically increasing. To implement a discontinuous
+    // jump, users must explicitly reset the frame queue with `pl_queue_reset`
+    // and restart from PTS 0.0.
+    double pts;
+
+    // The radius of the configured mixer. This should be set to the value
+    // as returned by `pl_frame_mix_radius`.
+    float radius;
+
+    // The estimated duration of a vsync, in seconds. This will only be used as
+    // a hint, the true value will be estimated by comparing `pts` timestamps
+    // between calls to `pl_queue_update`. (Optional)
+    float vsync_duration;
+
+    // If the difference between the (estimated) vsync duration and the
+    // (measured) frame duration is smaller than this threshold, silently
+    // disable interpolation and switch to ZOH semantics instead.
+    //
+    // For example, a value of 0.01 allows the FPS to differ by up to 1%
+    // without being interpolated. Note that this will result in a continuous
+    // phase drift unless also compensated for by the user, which will
+    // eventually resulted in a dropped or duplicated frame. (Though this can
+    // be preferable to seeing that same phase drift result in a temporally
+    // smeared image)
+    float interpolation_threshold;
+
+    // Specifies how long `pl_queue_update` will wait for frames to become
+    // available, in nanoseconds, before giving up and returning with
+    // QUEUE_MORE.
+    //
+    // If `get_frame` is provided, this value is ignored by `pl_queue` and
+    // should instead be interpreted by the provided callback.
+    uint64_t timeout;
+
+    // This callback will be used to pull new frames from the decoder. It may
+    // block if needed. The user is responsible for setting appropriate time
+    // limits and/or returning and interpreting QUEUE_MORE as sensible.
+    //
+    // Providing this callback is entirely optional. Users can instead choose
+    // to manually feed the frame queue with new frames using `pl_queue_push`.
+    enum pl_queue_status (*get_frame)(struct pl_source_frame *out_frame,
+                                      const struct pl_queue_params *params);
+    void *priv;
+};
+
+#define pl_queue_params(...) (&(struct pl_queue_params) { __VA_ARGS__ })
+
+// Advance the frame queue's internal state to the target timestamp. Any frames
+// which are no longer needed (i.e. too far in the past) are automatically
+// unmapped and evicted. Any future frames which are needed to fill the queue
+// must either have been pushed in advance, or will be requested using the
+// provided `get_frame` callback. If you call this on `out_mix == NULL`, the
+// queue state will advance, but no frames will be mapped.
+//
+// This function may return with PL_QUEUE_MORE, in which case the user may wish
+// to ensure more frames are available and then re-run this function with the
+// same parameters. In this case, `out_mix` is still written to, but it may be
+// incomplete (or even contain no frames at all). Additionally, when the source
+// contains interlaced frames (see `pl_source_frame.first_field`), this
+// function may return with PL_QUEUE_MORE if a frame is missing references to
+// a future frame.
+//
+// The resulting mix of frames in `out_mix` will represent the neighbourhood of
+// the target timestamp, and can be passed to `pl_render_image_mix` as-is.
+//
+// Note: `out_mix` will only remain valid until the next call to
+// `pl_queue_update` or `pl_queue_reset`.
+PL_API enum pl_queue_status pl_queue_update(pl_queue queue, struct pl_frame_mix *out_mix,
+                                            const struct pl_queue_params *params);
+
+// Returns a pl_queue's internal estimates for FPS and VPS (vsyncs per second).
+// Returns 0.0 if no estimate is available.
+PL_API float pl_queue_estimate_fps(pl_queue queue);
+PL_API float pl_queue_estimate_vps(pl_queue queue);
+
+// Returns the number of frames currently contained in a pl_queue.
+PL_API int pl_queue_num_frames(pl_queue queue);
+
+// Inspect the contents of the Nth queued frame. Returns false if `idx` is
+// out of range.
+//
+// Warning: No guarantee is made to ensure validity of `out->frame_data`
+// after this call. In particular, pl_queue_* calls made from another thread
+// may call `discard()` on the frame in question. The user bears responsibility
+// to avoid accessing `out->frame_data` in a multi-threaded scenario unless
+// an external guarantee can be made that the frame won't be dequeued until
+// it is done being used by the user.
+PL_API bool pl_queue_peek(pl_queue queue, int idx, struct pl_source_frame *out);
+
+PL_API_END
+
+#endif // LIBPLACEBO_FRAME_QUEUE_H
diff --git a/src/include/libplacebo/utils/libav.h b/src/include/libplacebo/utils/libav.h
new file mode 100644
index 0000000..91f3dd8
--- /dev/null
+++ b/src/include/libplacebo/utils/libav.h
@@ -0,0 +1,284 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_LIBAV_H_
+#define LIBPLACEBO_LIBAV_H_
+
+#include <libplacebo/config.h>
+#include <libplacebo/gpu.h>
+#include <libplacebo/shaders/deinterlacing.h>
+#include <libplacebo/utils/upload.h>
+
+#if defined(__cplusplus) && !defined(PL_LIBAV_IMPLEMENTATION)
+# define PL_LIBAV_API
+# define PL_LIBAV_IMPLEMENTATION 0
+# warning Remember to include this file with a PL_LIBAV_IMPLEMENTATION set to 1 in \
+          C translation unit to provide implementation. Suppress this warning by \
+          defining PL_LIBAV_IMPLEMENTATION to 0 in C++ files.
+#elif !defined(PL_LIBAV_IMPLEMENTATION)
+# define PL_LIBAV_API static inline
+# define PL_LIBAV_IMPLEMENTATION 1
+#else
+# define PL_LIBAV_API
+#endif
+
+PL_API_BEGIN
+
+#include <libavformat/avformat.h>
+#include <libavutil/frame.h>
+#include <libavutil/version.h>
+#include <libavcodec/avcodec.h>
+
+#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 16, 100) && defined(PL_HAVE_DOVI)
+# define PL_HAVE_LAV_DOLBY_VISION
+# include <libavutil/dovi_meta.h>
+#endif
+
+#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(56, 61, 100)
+# define PL_HAVE_LAV_FILM_GRAIN
+# include <libavutil/film_grain_params.h>
+#endif
+
+#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(56, 25, 100)
+# define PL_HAVE_LAV_HDR
+# include <libavutil/hdr_dynamic_metadata.h>
+# include <libavutil/mastering_display_metadata.h>
+#endif
+
+//------------------------------------------------------------------------
+// Important note: For support for AVVkFrame, which depends on <vulkan.h>,
+// users *SHOULD* include <vulkan/vulkan.h> manually before this header.
+//------------------------------------------------------------------------
+
+
+// Fill in the details of a `pl_frame` from an AVFrame. This function will
+// explicitly clear `out_frame`, setting all extra fields to 0. After this
+// function returns, the only missing data is information related to the plane
+// texture itself (`planes[N].texture`), as well as any overlays (e.g.
+// subtitles).
+//
+// Note: If the AVFrame contains an embedded ICC profile or H.274 film grain
+// metadata, the resulting `out_image->profile` will reference this pointer,
+// meaning that in general, the `pl_frame` is only guaranteed to be valid as
+// long as the AVFrame is not freed.
+//
+// Note: This will ignore Dolby Vision metadata by default (to avoid leaking
+// memory), either switch to pl_map_avframe_ex or do it manually using
+// pl_map_dovi_metadata.
+PL_LIBAV_API void pl_frame_from_avframe(struct pl_frame *out_frame, const AVFrame *frame);
+
+// Deprecated aliases for backwards compatibility
+#define pl_image_from_avframe pl_frame_from_avframe
+#define pl_target_from_avframe pl_frame_from_avframe
+
+// Copy extra metadata from an AVStream to a pl_frame. This should be called
+// after `pl_frame_from_avframe` or `pl_map_avframe` (respectively), and sets
+// metadata associated with stream-level side data. This is needed because
+// FFmpeg rather annoyingly does not propagate stream-level metadata to frames.
+PL_LIBAV_API void pl_frame_copy_stream_props(struct pl_frame *out_frame,
+                                             const AVStream *stream);
+
+#ifdef PL_HAVE_LAV_HDR
+struct pl_av_hdr_metadata {
+    // All fields are optional and may be passed as `NULL`.
+    const AVMasteringDisplayMetadata *mdm;
+    const AVContentLightMetadata *clm;
+    const AVDynamicHDRPlus *dhp;
+};
+
+// Helper function to update a `pl_hdr_metadata` struct from HDR10/HDR10+
+// metadata in the FFmpeg format. Unspecified/invalid elements will be left
+// uninitialized in `out`.
+PL_LIBAV_API void pl_map_hdr_metadata(struct pl_hdr_metadata *out,
+                                const struct pl_av_hdr_metadata *metadata);
+#endif
+
+#ifdef PL_HAVE_LAV_DOLBY_VISION
+// Helper function to map Dolby Vision metadata from the FFmpeg format.
+PL_LIBAV_API void pl_map_dovi_metadata(struct pl_dovi_metadata *out,
+                                       const AVDOVIMetadata *metadata);
+
+// Helper function to map Dolby Vision metadata from the FFmpeg format
+// to `pl_dovi_metadata`, and adds it to the `pl_frame`.
+// The `pl_frame` colorspace fields and HDR struct are also updated with
+// values from the `AVDOVIMetadata`.
+//
+// Note: The `pl_dovi_metadata` must be allocated externally.
+// Also, currently the metadata is only used if the `AVDOVIRpuDataHeader`
+// `disable_residual_flag` field is not zero and can be checked before allocating.
+PL_LIBAV_API void pl_frame_map_avdovi_metadata(struct pl_frame *out_frame,
+                                               struct pl_dovi_metadata *dovi,
+                                               const AVDOVIMetadata *metadata);
+#endif
+
+// Helper function to test if a pixfmt would be supported by the GPU.
+// Essentially, this can be used to check if `pl_map_avframe` would work for a
+// given AVPixelFormat, without actually uploading or allocating anything.
+PL_LIBAV_API bool pl_test_pixfmt(pl_gpu gpu, enum AVPixelFormat pixfmt);
+
+// Variant of `pl_test_pixfmt` that also tests for the given capabilities
+// being present. Note that in the presence of hardware accelerated frames,
+// this cannot be tested without frame-specific information (i.e. swformat),
+// but in practice this should be a non-issue as GPU-native hwformats will
+// probably be fully supported.
+PL_LIBAV_API bool pl_test_pixfmt_caps(pl_gpu gpu, enum AVPixelFormat pixfmt,
+                                      enum pl_fmt_caps caps);
+
+// Like `pl_frame_from_avframe`, but the texture pointers are also initialized
+// to ensure they have the correct size and format to match the AVframe.
+// Similar in spirit to `pl_recreate_plane`, and the same notes apply. `tex`
+// must be an array of 4 pointers of type `pl_tex`, each either
+// pointing to a valid texture, or NULL. Returns whether successful.
+PL_LIBAV_API bool pl_frame_recreate_from_avframe(pl_gpu gpu, struct pl_frame *out_frame,
+                                                 pl_tex tex[4], const AVFrame *frame);
+
+struct pl_avframe_params {
+    // The AVFrame to map. Required.
+    const AVFrame *frame;
+
+    // Backing textures for frame data. Required for all non-hwdec formats.
+    // This must point to an array of four valid textures (or NULL entries).
+    //
+    // Note: Not cleaned up by `pl_unmap_avframe`. The intent is for users to
+    // re-use this texture array for subsequent frames, to avoid texture
+    // creation/destruction overhead.
+    pl_tex *tex;
+
+    // Also map Dolby Vision metadata (if supported). Note that this also
+    // overrides the colorimetry metadata (forces BT.2020+PQ).
+    bool map_dovi;
+};
+
+#define PL_AVFRAME_DEFAULTS \
+    .map_dovi = true,
+
+#define pl_avframe_params(...) (&(struct pl_avframe_params) { PL_AVFRAME_DEFAULTS __VA_ARGS__ })
+
+// Very high level helper function to take an `AVFrame` and map it to the GPU.
+// The resulting `pl_frame` remains valid until `pl_unmap_avframe` is called,
+// which must be called at some point to clean up state. The `AVFrame` is
+// automatically ref'd and unref'd if needed. Returns whether successful.
+//
+// Note: `out_frame->user_data` points to a privately managed opaque struct
+// and must not be touched by the user.
+PL_LIBAV_API bool pl_map_avframe_ex(pl_gpu gpu, struct pl_frame *out_frame,
+                                    const struct pl_avframe_params *params);
+PL_LIBAV_API void pl_unmap_avframe(pl_gpu gpu, struct pl_frame *frame);
+
+// Backwards compatibility with previous versions of this API.
+PL_LIBAV_API bool pl_map_avframe(pl_gpu gpu, struct pl_frame *out_frame,
+                                 pl_tex tex[4], const AVFrame *avframe);
+
+// Return the AVFrame* that a pl_frame was mapped from (via pl_map_avframe_ex)
+// Note: This reference is attached to the `pl_frame` and will get freed by
+// pl_unmap_avframe.
+PL_LIBAV_API AVFrame *pl_get_mapped_avframe(const struct pl_frame *frame);
+
+// Download the texture contents of a `pl_frame` back to a corresponding
+// AVFrame. Blocks until completion.
+//
+// Note: This function performs minimal verification, so incorrect usage will
+// likely result in broken frames. Use `pl_frame_recreate_from_avframe` to
+// ensure matching formats.
+PL_LIBAV_API bool pl_download_avframe(pl_gpu gpu,
+                                      const struct pl_frame *frame,
+                                      AVFrame *out_frame);
+
+// Helper functions to update the colorimetry data in an AVFrame based on
+// the values specified in the given color space / color repr / profile.
+//
+// Note: These functions can and will allocate AVFrame side data if needed,
+// in particular to encode HDR metadata in `space.hdr`.
+PL_LIBAV_API void pl_avframe_set_color(AVFrame *frame, struct pl_color_space space);
+PL_LIBAV_API void pl_avframe_set_repr(AVFrame *frame, struct pl_color_repr repr);
+PL_LIBAV_API void pl_avframe_set_profile(AVFrame *frame, struct pl_icc_profile profile);
+
+// Map an AVPixelFormat to an array of pl_plane_data structs. The array must
+// have at least `av_pix_fmt_count_planes(fmt)` elements, but never more than
+// 4. This function leaves `width`, `height` and `row_stride`, as well as the
+// data pointers, uninitialized.
+//
+// If `bits` is non-NULL, this function will attempt aligning the resulting
+// `pl_plane_data` struct for optimal compatibility, placing the resulting
+// `pl_bit_depth` metadata into `bits`.
+//
+// Returns the number of plane structs written to, or 0 on error.
+//
+// Note: This function is usually clumsier to use than the higher-level
+// functions above, but it might have some fringe use cases, for example if
+// the user wants to replace the data buffers by `pl_buf` references in the
+// `pl_plane_data` before uploading it to the GPU.
+PL_LIBAV_API int pl_plane_data_from_pixfmt(struct pl_plane_data data[4],
+                                           struct pl_bit_encoding *bits,
+                                           enum AVPixelFormat pix_fmt);
+
+// Callback for AVCodecContext.get_buffer2 that allocates memory from
+// persistently mapped buffers. This can be more efficient than regular
+// system memory, especially on platforms that don't support importing
+// PL_HANDLE_HOST_PTR as buffers.
+//
+// Note: `avctx->opaque` must be a pointer that *points* to the GPU instance.
+// That is, it should have type `pl_gpu *`.
+PL_LIBAV_API int pl_get_buffer2(AVCodecContext *avctx, AVFrame *pic, int flags);
+
+// Mapping functions for the various libavutil enums. Note that these are not
+// quite 1:1, and even for values that exist in both, the semantics sometimes
+// differ. Some special cases (e.g. ICtCp, or XYZ) are handled differently in
+// libplacebo and libavutil, respectively.
+//
+// Because of this, it's generally recommended to avoid these and instead use
+// helpers like `pl_frame_from_avframe`, which contain extra logic to patch
+// through all of the special cases.
+PL_LIBAV_API enum pl_color_system pl_system_from_av(enum AVColorSpace spc);
+PL_LIBAV_API enum AVColorSpace pl_system_to_av(enum pl_color_system sys);
+PL_LIBAV_API enum pl_color_levels pl_levels_from_av(enum AVColorRange range);
+PL_LIBAV_API enum AVColorRange pl_levels_to_av(enum pl_color_levels levels);
+PL_LIBAV_API enum pl_color_primaries pl_primaries_from_av(enum AVColorPrimaries prim);
+PL_LIBAV_API enum AVColorPrimaries pl_primaries_to_av(enum pl_color_primaries prim);
+PL_LIBAV_API enum pl_color_transfer pl_transfer_from_av(enum AVColorTransferCharacteristic trc);
+PL_LIBAV_API enum AVColorTransferCharacteristic pl_transfer_to_av(enum pl_color_transfer trc);
+PL_LIBAV_API enum pl_chroma_location pl_chroma_from_av(enum AVChromaLocation loc);
+PL_LIBAV_API enum AVChromaLocation pl_chroma_to_av(enum pl_chroma_location loc);
+
+// Helper function to generate a `pl_color_space` struct from an AVFrame.
+PL_LIBAV_API void pl_color_space_from_avframe(struct pl_color_space *out_csp,
+                                              const AVFrame *frame);
+
+// Helper function to pick the right `pl_field` value for an AVFrame.
+PL_LIBAV_API enum pl_field pl_field_from_avframe(const AVFrame *frame);
+
+#ifdef PL_HAVE_LAV_FILM_GRAIN
+// Fill in film grain parameters from an AVFilmGrainParams.
+//
+// Note: The resulting struct will only remain valid as long as the
+// `AVFilmGrainParams` remains valid.
+PL_LIBAV_API void pl_film_grain_from_av(struct pl_film_grain_data *out_data,
+                                        const AVFilmGrainParams *fgp);
+#endif
+
+// Deprecated alias for backwards compatibility
+#define pl_swapchain_colors_from_avframe pl_color_space_from_avframe
+
+// Actual implementation, included as part of this header to avoid having
+// a compile-time dependency on libavutil.
+#if PL_LIBAV_IMPLEMENTATION
+# include <libplacebo/utils/libav_internal.h>
+#endif
+
+PL_API_END
+
+#endif // LIBPLACEBO_LIBAV_H_
diff --git a/src/include/libplacebo/utils/libav_internal.h b/src/include/libplacebo/utils/libav_internal.h
new file mode 100644
index 0000000..4c269e5
--- /dev/null
+++ b/src/include/libplacebo/utils/libav_internal.h
@@ -0,0 +1,1482 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo. If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_LIBAV_H_
+#error This header should be included as part of <libplacebo/utils/libav.h>
+#elif defined(__cplusplus)
+#error This header cannot be included from C++ define PL_LIBAV_IMPLEMENTATION appropriately
+#else
+
+#include <assert.h>
+
+#include <libplacebo/utils/dolbyvision.h>
+
+#include <libavutil/hwcontext.h>
+#include <libavutil/hwcontext_drm.h>
+#include <libavutil/imgutils.h>
+#include <libavutil/pixdesc.h>
+#include <libavutil/display.h>
+#include <libavcodec/version.h>
+
+// Try importing <vulkan.h> dynamically if it wasn't already
+#if !defined(VK_API_VERSION_1_2) && defined(__has_include)
+# if __has_include(<vulkan/vulkan.h>)
+#  include <vulkan/vulkan.h>
+# endif
+#endif
+
+#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 8, 100) && \
+    defined(PL_HAVE_VULKAN) && defined(VK_API_VERSION_1_2)
+# define PL_HAVE_LAV_VULKAN
+# include <libavutil/hwcontext_vulkan.h>
+# include <libplacebo/vulkan.h>
+# if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(58, 11, 100)
+#  define PL_HAVE_LAV_VULKAN_V2
+# endif
+#endif
+
+PL_LIBAV_API enum pl_color_system pl_system_from_av(enum AVColorSpace spc)
+{
+    switch (spc) {
+    case AVCOL_SPC_RGB:                 return PL_COLOR_SYSTEM_RGB;
+    case AVCOL_SPC_BT709:               return PL_COLOR_SYSTEM_BT_709;
+    case AVCOL_SPC_UNSPECIFIED:         return PL_COLOR_SYSTEM_UNKNOWN;
+    case AVCOL_SPC_RESERVED:            return PL_COLOR_SYSTEM_UNKNOWN;
+    case AVCOL_SPC_FCC:                 return PL_COLOR_SYSTEM_UNKNOWN; // missing
+    case AVCOL_SPC_BT470BG:             return PL_COLOR_SYSTEM_BT_601;
+    case AVCOL_SPC_SMPTE170M:           return PL_COLOR_SYSTEM_BT_601;
+    case AVCOL_SPC_SMPTE240M:           return PL_COLOR_SYSTEM_SMPTE_240M;
+    case AVCOL_SPC_YCGCO:               return PL_COLOR_SYSTEM_YCGCO;
+    case AVCOL_SPC_BT2020_NCL:          return PL_COLOR_SYSTEM_BT_2020_NC;
+    case AVCOL_SPC_BT2020_CL:           return PL_COLOR_SYSTEM_BT_2020_C;
+    case AVCOL_SPC_SMPTE2085:           return PL_COLOR_SYSTEM_UNKNOWN; // missing
+    case AVCOL_SPC_CHROMA_DERIVED_NCL:  return PL_COLOR_SYSTEM_UNKNOWN; // missing
+    case AVCOL_SPC_CHROMA_DERIVED_CL:   return PL_COLOR_SYSTEM_UNKNOWN; // missing
+    // Note: this colorspace is confused between PQ and HLG, which libav*
+    // requires inferring from other sources, but libplacebo makes explicit.
+    // Default to PQ as it's the more common scenario.
+    case AVCOL_SPC_ICTCP:               return PL_COLOR_SYSTEM_BT_2100_PQ;
+    case AVCOL_SPC_NB:                  return PL_COLOR_SYSTEM_COUNT;
+    }
+
+    return PL_COLOR_SYSTEM_UNKNOWN;
+}
+
+PL_LIBAV_API enum AVColorSpace pl_system_to_av(enum pl_color_system sys)
+{
+    switch (sys) {
+    case PL_COLOR_SYSTEM_UNKNOWN:       return AVCOL_SPC_UNSPECIFIED;
+    case PL_COLOR_SYSTEM_BT_601:        return AVCOL_SPC_SMPTE170M;
+    case PL_COLOR_SYSTEM_BT_709:        return AVCOL_SPC_BT709;
+    case PL_COLOR_SYSTEM_SMPTE_240M:    return AVCOL_SPC_SMPTE240M;
+    case PL_COLOR_SYSTEM_BT_2020_NC:    return AVCOL_SPC_BT2020_NCL;
+    case PL_COLOR_SYSTEM_BT_2020_C:     return AVCOL_SPC_BT2020_CL;
+    case PL_COLOR_SYSTEM_BT_2100_PQ:    return AVCOL_SPC_ICTCP;
+    case PL_COLOR_SYSTEM_BT_2100_HLG:   return AVCOL_SPC_ICTCP;
+    case PL_COLOR_SYSTEM_DOLBYVISION:   return AVCOL_SPC_UNSPECIFIED; // missing
+    case PL_COLOR_SYSTEM_YCGCO:         return AVCOL_SPC_YCGCO;
+    case PL_COLOR_SYSTEM_RGB:           return AVCOL_SPC_RGB;
+    case PL_COLOR_SYSTEM_XYZ:           return AVCOL_SPC_UNSPECIFIED; // handled differently
+    case PL_COLOR_SYSTEM_COUNT:         return AVCOL_SPC_NB;
+    }
+
+    return AVCOL_SPC_UNSPECIFIED;
+}
+
+PL_LIBAV_API enum pl_color_levels pl_levels_from_av(enum AVColorRange range)
+{
+    switch (range) {
+    case AVCOL_RANGE_UNSPECIFIED:       return PL_COLOR_LEVELS_UNKNOWN;
+    case AVCOL_RANGE_MPEG:              return PL_COLOR_LEVELS_LIMITED;
+    case AVCOL_RANGE_JPEG:              return PL_COLOR_LEVELS_FULL;
+    case AVCOL_RANGE_NB:                return PL_COLOR_LEVELS_COUNT;
+    }
+
+    return PL_COLOR_LEVELS_UNKNOWN;
+}
+
+PL_LIBAV_API enum AVColorRange pl_levels_to_av(enum pl_color_levels levels)
+{
+    switch (levels) {
+    case PL_COLOR_LEVELS_UNKNOWN:       return AVCOL_RANGE_UNSPECIFIED;
+    case PL_COLOR_LEVELS_LIMITED:       return AVCOL_RANGE_MPEG;
+    case PL_COLOR_LEVELS_FULL:          return AVCOL_RANGE_JPEG;
+    case PL_COLOR_LEVELS_COUNT:         return AVCOL_RANGE_NB;
+    }
+
+    return AVCOL_RANGE_UNSPECIFIED;
+}
+
+PL_LIBAV_API enum pl_color_primaries pl_primaries_from_av(enum AVColorPrimaries prim)
+{
+    switch (prim) {
+    case AVCOL_PRI_RESERVED0:       return PL_COLOR_PRIM_UNKNOWN;
+    case AVCOL_PRI_BT709:           return PL_COLOR_PRIM_BT_709;
+    case AVCOL_PRI_UNSPECIFIED:     return PL_COLOR_PRIM_UNKNOWN;
+    case AVCOL_PRI_RESERVED:        return PL_COLOR_PRIM_UNKNOWN;
+    case AVCOL_PRI_BT470M:          return PL_COLOR_PRIM_BT_470M;
+    case AVCOL_PRI_BT470BG:         return PL_COLOR_PRIM_BT_601_625;
+    case AVCOL_PRI_SMPTE170M:       return PL_COLOR_PRIM_BT_601_525;
+    case AVCOL_PRI_SMPTE240M:       return PL_COLOR_PRIM_BT_601_525;
+    case AVCOL_PRI_FILM:            return PL_COLOR_PRIM_FILM_C;
+    case AVCOL_PRI_BT2020:          return PL_COLOR_PRIM_BT_2020;
+    case AVCOL_PRI_SMPTE428:        return PL_COLOR_PRIM_CIE_1931;
+    case AVCOL_PRI_SMPTE431:        return PL_COLOR_PRIM_DCI_P3;
+    case AVCOL_PRI_SMPTE432:        return PL_COLOR_PRIM_DISPLAY_P3;
+    case AVCOL_PRI_JEDEC_P22:       return PL_COLOR_PRIM_EBU_3213;
+    case AVCOL_PRI_NB:              return PL_COLOR_PRIM_COUNT;
+    }
+
+    return PL_COLOR_PRIM_UNKNOWN;
+}
+
+PL_LIBAV_API enum AVColorPrimaries pl_primaries_to_av(enum pl_color_primaries prim)
+{
+    switch (prim) {
+    case PL_COLOR_PRIM_UNKNOWN:     return AVCOL_PRI_UNSPECIFIED;
+    case PL_COLOR_PRIM_BT_601_525:  return AVCOL_PRI_SMPTE170M;
+    case PL_COLOR_PRIM_BT_601_625:  return AVCOL_PRI_BT470BG;
+    case PL_COLOR_PRIM_BT_709:      return AVCOL_PRI_BT709;
+    case PL_COLOR_PRIM_BT_470M:     return AVCOL_PRI_BT470M;
+    case PL_COLOR_PRIM_EBU_3213:    return AVCOL_PRI_JEDEC_P22;
+    case PL_COLOR_PRIM_BT_2020:     return AVCOL_PRI_BT2020;
+    case PL_COLOR_PRIM_APPLE:       return AVCOL_PRI_UNSPECIFIED; // missing
+    case PL_COLOR_PRIM_ADOBE:       return AVCOL_PRI_UNSPECIFIED; // missing
+    case PL_COLOR_PRIM_PRO_PHOTO:   return AVCOL_PRI_UNSPECIFIED; // missing
+    case PL_COLOR_PRIM_CIE_1931:    return AVCOL_PRI_SMPTE428;
+    case PL_COLOR_PRIM_DCI_P3:      return AVCOL_PRI_SMPTE431;
+    case PL_COLOR_PRIM_DISPLAY_P3:  return AVCOL_PRI_SMPTE432;
+    case PL_COLOR_PRIM_V_GAMUT:     return AVCOL_PRI_UNSPECIFIED; // missing
+    case PL_COLOR_PRIM_S_GAMUT:     return AVCOL_PRI_UNSPECIFIED; // missing
+    case PL_COLOR_PRIM_FILM_C:      return AVCOL_PRI_FILM;
+    case PL_COLOR_PRIM_ACES_AP0:    return AVCOL_PRI_UNSPECIFIED; // missing
+    case PL_COLOR_PRIM_ACES_AP1:    return AVCOL_PRI_UNSPECIFIED; // missing
+    case PL_COLOR_PRIM_COUNT:       return AVCOL_PRI_NB;
+    }
+
+    return AVCOL_PRI_UNSPECIFIED;
+}
+
+PL_LIBAV_API enum pl_color_transfer pl_transfer_from_av(enum AVColorTransferCharacteristic trc)
+{
+    switch (trc) {
+    case AVCOL_TRC_RESERVED0:       return PL_COLOR_TRC_UNKNOWN;
+    case AVCOL_TRC_BT709:           return PL_COLOR_TRC_BT_1886; // EOTF != OETF
+    case AVCOL_TRC_UNSPECIFIED:     return PL_COLOR_TRC_UNKNOWN;
+    case AVCOL_TRC_RESERVED:        return PL_COLOR_TRC_UNKNOWN;
+    case AVCOL_TRC_GAMMA22:         return PL_COLOR_TRC_GAMMA22;
+    case AVCOL_TRC_GAMMA28:         return PL_COLOR_TRC_GAMMA28;
+    case AVCOL_TRC_SMPTE170M:       return PL_COLOR_TRC_BT_1886; // EOTF != OETF
+    case AVCOL_TRC_SMPTE240M:       return PL_COLOR_TRC_BT_1886; // EOTF != OETF
+    case AVCOL_TRC_LINEAR:          return PL_COLOR_TRC_LINEAR;
+    case AVCOL_TRC_LOG:             return PL_COLOR_TRC_UNKNOWN; // missing
+    case AVCOL_TRC_LOG_SQRT:        return PL_COLOR_TRC_UNKNOWN; // missing
+    case AVCOL_TRC_IEC61966_2_4:    return PL_COLOR_TRC_BT_1886; // EOTF != OETF
+    case AVCOL_TRC_BT1361_ECG:      return PL_COLOR_TRC_BT_1886; // ETOF != OETF
+    case AVCOL_TRC_IEC61966_2_1:    return PL_COLOR_TRC_SRGB;
+    case AVCOL_TRC_BT2020_10:       return PL_COLOR_TRC_BT_1886; // EOTF != OETF
+    case AVCOL_TRC_BT2020_12:       return PL_COLOR_TRC_BT_1886; // EOTF != OETF
+    case AVCOL_TRC_SMPTE2084:       return PL_COLOR_TRC_PQ;
+    case AVCOL_TRC_SMPTE428:        return PL_COLOR_TRC_ST428;
+    case AVCOL_TRC_ARIB_STD_B67:    return PL_COLOR_TRC_HLG;
+    case AVCOL_TRC_NB:              return PL_COLOR_TRC_COUNT;
+    }
+
+    return PL_COLOR_TRC_UNKNOWN;
+}
+
+PL_LIBAV_API enum AVColorTransferCharacteristic pl_transfer_to_av(enum pl_color_transfer trc)
+{
+    switch (trc) {
+    case PL_COLOR_TRC_UNKNOWN:      return AVCOL_TRC_UNSPECIFIED;
+    case PL_COLOR_TRC_BT_1886:      return AVCOL_TRC_BT709;       // EOTF != OETF
+    case PL_COLOR_TRC_SRGB:         return AVCOL_TRC_IEC61966_2_1;
+    case PL_COLOR_TRC_LINEAR:       return AVCOL_TRC_LINEAR;
+    case PL_COLOR_TRC_GAMMA18:      return AVCOL_TRC_UNSPECIFIED; // missing
+    case PL_COLOR_TRC_GAMMA20:      return AVCOL_TRC_UNSPECIFIED; // missing
+    case PL_COLOR_TRC_GAMMA22:      return AVCOL_TRC_GAMMA22;
+    case PL_COLOR_TRC_GAMMA24:      return AVCOL_TRC_UNSPECIFIED; // missing
+    case PL_COLOR_TRC_GAMMA26:      return AVCOL_TRC_UNSPECIFIED; // missing
+    case PL_COLOR_TRC_GAMMA28:      return AVCOL_TRC_GAMMA28;
+    case PL_COLOR_TRC_ST428:        return AVCOL_TRC_SMPTE428;
+    case PL_COLOR_TRC_PRO_PHOTO:    return AVCOL_TRC_UNSPECIFIED; // missing
+    case PL_COLOR_TRC_PQ:           return AVCOL_TRC_SMPTE2084;
+    case PL_COLOR_TRC_HLG:          return AVCOL_TRC_ARIB_STD_B67;
+    case PL_COLOR_TRC_V_LOG:        return AVCOL_TRC_UNSPECIFIED; // missing
+    case PL_COLOR_TRC_S_LOG1:       return AVCOL_TRC_UNSPECIFIED; // missing
+    case PL_COLOR_TRC_S_LOG2:       return AVCOL_TRC_UNSPECIFIED; // missing
+    case PL_COLOR_TRC_COUNT:        return AVCOL_TRC_NB;
+    }
+
+    return AVCOL_TRC_UNSPECIFIED;
+}
+
+PL_LIBAV_API enum pl_chroma_location pl_chroma_from_av(enum AVChromaLocation loc)
+{
+    switch (loc) {
+    case AVCHROMA_LOC_UNSPECIFIED:  return PL_CHROMA_UNKNOWN;
+    case AVCHROMA_LOC_LEFT:         return PL_CHROMA_LEFT;
+    case AVCHROMA_LOC_CENTER:       return PL_CHROMA_CENTER;
+    case AVCHROMA_LOC_TOPLEFT:      return PL_CHROMA_TOP_LEFT;
+    case AVCHROMA_LOC_TOP:          return PL_CHROMA_TOP_CENTER;
+    case AVCHROMA_LOC_BOTTOMLEFT:   return PL_CHROMA_BOTTOM_LEFT;
+    case AVCHROMA_LOC_BOTTOM:       return PL_CHROMA_BOTTOM_CENTER;
+    case AVCHROMA_LOC_NB:           return PL_CHROMA_COUNT;
+    }
+
+    return PL_CHROMA_UNKNOWN;
+}
+
+PL_LIBAV_API enum AVChromaLocation pl_chroma_to_av(enum pl_chroma_location loc)
+{
+    switch (loc) {
+    case PL_CHROMA_UNKNOWN:         return AVCHROMA_LOC_UNSPECIFIED;
+    case PL_CHROMA_LEFT:            return AVCHROMA_LOC_LEFT;
+    case PL_CHROMA_CENTER:          return AVCHROMA_LOC_CENTER;
+    case PL_CHROMA_TOP_LEFT:        return AVCHROMA_LOC_TOPLEFT;
+    case PL_CHROMA_TOP_CENTER:      return AVCHROMA_LOC_TOP;
+    case PL_CHROMA_BOTTOM_LEFT:     return AVCHROMA_LOC_BOTTOMLEFT;
+    case PL_CHROMA_BOTTOM_CENTER:   return AVCHROMA_LOC_BOTTOM;
+    case PL_CHROMA_COUNT:           return AVCHROMA_LOC_NB;
+    }
+
+    return AVCHROMA_LOC_UNSPECIFIED;
+}
+
+#ifdef PL_HAVE_LAV_HDR
+PL_LIBAV_API void pl_map_hdr_metadata(struct pl_hdr_metadata *out,
+                                      const struct pl_av_hdr_metadata *data)
+{
+    if (data->mdm) {
+        if (data->mdm->has_luminance) {
+            out->max_luma = av_q2d(data->mdm->max_luminance);
+            out->min_luma = av_q2d(data->mdm->min_luminance);
+            if (out->max_luma < 10.0 || out->min_luma >= out->max_luma)
+                out->max_luma = out->min_luma = 0; /* sanity */
+        }
+        if (data->mdm->has_primaries) {
+            out->prim = (struct pl_raw_primaries) {
+                .red.x   = av_q2d(data->mdm->display_primaries[0][0]),
+                .red.y   = av_q2d(data->mdm->display_primaries[0][1]),
+                .green.x = av_q2d(data->mdm->display_primaries[1][0]),
+                .green.y = av_q2d(data->mdm->display_primaries[1][1]),
+                .blue.x  = av_q2d(data->mdm->display_primaries[2][0]),
+                .blue.y  = av_q2d(data->mdm->display_primaries[2][1]),
+                .white.x = av_q2d(data->mdm->white_point[0]),
+                .white.y = av_q2d(data->mdm->white_point[1]),
+            };
+        }
+    }
+
+    if (data->clm) {
+        out->max_cll = data->clm->MaxCLL;
+        out->max_fall = data->clm->MaxFALL;
+    }
+
+    if (data->dhp && data->dhp->application_version < 2) {
+        float hist_max = 0;
+        const AVHDRPlusColorTransformParams *pars = &data->dhp->params[0];
+        assert(data->dhp->num_windows > 0);
+        out->scene_max[0] = 10000 * av_q2d(pars->maxscl[0]);
+        out->scene_max[1] = 10000 * av_q2d(pars->maxscl[1]);
+        out->scene_max[2] = 10000 * av_q2d(pars->maxscl[2]);
+        out->scene_avg = 10000 * av_q2d(pars->average_maxrgb);
+
+        // Calculate largest value from histogram to use as fallback for clips
+        // with missing MaxSCL information. Note that this may end up picking
+        // the "reserved" value at the 5% percentile, which in practice appears
+        // to track the brightest pixel in the scene.
+        for (int i = 0; i < pars->num_distribution_maxrgb_percentiles; i++) {
+            float hist_val = av_q2d(pars->distribution_maxrgb[i].percentile);
+            if (hist_val > hist_max)
+                hist_max = hist_val;
+        }
+        hist_max *= 10000;
+        if (!out->scene_max[0])
+            out->scene_max[0] = hist_max;
+        if (!out->scene_max[1])
+            out->scene_max[1] = hist_max;
+        if (!out->scene_max[2])
+            out->scene_max[2] = hist_max;
+
+        if (pars->tone_mapping_flag == 1) {
+            out->ootf.target_luma = av_q2d(data->dhp->targeted_system_display_maximum_luminance);
+            out->ootf.knee_x = av_q2d(pars->knee_point_x);
+            out->ootf.knee_y = av_q2d(pars->knee_point_y);
+            assert(pars->num_bezier_curve_anchors < 16);
+            for (int i = 0; i < pars->num_bezier_curve_anchors; i++)
+                out->ootf.anchors[i] = av_q2d(pars->bezier_curve_anchors[i]);
+            out->ootf.num_anchors = pars->num_bezier_curve_anchors;
+        }
+    }
+}
+#endif // PL_HAVE_LAV_HDR
+
+static inline void *pl_get_side_data_raw(const AVFrame *frame,
+                                         enum AVFrameSideDataType type)
+{
+    const AVFrameSideData *sd = av_frame_get_side_data(frame, type);
+    return sd ? (void *) sd->data : NULL;
+}
+
+PL_LIBAV_API void pl_color_space_from_avframe(struct pl_color_space *out_csp,
+                                              const AVFrame *frame)
+{
+    *out_csp = (struct pl_color_space) {
+        .primaries = pl_primaries_from_av(frame->color_primaries),
+        .transfer = pl_transfer_from_av(frame->color_trc),
+    };
+
+#ifdef PL_HAVE_LAV_HDR
+    pl_map_hdr_metadata(&out_csp->hdr, &(struct pl_av_hdr_metadata) {
+        .mdm = pl_get_side_data_raw(frame, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA),
+        .clm = pl_get_side_data_raw(frame, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL),
+        .dhp = pl_get_side_data_raw(frame, AV_FRAME_DATA_DYNAMIC_HDR_PLUS),
+    });
+#endif
+}
+
+PL_LIBAV_API enum pl_field pl_field_from_avframe(const AVFrame *frame)
+{
+#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(58, 7, 100)
+    if (!frame || !(frame->flags & AV_FRAME_FLAG_INTERLACED))
+        return PL_FIELD_NONE;
+    return (frame->flags & AV_FRAME_FLAG_TOP_FIELD_FIRST)
+                ? PL_FIELD_TOP : PL_FIELD_BOTTOM;
+#else
+    if (!frame || !frame->interlaced_frame)
+        return PL_FIELD_NONE;
+    return frame->top_field_first ? PL_FIELD_TOP : PL_FIELD_BOTTOM;
+#endif
+}
+
+#ifdef PL_HAVE_LAV_FILM_GRAIN
+PL_LIBAV_API void pl_film_grain_from_av(struct pl_film_grain_data *out_data,
+                                        const AVFilmGrainParams *fgp)
+{
+    out_data->seed = fgp->seed;
+
+    switch (fgp->type) {
+    case AV_FILM_GRAIN_PARAMS_NONE: break;
+    case AV_FILM_GRAIN_PARAMS_AV1: {
+        const AVFilmGrainAOMParams *src = &fgp->codec.aom;
+        struct pl_av1_grain_data *dst = &out_data->params.av1;
+        out_data->type = PL_FILM_GRAIN_AV1;
+        *dst = (struct pl_av1_grain_data) {
+            .num_points_y = src->num_y_points,
+            .chroma_scaling_from_luma = src->chroma_scaling_from_luma,
+            .num_points_uv = { src->num_uv_points[0], src->num_uv_points[1] },
+            .scaling_shift = src->scaling_shift,
+            .ar_coeff_lag = src->ar_coeff_lag,
+            .ar_coeff_shift = src->ar_coeff_shift,
+            .grain_scale_shift = src->grain_scale_shift,
+            .uv_mult = { src->uv_mult[0], src->uv_mult[1] },
+            .uv_mult_luma = { src->uv_mult_luma[0], src->uv_mult_luma[1] },
+            .uv_offset = { src->uv_offset[0], src->uv_offset[1] },
+            .overlap = src->overlap_flag,
+        };
+
+        assert(sizeof(dst->ar_coeffs_uv) == sizeof(src->ar_coeffs_uv));
+        memcpy(dst->points_y, src->y_points, sizeof(dst->points_y));
+        memcpy(dst->points_uv, src->uv_points, sizeof(dst->points_uv));
+        memcpy(dst->ar_coeffs_y, src->ar_coeffs_y, sizeof(dst->ar_coeffs_y));
+        memcpy(dst->ar_coeffs_uv, src->ar_coeffs_uv, sizeof(dst->ar_coeffs_uv));
+        break;
+    }
+#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(57, 2, 100)
+    case AV_FILM_GRAIN_PARAMS_H274: {
+        const AVFilmGrainH274Params *src = &fgp->codec.h274;
+        struct pl_h274_grain_data *dst = &out_data->params.h274;
+        out_data->type = PL_FILM_GRAIN_H274;
+        *dst = (struct pl_h274_grain_data) {
+            .model_id = src->model_id,
+            .blending_mode_id = src->blending_mode_id,
+            .log2_scale_factor = src->log2_scale_factor,
+            .component_model_present = {
+                src->component_model_present[0],
+                src->component_model_present[1],
+                src->component_model_present[2],
+            },
+            .intensity_interval_lower_bound = {
+                src->intensity_interval_lower_bound[0],
+                src->intensity_interval_lower_bound[1],
+                src->intensity_interval_lower_bound[2],
+            },
+            .intensity_interval_upper_bound = {
+                src->intensity_interval_upper_bound[0],
+                src->intensity_interval_upper_bound[1],
+                src->intensity_interval_upper_bound[2],
+            },
+            .comp_model_value = {
+                src->comp_model_value[0],
+                src->comp_model_value[1],
+                src->comp_model_value[2],
+            },
+        };
+        memcpy(dst->num_intensity_intervals, src->num_intensity_intervals,
+               sizeof(dst->num_intensity_intervals));
+        memcpy(dst->num_model_values, src->num_model_values,
+               sizeof(dst->num_model_values));
+        break;
+    }
+#endif
+    }
+}
+#endif // PL_HAVE_LAV_FILM_GRAIN
+
+static inline int pl_plane_data_num_comps(const struct pl_plane_data *data)
+{
+    for (int i = 0; i < 4; i++) {
+        if (data->component_size[i] == 0)
+            return i;
+    }
+
+    return 4;
+}
+
+PL_LIBAV_API int pl_plane_data_from_pixfmt(struct pl_plane_data out_data[4],
+                                           struct pl_bit_encoding *out_bits,
+                                           enum AVPixelFormat pix_fmt)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pix_fmt);
+    int planes = av_pix_fmt_count_planes(pix_fmt);
+    struct pl_plane_data aligned_data[4];
+    struct pl_bit_encoding bits;
+    bool first;
+    if (!desc || planes < 0) // e.g. AV_PIX_FMT_NONE
+        return 0;
+
+    if (desc->flags & AV_PIX_FMT_FLAG_BITSTREAM) {
+        // Bitstream formats will most likely never be supported
+        return 0;
+    }
+
+    if (desc->flags & AV_PIX_FMT_FLAG_PAL) {
+        // Palette formats are (currently) not supported
+        return 0;
+    }
+
+    if (desc->flags & AV_PIX_FMT_FLAG_BAYER) {
+        // Bayer format don't have valid `desc->offset` values, so we can't
+        // use `pl_plane_data_from_mask` on them.
+        return 0;
+    }
+
+    if (desc->nb_components == 0 || desc->nb_components > 4) {
+        // Bogus components, possibly fake/virtual/hwaccel format?
+        return 0;
+    }
+
+    if (planes > 4)
+        return 0; // This shouldn't ever happen
+
+    // Fill in the details for each plane
+    for (int p = 0; p < planes; p++) {
+        struct pl_plane_data *data = &out_data[p];
+        int size[4] = {0};
+        int shift[4] = {0};
+        data->swapped = desc->flags & AV_PIX_FMT_FLAG_BE;
+        data->type = (desc->flags & AV_PIX_FMT_FLAG_FLOAT)
+                        ? PL_FMT_FLOAT
+                        : PL_FMT_UNORM;
+
+        data->pixel_stride = 0;
+
+        for (int c = 0; c < desc->nb_components; c++) {
+            const AVComponentDescriptor *comp = &desc->comp[c];
+            if (comp->plane != p)
+                continue;
+            if (data->swapped && comp->shift) {
+                // We cannot naively handle packed big endian formats because
+                // swapping the words also swaps the component order, so just
+                // exit out as a stupid safety measure
+                return 0;
+            }
+
+            size[c] = comp->depth;
+            shift[c] = comp->shift + comp->offset * 8;
+
+            if (data->pixel_stride && (int) data->pixel_stride != comp->step) {
+                // Pixel format contains components with different pixel stride
+                // (e.g. packed YUYV), this is currently not supported
+                return 0;
+            }
+            data->pixel_stride = comp->step;
+        }
+
+        pl_plane_data_from_comps(data, size, shift);
+    }
+
+    if (!out_bits)
+        return planes;
+
+    // Attempt aligning all of the planes for optimum compatibility
+    first = true;
+    for (int p = 0; p < planes; p++) {
+        aligned_data[p] = out_data[p];
+
+        // Planes with only an alpha component should be ignored
+        if (pl_plane_data_num_comps(&aligned_data[p]) == 1 &&
+            aligned_data[p].component_map[0] == PL_CHANNEL_A)
+        {
+            continue;
+        }
+
+        if (!pl_plane_data_align(&aligned_data[p], &bits))
+            goto misaligned;
+
+        if (first) {
+            *out_bits = bits;
+            first = false;
+        } else {
+            if (!pl_bit_encoding_equal(&bits, out_bits))
+                goto misaligned;
+        }
+    }
+
+    // Overwrite the planes by their aligned versions
+    for (int p = 0; p < planes; p++)
+        out_data[p] = aligned_data[p];
+
+    return planes;
+
+misaligned:
+    *out_bits = (struct pl_bit_encoding) {0};
+    return planes;
+}
+
+PL_LIBAV_API bool pl_test_pixfmt_caps(pl_gpu gpu, enum AVPixelFormat pixfmt,
+                                      enum pl_fmt_caps caps)
+{
+    struct pl_bit_encoding bits;
+    struct pl_plane_data data[4];
+    pl_fmt fmt;
+    int planes;
+
+    switch (pixfmt) {
+    case AV_PIX_FMT_DRM_PRIME:
+    case AV_PIX_FMT_VAAPI:
+        return gpu->import_caps.tex & PL_HANDLE_DMA_BUF;
+
+#ifdef PL_HAVE_LAV_VULKAN
+    case AV_PIX_FMT_VULKAN:
+        return pl_vulkan_get(gpu);
+#endif
+
+    default: break;
+    }
+
+    planes = pl_plane_data_from_pixfmt(data, &bits, pixfmt);
+    if (!planes)
+        return false;
+
+    for (int i = 0; i < planes; i++) {
+        data[i].row_stride = 0;
+        fmt = pl_plane_find_fmt(gpu, NULL, &data[i]);
+        if (!fmt || (fmt->caps & caps) != caps)
+            return false;
+
+    }
+
+    return true;
+}
+
+PL_LIBAV_API bool pl_test_pixfmt(pl_gpu gpu, enum AVPixelFormat pixfmt)
+{
+    return pl_test_pixfmt_caps(gpu, pixfmt, 0);
+}
+
+PL_LIBAV_API void pl_avframe_set_color(AVFrame *frame, struct pl_color_space csp)
+{
+    const AVFrameSideData *sd;
+    (void) sd;
+
+    frame->color_primaries = pl_primaries_to_av(csp.primaries);
+    frame->color_trc = pl_transfer_to_av(csp.transfer);
+
+#ifdef PL_HAVE_LAV_HDR
+    if (csp.hdr.max_cll) {
+        sd = av_frame_get_side_data(frame, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL);
+        if (!sd) {
+            sd = av_frame_new_side_data(frame, AV_FRAME_DATA_CONTENT_LIGHT_LEVEL,
+                                        sizeof(AVContentLightMetadata));
+        }
+
+        if (sd) {
+            AVContentLightMetadata *clm = (AVContentLightMetadata *) sd->data;
+            *clm = (AVContentLightMetadata) {
+                .MaxCLL = csp.hdr.max_cll,
+                .MaxFALL = csp.hdr.max_fall,
+            };
+        }
+    }
+
+    if (csp.hdr.max_luma || csp.hdr.prim.red.x) {
+        sd = av_frame_get_side_data(frame, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA);
+        if (!sd) {
+            sd = av_frame_new_side_data(frame, AV_FRAME_DATA_MASTERING_DISPLAY_METADATA,
+                                        sizeof(AVMasteringDisplayMetadata));
+        }
+
+        if (sd) {
+            AVMasteringDisplayMetadata *mdm = (AVMasteringDisplayMetadata *) sd->data;
+            *mdm = (AVMasteringDisplayMetadata) {
+                .max_luminance = av_d2q(csp.hdr.max_luma, 1000000),
+                .min_luminance = av_d2q(csp.hdr.min_luma, 1000000),
+                .has_luminance = !!csp.hdr.max_luma,
+                .display_primaries = {
+                    {
+                        av_d2q(csp.hdr.prim.red.x, 1000000),
+                        av_d2q(csp.hdr.prim.red.y, 1000000),
+                    }, {
+                        av_d2q(csp.hdr.prim.green.x, 1000000),
+                        av_d2q(csp.hdr.prim.green.y, 1000000),
+                    }, {
+                        av_d2q(csp.hdr.prim.blue.x, 1000000),
+                        av_d2q(csp.hdr.prim.blue.y, 1000000),
+                    }
+                },
+                .white_point = {
+                    av_d2q(csp.hdr.prim.white.x, 1000000),
+                    av_d2q(csp.hdr.prim.white.y, 1000000),
+                },
+                .has_primaries = !!csp.hdr.prim.red.x,
+            };
+        }
+    }
+#endif // PL_HAVE_LAV_HDR
+}
+
+PL_LIBAV_API void pl_avframe_set_repr(AVFrame *frame, struct pl_color_repr repr)
+{
+    frame->colorspace = pl_system_to_av(repr.sys);
+    frame->color_range = pl_levels_to_av(repr.levels);
+
+    // No real way to map repr.bits, the image format already has to match
+}
+
+PL_LIBAV_API void pl_avframe_set_profile(AVFrame *frame, struct pl_icc_profile profile)
+{
+    const AVFrameSideData *sd;
+    av_frame_remove_side_data(frame, AV_FRAME_DATA_ICC_PROFILE);
+
+    if (!profile.len)
+        return;
+
+    sd = av_frame_new_side_data(frame, AV_FRAME_DATA_ICC_PROFILE, profile.len);
+    memcpy(sd->data, profile.data, profile.len);
+}
+
+PL_LIBAV_API void pl_frame_from_avframe(struct pl_frame *out,
+                                         const AVFrame *frame)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
+    int planes = av_pix_fmt_count_planes(frame->format);
+    const AVFrameSideData *sd;
+    assert(desc);
+
+    if (desc->flags & AV_PIX_FMT_FLAG_HWACCEL) {
+        const AVHWFramesContext *hwfc = (AVHWFramesContext *) frame->hw_frames_ctx->data;
+        desc = av_pix_fmt_desc_get(hwfc->sw_format);
+        planes = av_pix_fmt_count_planes(hwfc->sw_format);
+    }
+
+    // This should never fail, and there's nothing really useful we can do in
+    // this failure case anyway, since this is a `void` function.
+    assert(planes <= 4);
+
+    *out = (struct pl_frame) {
+        .num_planes = planes,
+        .crop = {
+            .x0 = frame->crop_left,
+            .y0 = frame->crop_top,
+            .x1 = frame->width - frame->crop_right,
+            .y1 = frame->height - frame->crop_bottom,
+        },
+        .repr = {
+            .sys = pl_system_from_av(frame->colorspace),
+            .levels = pl_levels_from_av(frame->color_range),
+            .alpha = (desc->flags & AV_PIX_FMT_FLAG_ALPHA)
+                        ? PL_ALPHA_INDEPENDENT
+                        : PL_ALPHA_UNKNOWN,
+
+            // For sake of simplicity, just use the first component's depth as
+            // the authoritative color depth for the whole image. Usually, this
+            // will be overwritten by more specific information when using e.g.
+            // `pl_map_avframe`, but for the sake of e.g. users wishing to map
+            // hwaccel frames manually, this is a good default.
+            .bits.color_depth = desc->comp[0].depth,
+        },
+    };
+
+    pl_color_space_from_avframe(&out->color, frame);
+
+    if (frame->colorspace == AVCOL_SPC_ICTCP &&
+        frame->color_trc == AVCOL_TRC_ARIB_STD_B67)
+    {
+        // libav* makes no distinction between PQ and HLG ICtCp, so we need
+        // to manually fix it in the case that we have HLG ICtCp data.
+        out->repr.sys = PL_COLOR_SYSTEM_BT_2100_HLG;
+
+    } else if (strncmp(desc->name, "xyz", 3) == 0) {
+
+        // libav* handles this as a special case, but doesn't provide an
+        // explicit flag for it either, so we have to resort to this ugly
+        // hack...
+        out->repr.sys = PL_COLOR_SYSTEM_XYZ;
+
+    } else if (desc->flags & AV_PIX_FMT_FLAG_RGB) {
+
+        out->repr.sys = PL_COLOR_SYSTEM_RGB;
+        out->repr.levels = PL_COLOR_LEVELS_FULL; // libav* ignores levels for RGB
+
+    } else if (!pl_color_system_is_ycbcr_like(out->repr.sys)) {
+        // libav* likes leaving this as UNKNOWN (or even RGB) for YCbCr frames,
+        // which confuses libplacebo since we infer UNKNOWN as RGB. To get
+        // around this, explicitly infer a suitable colorspace.
+        out->repr.sys = pl_color_system_guess_ycbcr(frame->width, frame->height);
+    }
+
+    if ((sd = av_frame_get_side_data(frame, AV_FRAME_DATA_ICC_PROFILE))) {
+        out->profile = (struct pl_icc_profile) {
+            .data = sd->data,
+            .len = sd->size,
+        };
+
+        // Needed to ensure profile uniqueness
+        pl_icc_profile_compute_signature(&out->profile);
+    }
+
+    if ((sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DISPLAYMATRIX))) {
+        double rot = av_display_rotation_get((const int32_t *) sd->data);
+        out->rotation = pl_rotation_normalize(4.5 - rot / 90.0);
+    }
+
+#ifdef PL_HAVE_LAV_FILM_GRAIN
+    if ((sd = av_frame_get_side_data(frame, AV_FRAME_DATA_FILM_GRAIN_PARAMS)))
+        pl_film_grain_from_av(&out->film_grain, (AVFilmGrainParams *) sd->data);
+#endif // HAVE_LAV_FILM_GRAIN
+
+    for (int p = 0; p < out->num_planes; p++) {
+        struct pl_plane *plane = &out->planes[p];
+
+        // Fill in the component mapping array
+        for (int c = 0; c < desc->nb_components; c++) {
+            if (desc->comp[c].plane == p)
+                plane->component_mapping[plane->components++] = c;
+        }
+
+        // Clear the superfluous components
+        for (int c = plane->components; c < 4; c++)
+            plane->component_mapping[c] = PL_CHANNEL_NONE;
+    }
+
+    // Only set the chroma location for definitely subsampled images, makes no
+    // sense otherwise
+    if (desc->log2_chroma_w || desc->log2_chroma_h) {
+        enum pl_chroma_location loc = pl_chroma_from_av(frame->chroma_location);
+        pl_frame_set_chroma_location(out, loc);
+    }
+}
+
+#if LIBAVFORMAT_VERSION_INT >= AV_VERSION_INT(60, 15, 100)
+PL_LIBAV_API const uint8_t *pl_av_stream_get_side_data(const AVStream *st,
+                                                 enum AVPacketSideDataType type)
+{
+    const AVPacketSideData *sd;
+    sd = av_packet_side_data_get(st->codecpar->coded_side_data,
+                                 st->codecpar->nb_coded_side_data,
+                                 type);
+    return sd ? sd->data : NULL;
+}
+#else
+# define pl_av_stream_get_side_data(st, type) av_stream_get_side_data(st, type, NULL)
+#endif
+
+PL_LIBAV_API void pl_frame_copy_stream_props(struct pl_frame *out,
+                                             const AVStream *stream)
+{
+    const uint8_t *sd;
+    if ((sd = pl_av_stream_get_side_data(stream, AV_PKT_DATA_DISPLAYMATRIX))) {
+        double rot = av_display_rotation_get((const int32_t *) sd);
+        out->rotation = pl_rotation_normalize(4.5 - rot / 90.0);
+    }
+
+#ifdef PL_HAVE_LAV_HDR
+    pl_map_hdr_metadata(&out->color.hdr, &(struct pl_av_hdr_metadata) {
+        .mdm = (void *) pl_av_stream_get_side_data(stream,
+                        AV_PKT_DATA_MASTERING_DISPLAY_METADATA),
+        .clm = (void *) pl_av_stream_get_side_data(stream,
+                        AV_PKT_DATA_CONTENT_LIGHT_LEVEL),
+# if LIBAVCODEC_VERSION_INT >= AV_VERSION_INT(59, 2, 100)
+        .dhp = (void *) pl_av_stream_get_side_data(stream,
+                        AV_PKT_DATA_DYNAMIC_HDR10_PLUS),
+# endif
+    });
+#endif
+}
+
+#undef pl_av_stream_get_side_data
+
+#ifdef PL_HAVE_LAV_DOLBY_VISION
+PL_LIBAV_API void pl_map_dovi_metadata(struct pl_dovi_metadata *out,
+                                       const AVDOVIMetadata *data)
+{
+    const AVDOVIRpuDataHeader *header;
+    const AVDOVIDataMapping *mapping;
+    const AVDOVIColorMetadata *color;
+    if (!data)
+        return;
+
+    header = av_dovi_get_header(data);
+    mapping = av_dovi_get_mapping(data);
+    color = av_dovi_get_color(data);
+
+    for (int i = 0; i < 3; i++)
+        out->nonlinear_offset[i] = av_q2d(color->ycc_to_rgb_offset[i]);
+    for (int i = 0; i < 9; i++) {
+        float *nonlinear = &out->nonlinear.m[0][0];
+        float *linear = &out->linear.m[0][0];
+        nonlinear[i] = av_q2d(color->ycc_to_rgb_matrix[i]);
+        linear[i] = av_q2d(color->rgb_to_lms_matrix[i]);
+    }
+    for (int c = 0; c < 3; c++) {
+        const AVDOVIReshapingCurve *csrc = &mapping->curves[c];
+        struct pl_reshape_data *cdst = &out->comp[c];
+        cdst->num_pivots = csrc->num_pivots;
+        for (int i = 0; i < csrc->num_pivots; i++) {
+            const float scale = 1.0f / ((1 << header->bl_bit_depth) - 1);
+            cdst->pivots[i] = scale * csrc->pivots[i];
+        }
+        for (int i = 0; i < csrc->num_pivots - 1; i++) {
+            const float scale = 1.0f / (1 << header->coef_log2_denom);
+            cdst->method[i] = csrc->mapping_idc[i];
+            switch (csrc->mapping_idc[i]) {
+            case AV_DOVI_MAPPING_POLYNOMIAL:
+                for (int k = 0; k < 3; k++) {
+                    cdst->poly_coeffs[i][k] = (k <= csrc->poly_order[i])
+                        ? scale * csrc->poly_coef[i][k]
+                        : 0.0f;
+                }
+                break;
+            case AV_DOVI_MAPPING_MMR:
+                cdst->mmr_order[i] = csrc->mmr_order[i];
+                cdst->mmr_constant[i] = scale * csrc->mmr_constant[i];
+                for (int j = 0; j < csrc->mmr_order[i]; j++) {
+                    for (int k = 0; k < 7; k++)
+                        cdst->mmr_coeffs[i][j][k] = scale * csrc->mmr_coef[i][j][k];
+                }
+                break;
+            }
+        }
+    }
+}
+
+PL_LIBAV_API void pl_frame_map_avdovi_metadata(struct pl_frame *out_frame,
+                                               struct pl_dovi_metadata *dovi,
+                                               const AVDOVIMetadata *metadata)
+{
+    const AVDOVIRpuDataHeader *header;
+    const AVDOVIColorMetadata *color;
+    if (!dovi || !metadata)
+        return;
+
+    header = av_dovi_get_header(metadata);
+    color = av_dovi_get_color(metadata);
+    if (header->disable_residual_flag) {
+        pl_map_dovi_metadata(dovi, metadata);
+
+        out_frame->repr.dovi = dovi;
+        out_frame->repr.sys = PL_COLOR_SYSTEM_DOLBYVISION;
+        out_frame->color.primaries = PL_COLOR_PRIM_BT_2020;
+        out_frame->color.transfer = PL_COLOR_TRC_PQ;
+        out_frame->color.hdr.min_luma =
+            pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NITS, color->source_min_pq / 4095.0f);
+        out_frame->color.hdr.max_luma =
+            pl_hdr_rescale(PL_HDR_PQ, PL_HDR_NITS, color->source_max_pq / 4095.0f);
+    }
+}
+#endif // PL_HAVE_LAV_DOLBY_VISION
+
+PL_LIBAV_API bool pl_frame_recreate_from_avframe(pl_gpu gpu,
+                                                 struct pl_frame *out,
+                                                 pl_tex tex[4],
+                                                 const AVFrame *frame)
+{
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
+    struct pl_plane_data data[4] = {0};
+    int planes;
+
+    pl_frame_from_avframe(out, frame);
+    planes = pl_plane_data_from_pixfmt(data, &out->repr.bits, frame->format);
+    if (!planes)
+        return false;
+
+    for (int p = 0; p < planes; p++) {
+        bool is_chroma = p == 1 || p == 2; // matches lavu logic
+        data[p].width = AV_CEIL_RSHIFT(frame->width, is_chroma ? desc->log2_chroma_w : 0);
+        data[p].height = AV_CEIL_RSHIFT(frame->height, is_chroma ? desc->log2_chroma_h : 0);
+
+        if (!pl_recreate_plane(gpu, &out->planes[p], &tex[p], &data[p]))
+            return false;
+    }
+
+    return true;
+}
+
+static void pl_avframe_free_cb(void *priv)
+{
+    AVFrame *frame = priv;
+    av_frame_free(&frame);
+}
+
+#define PL_MAGIC0 0xfb5b3b8b
+#define PL_MAGIC1 0xee659f6d
+
+struct pl_avalloc {
+    uint32_t magic[2];
+    pl_gpu gpu;
+    pl_buf buf;
+};
+
+// Attached to `pl_frame.user_data` for mapped AVFrames
+struct pl_avframe_priv {
+    AVFrame *avframe;
+    struct pl_dovi_metadata dovi; // backing storage for per-frame dovi metadata
+    pl_tex planar; // for planar vulkan textures
+};
+
+static void pl_fix_hwframe_sample_depth(struct pl_frame *out, const AVFrame *frame)
+{
+    const AVHWFramesContext *hwfc = (AVHWFramesContext *) frame->hw_frames_ctx->data;
+    pl_fmt fmt = out->planes[0].texture->params.format;
+    struct pl_bit_encoding *bits = &out->repr.bits;
+
+    bits->sample_depth = fmt->component_depth[0];
+
+    switch (hwfc->sw_format) {
+    case AV_PIX_FMT_P010: bits->bit_shift = 6; break;
+    default: break;
+    }
+}
+
+static bool pl_map_avframe_drm(pl_gpu gpu, struct pl_frame *out,
+                               const AVFrame *frame)
+{
+    const AVHWFramesContext *hwfc = (AVHWFramesContext *) frame->hw_frames_ctx->data;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
+    const AVDRMFrameDescriptor *drm = (AVDRMFrameDescriptor *) frame->data[0];
+    assert(frame->format == AV_PIX_FMT_DRM_PRIME);
+    if (!(gpu->import_caps.tex & PL_HANDLE_DMA_BUF))
+        return false;
+
+    assert(drm->nb_layers >= out->num_planes);
+    for (int n = 0; n < out->num_planes; n++) {
+        const AVDRMLayerDescriptor *layer = &drm->layers[n];
+        const AVDRMPlaneDescriptor *plane = &layer->planes[0];
+        const AVDRMObjectDescriptor *object = &drm->objects[plane->object_index];
+        pl_fmt fmt = pl_find_fourcc(gpu, layer->format);
+        bool is_chroma = n == 1 || n == 2;
+        if (!fmt || !pl_fmt_has_modifier(fmt, object->format_modifier))
+            return false;
+
+        assert(layer->nb_planes == 1); // we only support planar formats
+        assert(plane->pitch >= 0); // definitely requires special handling
+        out->planes[n].texture = pl_tex_create(gpu, pl_tex_params(
+            .w = AV_CEIL_RSHIFT(frame->width, is_chroma ? desc->log2_chroma_w : 0),
+            .h = AV_CEIL_RSHIFT(frame->height, is_chroma ? desc->log2_chroma_h : 0),
+            .format = fmt,
+            .sampleable = true,
+            .blit_src = fmt->caps & PL_FMT_CAP_BLITTABLE,
+            .import_handle = PL_HANDLE_DMA_BUF,
+            .shared_mem = {
+                .handle.fd = object->fd,
+                .size = object->size,
+                .offset = plane->offset,
+                .drm_format_mod = object->format_modifier,
+                .stride_w = plane->pitch,
+            },
+        ));
+        if (!out->planes[n].texture)
+            return false;
+    }
+
+    pl_fix_hwframe_sample_depth(out, frame);
+    return true;
+}
+
+// Derive a DMABUF from any other hwaccel format, and map that instead
+static bool pl_map_avframe_derived(pl_gpu gpu, struct pl_frame *out,
+                                   const AVFrame *frame)
+{
+    const int flags = AV_HWFRAME_MAP_READ | AV_HWFRAME_MAP_DIRECT;
+    struct pl_avframe_priv *priv = out->user_data;
+    AVFrame *derived = av_frame_alloc();
+    derived->width = frame->width;
+    derived->height = frame->height;
+    derived->format = AV_PIX_FMT_DRM_PRIME;
+    derived->hw_frames_ctx = av_buffer_ref(frame->hw_frames_ctx);
+    if (av_hwframe_map(derived, frame, flags) < 0)
+        goto error;
+    if (av_frame_copy_props(derived, frame) < 0)
+        goto error;
+    if (!pl_map_avframe_drm(gpu, out, derived))
+        goto error;
+
+    av_frame_free(&priv->avframe);
+    priv->avframe = derived;
+    return true;
+
+error:
+    av_frame_free(&derived);
+    return false;
+}
+
+#ifdef PL_HAVE_LAV_VULKAN
+static bool pl_acquire_avframe(pl_gpu gpu, struct pl_frame *frame)
+{
+    const struct pl_avframe_priv *priv = frame->user_data;
+    AVHWFramesContext *hwfc = (void *) priv->avframe->hw_frames_ctx->data;
+    AVVulkanFramesContext *vkfc = hwfc->hwctx;
+    AVVkFrame *vkf = (AVVkFrame *) priv->avframe->data[0];
+
+#ifdef PL_HAVE_LAV_VULKAN_V2
+    vkfc->lock_frame(hwfc, vkf);
+#else
+    (void) vkfc;
+#endif
+
+    for (int n = 0; n < frame->num_planes; n++) {
+        pl_vulkan_release_ex(gpu, pl_vulkan_release_params(
+            .tex        = priv->planar ? priv->planar : frame->planes[n].texture,
+            .layout     = vkf->layout[n],
+            .qf         = VK_QUEUE_FAMILY_IGNORED,
+            .semaphore  = {
+                .sem    = vkf->sem[n],
+                .value  = vkf->sem_value[n],
+            },
+        ));
+        if (priv->planar)
+            break;
+    }
+
+    return true;
+}
+
+static void pl_release_avframe(pl_gpu gpu, struct pl_frame *frame)
+{
+    const struct pl_avframe_priv *priv = frame->user_data;
+    AVHWFramesContext *hwfc = (void *) priv->avframe->hw_frames_ctx->data;
+    AVVulkanFramesContext *vkfc = hwfc->hwctx;
+    AVVkFrame *vkf = (AVVkFrame *) priv->avframe->data[0];
+
+    for (int n = 0; n < frame->num_planes; n++) {
+        int ok = pl_vulkan_hold_ex(gpu, pl_vulkan_hold_params(
+            .tex        = priv->planar ? priv->planar : frame->planes[n].texture,
+            .out_layout = &vkf->layout[n],
+            .qf         = VK_QUEUE_FAMILY_IGNORED,
+            .semaphore  = {
+                .sem    = vkf->sem[n],
+                .value  = vkf->sem_value[n] + 1,
+            },
+        ));
+
+        vkf->access[n] = 0;
+        vkf->sem_value[n] += !!ok;
+        if (priv->planar)
+            break;
+    }
+
+#ifdef PL_HAVE_LAV_VULKAN_V2
+    vkfc->unlock_frame(hwfc, vkf);
+#else
+    (void) vkfc;
+#endif
+}
+
+static bool pl_map_avframe_vulkan(pl_gpu gpu, struct pl_frame *out,
+                                  const AVFrame *frame)
+{
+    const AVHWFramesContext *hwfc = (AVHWFramesContext *) frame->hw_frames_ctx->data;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(hwfc->sw_format);
+    const AVVulkanFramesContext *vkfc = hwfc->hwctx;
+    AVVkFrame *vkf = (AVVkFrame *) frame->data[0];
+    struct pl_avframe_priv *priv = out->user_data;
+    pl_vulkan vk = pl_vulkan_get(gpu);
+
+#ifdef PL_HAVE_LAV_VULKAN_V2
+    const VkFormat *vk_fmt = vkfc->format;
+#else
+    const VkFormat *vk_fmt = av_vkfmt_from_pixfmt(hwfc->sw_format);
+#endif
+
+    assert(frame->format == AV_PIX_FMT_VULKAN);
+    priv->planar = NULL;
+    if (!vk)
+        return false;
+
+    for (int n = 0; n < out->num_planes; n++) {
+        struct pl_plane *plane = &out->planes[n];
+        bool chroma = n == 1 || n == 2;
+        int num_subplanes;
+        assert(vk_fmt[n]);
+
+        plane->texture = pl_vulkan_wrap(gpu, pl_vulkan_wrap_params(
+            .image  = vkf->img[n],
+            .width  = AV_CEIL_RSHIFT(hwfc->width, chroma ? desc->log2_chroma_w : 0),
+            .height = AV_CEIL_RSHIFT(hwfc->height, chroma ? desc->log2_chroma_h : 0),
+            .format = vk_fmt[n],
+            .usage  = vkfc->usage,
+        ));
+        if (!plane->texture)
+            return false;
+
+        num_subplanes = plane->texture->params.format->num_planes;
+        if (num_subplanes) {
+            assert(num_subplanes == out->num_planes);
+            priv->planar = plane->texture;
+            for (int i = 0; i < num_subplanes; i++)
+                out->planes[i].texture = priv->planar->planes[i];
+            break;
+        }
+    }
+
+    out->acquire = pl_acquire_avframe;
+    out->release = pl_release_avframe;
+    pl_fix_hwframe_sample_depth(out, frame);
+    return true;
+}
+
+static void pl_unmap_avframe_vulkan(pl_gpu gpu, struct pl_frame *frame)
+{
+    struct pl_avframe_priv *priv = frame->user_data;
+    if (priv->planar) {
+        pl_tex_destroy(gpu, &priv->planar);
+        for (int n = 0; n < frame->num_planes; n++)
+            frame->planes[n].texture = NULL;
+    }
+}
+#endif
+
+PL_LIBAV_API bool pl_map_avframe_ex(pl_gpu gpu, struct pl_frame *out,
+                                    const struct pl_avframe_params *params)
+{
+    const AVFrame *frame = params->frame;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(frame->format);
+    struct pl_plane_data data[4] = {0};
+    pl_tex *tex = params->tex;
+    int planes;
+
+    struct pl_avframe_priv *priv = malloc(sizeof(*priv));
+    if (!priv)
+        goto error;
+
+    pl_frame_from_avframe(out, frame);
+    priv->avframe = av_frame_clone(frame);
+    out->user_data = priv;
+
+#ifdef PL_HAVE_LAV_DOLBY_VISION
+    if (params->map_dovi) {
+        AVFrameSideData *sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DOVI_METADATA);
+        if (sd) {
+            const AVDOVIMetadata *metadata = (AVDOVIMetadata *) sd->data;
+            const AVDOVIRpuDataHeader *header = av_dovi_get_header(metadata);
+            // Only automatically map DoVi RPUs that don't require an EL
+            if (header->disable_residual_flag)
+                pl_frame_map_avdovi_metadata(out, &priv->dovi, metadata);
+        }
+
+#ifdef PL_HAVE_LIBDOVI
+        sd = av_frame_get_side_data(frame, AV_FRAME_DATA_DOVI_RPU_BUFFER);
+        if (sd)
+            pl_hdr_metadata_from_dovi_rpu(&out->color.hdr, sd->buf->data, sd->buf->size);
+#endif // PL_HAVE_LIBDOVI
+    }
+
+#endif // PL_HAVE_LAV_DOLBY_VISION
+
+    switch (frame->format) {
+    case AV_PIX_FMT_DRM_PRIME:
+        if (!pl_map_avframe_drm(gpu, out, frame))
+            goto error;
+        return true;
+
+    case AV_PIX_FMT_VAAPI:
+        if (!pl_map_avframe_derived(gpu, out, frame))
+            goto error;
+        return true;
+
+#ifdef PL_HAVE_LAV_VULKAN
+    case AV_PIX_FMT_VULKAN:
+        if (!pl_map_avframe_vulkan(gpu, out, frame))
+            goto error;
+        return true;
+#endif
+
+    default: break;
+    }
+
+    // Backing textures are required from this point onwards
+    if (!tex)
+        goto error;
+
+    planes = pl_plane_data_from_pixfmt(data, &out->repr.bits, frame->format);
+    if (!planes)
+        goto error;
+
+    for (int p = 0; p < planes; p++) {
+        AVBufferRef *buf = av_frame_get_plane_buffer((AVFrame *) frame, p);
+        struct pl_avalloc *alloc = buf ? av_buffer_get_opaque(buf) : NULL;
+        bool is_chroma = p == 1 || p == 2; // matches lavu logic
+
+        data[p].width = AV_CEIL_RSHIFT(frame->width, is_chroma ? desc->log2_chroma_w : 0);
+        data[p].height = AV_CEIL_RSHIFT(frame->height, is_chroma ? desc->log2_chroma_h : 0);
+        if (frame->linesize[p] < 0) {
+            data[p].pixels = frame->data[p] + frame->linesize[p] * (data[p].height - 1);
+            data[p].row_stride = -frame->linesize[p];
+            out->planes[p].flipped = true;
+        } else {
+            data[p].pixels = frame->data[p];
+            data[p].row_stride = frame->linesize[p];
+        }
+
+        // Probe for frames allocated by pl_get_buffer2
+        if (alloc && alloc->magic[0] == PL_MAGIC0 && alloc->magic[1] == PL_MAGIC1) {
+            data[p].buf = alloc->buf;
+            data[p].buf_offset = (uintptr_t) data[p].pixels - (uintptr_t) alloc->buf->data;
+            data[p].pixels = NULL;
+        } else if (gpu->limits.callbacks) {
+            // Use asynchronous upload if possible
+            data[p].callback = pl_avframe_free_cb;
+            data[p].priv = av_frame_clone(frame);
+        }
+
+        if (!pl_upload_plane(gpu, &out->planes[p], &tex[p], &data[p])) {
+            av_frame_free((AVFrame **) &data[p].priv);
+            goto error;
+        }
+
+        out->planes[p].texture = tex[p];
+    }
+
+    return true;
+
+error:
+    pl_unmap_avframe(gpu, out);
+    return false;
+}
+
+// Backwards compatibility with previous versions of this API.
+PL_LIBAV_API bool pl_map_avframe(pl_gpu gpu, struct pl_frame *out_frame,
+                                     pl_tex tex[4], const AVFrame *avframe)
+{
+    return pl_map_avframe_ex(gpu, out_frame, &(struct pl_avframe_params) {
+        .frame  = avframe,
+        .tex    = tex,
+    });
+}
+
+PL_LIBAV_API void pl_unmap_avframe(pl_gpu gpu, struct pl_frame *frame)
+{
+    struct pl_avframe_priv *priv = frame->user_data;
+    const AVPixFmtDescriptor *desc;
+    if (!priv)
+        goto done;
+
+#ifdef PL_HAVE_LAV_VULKAN
+    if (priv->avframe->format == AV_PIX_FMT_VULKAN)
+        pl_unmap_avframe_vulkan(gpu, frame);
+#endif
+
+    desc = av_pix_fmt_desc_get(priv->avframe->format);
+    if (desc->flags & AV_PIX_FMT_FLAG_HWACCEL) {
+        for (int i = 0; i < 4; i++)
+            pl_tex_destroy(gpu, &frame->planes[i].texture);
+    }
+
+    av_frame_free(&priv->avframe);
+    free(priv);
+
+done:
+    memset(frame, 0, sizeof(*frame)); // sanity
+}
+
+PL_LIBAV_API AVFrame *pl_get_mapped_avframe(const struct pl_frame *frame)
+{
+    struct pl_avframe_priv *priv = frame->user_data;
+    return priv->avframe;
+}
+
+static void pl_done_cb(void *priv)
+{
+    bool *status = priv;
+    *status = true;
+}
+
+PL_LIBAV_API bool pl_download_avframe(pl_gpu gpu,
+                                      const struct pl_frame *frame,
+                                      AVFrame *out_frame)
+{
+    bool done[4] = {0};
+    if (frame->num_planes != av_pix_fmt_count_planes(out_frame->format))
+        return false;
+
+    for (int p = 0; p < frame->num_planes; p++) {
+        bool ok = pl_tex_download(gpu, pl_tex_transfer_params(
+            .tex = frame->planes[p].texture,
+            .row_pitch = out_frame->linesize[p],
+            .ptr = out_frame->data[p],
+            // Use synchronous transfer for the last plane
+            .callback = (p+1) < frame->num_planes ? pl_done_cb : NULL,
+            .priv = &done[p],
+        ));
+
+        if (!ok)
+            return false;
+    }
+
+    for (int p = 0; p < frame->num_planes - 1; p++) {
+        while (!done[p])
+            pl_tex_poll(gpu, frame->planes[p].texture, UINT64_MAX);
+    }
+
+    return true;
+}
+
+#define PL_DIV_UP(x, y) (((x) + (y) - 1) / (y))
+#define PL_ALIGN(x, align) ((align) ? PL_DIV_UP(x, align) * (align) : (x))
+#define PL_MAX(x, y) ((x) > (y) ? (x) : (y))
+#define PL_LCM(x, y) ((x) * ((y) / av_gcd(x, y)))
+
+static inline void pl_avalloc_free(void *opaque, uint8_t *data)
+{
+    struct pl_avalloc *alloc = opaque;
+    assert(alloc->magic[0] == PL_MAGIC0);
+    assert(alloc->magic[1] == PL_MAGIC1);
+    assert(alloc->buf->data == data);
+    pl_buf_destroy(alloc->gpu, &alloc->buf);
+    free(alloc);
+}
+
+PL_LIBAV_API int pl_get_buffer2(AVCodecContext *avctx, AVFrame *pic, int flags)
+{
+    int alignment[AV_NUM_DATA_POINTERS];
+    int width = pic->width;
+    int height = pic->height;
+    size_t planesize[4];
+    int ret = 0;
+
+    pl_gpu *pgpu = avctx->opaque;
+    pl_gpu gpu = pgpu ? *pgpu : NULL;
+    struct pl_plane_data data[4];
+    struct pl_avalloc *alloc;
+    const AVPixFmtDescriptor *desc = av_pix_fmt_desc_get(pic->format);
+    int planes = pl_plane_data_from_pixfmt(data, NULL, pic->format);
+
+    // Sanitize frame structs
+    memset(pic->data, 0, sizeof(pic->data));
+    memset(pic->linesize, 0, sizeof(pic->linesize));
+    memset(pic->buf, 0, sizeof(pic->buf));
+    pic->extended_data = pic->data;
+    pic->extended_buf = NULL;
+
+    if (!(avctx->codec->capabilities & AV_CODEC_CAP_DR1) || !planes)
+        goto fallback;
+    if (!gpu || !gpu->limits.thread_safe || !gpu->limits.max_mapped_size ||
+        !gpu->limits.host_cached)
+    {
+        goto fallback;
+    }
+
+    avcodec_align_dimensions2(avctx, &width, &height, alignment);
+    if ((ret = av_image_fill_linesizes(pic->linesize, pic->format, width)))
+        return ret;
+
+    for (int p = 0; p < planes; p++) {
+        alignment[p] = PL_LCM(alignment[p], gpu->limits.align_tex_xfer_pitch);
+        alignment[p] = PL_LCM(alignment[p], gpu->limits.align_tex_xfer_offset);
+        alignment[p] = PL_LCM(alignment[p], data[p].pixel_stride);
+        pic->linesize[p] = PL_ALIGN(pic->linesize[p], alignment[p]);
+    }
+
+#if LIBAVUTIL_VERSION_INT >= AV_VERSION_INT(56, 56, 100)
+    ret = av_image_fill_plane_sizes(planesize, pic->format, height, (ptrdiff_t[4]) {
+        pic->linesize[0], pic->linesize[1], pic->linesize[2], pic->linesize[3],
+    });
+    if (ret < 0)
+        return ret;
+#else
+    uint8_t *ptrs[4], * const base = (uint8_t *) 0x10000;
+    ret = av_image_fill_pointers(ptrs, pic->format, height, base, pic->linesize);
+    if (ret < 0)
+        return ret;
+    for (int p = 0; p < 4; p++)
+        planesize[p] = (uintptr_t) ptrs[p] - (uintptr_t) base;
+#endif
+
+    for (int p = 0; p < planes; p++) {
+        const size_t buf_size = planesize[p] + alignment[p];
+        if (buf_size > gpu->limits.max_mapped_size) {
+            av_frame_unref(pic);
+            goto fallback;
+        }
+
+        alloc = malloc(sizeof(*alloc));
+        if (!alloc) {
+            av_frame_unref(pic);
+            return AVERROR(ENOMEM);
+        }
+
+        *alloc = (struct pl_avalloc) {
+            .magic = { PL_MAGIC0, PL_MAGIC1 },
+            .gpu = gpu,
+            .buf = pl_buf_create(gpu, pl_buf_params(
+                .size = buf_size,
+                .memory_type = PL_BUF_MEM_HOST,
+                .host_mapped = true,
+                .storable = desc->flags & AV_PIX_FMT_FLAG_BE,
+            )),
+        };
+
+        if (!alloc->buf) {
+            free(alloc);
+            av_frame_unref(pic);
+            return AVERROR(ENOMEM);
+        }
+
+        pic->data[p] = (uint8_t *) PL_ALIGN((uintptr_t) alloc->buf->data, alignment[p]);
+        pic->buf[p] = av_buffer_create(alloc->buf->data, buf_size, pl_avalloc_free, alloc, 0);
+        if (!pic->buf[p]) {
+            pl_buf_destroy(gpu, &alloc->buf);
+            free(alloc);
+            av_frame_unref(pic);
+            return AVERROR(ENOMEM);
+        }
+    }
+
+    return 0;
+
+fallback:
+    return avcodec_default_get_buffer2(avctx, pic, flags);
+}
+
+#undef PL_MAGIC0
+#undef PL_MAGIC1
+#undef PL_ALIGN
+#undef PL_MAX
+
+#endif // LIBPLACEBO_LIBAV_H_
diff --git a/src/include/libplacebo/utils/upload.h b/src/include/libplacebo/utils/upload.h
new file mode 100644
index 0000000..9e8d436
--- /dev/null
+++ b/src/include/libplacebo/utils/upload.h
@@ -0,0 +1,153 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_UPLOAD_H_
+#define LIBPLACEBO_UPLOAD_H_
+
+#include <stdint.h>
+
+#include <libplacebo/gpu.h>
+#include <libplacebo/renderer.h>
+
+PL_API_BEGIN
+
+// This file contains a utility function to assist in uploading data from host
+// memory to a texture. In particular, the texture will be suitable for use as
+// a `pl_plane`.
+
+// Description of the host representation of an image plane
+struct pl_plane_data {
+    enum pl_fmt_type type;  // meaning of the data (must not be UINT or SINT)
+    int width, height;      // dimensions of the plane
+    int component_size[4];  // size in bits of each coordinate
+    int component_pad[4];   // ignored bits preceding each component
+    int component_map[4];   // semantic meaning of each component (pixel order)
+    size_t pixel_stride;    // offset in bytes between pixels (required)
+    size_t row_stride;      // offset in bytes between rows (optional)
+    bool swapped;           // pixel data is endian-swapped (non-native)
+
+    // Similar to `pl_tex_transfer_params`, you can either upload from a raw
+    // pointer address, or a buffer + offset. Again, the use of these two
+    // mechanisms is mutually exclusive.
+    //
+    // 1. Uploading from host memory
+    const void *pixels;     // the actual data underlying this plane
+
+    // 2. Uploading from a buffer (requires `pl_gpu_limits.buf_transfer`)
+    pl_buf buf;             // the buffer to use
+    size_t buf_offset;      // offset of data within buffer, must be a
+                            // multiple of `pixel_stride` as well as of 4
+
+    // Similar to `pl_tex_transfer_params.callback`, this allows turning the
+    // upload of a plane into an asynchronous upload. The same notes apply.
+    void (*callback)(void *priv);
+    void *priv;
+
+    // Note: When using this together with `pl_frame`, there is some amount of
+    // overlap between `component_pad` and `pl_color_repr.bits`. Some key
+    // differences between the two:
+    //
+    // - the bits from `component_pad` are ignored; whereas the superfluous bits
+    //   in a `pl_color_repr` must be 0.
+    // - the `component_pad` exists to align the component size and placement
+    //   with the capabilities of GPUs; the `pl_color_repr` exists to control
+    //   the semantics of the color samples on a finer granularity.
+    // - the `pl_color_repr` applies to the color sample as a whole, and
+    //   therefore applies to all planes; the `component_pad` can be different
+    //   for each plane.
+    // - `component_pad` interacts with float textures by moving the actual
+    //   float in memory. `pl_color_repr` interacts with float data as if
+    //   the float was converted from an integer under full range semantics.
+    //
+    // To help establish the motivating difference, a typical example of a use
+    // case would be yuv420p10. Since 10-bit GPU texture support is limited,
+    // and working with non-byte-aligned pixels is awkward in general, the
+    // convention is to represent yuv420p10 as 16-bit samples with either the
+    // high or low bits set to 0. In this scenario, the `component_size` of the
+    // `pl_plane_data` and `pl_bit_encoding.sample_depth` would be 16, while
+    // the `pl_bit_encoding.color_depth` would be 10 (and additionally, the
+    // `pl_bit_encoding.bit_shift` would be either 0 or  6, depending on
+    // whether the low or the high bits are used).
+    //
+    // On the contrary, something like a packed, 8-bit XBGR format (where the
+    // X bits are ignored and may contain garbage) would set `component_pad[0]`
+    // to 8, and the component_size[0:2] (respectively) to 8 as well.
+    //
+    // As a general rule of thumb, for maximum compatibility, you should try
+    // and align component_size/component_pad to multiples of 8 and explicitly
+    // clear any remaining superfluous bits (+ use `pl_color_repr.bits` to
+    // ensure they're decoded correctly). You should also try to align the
+    // `pixel_stride` to a power of two.
+};
+
+// Fills in the `component_size`, `component_pad` and `component_map` fields
+// based on the supplied mask for each component (in semantic order, i.e.
+// RGBA). Each element of `mask` must have a contiguous range of set bits.
+PL_API void pl_plane_data_from_mask(struct pl_plane_data *data, uint64_t mask[4]);
+
+// Fills in the `component_size`, `component_pad` and `component_map` fields
+// based on the supplied sizes (in bits) and shift of each component (in
+// semantic order).
+//
+// Similar to `pl_plane_data_from_mask` but not limited to 64-bit pixels.
+PL_API void pl_plane_data_from_comps(struct pl_plane_data *data, int size[4],
+                                     int shift[4]);
+
+// Helper function to take a `pl_plane_data` struct and try and improve its
+// alignment to make it more likely to correspond to a real `pl_fmt`. It does
+// this by attempting to round each component up to the nearest byte boundary.
+// This relies on the assumption (true in practice) that superfluous bits of
+// byte-misaligned formats are explicitly set to 0.
+//
+// The resulting shift must be consistent across all components, in which case
+// it's returned in `out_bits`. If no alignment was possible, `out_bits` is set
+// to {0}, and this function returns false.
+PL_API bool pl_plane_data_align(struct pl_plane_data *data, struct pl_bit_encoding *out_bits);
+
+// Helper function to find a suitable `pl_fmt` based on a pl_plane_data's
+// requirements. This is called internally by `pl_upload_plane`, but it's
+// exposed to users both as a convenience and so they may pre-emptively check
+// if a format would be supported without actually having to attempt the upload.
+PL_API pl_fmt pl_plane_find_fmt(pl_gpu gpu, int out_map[4], const struct pl_plane_data *data);
+
+// Upload an image plane to a texture, and output the resulting `pl_plane`
+// struct to `out_plane` (optional). `tex` must be a valid pointer to a texture
+// (or NULL), which will be destroyed and reinitialized if it does not already
+// exist or is incompatible. Returns whether successful.
+//
+// The resulting texture is guaranteed to be `sampleable`, and it will also try
+// and maximize compatibility with the other `pl_renderer` requirements
+// (blittable, linear filterable, etc.).
+//
+// Note: `out_plane->shift_x/y` and `out_plane->flipped` are left
+// uninitialized, and should be set explicitly by the user.
+PL_API bool pl_upload_plane(pl_gpu gpu, struct pl_plane *out_plane,
+                            pl_tex *tex, const struct pl_plane_data *data);
+
+// Like `pl_upload_plane`, but only creates an uninitialized texture object
+// rather than actually performing an upload. This can be useful to, for
+// example, prepare textures to be used as the target of rendering.
+//
+// The resulting texture is guaranteed to be `renderable`, and it will also try
+// to maximize compatibility with the other `pl_renderer` requirements
+// (blittable, storable, etc.).
+PL_API bool pl_recreate_plane(pl_gpu gpu, struct pl_plane *out_plane,
+                              pl_tex *tex, const struct pl_plane_data *data);
+
+PL_API_END
+
+#endif // LIBPLACEBO_UPLOAD_H_
diff --git a/src/include/libplacebo/vulkan.h b/src/include/libplacebo/vulkan.h
new file mode 100644
index 0000000..4e5db95
--- /dev/null
+++ b/src/include/libplacebo/vulkan.h
@@ -0,0 +1,638 @@
+/*
+ * This file is part of libplacebo.
+ *
+ * libplacebo is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * libplacebo is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with libplacebo.  If not, see <http://www.gnu.org/licenses/>.
+ */
+
+#ifndef LIBPLACEBO_VULKAN_H_
+#define LIBPLACEBO_VULKAN_H_
+
+#include <vulkan/vulkan.h>
+#include <libplacebo/gpu.h>
+#include <libplacebo/swapchain.h>
+
+PL_API_BEGIN
+
+#define PL_VK_MIN_VERSION VK_API_VERSION_1_2
+
+// Structure representing a VkInstance. Using this is not required.
+typedef const struct pl_vk_inst_t {
+    VkInstance instance;
+
+    // The Vulkan API version supported by this VkInstance.
+    uint32_t api_version;
+
+    // The associated vkGetInstanceProcAddr pointer.
+    PFN_vkGetInstanceProcAddr get_proc_addr;
+
+    // The instance extensions that were successfully enabled, including
+    // extensions enabled by libplacebo internally. May contain duplicates.
+    const char * const *extensions;
+    int num_extensions;
+
+    // The instance layers that were successfully enabled, including
+    // layers enabled by libplacebo internally. May contain duplicates.
+    const char * const *layers;
+    int num_layers;
+} *pl_vk_inst;
+
+struct pl_vk_inst_params {
+    // If set, enable the debugging and validation layers. These should
+    // generally be lightweight and relatively harmless to enable.
+    bool debug;
+
+    // If set, also enable GPU-assisted verification and best practices
+    // layers. (Note: May cause substantial slowdown and/or result in lots of
+    // false positive spam)
+    bool debug_extra;
+
+    // If nonzero, restricts the Vulkan API version to be at most this. This
+    // is only really useful for explicitly testing backwards compatibility.
+    uint32_t max_api_version;
+
+    // Pointer to a user-provided `vkGetInstanceProcAddr`. If this is NULL,
+    // libplacebo will use the directly linked version (if available).
+    PFN_vkGetInstanceProcAddr get_proc_addr;
+
+    // Enables extra instance extensions. Instance creation will fail if these
+    // extensions are not all supported. The user may use this to enable e.g.
+    // windowing system integration.
+    const char * const *extensions;
+    int num_extensions;
+
+    // Enables extra optional instance extensions. These are opportunistically
+    // enabled if supported by the device, but otherwise skipped.
+    const char * const *opt_extensions;
+    int num_opt_extensions;
+
+    // Enables extra layers. Instance creation will fail if these layers are
+    // not all supported.
+    //
+    // NOTE: Layers needed for required/optional extensions are automatically
+    // enabled. The user does not specifically need to enable layers related
+    // to extension support.
+    const char * const *layers;
+    int num_layers;
+
+    // Enables extra optional layers. These are opportunistically enabled if
+    // supported by the platform, but otherwise skipped.
+    const char * const *opt_layers;
+    int num_opt_layers;
+};
+
+#define pl_vk_inst_params(...) (&(struct pl_vk_inst_params) { __VA_ARGS__ })
+PL_API extern const struct pl_vk_inst_params pl_vk_inst_default_params;
+
+// Helper function to simplify instance creation. The user could also bypass
+// these helpers and do it manually, but this function is provided as a
+// convenience. It also sets up a debug callback which forwards all vulkan
+// messages to the `pl_log` callback.
+PL_API pl_vk_inst pl_vk_inst_create(pl_log log, const struct pl_vk_inst_params *params);
+PL_API void pl_vk_inst_destroy(pl_vk_inst *inst);
+
+struct pl_vulkan_queue {
+    uint32_t index; // Queue family index
+    uint32_t count; // Queue family count
+};
+
+// Structure representing the actual vulkan device and associated GPU instance
+typedef const struct pl_vulkan_t *pl_vulkan;
+struct pl_vulkan_t {
+    pl_gpu gpu;
+
+    // The vulkan objects in use. The user may use this for their own purposes,
+    // but please note that the lifetime is tied to the lifetime of the
+    // pl_vulkan object, and must not be destroyed by the user. Note that the
+    // created vulkan device may have any number of queues and queue family
+    // assignments; so using it for queue submission commands is ill-advised.
+    VkInstance instance;
+    VkPhysicalDevice phys_device;
+    VkDevice device;
+
+    // The associated vkGetInstanceProcAddr pointer.
+    PFN_vkGetInstanceProcAddr get_proc_addr;
+
+    // The Vulkan API version supported by this VkPhysicalDevice.
+    uint32_t api_version;
+
+    // The device extensions that were successfully enabled, including
+    // extensions enabled by libplacebo internally. May contain duplicates.
+    const char * const *extensions;
+    int num_extensions;
+
+    // The device features that were enabled at device creation time.
+    //
+    // Note: Whenever a feature flag is ambiguious between several alternative
+    // locations, for completeness' sake, we include both.
+    const VkPhysicalDeviceFeatures2 *features;
+
+    // The explicit queue families we are using to provide a given capability.
+    struct pl_vulkan_queue queue_graphics; // provides VK_QUEUE_GRAPHICS_BIT
+    struct pl_vulkan_queue queue_compute;  // provides VK_QUEUE_COMPUTE_BIT
+    struct pl_vulkan_queue queue_transfer; // provides VK_QUEUE_TRANSFER_BIT
+
+    // Functions for locking a queue. These must be used to lock VkQueues for
+    // submission or other related operations when sharing the VkDevice between
+    // multiple threads, Using this on queue families or indices not contained
+    // in `queues` is undefined behavior.
+    void (*lock_queue)(pl_vulkan vk, uint32_t qf, uint32_t qidx);
+    void (*unlock_queue)(pl_vulkan vk, uint32_t qf, uint32_t qidx);
+
+    // --- Deprecated fields
+
+    // These are the same active queue families and their queue counts in list
+    // form. This list does not contain duplicates, nor any extra queues
+    // enabled at device creation time. Deprecated in favor of querying
+    // `vkGetPhysicalDeviceQueueFamilyProperties` directly.
+    const struct pl_vulkan_queue *queues PL_DEPRECATED;
+    int num_queues PL_DEPRECATED;
+};
+
+struct pl_vulkan_params {
+    // The vulkan instance. Optional, if NULL then libplacebo will internally
+    // create a VkInstance with the settings from `instance_params`.
+    //
+    // Note: The VkInstance provided by the user *MUST* be created with a
+    // VkApplicationInfo.apiVersion of PL_VK_MIN_VERSION or higher.
+    VkInstance instance;
+
+    // Pointer to `vkGetInstanceProcAddr`. If this is NULL, libplacebo will
+    // use the directly linked version (if available).
+    //
+    // Note: This overwrites the same value from `instance_params`.
+    PFN_vkGetInstanceProcAddr get_proc_addr;
+
+    // Configures the settings used for creating an internal vulkan instance.
+    // May be NULL. Ignored if `instance` is set.
+    const struct pl_vk_inst_params *instance_params;
+
+    // When choosing the device, rule out all devices that don't support
+    // presenting to this surface. When creating a device, enable all extensions
+    // needed to ensure we can present to this surface. Optional. Only legal
+    // when specifying an existing VkInstance to use.
+    VkSurfaceKHR surface;
+
+    // --- Physical device selection options
+
+    // The vulkan physical device. May be set by the caller to indicate the
+    // physical device to use. Otherwise, libplacebo will pick the "best"
+    // available GPU, based on the advertised device type. (i.e., it will
+    // prefer discrete GPUs over integrated GPUs). Only legal when specifying
+    // an existing VkInstance to use.
+    VkPhysicalDevice device;
+
+    // When choosing the device, only choose a device with this exact name.
+    // This overrides `allow_software`. No effect if `device` is set. Note: A
+    // list of devices and their names are logged at level PL_LOG_INFO.
+    const char *device_name;
+
+    // When choosing the device, only choose a device with this exact UUID.
+    // This overrides `allow_software` and `device_name`. No effect if `device`
+    // is set.
+    uint8_t device_uuid[16];
+
+    // When choosing the device, controls whether or not to also allow software
+    // GPUs. No effect if `device` or `device_name` are set.
+    bool allow_software;
+
+    // --- Logical device creation options
+
+    // Controls whether or not to allow asynchronous transfers, using transfer
+    // queue families, if supported by the device. This can be significantly
+    // faster and more power efficient, and also allows streaming uploads in
+    // parallel with rendering commands. Enabled by default.
+    bool async_transfer;
+
+    // Controls whether or not to allow asynchronous compute, using dedicated
+    // compute queue families, if supported by the device. On some devices,
+    // these can allow the GPU to schedule compute shaders in parallel with
+    // fragment shaders. Enabled by default.
+    bool async_compute;
+
+    // Limits the number of queues to use. If left as 0, libplacebo will use as
+    // many queues as the device supports. Multiple queues can result in
+    // improved efficiency when submitting multiple commands that can entirely
+    // or partially execute in parallel. Defaults to 1, since using more queues
+    // can actually decrease performance.
+    //
+    // Note: libplacebo will always *create* logical devices with all available
+    // queues for a given QF enabled, regardless of this setting.
+    int queue_count;
+
+    // Bitmask of extra queue families to enable. If set, then *all* queue
+    // families matching *any* of these flags will be enabled at device
+    // creation time. Setting this to VK_QUEUE_FLAG_BITS_MAX_ENUM effectively
+    // enables all queue families supported by the device.
+    VkQueueFlags extra_queues;
+
+    // Enables extra device extensions. Device creation will fail if these
+    // extensions are not all supported. The user may use this to enable e.g.
+    // interop extensions.
+    const char * const *extensions;
+    int num_extensions;
+
+    // Enables extra optional device extensions. These are opportunistically
+    // enabled if supported by the device, but otherwise skipped.
+    const char * const *opt_extensions;
+    int num_opt_extensions;
+
+    // Optional extra features to enable at device creation time. These are
+    // opportunistically enabled if supported by the physical device, but
+    // otherwise kept disabled.
+    const VkPhysicalDeviceFeatures2 *features;
+
+    // --- Misc/debugging options
+
+    // Restrict specific features to e.g. work around driver bugs, or simply
+    // for testing purposes
+    int max_glsl_version;       // limit the maximum GLSL version
+    uint32_t max_api_version;   // limit the maximum vulkan API version
+};
+
+// Default/recommended parameters. Should generally be safe and efficient.
+#define PL_VULKAN_DEFAULTS                              \
+    .async_transfer = true,                             \
+    .async_compute  = true,                             \
+    /* enabling multiple queues often decreases perf */ \
+    .queue_count    = 1,
+
+#define pl_vulkan_params(...) (&(struct pl_vulkan_params) { PL_VULKAN_DEFAULTS __VA_ARGS__ })
+PL_API extern const struct pl_vulkan_params pl_vulkan_default_params;
+
+// Creates a new vulkan device based on the given parameters and initializes
+// a new GPU. If `params` is left as NULL, it defaults to
+// &pl_vulkan_default_params.
+//
+// Thread-safety: Safe
+PL_API pl_vulkan pl_vulkan_create(pl_log log, const struct pl_vulkan_params *params);
+
+// Destroys the vulkan device and all associated objects, except for the
+// VkInstance provided by the user.
+//
+// Note that all resources allocated from this vulkan object (e.g. via the
+// `vk->ra` or using `pl_vulkan_create_swapchain`) *must* be explicitly
+// destroyed by the user before calling this.
+//
+// Also note that this function will block until all in-flight GPU commands are
+// finished processing. You can avoid this by manually calling `pl_gpu_finish`
+// before `pl_vulkan_destroy`.
+PL_API void pl_vulkan_destroy(pl_vulkan *vk);
+
+// For a `pl_gpu` backed by `pl_vulkan`, this function can be used to retrieve
+// the underlying `pl_vulkan`. Returns NULL for any other type of `gpu`.
+PL_API pl_vulkan pl_vulkan_get(pl_gpu gpu);
+
+struct pl_vulkan_device_params {
+    // The instance to use. Required!
+    //
+    // Note: The VkInstance provided by the user *must* be created with a
+    // VkApplicationInfo.apiVersion of PL_VK_MIN_VERSION or higher.
+    VkInstance instance;
+
+    // Mirrored from `pl_vulkan_params`. All of these fields are optional.
+    PFN_vkGetInstanceProcAddr get_proc_addr;
+    VkSurfaceKHR surface;
+    const char *device_name;
+    uint8_t device_uuid[16];
+    bool allow_software;
+};
+
+#define pl_vulkan_device_params(...) (&(struct pl_vulkan_device_params) { __VA_ARGS__ })
+
+// Helper function to choose the best VkPhysicalDevice, given a VkInstance.
+// This uses the same logic as `pl_vulkan_create` uses internally. If no
+// matching device was found, this returns VK_NULL_HANDLE.
+PL_API VkPhysicalDevice pl_vulkan_choose_device(pl_log log,
+                              const struct pl_vulkan_device_params *params);
+
+struct pl_vulkan_swapchain_params {
+    // The surface to use for rendering. Required, the user is in charge of
+    // creating this. Must belong to the same VkInstance as `vk->instance`.
+    VkSurfaceKHR surface;
+
+    // The preferred presentation mode. See the vulkan documentation for more
+    // information about these. If the device/surface combination does not
+    // support this mode, libplacebo will fall back to VK_PRESENT_MODE_FIFO_KHR.
+    //
+    // Warning: Leaving this zero-initialized is the same as having specified
+    // VK_PRESENT_MODE_IMMEDIATE_KHR, which is probably not what the user
+    // wants!
+    VkPresentModeKHR present_mode;
+
+    // Allow up to N in-flight frames. This essentially controls how many
+    // rendering commands may be queued up at the same time. See the
+    // documentation for `pl_swapchain_get_latency` for more information. For
+    // vulkan specifically, we are only able to wait until the GPU has finished
+    // rendering a frame - we are unable to wait until the display has actually
+    // finished displaying it. So this only provides a rough guideline.
+    // Optional, defaults to 3.
+    int swapchain_depth;
+
+    // This suppresses automatic recreation of the swapchain when any call
+    // returns VK_SUBOPTIMAL_KHR. Normally, libplacebo will recreate the
+    // swapchain internally on the next `pl_swapchain_start_frame`. If enabled,
+    // clients are assumed to take care of swapchain recreations themselves, by
+    // calling `pl_swapchain_resize` as appropriate. libplacebo will tolerate
+    // the "suboptimal" status indefinitely.
+    bool allow_suboptimal;
+
+    // Disable high-bit (10 or more) SDR formats. May help work around buggy
+    // drivers which don't dither properly when outputting high bit depth
+    // SDR backbuffers to 8-bit screens.
+    bool disable_10bit_sdr;
+};
+
+#define pl_vulkan_swapchain_params(...) (&(struct pl_vulkan_swapchain_params) { __VA_ARGS__ })
+
+// Creates a new vulkan swapchain based on an existing VkSurfaceKHR. Using this
+// function requires that the vulkan device was created with the
+// VK_KHR_swapchain extension. The easiest way of accomplishing this is to set
+// the `pl_vulkan_params.surface` explicitly at creation time.
+PL_API pl_swapchain pl_vulkan_create_swapchain(pl_vulkan vk,
+                              const struct pl_vulkan_swapchain_params *params);
+
+// This will return true if the vulkan swapchain is internally detected
+// as being suboptimal (VK_SUBOPTIMAL_KHR). This might be of use to clients
+// who have `params->allow_suboptimal` enabled.
+PL_API bool pl_vulkan_swapchain_suboptimal(pl_swapchain sw);
+
+// Vulkan interop API, for sharing a single VkDevice (and associated vulkan
+// resources) directly with the API user. The use of this API is a bit sketchy
+// and requires careful communication of Vulkan API state.
+
+struct pl_vulkan_import_params {
+    // The vulkan instance. Required.
+    //
+    // Note: The VkInstance provided by the user *must* be created with a
+    // VkApplicationInfo.apiVersion of PL_VK_MIN_VERSION or higher.
+    VkInstance instance;
+
+    // Pointer to `vkGetInstanceProcAddr`. If this is NULL, libplacebo will
+    // use the directly linked version (if available).
+    PFN_vkGetInstanceProcAddr get_proc_addr;
+
+    // The physical device selected by the user. Required.
+    VkPhysicalDevice phys_device;
+
+    // The logical device created by the user. Required.
+    VkDevice device;
+
+    // --- Logical device parameters
+
+    // List of all device-level extensions that were enabled. (Instance-level
+    // extensions need not be re-specified here, since it's guaranteed that any
+    // instance-level extensions that device-level extensions depend on were
+    // enabled at the instance level)
+    const char * const *extensions;
+    int num_extensions;
+
+    // Enabled queue families. At least `queue_graphics` is required.
+    //
+    // It's okay for multiple queue families to be specified with the same
+    // index, e.g. in the event that a dedicated compute queue also happens to
+    // be the dedicated transfer queue.
+    //
+    // It's also okay to leave the queue struct as {0} in the event that no
+    // dedicated queue exists for a given operation type. libplacebo will
+    // automatically fall back to using e.g. the graphics queue instead.
+    struct pl_vulkan_queue queue_graphics; // must support VK_QUEUE_GRAPHICS_BIT
+    struct pl_vulkan_queue queue_compute;  // must support VK_QUEUE_COMPUTE_BIT
+    struct pl_vulkan_queue queue_transfer; // must support VK_QUEUE_TRANSFER_BIT
+
+    // Enabled VkPhysicalDeviceFeatures. The device *must* be created with
+    // all of the features in `pl_vulkan_required_features` enabled.
+    const VkPhysicalDeviceFeatures2 *features;
+
+    // Functions for locking a queue. If set, these will be used instead of
+    // libplacebo's internal functions for `pl_vulkan.(un)lock_queue`.
+    void (*lock_queue)(void *ctx, uint32_t qf, uint32_t qidx);
+    void (*unlock_queue)(void *ctx, uint32_t qf, uint32_t qidx);
+    void *queue_ctx;
+
+    // --- Misc/debugging options
+
+    // Restrict specific features to e.g. work around driver bugs, or simply
+    // for testing purposes. See `pl_vulkan_params` for a description of these.
+    int max_glsl_version;
+    uint32_t max_api_version;
+};
+
+#define pl_vulkan_import_params(...) (&(struct pl_vulkan_import_params) { __VA_ARGS__ })
+
+// For purely informative reasons, this contains a list of extensions and
+// device features that libplacebo *can* make use of. These are all strictly
+// optional, but provide a hint to the API user as to what might be worth
+// enabling at device creation time.
+//
+// Note: This also includes physical device features provided by extensions.
+// They are all provided using extension-specific features structs, rather
+// than the more general purpose VkPhysicalDeviceVulkan11Features etc.
+PL_API extern const char * const pl_vulkan_recommended_extensions[];
+PL_API extern const int pl_vulkan_num_recommended_extensions;
+PL_API extern const VkPhysicalDeviceFeatures2 pl_vulkan_recommended_features;
+
+// A list of device features that are required by libplacebo. These
+// *must* be provided by imported Vulkan devices.
+//
+// Note: `pl_vulkan_recommended_features` does not include this list.
+PL_API extern const VkPhysicalDeviceFeatures2 pl_vulkan_required_features;
+
+// Import an existing VkDevice instead of creating a new one, and wrap it into
+// a `pl_vulkan` abstraction. It's safe to `pl_vulkan_destroy` this, which will
+// destroy application state related to libplacebo but leave the underlying
+// VkDevice intact.
+PL_API pl_vulkan pl_vulkan_import(pl_log log, const struct pl_vulkan_import_params *params);
+
+struct pl_vulkan_wrap_params {
+    // The image itself. It *must* be usable concurrently by all of the queue
+    // family indices listed in `pl_vulkan->queues`. Note that this requires
+    // the use of VK_SHARING_MODE_CONCURRENT if `pl_vulkan->num_queues` is
+    // greater than 1. If this is difficult to achieve for the user, then
+    // `async_transfer` / `async_compute` should be turned off, which
+    // guarantees the use of only one queue family.
+    VkImage image;
+
+    // Which aspect of `image` to wrap. Only useful for wrapping individual
+    // sub-planes of planar images. If left as 0, it defaults to the entire
+    // image (i.e. the union of VK_IMAGE_ASPECT_PLANE_N_BIT for planar formats,
+    // and VK_IMAGE_ASPECT_COLOR_BIT otherwise).
+    VkImageAspectFlags aspect;
+
+    // The image's dimensions (unused dimensions must be 0)
+    int width;
+    int height;
+    int depth;
+
+    // The image's format. libplacebo will try to map this to an equivalent
+    // pl_fmt. If no compatible pl_fmt is found, wrapping will fail.
+    VkFormat format;
+
+    // The usage flags the image was created with. libplacebo will set the
+    // pl_tex capabilities to include whatever it can, as determined by the set
+    // of enabled usage flags.
+    VkImageUsageFlags usage;
+
+    // See `pl_tex_params`
+    void *user_data;
+    pl_debug_tag debug_tag;
+};
+
+#define pl_vulkan_wrap_params(...) (&(struct pl_vulkan_wrap_params) {   \
+        .debug_tag = PL_DEBUG_TAG,                                      \
+        __VA_ARGS__                                                     \
+    })
+
+// Wraps an external VkImage into a pl_tex abstraction. By default, the image
+// is considered "held" by the user and must be released before calling any
+// pl_tex_* API calls on it (see `pl_vulkan_release`).
+//
+// This wrapper can be destroyed by simply calling `pl_tex_destroy` on it,
+// which will not destroy the underlying VkImage. If a pl_tex wrapper is
+// destroyed while an image is not currently being held by the user, that
+// image is left in an undefined state.
+//
+// Wrapping the same VkImage multiple times is undefined behavior, as is trying
+// to wrap an image belonging to a different VkDevice than the one in use by
+// `gpu`.
+//
+// This function may fail, in which case it returns NULL.
+PL_API pl_tex pl_vulkan_wrap(pl_gpu gpu, const struct pl_vulkan_wrap_params *params);
+
+// Analogous to `pl_vulkan_wrap`, this function takes any `pl_tex` (including
+// ones created by `pl_tex_create`) and unwraps it to expose the underlying
+// VkImage to the user. Unlike `pl_vulkan_wrap`, this `pl_tex` is *not*
+// considered held after calling this function - the user must explicitly
+// `pl_vulkan_hold` before accessing the VkImage.
+//
+// `out_format` and `out_flags` will be updated to hold the VkImage's
+// format and usage flags. (Optional)
+PL_API VkImage pl_vulkan_unwrap(pl_gpu gpu, pl_tex tex,
+                                VkFormat *out_format, VkImageUsageFlags *out_flags);
+
+// Represents a vulkan semaphore/value pair (for compatibility with timeline
+// semaphores). When using normal, binary semaphores, `value` may be ignored.
+typedef struct pl_vulkan_sem {
+    VkSemaphore sem;
+    uint64_t value;
+} pl_vulkan_sem;
+
+struct pl_vulkan_hold_params {
+    // The Vulkan image to hold. It will be marked as held. Attempting to
+    // perform any pl_tex_* operation (except pl_tex_destroy) on a held image
+    // is undefined behavior.
+    pl_tex tex;
+
+    // The layout to transition the image to when holding. Alternatively, a
+    // pointer to receive the current image layout. If `out_layout` is
+    // provided, `layout` is ignored.
+    VkImageLayout layout;
+    VkImageLayout *out_layout;
+
+    // The queue family index to transition the image to. This can be used with
+    // VK_QUEUE_FAMILY_EXTERNAL to transition the image to an external API. As
+    // a special case, if set to VK_QUEUE_FAMILY_IGNORED, libplacebo will not
+    // transition the image, even if this image was not set up for concurrent
+    // usage. Ignored for concurrent images.
+    uint32_t qf;
+
+    // The semaphore to fire when the image is available for use. (Required)
+    pl_vulkan_sem semaphore;
+};
+
+#define pl_vulkan_hold_params(...) (&(struct pl_vulkan_hold_params) { __VA_ARGS__ })
+
+// "Hold" a shared image, transferring control over the image to the user.
+// Returns whether successful.
+PL_API bool pl_vulkan_hold_ex(pl_gpu gpu, const struct pl_vulkan_hold_params *params);
+
+struct pl_vulkan_release_params {
+    // The image to be released. It must be marked as "held". Performing any
+    // operation on the VkImage underlying this `pl_tex` while it is not being
+    // held by the user is undefined behavior.
+    pl_tex tex;
+
+    // The current layout of the image at the point in time when `semaphore`
+    // fires, or if no semaphore is specified, at the time of call.
+    VkImageLayout layout;
+
+    // The queue family index to transition the image to. This can be used with
+    // VK_QUEUE_FAMILY_EXTERNAL to transition the image rom an external API. As
+    // a special case, if set to VK_QUEUE_FAMILY_IGNORED, libplacebo will not
+    // transition the image, even if this image was not set up for concurrent
+    // usage. Ignored for concurrent images.
+    uint32_t qf;
+
+    // The semaphore to wait on before libplacebo will actually use or modify
+    // the image. (Optional)
+    //
+    // Note: the lifetime of `semaphore` is indeterminate, and destroying it
+    // while the texture is still depending on that semaphore is undefined
+    // behavior.
+    //
+    // Technically, the only way to be sure that it's safe to free is to use
+    // `pl_gpu_finish()` or similar (e.g. `pl_vulkan_destroy` or
+    // `vkDeviceWaitIdle`) after another operation involving `tex` has been
+    // emitted (or the texture has been destroyed).
+    //
+    //
+    // Warning: If `tex` is a planar image (`pl_fmt.num_planes > 0`), and
+    // `semaphore` is specified, it *must* be a timeline semaphore! Failure to
+    // respect this will result in undefined behavior. This warning does not
+    // apply to individual planes (as exposed by `pl_tex.planes`).
+    pl_vulkan_sem semaphore;
+};
+
+#define pl_vulkan_release_params(...) (&(struct pl_vulkan_release_params) { __VA_ARGS__ })
+
+// "Release" a shared image, transferring control to libplacebo.
+PL_API void pl_vulkan_release_ex(pl_gpu gpu, const struct pl_vulkan_release_params *params);
+
+struct pl_vulkan_sem_params {
+    // The type of semaphore to create.
+    VkSemaphoreType type;
+
+    // For VK_SEMAPHORE_TYPE_TIMELINE, sets the initial timeline value.
+    uint64_t initial_value;
+
+    // If set, exports this VkSemaphore to the handle given in `out_handle`.
+    // The user takes over ownership, and should manually close it before
+    // destroying this VkSemaphore (via `pl_vulkan_sem_destroy`).
+    enum pl_handle_type export_handle;
+    union pl_handle *out_handle;
+
+    // Optional debug tag to identify this semaphore.
+    pl_debug_tag debug_tag;
+};
+
+#define pl_vulkan_sem_params(...) (&(struct pl_vulkan_sem_params) {     \
+        .debug_tag = PL_DEBUG_TAG,                                      \
+        __VA_ARGS__                                                     \
+    })
+
+// Helper functions to create and destroy vulkan semaphores. Returns
+// VK_NULL_HANDLE on failure.
+PL_API VkSemaphore pl_vulkan_sem_create(pl_gpu gpu, const struct pl_vulkan_sem_params *params);
+PL_API void pl_vulkan_sem_destroy(pl_gpu gpu, VkSemaphore *semaphore);
+
+// Backwards-compatibility wrappers for older versions of the API.
+PL_DEPRECATED PL_API bool pl_vulkan_hold(pl_gpu gpu, pl_tex tex, VkImageLayout layout,
+                                         pl_vulkan_sem sem_out);
+PL_DEPRECATED PL_API bool pl_vulkan_hold_raw(pl_gpu gpu, pl_tex tex, VkImageLayout *out_layout,
+                                             pl_vulkan_sem sem_out);
+PL_DEPRECATED PL_API void pl_vulkan_release(pl_gpu gpu, pl_tex tex, VkImageLayout layout,
+                                            pl_vulkan_sem sem_in);
+
+PL_API_END
+
+#endif // LIBPLACEBO_VULKAN_H_